From: Brion Vibber Date: Wed, 30 Nov 2011 00:36:34 +0000 (+0000) Subject: * (bug 32712) Fix for search indexing of pages with certain unicode chars following URL X-Git-Tag: 1.31.0-rc.0~26225 X-Git-Url: https://git.cyclocoop.org/%7B%24admin_url%7Dcompta/operations/modifier.php?a=commitdiff_plain;h=f79d1d3ffb9196dc920655d6cfb9a80929391a8b;p=lhc%2Fweb%2Fwiklou.git * (bug 32712) Fix for search indexing of pages with certain unicode chars following URL A regex in SearchUpdate was built for ancient pure ISO 8859-1 and looked for \xa0-\xff bytes -- this caused the regex to cut off partway through if there was a char containing a byte in the \x80-\x9f range. Fixed regex to pass \x80-\xff instead. Added a test case to SearchUpdateTest which checks for this case (example text run through the update squash algo, then run through preg_replace with a /u param to make sure it gets treated as UTF-8 and checking whether it breaks.) --- diff --git a/tests/phpunit/includes/search/SearchUpdateTest.php b/tests/phpunit/includes/search/SearchUpdateTest.php index 935425a676..6e49a9a1c9 100644 --- a/tests/phpunit/includes/search/SearchUpdateTest.php +++ b/tests/phpunit/includes/search/SearchUpdateTest.php @@ -77,4 +77,14 @@ EOT 'Bug 18609' ); } + + function testBug32712() { + $text = "text „http://example.com“ text"; + $result = $this->updateText( $text ); + $processed = preg_replace( '/Q/u', 'Q', $result ); + $this->assertTrue( + $processed != '', + 'Link surrounded by unicode quotes should not fail UTF-8 validation' + ); + } }