A regex in SearchUpdate was built for ancient pure ISO 8859-1 and looked for \xa0-\xff bytes -- this caused the regex to cut off partway through if there was a char containing a byte in the \x80-\x9f range.
Fixed regex to pass \x80-\xff instead.
Added a test case to SearchUpdateTest which checks for this case (example text run through the update squash algo, then run through preg_replace with a /u param to make sure it gets treated as UTF-8 and checking whether it breaks.)
'Bug 18609'
);
}
+
+ function testBug32712() {
+ $text = "text „http://example.com“ text";
+ $result = $this->updateText( $text );
+ $processed = preg_replace( '/Q/u', 'Q', $result );
+ $this->assertTrue(
+ $processed != '',
+ 'Link surrounded by unicode quotes should not fail UTF-8 validation'
+ );
+ }
}