Merge "French space armoring: Use unicode regex instead of octal escapes"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Mon, 25 Jun 2018 21:49:55 +0000 (21:49 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Mon, 25 Jun 2018 21:49:55 +0000 (21:49 +0000)
includes/parser/Sanitizer.php

index 89a7c96..21498f8 100644 (file)
@@ -1155,9 +1155,9 @@ class Sanitizer {
                $fixtags = [
                        # French spaces, last one Guillemet-left
                        # only if there is something before the space
-                       '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => "\\1$space",
+                       '/(.) (?=[?:;!%»])/u' => "\\1$space",
                        # French spaces, Guillemet-right
-                       '/(\\302\\253) /' => "\\1$space",
+                       '/(«) /u' => "\\1$space",
                ];
                return preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
        }