Merge "Sanitizer: Allow attribute names to use any Unicode "Letter" or "Number""
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 12 Jul 2017 15:13:19 +0000 (15:13 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 12 Jul 2017 15:13:19 +0000 (15:13 +0000)
includes/Sanitizer.php
tests/phpunit/includes/SanitizerTest.php

index dd4a314..b08bc69 100644 (file)
@@ -339,8 +339,8 @@ class Sanitizer {
         */
        static function getAttribsRegex() {
                if ( self::$attribsRegex === null ) {
-                       $attribFirst = '[:A-Z_a-z0-9]';
-                       $attrib = '[:A-Z_a-z-.0-9]';
+                       $attribFirst = "[:_\p{L}\p{N}]";
+                       $attrib = "[:_\.\-\p{L}\p{N}]";
                        $space = '[\x09\x0a\x0c\x0d\x20]';
                        self::$attribsRegex =
                                "/(?:^|$space)({$attribFirst}{$attrib}*)
@@ -351,7 +351,7 @@ class Sanitizer {
                                                | '([^']*)(?:'|\$)
                                                | (((?!$space|>).)*)
                                        )
-                               )?(?=$space|\$)/sx";
+                               )?(?=$space|\$)/sxu";
                }
                return self::$attribsRegex;
        }
index c237c50..abcf1d4 100644 (file)
@@ -178,6 +178,10 @@ class SanitizerTest extends MediaWikiTestCase {
        public static function provideTagAttributesToDecode() {
                return [
                        [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
+                       [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
+                       [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
+                       [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
+                       [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
                        [ [ 'foo' => 'bar' ], '    foo   =   bar    ', 'Spaced attribute' ],
                        [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
                        [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],