*/
static function getAttribsRegex() {
if ( self::$attribsRegex === null ) {
- $attribFirst = '[:A-Z_a-z0-9]';
- $attrib = '[:A-Z_a-z-.0-9]';
+ $attribFirst = "[:_\p{L}\p{N}]";
+ $attrib = "[:_\.\-\p{L}\p{N}]";
$space = '[\x09\x0a\x0c\x0d\x20]';
self::$attribsRegex =
"/(?:^|$space)({$attribFirst}{$attrib}*)
| '([^']*)(?:'|\$)
| (((?!$space|>).)*)
)
- )?(?=$space|\$)/sx";
+ )?(?=$space|\$)/sxu";
}
return self::$attribsRegex;
}
public static function provideTagAttributesToDecode() {
return [
[ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
+ [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
+ [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
+ [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
+ [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
[ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
[ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
[ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],