From: Platonides Date: Sat, 19 Feb 2011 20:16:54 +0000 (+0000) Subject: (Bug 27539) Allow attributes beginning with a digit in wiktext tag parameters. X-Git-Tag: 1.31.0-rc.0~31881 X-Git-Url: https://git.cyclocoop.org/%242?a=commitdiff_plain;h=b856f6605b00061ea14ab8d6af9746ec35daa77f;p=lhc%2Fweb%2Fwiklou.git (Bug 27539) Allow attributes beginning with a digit in wiktext tag parameters. Its removal in r70849 breaks ProofreadPage extension. Restricted r82475 relaxation to just numbers. Added tests. This only affects wikitext (tag hooks). MW_ATTRIBS_REGEX is only used through decodeTagAttributes() calls. fixTagAttributes() calls decodeTagAttributes(), and would be nastier to fix, since it is called with HTML parameters (eg. by removeHTMLtags) but such incorrect parameters grabbed would be removed by validateTagAttributes() --- diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 9a64e2cdf7..0fc5a1cd3f 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -39,7 +39,7 @@ define( 'MW_CHAR_REFS_REGEX', * Allows some... latitude. * Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes */ -$attribFirst = '[:A-Z_a-z-.0-9]'; // more lenient than standards (by allowing [-.0-9]) +$attribFirst = '[:A-Z_a-z0-9]'; $attrib = '[:A-Z_a-z-.0-9]'; $space = '[\x09\x0a\x0d\x20]'; define( 'MW_ATTRIBS_REGEX', diff --git a/tests/phpunit/includes/SanitizerTest.php b/tests/phpunit/includes/SanitizerTest.php index c2e3e6b348..40d6cf77f8 100644 --- a/tests/phpunit/includes/SanitizerTest.php +++ b/tests/phpunit/includes/SanitizerTest.php @@ -68,5 +68,46 @@ class SanitizerTest extends MediaWikiTestCase { 'Self-closing closing div' ); } + + function testDecodeTagAttributes() { + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=bar' ), array( 'foo' => 'bar' ), 'Unquoted attribute' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( ' foo = bar ' ), array( 'foo' => 'bar' ), 'Spaced attribute' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo="bar"' ), array( 'foo' => 'bar' ), 'Double-quoted attribute' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=\'bar\'' ), array( 'foo' => 'bar' ), 'Single-quoted attribute' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=\'bar\' baz="foo"' ), array( 'foo' => 'bar', 'baz' => 'foo' ), 'Several attributes' ); + + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=\'bar\' baz="foo"' ), array( 'foo' => 'bar', 'baz' => 'foo' ), 'Several attributes' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=\'bar\' baz="foo"' ), array( 'foo' => 'bar', 'baz' => 'foo' ), 'Several attributes' ); + + $this->assertEquals( Sanitizer::decodeTagAttributes( ':foo=\'bar\'' ), array( ':foo' => 'bar' ), 'Leading :' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( '_foo=\'bar\'' ), array( '_foo' => 'bar' ), 'Leading _' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'Foo=\'bar\'' ), array( 'foo' => 'bar' ), 'Leading capital' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'FOO=BAR' ), array( 'foo' => 'BAR' ), 'Attribute keys are normalized to lowercase' ); + + # Invalid beginning + $this->assertEquals( Sanitizer::decodeTagAttributes( '-foo=bar' ), array(), 'Leading - is forbidden' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( '.foo=bar' ), array(), 'Leading . is forbidden' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo-bar=bar' ), array( 'foo-bar' => 'bar' ), 'A - is allowed inside the attribute' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo-=bar' ), array( 'foo-' => 'bar' ), 'A - is allowed inside the attribute' ); + + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo.bar=baz' ), array( 'foo.bar' => 'baz' ), 'A . is allowed inside the attribute' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo.=baz' ), array( 'foo.' => 'baz' ), 'A . is allowed as last character' ); + + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo6=baz' ), array( 'foo6' => 'baz' ), 'Numbers are allowed' ); + + # This bit is more relaxed than XML rules, but some extensions use it, like ProofreadPage (see bug 27539) + $this->assertEquals( Sanitizer::decodeTagAttributes( '1foo=baz' ), array( '1foo' => 'baz' ), 'Leading numbers are allowed' ); + + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo$=baz' ), array(), 'Symbols are not allowed' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo@=baz' ), array(), 'Symbols are not allowed' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo~=baz' ), array(), 'Symbols are not allowed' ); + + + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=1[#^`*%w/(' ), array( 'foo' => '1[#^`*%w/(' ), 'All kind of characters are allowed as values' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo="1[#^`*%\'w/("' ), array( 'foo' => '1[#^`*%\'w/(' ), 'Double quotes are allowed if quoted by single quotes' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=\'1[#^`*%"w/(\'' ), array( 'foo' => '1[#^`*%"w/(' ), 'Single quotes are allowed if quoted by double quotes' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=&"' ), array( 'foo' => '&"' ), 'Special chars can be provided as entities' ); + $this->assertEquals( Sanitizer::decodeTagAttributes( 'foo=&foobar;' ), array( 'foo' => '&foobar;' ), 'Entity-like items are accepted' ); + } }