+<?php
+
+/**
+ * @todo Tests covering decodeCharReferences can be refactored into a single
+ * method and dataprovider.
+ *
+ * @group Sanitizer
+ */
+class SanitizerUnitTest extends MediaWikiUnitTestCase {
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeNamedEntities() {
+ $this->assertEquals(
+ "\xc3\xa9cole",
+ Sanitizer::decodeCharReferences( 'école' ),
+ 'decode named entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeNumericEntities() {
+ $this->assertEquals(
+ "\xc4\x88io bonas dans l'\xc3\xa9cole!",
+ Sanitizer::decodeCharReferences( "Ĉio bonas dans l'école!" ),
+ 'decode numeric entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeMixedEntities() {
+ $this->assertEquals(
+ "\xc4\x88io bonas dans l'\xc3\xa9cole!",
+ Sanitizer::decodeCharReferences( "Ĉio bonas dans l'école!" ),
+ 'decode mixed numeric/named entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeMixedComplexEntities() {
+ $this->assertEquals(
+ "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas Ĉio dans l'école)",
+ Sanitizer::decodeCharReferences(
+ "Ĉio bonas dans l'école! (mais pas &#x108;io dans l'&eacute;cole)"
+ ),
+ 'decode mixed complex entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testInvalidAmpersand() {
+ $this->assertEquals(
+ 'a & b',
+ Sanitizer::decodeCharReferences( 'a & b' ),
+ 'Invalid ampersand'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testInvalidEntities() {
+ $this->assertEquals(
+ '&foo;',
+ Sanitizer::decodeCharReferences( '&foo;' ),
+ 'Invalid named entity'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testInvalidNumberedEntities() {
+ $this->assertEquals(
+ UtfNormal\Constants::UTF8_REPLACEMENT,
+ Sanitizer::decodeCharReferences( "�" ),
+ 'Invalid numbered entity'
+ );
+ }
+
+ /**
+ * @dataProvider provideTagAttributesToDecode
+ * @covers Sanitizer::decodeTagAttributes
+ */
+ public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
+ $this->assertEquals( $expected,
+ Sanitizer::decodeTagAttributes( $attributes ),
+ $message
+ );
+ }
+
+ public static function provideTagAttributesToDecode() {
+ return [
+ [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
+ [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
+ [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
+ [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
+ [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
+ [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
+ [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
+ [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
+ [
+ [ 'foo' => 'bar', 'baz' => 'foo' ],
+ 'foo=\'bar\' baz="foo"',
+ 'Several attributes'
+ ],
+ [
+ [ 'foo' => 'bar', 'baz' => 'foo' ],
+ 'foo=\'bar\' baz="foo"',
+ 'Several attributes'
+ ],
+ [
+ [ 'foo' => 'bar', 'baz' => 'foo' ],
+ 'foo=\'bar\' baz="foo"',
+ 'Several attributes'
+ ],
+ [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
+ [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
+ [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
+ [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
+
+ # Invalid beginning
+ [ [], '-foo=bar', 'Leading - is forbidden' ],
+ [ [], '.foo=bar', 'Leading . is forbidden' ],
+ [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
+ [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
+ [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
+ [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
+ [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
+
+ # This bit is more relaxed than XML rules, but some extensions use
+ # it, like ProofreadPage (see T29539)
+ [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
+ [ [], 'foo$=baz', 'Symbols are not allowed' ],
+ [ [], 'foo@=baz', 'Symbols are not allowed' ],
+ [ [], 'foo~=baz', 'Symbols are not allowed' ],
+ [
+ [ 'foo' => '1[#^`*%w/(' ],
+ 'foo=1[#^`*%w/(',
+ 'All kind of characters are allowed as values'
+ ],
+ [
+ [ 'foo' => '1[#^`*%\'w/(' ],
+ 'foo="1[#^`*%\'w/("',
+ 'Double quotes are allowed if quoted by single quotes'
+ ],
+ [
+ [ 'foo' => '1[#^`*%"w/(' ],
+ 'foo=\'1[#^`*%"w/(\'',
+ 'Single quotes are allowed if quoted by double quotes'
+ ],
+ [ [ 'foo' => '&"' ], 'foo=&"', 'Special chars can be provided as entities' ],
+ [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideCssCommentsFixtures
+ * @covers Sanitizer::checkCss
+ */
+ public function testCssCommentsChecking( $expected, $css, $message = '' ) {
+ $this->assertEquals( $expected,
+ Sanitizer::checkCss( $css ),
+ $message
+ );
+ }
+
+ public static function provideCssCommentsFixtures() {
+ /** [ <expected>, <css>, [message] ] */
+ return [
+ // Valid comments spanning entire input
+ [ '/**/', '/**/' ],
+ [ '/* comment */', '/* comment */' ],
+ // Weird stuff
+ [ ' ', '/****/' ],
+ [ ' ', '/* /* */' ],
+ [ 'display: block;', "display:/* foo */block;" ],
+ [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
+ 'Backslash-escaped comments must be stripped (T30450)' ],
+ [ '', '/* unfinished comment structure',
+ 'Remove anything after a comment-start token' ],
+ [ '', "\\2f\\2a unifinished comment'",
+ 'Remove anything after a backslash-escaped comment-start token' ],
+ [
+ '/* insecure input */',
+ 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
+ . '(src=\'asdf.png\',sizingMethod=\'scale\');'
+ ],
+ [
+ '/* insecure input */',
+ '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
+ . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
+ ],
+ [ '/* insecure input */', 'width: expression(1+1);' ],
+ [ '/* insecure input */', 'background-image: image(asdf.png);' ],
+ [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
+ [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
+ [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
+ [
+ '/* insecure input */',
+ 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
+ ],
+ [
+ '/* insecure input */',
+ 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
+ ],
+ [ '/* insecure input */', 'foo: attr( title, url );' ],
+ [ '/* insecure input */', 'foo: attr( title url );' ],
+ [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideEscapeHtmlAllowEntities
+ * @covers Sanitizer::escapeHtmlAllowEntities
+ */
+ public function testEscapeHtmlAllowEntities( $expected, $html ) {
+ $this->assertEquals(
+ $expected,
+ Sanitizer::escapeHtmlAllowEntities( $html )
+ );
+ }
+
+ public static function provideEscapeHtmlAllowEntities() {
+ return [
+ [ 'foo', 'foo' ],
+ [ 'a¡b', 'a¡b' ],
+ [ 'foo'bar', "foo'bar" ],
+ [ '<script>foo</script>', '<script>foo</script>' ],
+ ];
+ }
+
+ /**
+ * Test Sanitizer::escapeId
+ *
+ * @dataProvider provideEscapeId
+ * @covers Sanitizer::escapeId
+ */
+ public function testEscapeId( $input, $output ) {
+ $this->assertEquals(
+ $output,
+ Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
+ );
+ }
+
+ public static function provideEscapeId() {
+ return [
+ [ '+', '.2B' ],
+ [ '&', '.26' ],
+ [ '=', '.3D' ],
+ [ ':', ':' ],
+ [ ';', '.3B' ],
+ [ '@', '.40' ],
+ [ '$', '.24' ],
+ [ '-_.', '-_.' ],
+ [ '!', '.21' ],
+ [ '*', '.2A' ],
+ [ '/', '.2F' ],
+ [ '[]', '.5B.5D' ],
+ [ '<>', '.3C.3E' ],
+ [ '\'', '.27' ],
+ [ '§', '.C2.A7' ],
+ [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
+ [ 'A&B&C&amp;D&amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
+ ];
+ }
+
+ /**
+ * Test escapeIdReferenceList for consistency with escapeIdForAttribute
+ *
+ * @dataProvider provideEscapeIdReferenceList
+ * @covers Sanitizer::escapeIdReferenceList
+ */
+ public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
+ $this->assertEquals(
+ Sanitizer::escapeIdReferenceList( $referenceList ),
+ Sanitizer::escapeIdForAttribute( $id1 )
+ . ' '
+ . Sanitizer::escapeIdForAttribute( $id2 )
+ );
+ }
+
+ public static function provideEscapeIdReferenceList() {
+ /** [ <reference list>, <individual id 1>, <individual id 2> ] */
+ return [
+ [ 'foo bar', 'foo', 'bar' ],
+ [ '#1 #2', '#1', '#2' ],
+ [ '+1 +2', '+1', '+2' ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideIsReservedDataAttribute
+ * @covers Sanitizer::isReservedDataAttribute
+ */
+ public function testIsReservedDataAttribute( $attr, $expected ) {
+ $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
+ }
+
+ public static function provideIsReservedDataAttribute() {
+ return [
+ [ 'foo', false ],
+ [ 'data', false ],
+ [ 'data-foo', false ],
+ [ 'data-mw', true ],
+ [ 'data-ooui', true ],
+ [ 'data-parsoid', true ],
+ [ 'data-mw-foo', true ],
+ [ 'data-ooui-foo', true ],
+ [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
+ ];
+ }
+
+ /**
+ * @dataProvider provideStripAllTags
+ *
+ * @covers Sanitizer::stripAllTags()
+ * @covers RemexStripTagHandler
+ *
+ * @param string $input
+ * @param string $expected
+ */
+ public function testStripAllTags( $input, $expected ) {
+ $this->assertEquals( $expected, Sanitizer::stripAllTags( $input ) );
+ }
+
+ public function provideStripAllTags() {
+ return [
+ [ '<p>Foo</p>', 'Foo' ],
+ [ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
+ [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
+ [ '<p>Hello <strong> world café</p>', 'Hello <strong> world café' ],
+ [
+ '<p><small data-foo=\'bar"<baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
+ 'Bar Whee!'
+ ],
+ [ '1<span class="<?php">2</span>3', '123' ],
+ [ '1<span class="<?">2</span>3', '123' ],
+ [ '<th>1</th><td>2</td>', '1 2' ],
+ ];
+ }
+
+}