*/
static function validateTagAttributes( $attribs, $element ) {
return self::validateAttributes( $attribs,
- self::attributeWhitelist( $element ) );
+ self::attributeWhitelistInternal( $element ) );
}
/**
* - Invalid id attributes are re-encoded
*
* @param array $attribs
- * @param array $whitelist List of allowed attribute names
+ * @param array $whitelist List of allowed attribute names,
+ * either as a sequential array of valid attribute names or
+ * as an associative array where keys give valid attribute names
* @return array
*
* @todo Check for legal values where the DTD limits things.
* @todo Check for unique id attribute :P
*/
static function validateAttributes( $attribs, $whitelist ) {
- $whitelist = array_flip( $whitelist );
+ if ( isset( $whitelist[0] ) ) {
+ // We would like to eventually deprecate calling this
+ // function with a sequential array, but for now just
+ // convert it.
+ $whitelist = array_flip( $whitelist );
+ }
$hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/';
$out = [];
# * Disallow data attributes used by MediaWiki code
# * Ensure that the attribute is not namespaced by banning
# colons.
- if ( !preg_match( '/^data-[^:]*$/i', $attribute )
- && !isset( $whitelist[$attribute] )
- || self::isReservedDataAttribute( $attribute )
- ) {
+ if ( (
+ !preg_match( '/^data-[^:]*$/i', $attribute ) &&
+ !array_key_exists( $attribute, $whitelist )
+ ) || self::isReservedDataAttribute( $attribute ) ) {
continue;
}
* Fetch the whitelist of acceptable attributes for a given element name.
*
* @param string $element
- * @return array
+ * @return array A sequential array of acceptable attribute names
+ * @deprecated since 1.34; should be private
*/
static function attributeWhitelist( $element ) {
+ wfDeprecated( __METHOD__, '1.34' );
$list = self::setupAttributeWhitelist();
return $list[$element] ?? [];
}
+ /**
+ * Fetch the whitelist of acceptable attributes for a given element name.
+ *
+ * @param string $element
+ * @return array An associative array where keys are acceptable attribute
+ * names
+ */
+ private static function attributeWhitelistInternal( $element ) {
+ $list = self::setupAttributeWhitelistInternal();
+ return $list[$element] ?? [];
+ }
+
/**
* Foreach array key (an allowed HTML element), return an array
* of allowed attributes
* @return array
+ * @deprecated since 1.34; should be private
*/
static function setupAttributeWhitelist() {
+ wfDeprecated( __METHOD__, '1.34' );
+ $wlist = self::setupAttributeWhitelistInternal();
+ // This method is expected to return a sequential array as the
+ // value for each HTML element key.
+ return array_map( function ( $v ) {
+ return array_keys( $v );
+ }, $wlist );
+ }
+
+ /**
+ * Foreach array key (an allowed HTML element), return an array
+ * of allowed attributes
+ * @return array An associative array: keys are HTML element names;
+ * values are associative arrays where the keys are allowed attribute
+ * names.
+ */
+ private static function setupAttributeWhitelistInternal() {
static $whitelist;
if ( $whitelist !== null ) {
return $whitelist;
}
- $common = [
+ // For lookup efficiency flip each attributes array so the keys are
+ // the valid attributes.
+ $merge = function ( $a, $b, $c = [] ) {
+ return array_merge( $a, array_flip( $b ), array_flip( $c ) );
+ };
+ $common = $merge( [], [
# HTML
'id',
'class',
'itemref',
'itemscope',
'itemtype',
- ];
+ ] );
+
+ $block = $merge( $common, [ 'align' ] );
- $block = array_merge( $common, [ 'align' ] );
$tablealign = [ 'align', 'valign' ];
$tablecell = [
'abbr',
# acronym
# 9.2.2
- 'blockquote' => array_merge( $common, [ 'cite' ] ),
- 'q' => array_merge( $common, [ 'cite' ] ),
+ 'blockquote' => $merge( $common, [ 'cite' ] ),
+ 'q' => $merge( $common, [ 'cite' ] ),
# 9.2.3
'sub' => $common,
'p' => $block,
# 9.3.2
- 'br' => array_merge( $common, [ 'clear' ] ),
+ 'br' => $merge( $common, [ 'clear' ] ),
# https://www.w3.org/TR/html5/text-level-semantics.html#the-wbr-element
'wbr' => $common,
# 9.3.4
- 'pre' => array_merge( $common, [ 'width' ] ),
+ 'pre' => $merge( $common, [ 'width' ] ),
# 9.4
- 'ins' => array_merge( $common, [ 'cite', 'datetime' ] ),
- 'del' => array_merge( $common, [ 'cite', 'datetime' ] ),
+ 'ins' => $merge( $common, [ 'cite', 'datetime' ] ),
+ 'del' => $merge( $common, [ 'cite', 'datetime' ] ),
# 10.2
- 'ul' => array_merge( $common, [ 'type' ] ),
- 'ol' => array_merge( $common, [ 'type', 'start', 'reversed' ] ),
- 'li' => array_merge( $common, [ 'type', 'value' ] ),
+ 'ul' => $merge( $common, [ 'type' ] ),
+ 'ol' => $merge( $common, [ 'type', 'start', 'reversed' ] ),
+ 'li' => $merge( $common, [ 'type', 'value' ] ),
# 10.3
'dl' => $common,
'dt' => $common,
# 11.2.1
- 'table' => array_merge( $common,
+ 'table' => $merge( $common,
[ 'summary', 'width', 'border', 'frame',
'rules', 'cellspacing', 'cellpadding',
'align', 'bgcolor',
'tbody' => $common,
# 11.2.4
- 'colgroup' => array_merge( $common, [ 'span' ] ),
- 'col' => array_merge( $common, [ 'span' ] ),
+ 'colgroup' => $merge( $common, [ 'span' ] ),
+ 'col' => $merge( $common, [ 'span' ] ),
# 11.2.5
- 'tr' => array_merge( $common, [ 'bgcolor' ], $tablealign ),
+ 'tr' => $merge( $common, [ 'bgcolor' ], $tablealign ),
# 11.2.6
- 'td' => array_merge( $common, $tablecell, $tablealign ),
- 'th' => array_merge( $common, $tablecell, $tablealign ),
+ 'td' => $merge( $common, $tablecell, $tablealign ),
+ 'th' => $merge( $common, $tablecell, $tablealign ),
# 12.2
# NOTE: <a> is not allowed directly, but the attrib
# whitelist is used from the Parser object
- 'a' => array_merge( $common, [ 'href', 'rel', 'rev' ] ), # rel/rev esp. for RDFa
+ 'a' => $merge( $common, [ 'href', 'rel', 'rev' ] ), # rel/rev esp. for RDFa
# 13.2
# Not usually allowed, but may be used for extension-style hooks
# such as <math> when it is rasterized, or if $wgAllowImageTag is
# true
- 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height', 'srcset' ] ),
+ 'img' => $merge( $common, [ 'alt', 'src', 'width', 'height', 'srcset' ] ),
# Attributes for A/V tags added in T163583 / T133673
- 'audio' => array_merge( $common, [ 'controls', 'preload', 'width', 'height' ] ),
- 'video' => array_merge( $common, [ 'poster', 'controls', 'preload', 'width', 'height' ] ),
- 'source' => array_merge( $common, [ 'type', 'src' ] ),
- 'track' => array_merge( $common, [ 'type', 'src', 'srclang', 'kind', 'label' ] ),
+ 'audio' => $merge( $common, [ 'controls', 'preload', 'width', 'height' ] ),
+ 'video' => $merge( $common, [ 'poster', 'controls', 'preload', 'width', 'height' ] ),
+ 'source' => $merge( $common, [ 'type', 'src' ] ),
+ 'track' => $merge( $common, [ 'type', 'src', 'srclang', 'kind', 'label' ] ),
# 15.2.1
'tt' => $common,
'u' => $common,
# 15.2.2
- 'font' => array_merge( $common, [ 'size', 'color', 'face' ] ),
+ 'font' => $merge( $common, [ 'size', 'color', 'face' ] ),
# basefont
# 15.3
- 'hr' => array_merge( $common, [ 'width' ] ),
+ 'hr' => $merge( $common, [ 'width' ] ),
# HTML Ruby annotation text module, simple ruby only.
# https://www.w3.org/TR/html5/text-level-semantics.html#the-ruby-element
# rbc
'rb' => $common,
'rp' => $common,
- 'rt' => $common, # array_merge( $common, [ 'rbspan' ] ),
+ 'rt' => $common, # $merge( $common, [ 'rbspan' ] ),
'rtc' => $common,
# MathML root element, where used for extensions
# 'title' may not be 100% valid here; it's XHTML
# https://www.w3.org/TR/REC-MathML/
- 'math' => [ 'class', 'style', 'id', 'title' ],
+ 'math' => $merge( [], [ 'class', 'style', 'id', 'title' ] ),
// HTML 5 section 4.5
'figure' => $common,
# HTML5 elements, defined by:
# https://html.spec.whatwg.org/multipage/semantics.html#the-data-element
- 'data' => array_merge( $common, [ 'value' ] ),
- 'time' => array_merge( $common, [ 'datetime' ] ),
+ 'data' => $merge( $common, [ 'value' ] ),
+ 'time' => $merge( $common, [ 'datetime' ] ),
'mark' => $common,
// meta and link are only permitted by removeHTMLtags when Microdata
// Also meta and link are only valid in WikiText as Microdata elements
// (ie: validateTag rejects tags missing the attributes needed for Microdata)
// So we don't bother including $common attributes that have no purpose.
- 'meta' => [ 'itemprop', 'content' ],
- 'link' => [ 'itemprop', 'href', 'title' ],
+ 'meta' => $merge( [], [ 'itemprop', 'content' ] ),
+ 'link' => $merge( [], [ 'itemprop', 'href', 'title' ] ),
];
return $whitelist;
<?php
+use Wikimedia\TestingAccessWrapper;
+
/**
* @todo Tests covering decodeCharReferences can be refactored into a single
* method and dataprovider.
/**
* @dataProvider provideDeprecatedAttributes
* @covers Sanitizer::fixTagAttributes
+ * @covers Sanitizer::validateTagAttributes
+ * @covers Sanitizer::validateAttributes
*/
public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
$this->assertEquals( " $inputAttr",
];
}
+ /**
+ * @dataProvider provideValidateTagAttributes
+ * @covers Sanitizer::validateTagAttributes
+ * @covers Sanitizer::validateAttributes
+ */
+ public function testValidateTagAttributes( $element, $attribs, $expected ) {
+ $actual = Sanitizer::validateTagAttributes( $attribs, $element );
+ $this->assertArrayEquals( $expected, $actual, false, true );
+ }
+
+ public static function provideValidateTagAttributes() {
+ return [
+ [ 'math',
+ [ 'id' => 'foo bar', 'bogus' => 'stripped', 'data-foo' => 'bar' ],
+ [ 'id' => 'foo_bar', 'data-foo' => 'bar' ],
+ ],
+ [ 'meta',
+ [ 'id' => 'foo bar', 'itemprop' => 'foo', 'content' => 'bar' ],
+ [ 'itemprop' => 'foo', 'content' => 'bar' ],
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideAttributeWhitelist
+ * @covers Sanitizer::attributeWhitelist
+ */
+ public function testAttributeWhitelist( $element, $attribs ) {
+ $this->hideDeprecated( 'Sanitizer::attributeWhitelist' );
+ $this->hideDeprecated( 'Sanitizer::setupAttributeWhitelist' );
+ $actual = Sanitizer::attributeWhitelist( $element );
+ $this->assertArrayEquals( $attribs, $actual );
+ }
+
+ /**
+ * @dataProvider provideAttributeWhitelist
+ * @covers Sanitizer::attributeWhitelistInternal
+ */
+ public function testAttributeWhitelistInternal( $element, $attribs ) {
+ $sanitizer = TestingAccessWrapper::newFromClass( Sanitizer::class );
+ $actual = $sanitizer->attributeWhitelistInternal( $element );
+ $this->assertArrayEquals( $attribs, array_keys( $actual ) );
+ }
+
+ public function provideAttributeWhitelist() {
+ /** [ <element>, [ <good attribute 1>, <good attribute 2>, ...] ] */
+ return [
+ [ 'math', [ 'class', 'style', 'id', 'title' ] ],
+ [ 'meta', [ 'itemprop', 'content' ] ],
+ [ 'link', [ 'itemprop', 'href', 'title' ] ],
+ ];
+ }
+
/**
* @dataProvider provideCssCommentsFixtures
* @covers Sanitizer::checkCss