* http://www.w3.org/TR/html4/sgml/entities.html
* As well as ' which is only defined starting in XHTML1.
*/
- private static $htmlEntities = array(
+ private static $htmlEntities = [
'Aacute' => 193,
'aacute' => 225,
'Acirc' => 194,
'zeta' => 950,
'zwj' => 8205,
'zwnj' => 8204
- );
+ ];
/**
* Character entity aliases accepted by MediaWiki
*/
- private static $htmlEntityAliases = array(
+ private static $htmlEntityAliases = [
'רלמ' => 'rlm',
'رلم' => 'rlm',
- );
+ ];
/**
* Lazy-initialised attributes regex, see getAttribsRegex()
/**
* Regular expression to match HTML/XML attribute pairs within a tag.
- * Allows some... latitude.
+ * Allows some... latitude. Based on,
+ * http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
* Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
* @return string
*/
if ( self::$attribsRegex === null ) {
$attribFirst = '[:A-Z_a-z0-9]';
$attrib = '[:A-Z_a-z-.0-9]';
- $space = '[\x09\x0a\x0d\x20]';
+ $space = '[\x09\x0a\x0c\x0d\x20]';
self::$attribsRegex =
"/(?:^|$space)({$attribFirst}{$attrib}*)
($space*=$space*
(?:
# The attribute value: quoted or alone
- \"([^<\"]*)(?:\"|\$)
- | '([^<']*)(?:'|\$)
- | ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
+ \"([^\"]*)(?:\"|\$)
+ | '([^']*)(?:'|\$)
+ | (((?!$space|>).)*)
)
)?(?=$space|\$)/sx";
}
* @param array $removetags For any tags (default or extra) to exclude
* @return array
*/
- public static function getRecognizedTagData( $extratags = array(), $removetags = array() ) {
+ public static function getRecognizedTagData( $extratags = [], $removetags = [] ) {
global $wgAllowMicrodataAttributes, $wgAllowImageTag;
static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
// are changed (like in the screwed up test system) we will re-initialise the settings.
$globalContext = implode( '-', compact( 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
if ( !$staticInitialised || $staticInitialised != $globalContext ) {
- $htmlpairsStatic = array( # Tags that must be closed
+ $htmlpairsStatic = [ # Tags that must be closed
'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
'strike', 'strong', 'tt', 'var', 'div', 'center',
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
'ruby', 'rb', 'rp', 'rt', 'rtc', 'p', 'span', 'abbr', 'dfn',
'kbd', 'samp', 'data', 'time', 'mark'
- );
- $htmlsingle = array(
+ ];
+ $htmlsingle = [
'br', 'wbr', 'hr', 'li', 'dt', 'dd'
- );
- $htmlsingleonly = array( # Elements that cannot have close tags
+ ];
+ $htmlsingleonly = [ # Elements that cannot have close tags
'br', 'wbr', 'hr'
- );
+ ];
if ( $wgAllowMicrodataAttributes ) {
$htmlsingle[] = $htmlsingleonly[] = 'meta';
$htmlsingle[] = $htmlsingleonly[] = 'link';
}
- $htmlnest = array( # Tags that can be nested--??
+ $htmlnest = [ # Tags that can be nested--??
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span',
'var', 'kbd', 'samp', 'em', 'strong', 'q', 'ruby', 'bdo'
- );
- $tabletags = array( # Can only appear inside table, we will close them
+ ];
+ $tabletags = [ # Can only appear inside table, we will close them
'td', 'th', 'tr',
- );
- $htmllist = array( # Tags used by list
+ ];
+ $htmllist = [ # Tags used by list
'ul', 'ol',
- );
- $listtags = array( # Tags that can appear in a list
+ ];
+ $listtags = [ # Tags that can appear in a list
'li',
- );
+ ];
if ( $wgAllowImageTag ) {
$htmlsingle[] = 'img';
$htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) );
# Convert them all to hashtables for faster lookup
- $vars = array( 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags',
- 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' );
+ $vars = [ 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags',
+ 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' ];
foreach ( $vars as $var ) {
$$var = array_flip( $$var );
}
$htmlpairs = array_merge( $extratags, $htmlpairsStatic );
$htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ), $removetags );
- return array(
+ return [
'htmlpairs' => $htmlpairs,
'htmlsingle' => $htmlsingle,
'htmlsingleonly' => $htmlsingleonly,
'listtags' => $listtags,
'htmlsingleallowed' => $htmlsingleallowed,
'htmlelements' => $htmlelements,
- );
+ ];
}
/**
* @return string
*/
public static function removeHTMLtags( $text, $processCallback = null,
- $args = array(), $extratags = array(), $removetags = array()
+ $args = [], $extratags = [], $removetags = []
) {
extract( self::getRecognizedTagData( $extratags, $removetags ) );
$bits = explode( '<', $text );
$text = str_replace( '>', '>', array_shift( $bits ) );
if ( !MWTidy::isEnabled() ) {
- $tagstack = $tablestack = array();
+ $tagstack = $tablestack = [];
foreach ( $bits as $x ) {
- $regs = array();
+ $regs = [];
# $slash: Does the current element start with a '/'?
# $t: Current element name
# $params: String between element name and >
if ( isset( $htmlsingleallowed[$ot] ) ) {
# Pop all elements with an optional close tag
# and see if we find a match below them
- $optstack = array();
+ $optstack = [];
array_push( $optstack, $ot );
MediaWiki\suppressWarnings();
$ot = array_pop( $tagstack );
} else {
if ( $t == 'table' ) {
array_push( $tablestack, $tagstack );
- $tagstack = array();
+ $tagstack = [];
}
array_push( $tagstack, $t );
}
# Replace any variables or template parameters with
# plaintext results.
if ( is_callable( $processCallback ) ) {
- call_user_func_array( $processCallback, array( &$params, $args ) );
+ call_user_func_array( $processCallback, [ &$params, $args ] );
}
if ( !Sanitizer::validateTag( $params, $t ) ) {
$t = strtolower( $t );
if ( isset( $htmlelements[$t] ) ) {
if ( is_callable( $processCallback ) ) {
- call_user_func_array( $processCallback, array( &$params, $args ) );
+ call_user_func_array( $processCallback, [ &$params, $args ] );
}
if ( !Sanitizer::validateTag( $params, $t ) ) {
* Take an array of attribute names and values and normalize or discard
* illegal values for the given whitelist.
*
- * - Discards attributes not the given whitelist
+ * - Discards attributes not on the given whitelist
* - Unsafe style attributes are discarded
* - Invalid id attributes are re-encoded
*
$whitelist = array_flip( $whitelist );
$hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/';
- $out = array();
+ $out = [];
foreach ( $attribs as $attribute => $value ) {
# allow XML namespace declaration if RDFa is enabled
if ( $wgAllowRdfaAttributes && preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
$value = Sanitizer::checkCss( $value );
}
+ # Escape HTML id attributes
if ( $attribute === 'id' ) {
$value = Sanitizer::escapeId( $value, 'noninitial' );
}
- # WAI-ARIA
- # http://www.w3.org/TR/wai-aria/
- # http://www.whatwg.org/html/elements.html#wai-aria
- # For now we only support role="presentation" until we work out what roles should be
- # usable by content and we ensure that our code explicitly rejects patterns that
- # violate HTML5's ARIA restrictions.
- if ( $attribute === 'role' && $value !== 'presentation' ) {
- continue;
+ # Escape HTML id reference lists
+ if ( $attribute === 'aria-describedby'
+ || $attribute === 'aria-flowto'
+ || $attribute === 'aria-labelledby'
+ || $attribute === 'aria-owns'
+ ) {
+ $value = Sanitizer::escapeIdReferenceList( $value, 'noninitial' );
}
// RDFa and microdata properties allow URLs, URIs and/or CURIs.
)/xu";
}
$value = preg_replace_callback( $decodeRegex,
- array( __CLASS__, 'cssDecodeCallback' ), $value );
+ [ __CLASS__, 'cssDecodeCallback' ], $value );
// Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
$value = preg_replace_callback(
// Convert more characters IE6 might treat as ascii
// U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D
$value = str_replace(
- array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ),
- array( 'r', 'n', 'n', 'l', 'i', '(', '(' ),
+ [ 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ],
+ [ 'r', 'n', 'n', 'l', 'i', '(', '(' ],
$value
);
// Whitespace is normalized during attribute decoding,
// so if we've been passed non-spaces we must encode them
// ahead of time or they won't be preserved.
- $encValue = strtr( $encValue, array(
+ $encValue = strtr( $encValue, [
"\n" => ' ',
"\r" => ' ',
"\t" => '	',
- ) );
+ ] );
return $encValue;
}
# Templates and links may be expanded in later parsing,
# creating invalid or dangerous output. Suppress this.
- $encValue = strtr( $encValue, array(
+ $encValue = strtr( $encValue, [
'<' => '<', // This should never happen,
'>' => '>', // we've received invalid input
'"' => '"', // which should have been escaped.
'PMID' => 'PMID',
'|' => '|',
'__' => '__',
- ) );
+ ] );
# Stupid hack
$encValue = preg_replace_callback(
'/((?i)' . wfUrlProtocols() . ')/',
- array( 'Sanitizer', 'armorLinksCallback' ),
+ [ 'Sanitizer', 'armorLinksCallback' ],
$encValue );
return $encValue;
}
* anchors and links won't break.
* @return string
*/
- static function escapeId( $id, $options = array() ) {
+ static function escapeId( $id, $options = [] ) {
global $wgExperimentalHtmlIds;
$options = (array)$options;
}
// HTML4-style escaping
- static $replace = array(
+ static $replace = [
'%3A' => ':',
'%' => '.'
- );
+ ];
$id = urlencode( strtr( $id, ' ', '_' ) );
$id = str_replace( array_keys( $replace ), array_values( $replace ), $id );
return $id;
}
+ /**
+ * Given a string containing a space delimited list of ids, escape each id
+ * to match ids escaped by the escapeId() function.
+ *
+ * @since 1.27
+ *
+ * @param string $referenceString Space delimited list of ids
+ * @param string|array $options String or array of strings (default is array()):
+ * 'noninitial': This is a non-initial fragment of an id, not a full id,
+ * so don't pay attention if the first character isn't valid at the
+ * beginning of an id. Only matters if $wgExperimentalHtmlIds is
+ * false.
+ * 'legacy': Behave the way the old HTML 4-based ID escaping worked even
+ * if $wgExperimentalHtmlIds is used, so we can generate extra
+ * anchors and links won't break.
+ * @return string
+ */
+ static function escapeIdReferenceList( $referenceString, $options = [] ) {
+ # Explode the space delimited list string into an array of tokens
+ $references = preg_split( '/\s+/', "{$referenceString}", -1, PREG_SPLIT_NO_EMPTY );
+
+ # Escape each token as an id
+ foreach ( $references as &$ref ) {
+ $ref = Sanitizer::escapeId( $ref, $options );
+ }
+
+ # Merge the array back to a space delimited list string
+ # If the array is empty, the result will be an empty string ('')
+ $referenceString = implode( ' ', $references );
+
+ return $referenceString;
+ }
+
/**
* Given a value, escape it so that it can be used as a CSS class and
* return it.
static function escapeClass( $class ) {
// Convert ugly stuff to underscores and kill underscores in ugly places
return rtrim( preg_replace(
- array( '/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/', '/_+/' ),
+ [ '/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/', '/_+/' ],
'_',
$class ), '_' );
}
/**
* Return an associative array of attribute names and values from
- * a partial tag string. Attribute names are forces to lowercase,
+ * a partial tag string. Attribute names are forced to lowercase,
* character references are decoded to UTF-8 text.
*
* @param string $text
*/
public static function decodeTagAttributes( $text ) {
if ( trim( $text ) == '' ) {
- return array();
+ return [];
}
- $attribs = array();
- $pairs = array();
+ $attribs = [];
+ $pairs = [];
if ( !preg_match_all(
self::getAttribsRegex(),
$text,
* @return string
*/
public static function safeEncodeTagAttributes( $assoc_array ) {
- $attribs = array();
+ $attribs = [];
foreach ( $assoc_array as $attribute => $value ) {
$encAttribute = htmlspecialchars( $attribute );
$encValue = Sanitizer::safeEncodeAttribute( $value );
static function normalizeCharReferences( $text ) {
return preg_replace_callback(
self::CHAR_REFS_REGEX,
- array( 'Sanitizer', 'normalizeCharReferencesCallback' ),
+ [ 'Sanitizer', 'normalizeCharReferencesCallback' ],
$text );
}
static function normalizeEntity( $name ) {
if ( isset( self::$htmlEntityAliases[$name] ) ) {
return '&' . self::$htmlEntityAliases[$name] . ';';
- } elseif ( in_array( $name, array( 'lt', 'gt', 'amp', 'quot' ) ) ) {
+ } elseif ( in_array( $name, [ 'lt', 'gt', 'amp', 'quot' ] ) ) {
return "&$name;";
} elseif ( isset( self::$htmlEntities[$name] ) ) {
return '&#' . self::$htmlEntities[$name] . ';';
public static function decodeCharReferences( $text ) {
return preg_replace_callback(
self::CHAR_REFS_REGEX,
- array( 'Sanitizer', 'decodeCharReferencesCallback' ),
+ [ 'Sanitizer', 'decodeCharReferencesCallback' ],
$text );
}
global $wgContLang;
$text = preg_replace_callback(
self::CHAR_REFS_REGEX,
- array( 'Sanitizer', 'decodeCharReferencesCallback' ),
+ [ 'Sanitizer', 'decodeCharReferencesCallback' ],
$text, /* limit */ -1, $count );
if ( $count ) {
$list = Sanitizer::setupAttributeWhitelist();
return isset( $list[$element] )
? $list[$element]
- : array();
+ : [];
}
/**
return $whitelist;
}
- $common = array(
+ $common = [
# HTML
'id',
'class',
'title',
# WAI-ARIA
+ 'aria-describedby',
+ 'aria-flowto',
+ 'aria-label',
+ 'aria-labelledby',
+ 'aria-owns',
'role',
- );
+ ];
if ( $wgAllowRdfaAttributes ) {
# RDFa attributes as specified in section 9 of
# http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
- $common = array_merge( $common, array(
+ $common = array_merge( $common, [
'about', 'property', 'resource', 'datatype', 'typeof',
- ) );
+ ] );
}
if ( $wgAllowMicrodataAttributes ) {
# add HTML5 microdata tags as specified by
# http://www.whatwg.org/html/microdata.html#the-microdata-model
- $common = array_merge( $common, array(
+ $common = array_merge( $common, [
'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'
- ) );
+ ] );
}
- $block = array_merge( $common, array( 'align' ) );
- $tablealign = array( 'align', 'valign' );
- $tablecell = array(
+ $block = array_merge( $common, [ 'align' ] );
+ $tablealign = [ 'align', 'valign' ];
+ $tablecell = [
'abbr',
'axis',
'headers',
'width', # deprecated
'height', # deprecated
'bgcolor', # deprecated
- );
+ ];
# Numbers refer to sections in HTML 4.01 standard describing the element.
# See: http://www.w3.org/TR/html4/
- $whitelist = array(
+ $whitelist = [
# 7.5.4
'div' => $block,
'center' => $common, # deprecated
# acronym
# 9.2.2
- 'blockquote' => array_merge( $common, array( 'cite' ) ),
- 'q' => array_merge( $common, array( 'cite' ) ),
+ 'blockquote' => array_merge( $common, [ 'cite' ] ),
+ 'q' => array_merge( $common, [ 'cite' ] ),
# 9.2.3
'sub' => $common,
'p' => $block,
# 9.3.2
- 'br' => array_merge( $common, array( 'clear' ) ),
+ 'br' => array_merge( $common, [ 'clear' ] ),
# http://www.whatwg.org/html/text-level-semantics.html#the-wbr-element
'wbr' => $common,
# 9.3.4
- 'pre' => array_merge( $common, array( 'width' ) ),
+ 'pre' => array_merge( $common, [ 'width' ] ),
# 9.4
- 'ins' => array_merge( $common, array( 'cite', 'datetime' ) ),
- 'del' => array_merge( $common, array( 'cite', 'datetime' ) ),
+ 'ins' => array_merge( $common, [ 'cite', 'datetime' ] ),
+ 'del' => array_merge( $common, [ 'cite', 'datetime' ] ),
# 10.2
- 'ul' => array_merge( $common, array( 'type' ) ),
- 'ol' => array_merge( $common, array( 'type', 'start', 'reversed' ) ),
- 'li' => array_merge( $common, array( 'type', 'value' ) ),
+ 'ul' => array_merge( $common, [ 'type' ] ),
+ 'ol' => array_merge( $common, [ 'type', 'start', 'reversed' ] ),
+ 'li' => array_merge( $common, [ 'type', 'value' ] ),
# 10.3
'dl' => $common,
# 11.2.1
'table' => array_merge( $common,
- array( 'summary', 'width', 'border', 'frame',
+ [ 'summary', 'width', 'border', 'frame',
'rules', 'cellspacing', 'cellpadding',
'align', 'bgcolor',
- ) ),
+ ] ),
# 11.2.2
'caption' => $block,
'tbody' => $common,
# 11.2.4
- 'colgroup' => array_merge( $common, array( 'span' ) ),
- 'col' => array_merge( $common, array( 'span' ) ),
+ 'colgroup' => array_merge( $common, [ 'span' ] ),
+ 'col' => array_merge( $common, [ 'span' ] ),
# 11.2.5
- 'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ),
+ 'tr' => array_merge( $common, [ 'bgcolor' ], $tablealign ),
# 11.2.6
'td' => array_merge( $common, $tablecell, $tablealign ),
# 12.2
# NOTE: <a> is not allowed directly, but the attrib
# whitelist is used from the Parser object
- 'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa
+ 'a' => array_merge( $common, [ 'href', 'rel', 'rev' ] ), # rel/rev esp. for RDFa
# 13.2
# Not usually allowed, but may be used for extension-style hooks
# such as <math> when it is rasterized, or if $wgAllowImageTag is
# true
- 'img' => array_merge( $common, array( 'alt', 'src', 'width', 'height' ) ),
+ 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height' ] ),
# 15.2.1
'tt' => $common,
'u' => $common,
# 15.2.2
- 'font' => array_merge( $common, array( 'size', 'color', 'face' ) ),
+ 'font' => array_merge( $common, [ 'size', 'color', 'face' ] ),
# basefont
# 15.3
- 'hr' => array_merge( $common, array( 'width' ) ),
+ 'hr' => array_merge( $common, [ 'width' ] ),
# HTML Ruby annotation text module, simple ruby only.
# http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element
# MathML root element, where used for extensions
# 'title' may not be 100% valid here; it's XHTML
# http://www.w3.org/TR/REC-MathML/
- 'math' => array( 'class', 'style', 'id', 'title' ),
+ 'math' => [ 'class', 'style', 'id', 'title' ],
# HTML 5 section 4.6
'bdi' => $common,
# HTML5 elements, defined by:
# http://www.whatwg.org/html/
- 'data' => array_merge( $common, array( 'value' ) ),
- 'time' => array_merge( $common, array( 'datetime' ) ),
+ 'data' => array_merge( $common, [ 'value' ] ),
+ 'time' => array_merge( $common, [ 'datetime' ] ),
'mark' => $common,
// meta and link are only permitted by removeHTMLtags when Microdata
// Also meta and link are only valid in WikiText as Microdata elements
// (ie: validateTag rejects tags missing the attributes needed for Microdata)
// So we don't bother including $common attributes that have no purpose.
- 'meta' => array( 'itemprop', 'content' ),
- 'link' => array( 'itemprop', 'href' ),
- );
+ 'meta' => [ 'itemprop', 'content' ],
+ 'link' => [ 'itemprop', 'href' ],
+ ];
$staticInitialised = $globalContext;
# Escape any control characters introduced by the above step
$url = preg_replace_callback( '/[\][<>"\\x00-\\x20\\x7F\|]/',
- array( __CLASS__, 'cleanUrlCallback' ), $url );
+ [ __CLASS__, 'cleanUrlCallback' ], $url );
# Validate hostname portion
- $matches = array();
+ $matches = [];
if ( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) {
list( /* $whole */, $protocol, $host, $rest ) = $matches;
*/
public static function validateEmail( $addr ) {
$result = null;
- if ( !Hooks::run( 'isValidEmailAddr', array( $addr, &$result ) ) ) {
+ if ( !Hooks::run( 'isValidEmailAddr', [ $addr, &$result ] ) ) {
return $result;
}