'li',
);
+ global $wgAllowImageTag;
+ if ( $wgAllowImageTag ) {
+ $htmlsingle[] = 'img';
+ $htmlsingleonly[] = 'img';
+ }
+
$htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) );
$htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) );
* @return Mixed
*/
static function checkCss( $value ) {
- $stripped = Sanitizer::decodeCharReferences( $value );
+ $value = Sanitizer::decodeCharReferences( $value );
// Remove any comments; IE gets token splitting wrong
- $stripped = StringUtils::delimiterReplace( '/*', '*/', ' ', $stripped );
-
- $value = $stripped;
-
- // ... and continue checks
- $stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
- 'codepointToUtf8(hexdec("$1"))', $stripped );
- $stripped = str_replace( '\\', '', $stripped );
- if( preg_match( '/(?:expression|tps*:\/\/|url\\s*\().*/is',
- $stripped ) ) {
- # haxx0r
+ $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
+
+ // Decode escape sequences and line continuation
+ // See the grammar in the CSS 2 spec, appendix D, Mozilla implements it accurately.
+ // IE 8 doesn't implement it at all, but there's no way to introduce url() into
+ // IE that doesn't hit Mozilla also.
+ static $decodeRegex;
+ if ( !$decodeRegex ) {
+ $space = '[\\x20\\t\\r\\n\\f]';
+ $nl = '(?:\\n|\\r\\n|\\r|\\f)';
+ $backslash = '\\\\';
+ $decodeRegex = "/ $backslash
+ (?:
+ ($nl) | # 1. Line continuation
+ ([0-9A-Fa-f]{1,6})$space? | # 2. character number
+ (.) # 3. backslash cancelling special meaning
+ )/xu";
+ }
+ $decoded = preg_replace_callback( $decodeRegex,
+ array( __CLASS__, 'cssDecodeCallback' ), $value );
+ if ( preg_match( '!expression|https?://|url\s*\(!i', $decoded ) ) {
+ // Not allowed
return false;
+ } else {
+ // Allowed, return CSS with comments stripped
+ return $value;
}
+ }
- return $value;
+ static function cssDecodeCallback( $matches ) {
+ if ( $matches[1] !== '' ) {
+ return '';
+ } elseif ( $matches[2] !== '' ) {
+ return codepointToUtf8( hexdec( $matches[2] ) );
+ } elseif ( $matches[3] !== '' ) {
+ return $matches[3];
+ } else {
+ throw new MWException( __METHOD__.': invalid match' );
+ }
}
/**
$text );
}
+ /**
+ * Decode any character references, numeric or named entities,
+ * in the next and normalize the resulting string. (bug 14952)
+ *
+ * This is useful for page titles, not for text to be displayed,
+ * MediaWiki allows HTML entities to escape normalization as a feature.
+ *
+ * @param $text String (already normalized, containing entities)
+ * @return String (still normalized, without entities)
+ */
+ public static function decodeCharReferencesAndNormalize( $text ) {
+ global $wgContLang;
+ $text = preg_replace_callback(
+ MW_CHAR_REFS_REGEX,
+ array( 'Sanitizer', 'decodeCharReferencesCallback' ),
+ $text, /* limit */ -1, $count );
+
+ if ( $count ) {
+ return $wgContLang->normalize( $text );
+ } else {
+ return $text;
+ }
+ }
+
/**
* @param $matches String
* @return String
static function setupAttributeWhitelist() {
global $wgAllowRdfaAttributes, $wgHtml5, $wgAllowMicrodataAttributes;
- $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style', 'xml:lang' );
+ $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
if ( $wgAllowRdfaAttributes ) {
#RDFa attributes as specified in section 9 of http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
# 13.2
# Not usually allowed, but may be used for extension-style hooks
- # such as <math> when it is rasterized
- 'img' => array_merge( $common, array( 'alt' ) ),
+ # such as <math> when it is rasterized, or if $wgAllowImageTag is
+ # true
+ 'img' => array_merge( $common, array( 'alt', 'src', 'width', 'height' ) ),
# 15.2.1
'tt' => $common,