Reserve data-mw and data-parsoid attribute prefix for trusted values
[lhc/web/wiklou.git] / includes / Sanitizer.php
index 8179905..5443e68 100644 (file)
@@ -454,15 +454,13 @@ class Sanitizer {
        public static function removeHTMLtags( $text, $processCallback = null,
                $args = array(), $extratags = array(), $removetags = array()
        ) {
-               global $wgUseTidy;
-
                extract( self::getRecognizedTagData( $extratags, $removetags ) );
 
                # Remove HTML comments
                $text = Sanitizer::removeHTMLcomments( $text );
                $bits = explode( '<', $text );
                $text = str_replace( '>', '&gt;', array_shift( $bits ) );
-               if ( !$wgUseTidy ) {
+               if ( !MWTidy::isEnabled() ) {
                        $tagstack = $tablestack = array();
                        foreach ( $bits as $x ) {
                                $regs = array();
@@ -478,7 +476,8 @@ class Sanitizer {
                                }
 
                                $badtag = false;
-                               if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
+                               $t = strtolower( $t );
+                               if ( isset( $htmlelements[$t] ) ) {
                                        # Check our stack
                                        if ( $slash && isset( $htmlsingleonly[$t] ) ) {
                                                $badtag = true;
@@ -538,7 +537,7 @@ class Sanitizer {
                                                        $badtag = true;
                                                } elseif ( in_array( $t, $tagstack ) && !isset( $htmlnest[$t] ) ) {
                                                        $badtag = true;
-                                               # Is it a self closed htmlpair ? (bug 5487)
+                                               #  Is it a self closed htmlpair ? (bug 5487)
                                                } elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) {
                                                        $badtag = true;
                                                } elseif ( isset( $htmlsingleonly[$t] ) ) {
@@ -598,7 +597,8 @@ class Sanitizer {
                                        list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
 
                                        $badtag = false;
-                                       if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
+                                       $t = strtolower( $t );
+                                       if ( isset( $htmlelements[$t] ) ) {
                                                if ( is_callable( $processCallback ) ) {
                                                        call_user_func_array( $processCallback, array( &$params, $args ) );
                                                }
@@ -740,7 +740,7 @@ class Sanitizer {
 
                $out = array();
                foreach ( $attribs as $attribute => $value ) {
-                       #allow XML namespace declaration if RDFa is enabled
+                       # allow XML namespace declaration if RDFa is enabled
                        if ( $wgAllowRdfaAttributes && preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
                                if ( !preg_match( self::EVIL_URI_PATTERN, $value ) ) {
                                        $out[$attribute] = $value;
@@ -750,7 +750,15 @@ class Sanitizer {
                        }
 
                        # Allow any attribute beginning with "data-"
-                       if ( !preg_match( '/^data-(?!ooui)/i', $attribute ) && !isset( $whitelist[$attribute] ) ) {
+                       # However:
+                       # * data-ooui is reserved for ooui
+                       # * data-mw and data-parsoid are reserved for parsoid
+                       # * data-mw-<ext name here> is reserved for extensions (or core) if
+                       #   they need to communicate some data to the client and want to be
+                       #   sure that it isn't coming from an untrusted user.
+                       if ( !preg_match( '/^data-(?!ooui|mw|parsoid)/i', $attribute )
+                               && !isset( $whitelist[$attribute] )
+                       ) {
                                continue;
                        }
 
@@ -786,7 +794,7 @@ class Sanitizer {
                                || $attribute === 'itemref' || $attribute === 'itemscope'
                                || $attribute === 'itemtype'
                        ) {
-                               //Paranoia. Allow "simple" values but suppress javascript
+                               // Paranoia. Allow "simple" values but suppress javascript
                                if ( preg_match( self::EVIL_URI_PATTERN, $value ) ) {
                                        continue;
                                }
@@ -796,7 +804,7 @@ class Sanitizer {
                        #       validation code that can be used by tag hook handlers, etc
                        if ( $attribute === 'href' || $attribute === 'src' ) {
                                if ( !preg_match( $hrefExp, $value ) ) {
-                                       continue; //drop any href or src attributes not using an allowed protocol.
+                                       continue; // drop any href or src attributes not using an allowed protocol.
                                        // NOTE: this also drops all relative URLs
                                }
                        }
@@ -1699,7 +1707,7 @@ class Sanitizer {
                        # rbc
                        'rb'         => $common,
                        'rp'         => $common,
-                       'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),
+                       'rt'         => $common, # array_merge( $common, array( 'rbspan' ) ),
                        'rtc'         => $common,
 
                        # MathML root element, where used for extensions
@@ -1809,7 +1817,9 @@ class Sanitizer {
                        $host = preg_replace( $strip, '', $host );
 
                        // IPv6 host names are bracketed with [].  Url-decode these.
-                       if ( substr_compare( "//%5B", $host, 0, 5 ) === 0 && preg_match( '!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches ) ) {
+                       if ( substr_compare( "//%5B", $host, 0, 5 ) === 0 &&
+                               preg_match( '!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches )
+                       ) {
                                $host = '//[' . $matches[1] . ']' . $matches[2];
                        }