Merge "Add 路面 for better word segmentation."
[lhc/web/wiklou.git] / includes / Sanitizer.php
index 478fe1e..a0c77da 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * XHTML sanitizer for MediaWiki
+ * XHTML sanitizer for %MediaWiki.
  *
  * Copyright © 2002-2005 Brion Vibber <brion@pobox.com> et al
  * http://www.mediawiki.org/
@@ -374,7 +374,7 @@ class Sanitizer {
                if ( !$staticInitialised ) {
 
                        $htmlpairsStatic = array( # Tags that must be closed
-                               'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
+                               'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
                                'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
                                'strike', 'strong', 'tt', 'var', 'div', 'center',
                                'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
@@ -457,10 +457,14 @@ class Sanitizer {
                                                                # and see if we find a match below them
                                                                $optstack = array();
                                                                array_push( $optstack, $ot );
-                                                               $ot = @array_pop( $tagstack );
+                                                               wfSuppressWarnings();
+                                                               $ot = array_pop( $tagstack );
+                                                               wfRestoreWarnings();
                                                                while ( $ot != $t && isset( $htmlsingleallowed[$ot] ) ) {
                                                                        array_push( $optstack, $ot );
-                                                                       $ot = @array_pop( $tagstack );
+                                                                       wfSuppressWarnings();
+                                                                       $ot = array_pop( $tagstack );
+                                                                       wfRestoreWarnings();
                                                                }
                                                                if ( $t != $ot ) {
                                                                        # No match. Push the optional elements back again
@@ -688,6 +692,16 @@ class Sanitizer {
                                }
                        }
 
+                       if ( $attribute === 'align' && !in_array( $element, $cells ) ) {
+                               if ( $value === 'center' ) {
+                                       $style .= ' margin-left: auto;';
+                                       $property = 'margin-right';
+                                       $value = 'auto';
+                               } else {
+                                       $property = 'float';
+                               }
+                       }
+
                        $style .= " $property: $value;";
 
                        unset( $attribs[$attribute] );
@@ -1239,7 +1253,7 @@ class Sanitizer {
         * a. named char refs can only be &lt; &gt; &amp; &quot;, others are
         *   numericized (this way we're well-formed even without a DTD)
         * b. any numeric char refs must be legal chars, not invalid or forbidden
-        * c. use &#x, not &#X
+        * c. use lower cased "&#x", not "&#X"
         * d. fix or reject non-valid attributes
         *
         * @param $text String
@@ -1407,7 +1421,7 @@ class Sanitizer {
        /**
         * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
         * return the UTF-8 encoding of that character. Otherwise, returns
-        * pseudo-entity source (eg &foo;)
+        * pseudo-entity source (eg "&foo;")
         *
         * @param $name String
         * @return String
@@ -1607,6 +1621,10 @@ class Sanitizer {
                        # 'title' may not be 100% valid here; it's XHTML
                        # http://www.w3.org/TR/REC-MathML/
                        'math'       => array( 'class', 'style', 'id', 'title' ),
+
+                       # HTML 5 section 4.6
+                       'bdi' => $common,
+
                        );
                return $whitelist;
        }
@@ -1684,7 +1702,7 @@ class Sanitizer {
                                \xe1\xa0\x8d| # 180d MONGOLIAN FREE VARIATION SELECTOR THREE
                                \xe2\x80\x8c| # 200c ZERO WIDTH NON-JOINER
                                \xe2\x80\x8d| # 200d ZERO WIDTH JOINER
-                               [\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe00f VARIATION SELECTOR-1-16
+                               [\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe0f VARIATION SELECTOR-1-16
                                /xuD";
 
                        $host = preg_replace( $strip, '', $host );