Merge "Remove unreachable block"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Thu, 20 Aug 2015 19:15:19 +0000 (19:15 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Thu, 20 Aug 2015 19:15:19 +0000 (19:15 +0000)
1  2 
includes/Sanitizer.php

diff --combined includes/Sanitizer.php
@@@ -349,9 -349,6 +349,6 @@@ class Sanitizer 
                                          \"([^<\"]*)\"
                                         | '([^<']*)'
                                         |  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
-                                        |  (\#[0-9a-fA-F]+) # Technically wrong, but lots of
-                                                                                # colors are specified like this.
-                                                                                # We'll be normalizing it.
                                        )
                                )?(?=$space|\$)/sx";
                }
                $value = self::normalizeCss( $value );
  
                // Reject problematic keywords and control characters
 -              if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
 +              if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ||
 +                      strpos( $value, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
                        return '/* invalid control char */';
                } elseif ( preg_match(
                        '! expression
         * @return string
         */
        private static function getTagAttributeCallback( $set ) {
-               if ( isset( $set[6] ) ) {
-                       # Illegal #XXXXXX color with no quotes.
-                       return $set[6];
-               } elseif ( isset( $set[5] ) ) {
+               if ( isset( $set[5] ) ) {
                        # No quotes.
                        return $set[5];
                } elseif ( isset( $set[4] ) ) {
        }
  
        /**
 -       * Returns true if a given Unicode codepoint is a valid character in XML.
 +       * Returns true if a given Unicode codepoint is a valid character in
 +       * both HTML5 and XML.
         * @param int $codepoint
         * @return bool
         */
        private static function validateCodepoint( $codepoint ) {
 +              # U+000C is valid in HTML5 but not allowed in XML.
 +              # U+000D is valid in XML but not allowed in HTML5.
 +              # U+007F - U+009F are disallowed in HTML5 (control characters).
                return $codepoint == 0x09
                        || $codepoint == 0x0a
 -                      || $codepoint == 0x0d
 -                      || ( $codepoint >= 0x20 && $codepoint <= 0xd7ff )
 +                      || ( $codepoint >= 0x20 && $codepoint <= 0x7e )
 +                      || ( $codepoint >= 0xa0 && $codepoint <= 0xd7ff )
                        || ( $codepoint >= 0xe000 && $codepoint <= 0xfffd )
                        || ( $codepoint >= 0x10000 && $codepoint <= 0x10ffff );
        }
  
                        $host = preg_replace( $strip, '', $host );
  
 +                      // IPv6 host names are bracketed with [].  Url-decode these.
 +                      if ( substr_compare( "//%5B", $host, 0, 5 ) === 0 && preg_match( '!^//%5B(.*?)%5D((:\d+)?)$!', $host, $matches ) ) {
 +                              $host = '//[' . $matches[1] . ']' . $matches[2];
 +                      }
 +
                        // @todo FIXME: Validate hostnames here
  
                        return $protocol . $host . $rest;