Merge "Change MediaWiki UI to use skinStyles so skins can customize"
[lhc/web/wiklou.git] / includes / parser / Parser.php
index c5c472a..84bb224 100644 (file)
@@ -1402,7 +1402,7 @@ class Parser {
                                $this->getExternalLinkAttribs( $url ) );
                        # Register it in the output object...
                        # Replace unnecessary URL escape codes with their equivalent characters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                        $this->mOutput->addExternalLink( $pasteurized );
                }
                wfProfileOut( __METHOD__ );
@@ -1710,7 +1710,7 @@ class Parser {
                        # Register link in the output object.
                        # Replace unnecessary URL escape codes with the referenced character
                        # This prevents spammers from hiding links from the filters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                        $this->mOutput->addExternalLink( $pasteurized );
                }
 
@@ -1759,40 +1759,75 @@ class Parser {
        }
 
        /**
-        * Replace unusual URL escape codes with their equivalent characters
+        * Replace unusual escape codes in a URL with their equivalent characters
         *
+        * @deprecated since 1.24, use normalizeLinkUrl
         * @param string $url
         * @return string
-        *
-        * @todo This can merge genuinely required bits in the path or query string,
-        *       breaking legit URLs. A proper fix would treat the various parts of
-        *       the URL differently; as a workaround, just use the output for
-        *       statistical records, not for actual linking/output.
         */
        public static function replaceUnusualEscapes( $url ) {
-               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
-                       array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+               wfDeprecated( __METHOD__, '1.24' );
+               return self::normalizeLinkUrl( $url );
        }
 
        /**
-        * Callback function used in replaceUnusualEscapes().
-        * Replaces unusual URL escape codes with their equivalent character
+        * Replace unusual escape codes in a URL with their equivalent characters
         *
-        * @param array $matches
+        * This generally follows the syntax defined in RFC 3986, with special
+        * consideration for HTTP query strings.
         *
+        * @param string $url
         * @return string
         */
-       private static function replaceUnusualEscapesCallback( $matches ) {
-               $char = urldecode( $matches[0] );
-               $ord = ord( $char );
-               # Is it an unsafe or HTTP reserved character according to RFC 1738?
-               if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
-                       # No, shouldn't be escaped
-                       return $char;
-               } else {
-                       # Yes, leave it escaped
-                       return $matches[0];
+       public static function normalizeLinkUrl( $url ) {
+               # First, make sure unsafe characters are encoded
+               $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+                       function ( $m ) {
+                               return rawurlencode( $m[0] );
+                       },
+                       $url
+               );
+
+               $ret = '';
+               $end = strlen( $url );
+
+               # Fragment part - 'fragment'
+               $start = strpos( $url, '#' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+                       $end = $start;
+               }
+
+               # Query part - 'query' minus &=+;
+               $start = strpos( $url, '?' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+                       $end = $start;
                }
+
+               # Scheme and path part - 'pchar'
+               # (we assume no userinfo or encoded colons in the host)
+               $ret = self::normalizeUrlComponent(
+                       substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+
+               return $ret;
+       }
+
+       private static function normalizeUrlComponent( $component, $unsafe ) {
+               $callback = function ( $matches ) use ( $unsafe ) {
+                       $char = urldecode( $matches[0] );
+                       $ord = ord( $char );
+                       if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+                               # Unescape it
+                               return $char;
+                       } else {
+                               # Leave it escaped, but use uppercase for a-f
+                               return strtoupper( $matches[0] );
+                       }
+               };
+               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
        }
 
        /**
@@ -4597,13 +4632,13 @@ class Parser {
                                if ( $isTemplate ) {
                                        # Put a T flag in the section identifier, to indicate to extractSections()
                                        # that sections inside <includeonly> should be counted.
-                                       $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
+                                       $editsectionPage = $titleText;
+                                       $editsectionSection = "T-$sectionIndex";
+                                       $editsectionContent = null;
                                } else {
-                                       $editlinkArgs = array(
-                                               $this->mTitle->getPrefixedText(),
-                                               $sectionIndex,
-                                               $headlineHint
-                                       );
+                                       $editsectionPage = $this->mTitle->getPrefixedText();
+                                       $editsectionSection = $sectionIndex;
+                                       $editsectionContent = $headlineHint;
                                }
                                // We use a bit of pesudo-xml for editsection markers. The
                                // language converter is run later on. Using a UNIQ style marker
@@ -4616,10 +4651,11 @@ class Parser {
                                // important bits of data, but put the headline hint inside a
                                // content block because the language converter is supposed to
                                // be able to convert that piece of data.
-                               $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] );
-                               $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"';
-                               if ( isset( $editlinkArgs[2] ) ) {
-                                       $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
+                               // Gets replaced with html in ParserOutput::getText
+                               $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
+                               $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
+                               if ( $editsectionContent !== null ) {
+                                       $editlink .= '>' . $editsectionContent . '</mw:editsection>';
                                } else {
                                        $editlink .= '/>';
                                }
@@ -5527,7 +5563,7 @@ class Parser {
                                                                $paramName = 'no-link';
                                                                $value = true;
                                                                $validated = true;
-                                                       } elseif ( preg_match( "/^(?i)$prots/", $value ) ) {
+                                                       } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
                                                                if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
                                                                        $paramName = 'link-url';
                                                                        $this->mOutput->addExternalLink( $value );