Merge "buttons: Set min-width of button groups and icon buttons"
[lhc/web/wiklou.git] / includes / parser / Parser.php
index bff9c0d..2d72deb 100644 (file)
@@ -266,6 +266,21 @@ class Parser {
         */
        public function __clone() {
                $this->mInParse = false;
+
+               // Bug 56226: When you create a reference "to" an object field, that
+               // makes the object field itself be a reference too (until the other
+               // reference goes out of scope). When cloning, any field that's a
+               // reference is copied as a reference in the new object. Both of these
+               // are defined PHP5 behaviors, as inconvenient as it is for us when old
+               // hooks from PHP4 days are passing fields by reference.
+               foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
+                       // Make a non-reference copy of the field, then rebind the field to
+                       // reference the new copy.
+                       $tmp = $this->$k;
+                       $this->$k =& $tmp;
+                       unset( $tmp );
+               }
+
                wfRunHooks( 'ParserCloned', array( $this ) );
        }
 
@@ -1413,7 +1428,7 @@ class Parser {
                                $this->getExternalLinkAttribs( $url ) );
                        # Register it in the output object...
                        # Replace unnecessary URL escape codes with their equivalent characters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                        $this->mOutput->addExternalLink( $pasteurized );
                }
                wfProfileOut( __METHOD__ );
@@ -1721,7 +1736,7 @@ class Parser {
                        # Register link in the output object.
                        # Replace unnecessary URL escape codes with the referenced character
                        # This prevents spammers from hiding links from the filters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                        $this->mOutput->addExternalLink( $pasteurized );
                }
 
@@ -1770,40 +1785,75 @@ class Parser {
        }
 
        /**
-        * Replace unusual URL escape codes with their equivalent characters
+        * Replace unusual escape codes in a URL with their equivalent characters
         *
+        * @deprecated since 1.24, use normalizeLinkUrl
         * @param string $url
         * @return string
-        *
-        * @todo This can merge genuinely required bits in the path or query string,
-        *       breaking legit URLs. A proper fix would treat the various parts of
-        *       the URL differently; as a workaround, just use the output for
-        *       statistical records, not for actual linking/output.
         */
        public static function replaceUnusualEscapes( $url ) {
-               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
-                       array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+               wfDeprecated( __METHOD__, '1.24' );
+               return self::normalizeLinkUrl( $url );
        }
 
        /**
-        * Callback function used in replaceUnusualEscapes().
-        * Replaces unusual URL escape codes with their equivalent character
+        * Replace unusual escape codes in a URL with their equivalent characters
         *
-        * @param array $matches
+        * This generally follows the syntax defined in RFC 3986, with special
+        * consideration for HTTP query strings.
         *
+        * @param string $url
         * @return string
         */
-       private static function replaceUnusualEscapesCallback( $matches ) {
-               $char = urldecode( $matches[0] );
-               $ord = ord( $char );
-               # Is it an unsafe or HTTP reserved character according to RFC 1738?
-               if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
-                       # No, shouldn't be escaped
-                       return $char;
-               } else {
-                       # Yes, leave it escaped
-                       return $matches[0];
+       public static function normalizeLinkUrl( $url ) {
+               # First, make sure unsafe characters are encoded
+               $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+                       function ( $m ) {
+                               return rawurlencode( $m[0] );
+                       },
+                       $url
+               );
+
+               $ret = '';
+               $end = strlen( $url );
+
+               # Fragment part - 'fragment'
+               $start = strpos( $url, '#' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+                       $end = $start;
                }
+
+               # Query part - 'query' minus &=+;
+               $start = strpos( $url, '?' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+                       $end = $start;
+               }
+
+               # Scheme and path part - 'pchar'
+               # (we assume no userinfo or encoded colons in the host)
+               $ret = self::normalizeUrlComponent(
+                       substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+
+               return $ret;
+       }
+
+       private static function normalizeUrlComponent( $component, $unsafe ) {
+               $callback = function ( $matches ) use ( $unsafe ) {
+                       $char = urldecode( $matches[0] );
+                       $ord = ord( $char );
+                       if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+                               # Unescape it
+                               return $char;
+                       } else {
+                               # Leave it escaped, but use uppercase for a-f
+                               return strtoupper( $matches[0] );
+                       }
+               };
+               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
        }
 
        /**