Merge "buttons: Set min-width of button groups and icon buttons"
[lhc/web/wiklou.git] / includes / parser / Parser.php
index 61fffc5..2d72deb 100644 (file)
@@ -210,6 +210,14 @@ class Parser {
         */
        public $mLangLinkLanguages;
 
+       /**
+        * @var MapCacheLRU|null
+        * @since 1.24
+        *
+        * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
+        */
+       public $currentRevisionCache;
+
        /**
         * @var bool Recursive call protection.
         * This variable should be treated as if it were private.
@@ -258,6 +266,21 @@ class Parser {
         */
        public function __clone() {
                $this->mInParse = false;
+
+               // Bug 56226: When you create a reference "to" an object field, that
+               // makes the object field itself be a reference too (until the other
+               // reference goes out of scope). When cloning, any field that's a
+               // reference is copied as a reference in the new object. Both of these
+               // are defined PHP5 behaviors, as inconvenient as it is for us when old
+               // hooks from PHP4 days are passing fields by reference.
+               foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
+                       // Make a non-reference copy of the field, then rebind the field to
+                       // reference the new copy.
+                       $tmp = $this->$k;
+                       $this->$k =& $tmp;
+                       unset( $tmp );
+               }
+
                wfRunHooks( 'ParserCloned', array( $this ) );
        }
 
@@ -305,6 +328,7 @@ class Parser {
                $this->mVarCache = array();
                $this->mUser = null;
                $this->mLangLinkLanguages = array();
+               $this->currentRevisionCache = null;
 
                /**
                 * Prefix for temporary replacement strings for the multipass parser.
@@ -376,6 +400,7 @@ class Parser {
 
                $this->startParse( $title, $options, self::OT_HTML, $clearState );
 
+               $this->currentRevisionCache = null;
                $this->mInputSize = strlen( $text );
                if ( $this->mOptions->getEnableLimitReport() ) {
                        $this->mOutput->resetParseStartTime();
@@ -588,6 +613,7 @@ class Parser {
                $this->mRevisionUser = $oldRevisionUser;
                $this->mRevisionSize = $oldRevisionSize;
                $this->mInputSize = false;
+               $this->currentRevisionCache = null;
                wfProfileOut( $fname );
                wfProfileOut( __METHOD__ );
 
@@ -1402,7 +1428,7 @@ class Parser {
                                $this->getExternalLinkAttribs( $url ) );
                        # Register it in the output object...
                        # Replace unnecessary URL escape codes with their equivalent characters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                        $this->mOutput->addExternalLink( $pasteurized );
                }
                wfProfileOut( __METHOD__ );
@@ -1710,7 +1736,7 @@ class Parser {
                        # Register link in the output object.
                        # Replace unnecessary URL escape codes with the referenced character
                        # This prevents spammers from hiding links from the filters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                        $this->mOutput->addExternalLink( $pasteurized );
                }
 
@@ -1759,40 +1785,75 @@ class Parser {
        }
 
        /**
-        * Replace unusual URL escape codes with their equivalent characters
+        * Replace unusual escape codes in a URL with their equivalent characters
         *
+        * @deprecated since 1.24, use normalizeLinkUrl
         * @param string $url
         * @return string
-        *
-        * @todo This can merge genuinely required bits in the path or query string,
-        *       breaking legit URLs. A proper fix would treat the various parts of
-        *       the URL differently; as a workaround, just use the output for
-        *       statistical records, not for actual linking/output.
         */
        public static function replaceUnusualEscapes( $url ) {
-               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
-                       array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+               wfDeprecated( __METHOD__, '1.24' );
+               return self::normalizeLinkUrl( $url );
        }
 
        /**
-        * Callback function used in replaceUnusualEscapes().
-        * Replaces unusual URL escape codes with their equivalent character
+        * Replace unusual escape codes in a URL with their equivalent characters
         *
-        * @param array $matches
+        * This generally follows the syntax defined in RFC 3986, with special
+        * consideration for HTTP query strings.
         *
+        * @param string $url
         * @return string
         */
-       private static function replaceUnusualEscapesCallback( $matches ) {
-               $char = urldecode( $matches[0] );
-               $ord = ord( $char );
-               # Is it an unsafe or HTTP reserved character according to RFC 1738?
-               if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
-                       # No, shouldn't be escaped
-                       return $char;
-               } else {
-                       # Yes, leave it escaped
-                       return $matches[0];
+       public static function normalizeLinkUrl( $url ) {
+               # First, make sure unsafe characters are encoded
+               $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+                       function ( $m ) {
+                               return rawurlencode( $m[0] );
+                       },
+                       $url
+               );
+
+               $ret = '';
+               $end = strlen( $url );
+
+               # Fragment part - 'fragment'
+               $start = strpos( $url, '#' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+                       $end = $start;
+               }
+
+               # Query part - 'query' minus &=+;
+               $start = strpos( $url, '?' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+                       $end = $start;
                }
+
+               # Scheme and path part - 'pchar'
+               # (we assume no userinfo or encoded colons in the host)
+               $ret = self::normalizeUrlComponent(
+                       substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+
+               return $ret;
+       }
+
+       private static function normalizeUrlComponent( $component, $unsafe ) {
+               $callback = function ( $matches ) use ( $unsafe ) {
+                       $char = urldecode( $matches[0] );
+                       $ord = ord( $char );
+                       if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+                               # Unescape it
+                               return $char;
+                       } else {
+                               # Leave it escaped, but use uppercase for a-f
+                               return strtoupper( $matches[0] );
+                       }
+               };
+               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
        }
 
        /**
@@ -3794,6 +3855,44 @@ class Parser {
                return array( $dom, $title );
        }
 
+       /**
+        * Fetch the current revision of a given title. Note that the revision
+        * (and even the title) may not exist in the database, so everything
+        * contributing to the output of the parser should use this method
+        * where possible, rather than getting the revisions themselves. This
+        * method also caches its results, so using it benefits performance.
+        *
+        * @since 1.24
+        * @param Title $title
+        * @return Revision
+        */
+       public function fetchCurrentRevisionOfTitle( $title ) {
+               $cacheKey = $title->getPrefixedDBkey();
+               if ( !$this->currentRevisionCache ) {
+                       $this->currentRevisionCache = new MapCacheLRU( 100 );
+               }
+               if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
+                       $this->currentRevisionCache->set( $cacheKey,
+                               // Defaults to Parser::statelessFetchRevision()
+                               call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
+                       );
+               }
+               return $this->currentRevisionCache->get( $cacheKey );
+       }
+
+       /**
+        * Wrapper around Revision::newFromTitle to allow passing additional parameters
+        * without passing them on to it.
+        *
+        * @since 1.24
+        * @param Title $title
+        * @param Parser|bool $parser
+        * @return Revision
+        */
+       public static function statelessFetchRevision( $title, $parser = false ) {
+               return Revision::newFromTitle( $title );
+       }
+
        /**
         * Fetch the unparsed text of a template and register a reference to it.
         * @param Title $title
@@ -3859,9 +3958,13 @@ class Parser {
                                break;
                        }
                        # Get the revision
-                       $rev = $id
-                               ? Revision::newFromId( $id )
-                               : Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
+                       if ( $id ) {
+                               $rev = Revision::newFromId( $id );
+                       } elseif ( $parser ) {
+                               $rev = $parser->fetchCurrentRevisionOfTitle( $title );
+                       } else {
+                               $rev = Revision::newFromTitle( $title );
+                       }
                        $rev_id = $rev ? $rev->getId() : 0;
                        # If there is no current revision, there is no page
                        if ( $id === false && !$rev ) {
@@ -4597,13 +4700,13 @@ class Parser {
                                if ( $isTemplate ) {
                                        # Put a T flag in the section identifier, to indicate to extractSections()
                                        # that sections inside <includeonly> should be counted.
-                                       $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
+                                       $editsectionPage = $titleText;
+                                       $editsectionSection = "T-$sectionIndex";
+                                       $editsectionContent = null;
                                } else {
-                                       $editlinkArgs = array(
-                                               $this->mTitle->getPrefixedText(),
-                                               $sectionIndex,
-                                               $headlineHint
-                                       );
+                                       $editsectionPage = $this->mTitle->getPrefixedText();
+                                       $editsectionSection = $sectionIndex;
+                                       $editsectionContent = $headlineHint;
                                }
                                // We use a bit of pesudo-xml for editsection markers. The
                                // language converter is run later on. Using a UNIQ style marker
@@ -4616,10 +4719,11 @@ class Parser {
                                // important bits of data, but put the headline hint inside a
                                // content block because the language converter is supposed to
                                // be able to convert that piece of data.
-                               $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] );
-                               $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"';
-                               if ( isset( $editlinkArgs[2] ) ) {
-                                       $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
+                               // Gets replaced with html in ParserOutput::getText
+                               $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
+                               $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
+                               if ( $editsectionContent !== null ) {
+                                       $editlink .= '>' . $editsectionContent . '</mw:editsection>';
                                } else {
                                        $editlink .= '/>';
                                }