Merge "buttons: Set min-width of button groups and icon buttons"

[lhc/web/wiklou.git] / includes / parser / Parser.php
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php

index 61fffc5..2d72deb 100644 (file)
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -210,6 +210,14 @@ class Parser {
          */
         public $mLangLinkLanguages;
  
+       /**
+        * @var MapCacheLRU|null
+        * @since 1.24
+        *
+        * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
+        */
+       public $currentRevisionCache;
+
         /**
          * @var bool Recursive call protection.
          * This variable should be treated as if it were private.
@@ -258,6 +266,21 @@ class Parser {
          */
         public function __clone() {
                 $this->mInParse = false;
+
+               // Bug 56226: When you create a reference "to" an object field, that
+               // makes the object field itself be a reference too (until the other
+               // reference goes out of scope). When cloning, any field that's a
+               // reference is copied as a reference in the new object. Both of these
+               // are defined PHP5 behaviors, as inconvenient as it is for us when old
+               // hooks from PHP4 days are passing fields by reference.
+               foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
+                       // Make a non-reference copy of the field, then rebind the field to
+                       // reference the new copy.
+                       $tmp = $this->$k;
+                       $this->$k =& $tmp;
+                       unset( $tmp );
+               }
+
                 wfRunHooks( 'ParserCloned', array( $this ) );
         }
  
@@ -305,6 +328,7 @@ class Parser {
                 $this->mVarCache = array();
                 $this->mUser = null;
                 $this->mLangLinkLanguages = array();
+               $this->currentRevisionCache = null;
  
                 /**
                  * Prefix for temporary replacement strings for the multipass parser.
@@ -376,6 +400,7 @@ class Parser {
  
                 $this->startParse( $title, $options, self::OT_HTML, $clearState );
  
+               $this->currentRevisionCache = null;
                 $this->mInputSize = strlen( $text );
                 if ( $this->mOptions->getEnableLimitReport() ) {
                         $this->mOutput->resetParseStartTime();
@@ -588,6 +613,7 @@ class Parser {
                 $this->mRevisionUser = $oldRevisionUser;
                 $this->mRevisionSize = $oldRevisionSize;
                 $this->mInputSize = false;
+               $this->currentRevisionCache = null;
                 wfProfileOut( $fname );
                 wfProfileOut( __METHOD__ );
  
@@ -1402,7 +1428,7 @@ class Parser {
                                 $this->getExternalLinkAttribs( $url ) );
                         # Register it in the output object...
                         # Replace unnecessary URL escape codes with their equivalent characters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                         $this->mOutput->addExternalLink( $pasteurized );
                 }
                 wfProfileOut( __METHOD__ );
@@ -1710,7 +1736,7 @@ class Parser {
                         # Register link in the output object.
                         # Replace unnecessary URL escape codes with the referenced character
                         # This prevents spammers from hiding links from the filters
-                       $pasteurized = self::replaceUnusualEscapes( $url );
+                       $pasteurized = self::normalizeLinkUrl( $url );
                         $this->mOutput->addExternalLink( $pasteurized );
                 }
  
@@ -1759,40 +1785,75 @@ class Parser {
         }
  
         /**
-        * Replace unusual URL escape codes with their equivalent characters
+        * Replace unusual escape codes in a URL with their equivalent characters
          *
+        * @deprecated since 1.24, use normalizeLinkUrl
          * @param string $url
          * @return string
-        *
-        * @todo This can merge genuinely required bits in the path or query string,
-        *       breaking legit URLs. A proper fix would treat the various parts of
-        *       the URL differently; as a workaround, just use the output for
-        *       statistical records, not for actual linking/output.
          */
         public static function replaceUnusualEscapes( $url ) {
-               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
-                       array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+               wfDeprecated( __METHOD__, '1.24' );
+               return self::normalizeLinkUrl( $url );
         }
  
         /**
-        * Callback function used in replaceUnusualEscapes().
-        * Replaces unusual URL escape codes with their equivalent character
+        * Replace unusual escape codes in a URL with their equivalent characters
          *
-        * @param array $matches
+        * This generally follows the syntax defined in RFC 3986, with special
+        * consideration for HTTP query strings.
          *
+        * @param string $url
          * @return string
          */
-       private static function replaceUnusualEscapesCallback( $matches ) {
-               $char = urldecode( $matches[0] );
-               $ord = ord( $char );
-               # Is it an unsafe or HTTP reserved character according to RFC 1738?
-               if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
-                       # No, shouldn't be escaped
-                       return $char;
-               } else {
-                       # Yes, leave it escaped
-                       return $matches[0];
+       public static function normalizeLinkUrl( $url ) {
+               # First, make sure unsafe characters are encoded
+               $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+                       function ( $m ) {
+                               return rawurlencode( $m[0] );
+                       },
+                       $url
+               );
+
+               $ret = '';
+               $end = strlen( $url );
+
+               # Fragment part - 'fragment'
+               $start = strpos( $url, '#' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+                       $end = $start;
+               }
+
+               # Query part - 'query' minus &=+;
+               $start = strpos( $url, '?' );
+               if ( $start !== false && $start < $end ) {
+                       $ret = self::normalizeUrlComponent(
+                               substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+                       $end = $start;
                 }
+
+               # Scheme and path part - 'pchar'
+               # (we assume no userinfo or encoded colons in the host)
+               $ret = self::normalizeUrlComponent(
+                       substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+
+               return $ret;
+       }
+
+       private static function normalizeUrlComponent( $component, $unsafe ) {
+               $callback = function ( $matches ) use ( $unsafe ) {
+                       $char = urldecode( $matches[0] );
+                       $ord = ord( $char );
+                       if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+                               # Unescape it
+                               return $char;
+                       } else {
+                               # Leave it escaped, but use uppercase for a-f
+                               return strtoupper( $matches[0] );
+                       }
+               };
+               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
         }
  
         /**
@@ -3794,6 +3855,44 @@ class Parser {
                 return array( $dom, $title );
         }
  
+       /**
+        * Fetch the current revision of a given title. Note that the revision
+        * (and even the title) may not exist in the database, so everything
+        * contributing to the output of the parser should use this method
+        * where possible, rather than getting the revisions themselves. This
+        * method also caches its results, so using it benefits performance.
+        *
+        * @since 1.24
+        * @param Title $title
+        * @return Revision
+        */
+       public function fetchCurrentRevisionOfTitle( $title ) {
+               $cacheKey = $title->getPrefixedDBkey();
+               if ( !$this->currentRevisionCache ) {
+                       $this->currentRevisionCache = new MapCacheLRU( 100 );
+               }
+               if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
+                       $this->currentRevisionCache->set( $cacheKey,
+                               // Defaults to Parser::statelessFetchRevision()
+                               call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
+                       );
+               }
+               return $this->currentRevisionCache->get( $cacheKey );
+       }
+
+       /**
+        * Wrapper around Revision::newFromTitle to allow passing additional parameters
+        * without passing them on to it.
+        *
+        * @since 1.24
+        * @param Title $title
+        * @param Parser|bool $parser
+        * @return Revision
+        */
+       public static function statelessFetchRevision( $title, $parser = false ) {
+               return Revision::newFromTitle( $title );
+       }
+
         /**
          * Fetch the unparsed text of a template and register a reference to it.
          * @param Title $title
@@ -3859,9 +3958,13 @@ class Parser {
                                 break;
                         }
                         # Get the revision
-                       $rev = $id
-                               ? Revision::newFromId( $id )
-                               : Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
+                       if ( $id ) {
+                               $rev = Revision::newFromId( $id );
+                       } elseif ( $parser ) {
+                               $rev = $parser->fetchCurrentRevisionOfTitle( $title );
+                       } else {
+                               $rev = Revision::newFromTitle( $title );
+                       }
                         $rev_id = $rev ? $rev->getId() : 0;
                         # If there is no current revision, there is no page
                         if ( $id === false && !$rev ) {
@@ -4597,13 +4700,13 @@ class Parser {
                                 if ( $isTemplate ) {
                                         # Put a T flag in the section identifier, to indicate to extractSections()
                                         # that sections inside <includeonly> should be counted.
-                                       $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
+                                       $editsectionPage = $titleText;
+                                       $editsectionSection = "T-$sectionIndex";
+                                       $editsectionContent = null;
                                 } else {
-                                       $editlinkArgs = array(
-                                               $this->mTitle->getPrefixedText(),
-                                               $sectionIndex,
-                                               $headlineHint
-                                       );
+                                       $editsectionPage = $this->mTitle->getPrefixedText();
+                                       $editsectionSection = $sectionIndex;
+                                       $editsectionContent = $headlineHint;
                                 }
                                 // We use a bit of pesudo-xml for editsection markers. The
                                 // language converter is run later on. Using a UNIQ style marker
@@ -4616,10 +4719,11 @@ class Parser {
                                 // important bits of data, but put the headline hint inside a
                                 // content block because the language converter is supposed to
                                 // be able to convert that piece of data.
-                               $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] );
-                               $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"';
-                               if ( isset( $editlinkArgs[2] ) ) {
-                                       $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
+                               // Gets replaced with html in ParserOutput::getText
+                               $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
+                               $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
+                               if ( $editsectionContent !== null ) {
+                                       $editlink .= '>' . $editsectionContent . '</mw:editsection>';
                                 } else {
                                         $editlink .= '/>';
                                 }