Merge "Add parser callback to get a page's current revision"

author jenkins-bot <jenkins-bot@gerrit.wikimedia.org>

Thu, 25 Sep 2014 22:52:10 +0000 (22:52 +0000)

committer Gerrit Code Review <gerrit@wikimedia.org>

Thu, 25 Sep 2014 22:52:10 +0000 (22:52 +0000)
author jenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Thu, 25 Sep 2014 22:52:10 +0000 (22:52 +0000)
committer Gerrit Code Review <gerrit@wikimedia.org>
Thu, 25 Sep 2014 22:52:10 +0000 (22:52 +0000)
diff --combined includes/parser/Parser.php

index 8bd96b5,bff9c0d..2d72deb
--- 1/includes/parser/Parser.php
--- 2/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@@ -210,6 -210,14 +210,14 @@@ class Parser 
          */
         public $mLangLinkLanguages;
   
+       /**
+        * @var MapCacheLRU|null
+        * @since 1.24
+        *
+        * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
+        */
+       public $currentRevisionCache;
+ 
         /**
          * @var bool Recursive call protection.
          * This variable should be treated as if it were private.
@@@ -258,21 -266,6 +266,21 @@@
          */
         public function __clone() {
                 $this->mInParse = false;
+ +
+ +              // Bug 56226: When you create a reference "to" an object field, that
+ +              // makes the object field itself be a reference too (until the other
+ +              // reference goes out of scope). When cloning, any field that's a
+ +              // reference is copied as a reference in the new object. Both of these
+ +              // are defined PHP5 behaviors, as inconvenient as it is for us when old
+ +              // hooks from PHP4 days are passing fields by reference.
+ +              foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
+ +                      // Make a non-reference copy of the field, then rebind the field to
+ +                      // reference the new copy.
+ +                      $tmp = $this->$k;
+ +                      $this->$k =& $tmp;
+ +                      unset( $tmp );
+ +              }
+ +
                 wfRunHooks( 'ParserCloned', array( $this ) );
         }
   
@@@ -320,6 -313,7 +328,7 @@@
                 $this->mVarCache = array();
                 $this->mUser = null;
                 $this->mLangLinkLanguages = array();
+               $this->currentRevisionCache = null;
   
                 /**
                  * Prefix for temporary replacement strings for the multipass parser.
@@@ -391,6 -385,7 +400,7 @@@
   
                 $this->startParse( $title, $options, self::OT_HTML, $clearState );
   
+               $this->currentRevisionCache = null;
                 $this->mInputSize = strlen( $text );
                 if ( $this->mOptions->getEnableLimitReport() ) {
                         $this->mOutput->resetParseStartTime();
@@@ -603,6 -598,7 +613,7 @@@
                 $this->mRevisionUser = $oldRevisionUser;
                 $this->mRevisionSize = $oldRevisionSize;
                 $this->mInputSize = false;
+               $this->currentRevisionCache = null;
                 wfProfileOut( $fname );
                 wfProfileOut( __METHOD__ );
   
@@@ -1417,7 -1413,7 +1428,7 @@@
                                 $this->getExternalLinkAttribs( $url ) );
                         # Register it in the output object...
                         # Replace unnecessary URL escape codes with their equivalent characters
- -                      $pasteurized = self::replaceUnusualEscapes( $url );
+ +                      $pasteurized = self::normalizeLinkUrl( $url );
                         $this->mOutput->addExternalLink( $pasteurized );
                 }
                 wfProfileOut( __METHOD__ );
@@@ -1725,7 -1721,7 +1736,7 @@@
                         # Register link in the output object.
                         # Replace unnecessary URL escape codes with the referenced character
                         # This prevents spammers from hiding links from the filters
- -                      $pasteurized = self::replaceUnusualEscapes( $url );
+ +                      $pasteurized = self::normalizeLinkUrl( $url );
                         $this->mOutput->addExternalLink( $pasteurized );
                 }
   
@@@ -1774,75 -1770,40 +1785,75 @@@
         }
   
         /**
- -       * Replace unusual URL escape codes with their equivalent characters
+ +       * Replace unusual escape codes in a URL with their equivalent characters
          *
+ +       * @deprecated since 1.24, use normalizeLinkUrl
          * @param string $url
          * @return string
- -       *
- -       * @todo This can merge genuinely required bits in the path or query string,
- -       *       breaking legit URLs. A proper fix would treat the various parts of
- -       *       the URL differently; as a workaround, just use the output for
- -       *       statistical records, not for actual linking/output.
          */
         public static function replaceUnusualEscapes( $url ) {
- -              return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
- -                      array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+ +              wfDeprecated( __METHOD__, '1.24' );
+ +              return self::normalizeLinkUrl( $url );
         }
   
         /**
- -       * Callback function used in replaceUnusualEscapes().
- -       * Replaces unusual URL escape codes with their equivalent character
+ +       * Replace unusual escape codes in a URL with their equivalent characters
          *
- -       * @param array $matches
+ +       * This generally follows the syntax defined in RFC 3986, with special
+ +       * consideration for HTTP query strings.
          *
+ +       * @param string $url
          * @return string
          */
- -      private static function replaceUnusualEscapesCallback( $matches ) {
- -              $char = urldecode( $matches[0] );
- -              $ord = ord( $char );
- -              # Is it an unsafe or HTTP reserved character according to RFC 1738?
- -              if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
- -                      # No, shouldn't be escaped
- -                      return $char;
- -              } else {
- -                      # Yes, leave it escaped
- -                      return $matches[0];
+ +      public static function normalizeLinkUrl( $url ) {
+ +              # First, make sure unsafe characters are encoded
+ +              $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+ +                      function ( $m ) {
+ +                              return rawurlencode( $m[0] );
+ +                      },
+ +                      $url
+ +              );
+ +
+ +              $ret = '';
+ +              $end = strlen( $url );
+ +
+ +              # Fragment part - 'fragment'
+ +              $start = strpos( $url, '#' );
+ +              if ( $start !== false && $start < $end ) {
+ +                      $ret = self::normalizeUrlComponent(
+ +                              substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+ +                      $end = $start;
                 }
+ +
+ +              # Query part - 'query' minus &=+;
+ +              $start = strpos( $url, '?' );
+ +              if ( $start !== false && $start < $end ) {
+ +                      $ret = self::normalizeUrlComponent(
+ +                              substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+ +                      $end = $start;
+ +              }
+ +
+ +              # Scheme and path part - 'pchar'
+ +              # (we assume no userinfo or encoded colons in the host)
+ +              $ret = self::normalizeUrlComponent(
+ +                      substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+ +
+ +              return $ret;
+ +      }
+ +
+ +      private static function normalizeUrlComponent( $component, $unsafe ) {
+ +              $callback = function ( $matches ) use ( $unsafe ) {
+ +                      $char = urldecode( $matches[0] );
+ +                      $ord = ord( $char );
+ +                      if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+ +                              # Unescape it
+ +                              return $char;
+ +                      } else {
+ +                              # Leave it escaped, but use uppercase for a-f
+ +                              return strtoupper( $matches[0] );
+ +                      }
+ +              };
+ +              return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
         }
   
         /**
@@@ -3844,6 -3805,44 +3855,44 @@@
                 return array( $dom, $title );
         }
   
+       /**
+        * Fetch the current revision of a given title. Note that the revision
+        * (and even the title) may not exist in the database, so everything
+        * contributing to the output of the parser should use this method
+        * where possible, rather than getting the revisions themselves. This
+        * method also caches its results, so using it benefits performance.
+        *
+        * @since 1.24
+        * @param Title $title
+        * @return Revision
+        */
+       public function fetchCurrentRevisionOfTitle( $title ) {
+               $cacheKey = $title->getPrefixedDBkey();
+               if ( !$this->currentRevisionCache ) {
+                       $this->currentRevisionCache = new MapCacheLRU( 100 );
+               }
+               if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
+                       $this->currentRevisionCache->set( $cacheKey,
+                               // Defaults to Parser::statelessFetchRevision()
+                               call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
+                       );
+               }
+               return $this->currentRevisionCache->get( $cacheKey );
+       }
+ 
+       /**
+        * Wrapper around Revision::newFromTitle to allow passing additional parameters
+        * without passing them on to it.
+        *
+        * @since 1.24
+        * @param Title $title
+        * @param Parser|bool $parser
+        * @return Revision
+        */
+       public static function statelessFetchRevision( $title, $parser = false ) {
+               return Revision::newFromTitle( $title );
+       }
+ 
         /**
          * Fetch the unparsed text of a template and register a reference to it.
          * @param Title $title
@@@ -3909,9 -3908,13 +3958,13 @@@
                                 break;
                         }
                         # Get the revision
-                       $rev = $id
-                               ? Revision::newFromId( $id )
-                               : Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
+                       if ( $id ) {
+                               $rev = Revision::newFromId( $id );
+                       } elseif ( $parser ) {
+                               $rev = $parser->fetchCurrentRevisionOfTitle( $title );
+                       } else {
+                               $rev = Revision::newFromTitle( $title );
+                       }
                         $rev_id = $rev ? $rev->getId() : 0;
                         # If there is no current revision, there is no page
                         if ( $id === false && !$rev ) {
diff --combined includes/parser/ParserOptions.php

index 7e4059b,9966a1d..b570fa5
--- 1/includes/parser/ParserOptions.php
--- 2/includes/parser/ParserOptions.php
+++ b/includes/parser/ParserOptions.php
@@@ -116,6 -116,12 +116,12 @@@ class ParserOptions 
          */
         public $mRemoveComments = true;
   
+       /**
+        * Callback for current revision fetching. Used as first argument to call_user_func().
+        */
+       public $mCurrentRevisionCallback =
+               array( 'Parser', 'statelessFetchRevision' );
+ 
         /**
          * Callback for template fetching. Used as first argument to call_user_func().
          */
@@@ -211,13 -217,6 +217,13 @@@
          */
         protected $onAccessCallback = null;
   
+ +      /**
+ +       * If the page being parsed is a redirect, this should hold the redirect
+ +       * target.
+ +       * @var Title|null
+ +       */
+ +      private $redirectTarget = null;
+ +
         public function getInterwikiMagic() {
                 return $this->mInterwikiMagic;
         }
@@@ -289,6 -288,11 +295,11 @@@
                 return $this->mRemoveComments;
         }
   
+       /* @since 1.24 */
+       public function getCurrentRevisionCallback() {
+               return $this->mCurrentRevisionCallback;
+       }
+ 
         public function getTemplateCallback() {
                 return $this->mTemplateCallback;
         }
@@@ -462,6 -466,11 +473,11 @@@
                 return wfSetVar( $this->mRemoveComments, $x );
         }
   
+       /* @since 1.24 */
+       public function setCurrentRevisionCallback( $x ) {
+               return wfSetVar( $this->mCurrentRevisionCallback, $x );
+       }
+ 
         public function setTemplateCallback( $x ) {
                 return wfSetVar( $this->mTemplateCallback, $x );
         }
@@@ -522,30 -531,6 +538,30 @@@
                 return wfSetVar( $this->mIsPrintable, $x );
         }
   
+ +      /**
+ +       * Set the redirect target.
+ +       *
+ +       * Note that setting or changing this does not *make* the page a redirect
+ +       * or change its target, it merely records the information for reference
+ +       * during the parse.
+ +       *
+ +       * @since 1.24
+ +       * @param Title|null $title
+ +       */
+ +      function setRedirectTarget( $title ) {
+ +              $this->redirectTarget = $title;
+ +      }
+ +
+ +      /**
+ +       * Get the previously-set redirect target.
+ +       *
+ +       * @since 1.24
+ +       * @return Title|null
+ +       */
+ +      function getRedirectTarget() {
+ +              return $this->redirectTarget;
+ +      }
+ +
         /**
          * Extra key that should be present in the parser cache key.
          * @param string $key
author	jenkins-bot <jenkins-bot@gerrit.wikimedia.org>
	Thu, 25 Sep 2014 22:52:10 +0000 (22:52 +0000)
committer	Gerrit Code Review <gerrit@wikimedia.org>
	Thu, 25 Sep 2014 22:52:10 +0000 (22:52 +0000)
		1	2
includes/parser/Parser.php	patch \|	diff1 \|	diff2 \|	blob \| history
includes/parser/ParserOptions.php	patch \|	diff1 \|	diff2 \|	blob \| history