Consolidate common Preprocessor caching code
authorOri Livneh <ori@wikimedia.org>
Thu, 8 Oct 2015 20:54:15 +0000 (13:54 -0700)
committerTim Starling <tstarling@wikimedia.org>
Sun, 25 Oct 2015 23:06:48 +0000 (23:06 +0000)
* Consolidate nearly-identical caching code in Preprocessor_DOM and
  Preprocessor_Hash by making Preprocessor an abstract class rather than an
  interface and by implementing Preprocessor::cacheSetTree() and
  Preprocessor::cacheGetTree().
* Cache trees for wikitext blobs that have length equal or greater to
  PreprocessorCacheThreshold. Previously they needed to be greater than
  PreprocessorCacheThreshold, so this changes the requirement by one character.
  I did it because it seems more natural.
* Modernize the code to use singleton service objects rather than globals.

We spend a lot of time in the Preprocessor, so it would be nice for this code
to be well-factored and clear.

Change-Id: Ib71c29f14a28445a505e12c774a24ad964330b95

includes/parser/Preprocessor.php
includes/parser/Preprocessor_DOM.php
includes/parser/Preprocessor_Hash.php

index b32593c..30eab19 100644 (file)
  * @ingroup Parser
  */
 
+use MediaWiki\Logger\LoggerFactory;
+
 /**
  * @ingroup Parser
  */
-interface Preprocessor {
+abstract class Preprocessor {
+
+       const CACHE_VERSION = 1;
+
+       /**
+        * Store a document tree in the cache.
+        *
+        * @param string $text
+        * @param int $flags
+        */
+       protected function cacheSetTree( $text, $flags, $tree ) {
+               $config = RequestContext::getMain()->getConfig();
+
+               $length = strlen( $text );
+               $threshold = $config->get( 'PreprocessorCacheThreshold' );
+               if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
+                       return false;
+               }
+
+               $key = wfMemcKey(
+                       defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__,
+                       md5( $text ), $flags );
+               $value = sprintf( "%08d", self::CACHE_VERSION ) . $tree;
+
+               $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
+               $cache->set( $key, $value, 86400 );
+
+               LoggerFactory::getInstance( 'Preprocessor' )
+                       ->info( "Cached preprocessor output (key: $key)" );
+       }
+
        /**
-        * Create a new preprocessor object based on an initialised Parser object
+        * Attempt to load a precomputed document tree for some given wikitext
+        * from the cache.
         *
-        * @param Parser $parser
+        * @param string $text
+        * @param int $flags
+        * @return PPNode_Hash_Tree|bool
         */
-       public function __construct( $parser );
+       protected function cacheGetTree( $text, $flags ) {
+               $config = RequestContext::getMain()->getConfig();
+
+               $length = strlen( $text );
+               $threshold = $config->get( 'PreprocessorCacheThreshold' );
+               if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
+                       return false;
+               }
+
+               $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
+
+               $key = wfMemcKey(
+                       defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__,
+                       md5( $text ), $flags );
+
+               $value = $cache->get( $key );
+               if ( !$value ) {
+                       return false;
+               }
+
+               $version = intval( substr( $value, 0, 8 ) );
+               if ( $version !== self::CACHE_VERSION ) {
+                       return false;
+               }
+
+               LoggerFactory::getInstance( 'Preprocessor' )
+                       ->info( "Loaded preprocessor output from cache (key: $key)" );
+
+               return substr( $value, 8 );
+       }
 
        /**
         * Create a new top-level frame for expansion of a page
         *
         * @return PPFrame
         */
-       public function newFrame();
+       abstract public function newFrame();
 
        /**
         * Create a new custom frame for programmatic use of parameter replacement
@@ -47,7 +111,7 @@ interface Preprocessor {
         *
         * @return PPFrame
         */
-       public function newCustomFrame( $args );
+       abstract public function newCustomFrame( $args );
 
        /**
         * Create a new custom node for programmatic use of parameter replacement
@@ -55,7 +119,7 @@ interface Preprocessor {
         *
         * @param array $values
         */
-       public function newPartNodeArray( $values );
+       abstract public function newPartNodeArray( $values );
 
        /**
         * Preprocess text to a PPNode
@@ -65,7 +129,7 @@ interface Preprocessor {
         *
         * @return PPNode
         */
-       public function preprocessToObj( $text, $flags = 0 );
+       abstract public function preprocessToObj( $text, $flags = 0 );
 }
 
 /**
index c329689..b71b9d2 100644 (file)
@@ -25,7 +25,7 @@
  * @ingroup Parser
  */
 // @codingStandardsIgnoreStart Squiz.Classes.ValidClassName.NotCamelCaps
-class Preprocessor_DOM implements Preprocessor {
+class Preprocessor_DOM extends Preprocessor {
        // @codingStandardsIgnoreEnd
 
        /**
@@ -35,7 +35,7 @@ class Preprocessor_DOM implements Preprocessor {
 
        public $memoryLimit;
 
-       const CACHE_VERSION = 1;
+       const CACHE_PREFIX = 'preprocess-xml';
 
        public function __construct( $parser ) {
                $this->parser = $parser;
@@ -148,30 +148,11 @@ class Preprocessor_DOM implements Preprocessor {
         * @return PPNode_DOM
         */
        public function preprocessToObj( $text, $flags = 0 ) {
-               global $wgMemc, $wgPreprocessorCacheThreshold;
-
-               $xml = false;
-               $cacheable = ( $wgPreprocessorCacheThreshold !== false
-                       && strlen( $text ) > $wgPreprocessorCacheThreshold );
-               if ( $cacheable ) {
-                       $cacheKey = wfMemcKey( 'preprocess-xml', md5( $text ), $flags );
-                       $cacheValue = $wgMemc->get( $cacheKey );
-                       if ( $cacheValue ) {
-                               $version = substr( $cacheValue, 0, 8 );
-                               if ( intval( $version ) == self::CACHE_VERSION ) {
-                                       $xml = substr( $cacheValue, 8 );
-                                       // From the cache
-                                       wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
-                               }
-                       }
-                       if ( $xml === false ) {
-                               $xml = $this->preprocessToXml( $text, $flags );
-                               $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
-                               $wgMemc->set( $cacheKey, $cacheValue, 86400 );
-                               wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
-                       }
-               } else {
+
+               $xml = $this->cacheGetTree( $text, $flags );
+               if ( $xml === false ) {
                        $xml = $this->preprocessToXml( $text, $flags );
+                       $this->cacheSetTree( $text, $flags, $xml );
                }
 
                // Fail if the number of elements exceeds acceptable limits
@@ -179,8 +160,7 @@ class Preprocessor_DOM implements Preprocessor {
                $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
                $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
                if ( $this->parser->mGeneratedPPNodeCount > $max ) {
-                       if ( $cacheable ) {
-                       }
+                       // if ( $cacheable ) { ... }
                        throw new MWException( __METHOD__ . ': generated node count limit exceeded' );
                }
 
@@ -199,8 +179,7 @@ class Preprocessor_DOM implements Preprocessor {
                        $obj = new PPNode_DOM( $dom->documentElement );
                }
 
-               if ( $cacheable ) {
-               }
+               // if ( $cacheable ) { ... }
 
                if ( !$result ) {
                        throw new MWException( __METHOD__ . ' generated invalid XML' );
index 49fa8a1..54824da 100644 (file)
@@ -28,7 +28,7 @@
  * @ingroup Parser
  */
 // @codingStandardsIgnoreStart Squiz.Classes.ValidClassName.NotCamelCaps
-class Preprocessor_Hash implements Preprocessor {
+class Preprocessor_Hash extends Preprocessor {
        // @codingStandardsIgnoreEnd
 
        /**
@@ -36,7 +36,7 @@ class Preprocessor_Hash implements Preprocessor {
         */
        public $parser;
 
-       const CACHE_VERSION = 1;
+       const CACHE_PREFIX = 'preprocess-hash';
 
        public function __construct( $parser ) {
                $this->parser = $parser;
@@ -88,6 +88,7 @@ class Preprocessor_Hash implements Preprocessor {
                return $node;
        }
 
+
        /**
         * Preprocess some wikitext and return the document tree.
         * This is the ghost of Parser::replace_variables().
@@ -112,25 +113,9 @@ class Preprocessor_Hash implements Preprocessor {
         * @return PPNode_Hash_Tree
         */
        public function preprocessToObj( $text, $flags = 0 ) {
-               // Check cache.
-               global $wgMemc, $wgPreprocessorCacheThreshold;
-
-               $cacheable = $wgPreprocessorCacheThreshold !== false
-                       && strlen( $text ) > $wgPreprocessorCacheThreshold;
-
-               if ( $cacheable ) {
-                       $cacheKey = wfMemcKey( 'preprocess-hash', md5( $text ), $flags );
-                       $cacheValue = $wgMemc->get( $cacheKey );
-                       if ( $cacheValue ) {
-                               $version = substr( $cacheValue, 0, 8 );
-                               if ( intval( $version ) == self::CACHE_VERSION ) {
-                                       $hash = unserialize( substr( $cacheValue, 8 ) );
-                                       // From the cache
-                                       wfDebugLog( "Preprocessor",
-                                               "Loaded preprocessor hash from memcached (key $cacheKey)" );
-                                       return $hash;
-                               }
-                       }
+               $tree = $this->cacheGetTree( $text, $flags );
+               if ( $tree !== false ) {
+                       return unserialize( $tree );
                }
 
                $rules = array(
@@ -629,13 +614,11 @@ class Preprocessor_Hash implements Preprocessor {
                                                                $lastNode = $node;
                                                        }
                                                        if ( !$node ) {
-                                                               if ( $cacheable ) {
-                                                               }
+                                                               // if ( $cacheable ) { ... }
                                                                throw new MWException( __METHOD__ . ': eqpos not found' );
                                                        }
                                                        if ( $node->name !== 'equals' ) {
-                                                               if ( $cacheable ) {
-                                                               }
+                                                               // if ( $cacheable ) { ... }
                                                                throw new MWException( __METHOD__ . ': eqpos is not equals' );
                                                        }
                                                        $equalsNode = $node;
@@ -732,15 +715,7 @@ class Preprocessor_Hash implements Preprocessor {
                $rootNode->lastChild = $stack->rootAccum->lastNode;
 
                // Cache
-               if ( $cacheable ) {
-                       $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );
-
-                       // T111289: Cache values should not exceed 1 Mb, but they do.
-                       if ( strlen( $cacheValue ) <= 1e6 ) {
-                               $wgMemc->set( $cacheKey, $cacheValue, 86400 );
-                               wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" );
-                       }
-               }
+               $this->cacheSetTree( $text, $flags, serialize( $rootNode ) );
 
                return $rootNode;
        }