From 1559be9f7aefb57dafbfd5dd5a363ff225b05fa1 Mon Sep 17 00:00:00 2001 From: Ori Livneh Date: Thu, 8 Oct 2015 13:54:15 -0700 Subject: [PATCH] Consolidate common Preprocessor caching code * Consolidate nearly-identical caching code in Preprocessor_DOM and Preprocessor_Hash by making Preprocessor an abstract class rather than an interface and by implementing Preprocessor::cacheSetTree() and Preprocessor::cacheGetTree(). * Cache trees for wikitext blobs that have length equal or greater to PreprocessorCacheThreshold. Previously they needed to be greater than PreprocessorCacheThreshold, so this changes the requirement by one character. I did it because it seems more natural. * Modernize the code to use singleton service objects rather than globals. We spend a lot of time in the Preprocessor, so it would be nice for this code to be well-factored and clear. Change-Id: Ib71c29f14a28445a505e12c774a24ad964330b95 --- includes/parser/Preprocessor.php | 80 ++++++++++++++++++++++++--- includes/parser/Preprocessor_DOM.php | 37 +++---------- includes/parser/Preprocessor_Hash.php | 43 +++----------- 3 files changed, 89 insertions(+), 71 deletions(-) diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php index b32593c9f7..30eab19a57 100644 --- a/includes/parser/Preprocessor.php +++ b/includes/parser/Preprocessor.php @@ -21,23 +21,87 @@ * @ingroup Parser */ +use MediaWiki\Logger\LoggerFactory; + /** * @ingroup Parser */ -interface Preprocessor { +abstract class Preprocessor { + + const CACHE_VERSION = 1; + + /** + * Store a document tree in the cache. + * + * @param string $text + * @param int $flags + */ + protected function cacheSetTree( $text, $flags, $tree ) { + $config = RequestContext::getMain()->getConfig(); + + $length = strlen( $text ); + $threshold = $config->get( 'PreprocessorCacheThreshold' ); + if ( $threshold === false || $length < $threshold || $length > 1e6 ) { + return false; + } + + $key = wfMemcKey( + defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__, + md5( $text ), $flags ); + $value = sprintf( "%08d", self::CACHE_VERSION ) . $tree; + + $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) ); + $cache->set( $key, $value, 86400 ); + + LoggerFactory::getInstance( 'Preprocessor' ) + ->info( "Cached preprocessor output (key: $key)" ); + } + /** - * Create a new preprocessor object based on an initialised Parser object + * Attempt to load a precomputed document tree for some given wikitext + * from the cache. * - * @param Parser $parser + * @param string $text + * @param int $flags + * @return PPNode_Hash_Tree|bool */ - public function __construct( $parser ); + protected function cacheGetTree( $text, $flags ) { + $config = RequestContext::getMain()->getConfig(); + + $length = strlen( $text ); + $threshold = $config->get( 'PreprocessorCacheThreshold' ); + if ( $threshold === false || $length < $threshold || $length > 1e6 ) { + return false; + } + + $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) ); + + $key = wfMemcKey( + defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__, + md5( $text ), $flags ); + + $value = $cache->get( $key ); + if ( !$value ) { + return false; + } + + $version = intval( substr( $value, 0, 8 ) ); + if ( $version !== self::CACHE_VERSION ) { + return false; + } + + LoggerFactory::getInstance( 'Preprocessor' ) + ->info( "Loaded preprocessor output from cache (key: $key)" ); + + return substr( $value, 8 ); + } /** * Create a new top-level frame for expansion of a page * * @return PPFrame */ - public function newFrame(); + abstract public function newFrame(); /** * Create a new custom frame for programmatic use of parameter replacement @@ -47,7 +111,7 @@ interface Preprocessor { * * @return PPFrame */ - public function newCustomFrame( $args ); + abstract public function newCustomFrame( $args ); /** * Create a new custom node for programmatic use of parameter replacement @@ -55,7 +119,7 @@ interface Preprocessor { * * @param array $values */ - public function newPartNodeArray( $values ); + abstract public function newPartNodeArray( $values ); /** * Preprocess text to a PPNode @@ -65,7 +129,7 @@ interface Preprocessor { * * @return PPNode */ - public function preprocessToObj( $text, $flags = 0 ); + abstract public function preprocessToObj( $text, $flags = 0 ); } /** diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index c3296894c2..b71b9d242f 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -25,7 +25,7 @@ * @ingroup Parser */ // @codingStandardsIgnoreStart Squiz.Classes.ValidClassName.NotCamelCaps -class Preprocessor_DOM implements Preprocessor { +class Preprocessor_DOM extends Preprocessor { // @codingStandardsIgnoreEnd /** @@ -35,7 +35,7 @@ class Preprocessor_DOM implements Preprocessor { public $memoryLimit; - const CACHE_VERSION = 1; + const CACHE_PREFIX = 'preprocess-xml'; public function __construct( $parser ) { $this->parser = $parser; @@ -148,30 +148,11 @@ class Preprocessor_DOM implements Preprocessor { * @return PPNode_DOM */ public function preprocessToObj( $text, $flags = 0 ) { - global $wgMemc, $wgPreprocessorCacheThreshold; - - $xml = false; - $cacheable = ( $wgPreprocessorCacheThreshold !== false - && strlen( $text ) > $wgPreprocessorCacheThreshold ); - if ( $cacheable ) { - $cacheKey = wfMemcKey( 'preprocess-xml', md5( $text ), $flags ); - $cacheValue = $wgMemc->get( $cacheKey ); - if ( $cacheValue ) { - $version = substr( $cacheValue, 0, 8 ); - if ( intval( $version ) == self::CACHE_VERSION ) { - $xml = substr( $cacheValue, 8 ); - // From the cache - wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" ); - } - } - if ( $xml === false ) { - $xml = $this->preprocessToXml( $text, $flags ); - $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml; - $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" ); - } - } else { + + $xml = $this->cacheGetTree( $text, $flags ); + if ( $xml === false ) { $xml = $this->preprocessToXml( $text, $flags ); + $this->cacheSetTree( $text, $flags, $xml ); } // Fail if the number of elements exceeds acceptable limits @@ -179,8 +160,7 @@ class Preprocessor_DOM implements Preprocessor { $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' ); $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount(); if ( $this->parser->mGeneratedPPNodeCount > $max ) { - if ( $cacheable ) { - } + // if ( $cacheable ) { ... } throw new MWException( __METHOD__ . ': generated node count limit exceeded' ); } @@ -199,8 +179,7 @@ class Preprocessor_DOM implements Preprocessor { $obj = new PPNode_DOM( $dom->documentElement ); } - if ( $cacheable ) { - } + // if ( $cacheable ) { ... } if ( !$result ) { throw new MWException( __METHOD__ . ' generated invalid XML' ); diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index 49fa8a1ebf..54824da69a 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -28,7 +28,7 @@ * @ingroup Parser */ // @codingStandardsIgnoreStart Squiz.Classes.ValidClassName.NotCamelCaps -class Preprocessor_Hash implements Preprocessor { +class Preprocessor_Hash extends Preprocessor { // @codingStandardsIgnoreEnd /** @@ -36,7 +36,7 @@ class Preprocessor_Hash implements Preprocessor { */ public $parser; - const CACHE_VERSION = 1; + const CACHE_PREFIX = 'preprocess-hash'; public function __construct( $parser ) { $this->parser = $parser; @@ -88,6 +88,7 @@ class Preprocessor_Hash implements Preprocessor { return $node; } + /** * Preprocess some wikitext and return the document tree. * This is the ghost of Parser::replace_variables(). @@ -112,25 +113,9 @@ class Preprocessor_Hash implements Preprocessor { * @return PPNode_Hash_Tree */ public function preprocessToObj( $text, $flags = 0 ) { - // Check cache. - global $wgMemc, $wgPreprocessorCacheThreshold; - - $cacheable = $wgPreprocessorCacheThreshold !== false - && strlen( $text ) > $wgPreprocessorCacheThreshold; - - if ( $cacheable ) { - $cacheKey = wfMemcKey( 'preprocess-hash', md5( $text ), $flags ); - $cacheValue = $wgMemc->get( $cacheKey ); - if ( $cacheValue ) { - $version = substr( $cacheValue, 0, 8 ); - if ( intval( $version ) == self::CACHE_VERSION ) { - $hash = unserialize( substr( $cacheValue, 8 ) ); - // From the cache - wfDebugLog( "Preprocessor", - "Loaded preprocessor hash from memcached (key $cacheKey)" ); - return $hash; - } - } + $tree = $this->cacheGetTree( $text, $flags ); + if ( $tree !== false ) { + return unserialize( $tree ); } $rules = array( @@ -629,13 +614,11 @@ class Preprocessor_Hash implements Preprocessor { $lastNode = $node; } if ( !$node ) { - if ( $cacheable ) { - } + // if ( $cacheable ) { ... } throw new MWException( __METHOD__ . ': eqpos not found' ); } if ( $node->name !== 'equals' ) { - if ( $cacheable ) { - } + // if ( $cacheable ) { ... } throw new MWException( __METHOD__ . ': eqpos is not equals' ); } $equalsNode = $node; @@ -732,15 +715,7 @@ class Preprocessor_Hash implements Preprocessor { $rootNode->lastChild = $stack->rootAccum->lastNode; // Cache - if ( $cacheable ) { - $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode ); - - // T111289: Cache values should not exceed 1 Mb, but they do. - if ( strlen( $cacheValue ) <= 1e6 ) { - $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); - } - } + $this->cacheSetTree( $text, $flags, serialize( $rootNode ) ); return $rootNode; } -- 2.20.1