From 6014f0db91a823a5c02b545c0d589408a307e77f Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Thu, 29 Sep 2011 08:18:20 +0000 Subject: [PATCH] HTMLFileCache refactoring: * Rewrote class and split into three classes: a base class, and html cache and a more generic cache to be used later. * The new classes now use RequestContext. * Renamed fetchPageText() -> fetchText(). * Split out new saveText() function from saveToFileCache(). * Various other cleanups and fixes. Also fixed backwards setting of $wgDisableCounters in rebuildFileCache.php. --- includes/Article.php | 11 +- includes/AutoLoader.php | 2 + includes/HistoryPage.php | 7 +- includes/Wiki.php | 12 +- includes/cache/FileCacheBase.php | 173 ++++++++++++++++++++ includes/cache/HTMLFileCache.php | 252 +++++++++++------------------ includes/cache/ObjectFileCache.php | 41 +++++ includes/db/DatabaseError.php | 6 +- maintenance/rebuildFileCache.php | 6 +- 9 files changed, 327 insertions(+), 183 deletions(-) create mode 100644 includes/cache/FileCacheBase.php create mode 100644 includes/cache/ObjectFileCache.php diff --git a/includes/Article.php b/includes/Article.php index 2c736a2d1c..d738f5afbc 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -1715,10 +1715,10 @@ class Article extends Page { $called = true; if ( $this->isFileCacheable() ) { - $cache = new HTMLFileCache( $this->getTitle() ); - if ( $cache->isFileCacheGood( $this->mPage->getTouched() ) ) { + $cache = HTMLFileCache::newFromTitle( $this->getTitle(), 'view' ); + if ( $cache->isCacheGood( $this->mPage->getTouched() ) ) { wfDebug( "Article::tryFileCache(): about to load file\n" ); - $cache->loadFromFileCache(); + $cache->loadFromFileCache( $this->getContext() ); return true; } else { wfDebug( "Article::tryFileCache(): starting buffer\n" ); @@ -1738,8 +1738,9 @@ class Article extends Page { public function isFileCacheable() { $cacheable = false; - if ( HTMLFileCache::useFileCache() ) { - $cacheable = $this->mPage->getID() && !$this->mRedirectedFrom && !$this->getTitle()->isRedirect(); + if ( HTMLFileCache::useFileCache( $this->getContext() ) ) { + $cacheable = $this->mPage->getID() + && !$this->mRedirectedFrom && !$this->getTitle()->isRedirect(); // Extension may have reason to disable file caching on some pages. if ( $cacheable ) { $cacheable = wfRunHooks( 'IsFileCacheable', array( &$this ) ); diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 6e8bd44792..e596739b64 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -363,6 +363,7 @@ $wgAutoloadLocalClasses = array( 'CacheDependency' => 'includes/cache/CacheDependency.php', 'ConstantDependency' => 'includes/cache/CacheDependency.php', 'DependencyWrapper' => 'includes/cache/CacheDependency.php', + 'FileCacheBase' => 'includes/cache/FileCacheBase.php', 'FileDependency' => 'includes/cache/CacheDependency.php', 'GlobalDependency' => 'includes/cache/CacheDependency.php', 'HTMLCacheUpdate' => 'includes/cache/HTMLCacheUpdate.php', @@ -371,6 +372,7 @@ $wgAutoloadLocalClasses = array( 'LinkBatch' => 'includes/cache/LinkBatch.php', 'LinkCache' => 'includes/cache/LinkCache.php', 'MessageCache' => 'includes/cache/MessageCache.php', + 'ObjectFileCache' => 'includes/cache/ObjectFileCache.php', 'SquidUpdate' => 'includes/cache/SquidUpdate.php', 'TitleDependency' => 'includes/cache/CacheDependency.php', 'TitleListDependency' => 'includes/cache/CacheDependency.php', diff --git a/includes/HistoryPage.php b/includes/HistoryPage.php index c40fec48a6..cef9b6642a 100644 --- a/includes/HistoryPage.php +++ b/includes/HistoryPage.php @@ -75,10 +75,11 @@ class HistoryPage { wfProfileIn( __METHOD__ ); + $context = RequestContext::getMain(); # Fill in the file cache if not set already - if ( $wgUseFileCache && HTMLFileCache::useFileCache() ) { - $cache = new HTMLFileCache( $this->title, 'history' ); - if ( !$cache->isFileCacheGood( /* Assume up to date */ ) ) { + if ( $wgUseFileCache && HTMLFileCache::useFileCache( $context ) ) { + $cache = HTMLFileCache::newFromTitle( $this->title, 'history' ); + if ( !$cache->isCacheGood( /* Assume up to date */ ) ) { ob_start( array( &$cache, 'saveToFileCache' ) ); } } diff --git a/includes/Wiki.php b/includes/Wiki.php index f5f6d8426d..c17b9cbd87 100644 --- a/includes/Wiki.php +++ b/includes/Wiki.php @@ -596,16 +596,14 @@ class MediaWiki { if ( $wgUseFileCache && $wgTitle->getNamespace() >= 0 ) { wfProfileIn( 'main-try-filecache' ); - // Raw pages should handle cache control on their own, - // even when using file cache. This reduces hits from clients. - if ( HTMLFileCache::useFileCache() ) { + if ( HTMLFileCache::useFileCache( $this->context ) ) { /* Try low-level file cache hit */ - $cache = new HTMLFileCache( $wgTitle, $action ); - if ( $cache->isFileCacheGood( /* Assume up to date */ ) ) { + $cache = HTMLFileCache::newFromTitle( $wgTitle, $action ); + if ( $cache->isCacheGood( /* Assume up to date */ ) ) { /* Check incoming headers to see if client has this cached */ - $timestamp = $cache->fileCacheTime(); + $timestamp = $cache->cacheTimestamp(); if ( !$this->context->getOutput()->checkLastModified( $timestamp ) ) { - $cache->loadFromFileCache(); + $cache->loadFromFileCache( $this->context ); } # Do any stats increment/watchlist stuff $article = WikiPage::factory( $wgTitle ); diff --git a/includes/cache/FileCacheBase.php b/includes/cache/FileCacheBase.php new file mode 100644 index 0000000000..4c5d41e381 --- /dev/null +++ b/includes/cache/FileCacheBase.php @@ -0,0 +1,173 @@ +mUseGzip = (bool)$wgUseGzip; + $this->mExt = 'cache'; + } + + /** + * Get the base cache directory (not speficic to this file) + * @return string + */ + abstract protected function cacheDirectory(); + + /** + * Get the path to the cache file + * @return string + */ + protected function cachePath() { + if ( $this->mFilePath !== null ) { + return $this->mFilePath; + } + + $dir = $this->cacheDirectory(); + $subDirs = $this->mType . '/' . $this->hashSubdirectory(); // includes '/' + # Avoid extension confusion + $key = str_replace( '.', '%2E', urlencode( $this->mKey ) ); + # Build the full file path + $this->mFilePath = "{$dir}/{$subDirs}{$key}.{$this->mExt}"; + if ( $this->useGzip() ) { + $this->mFilePath .= '.gz'; + } + + return $this->mFilePath; + } + + /** + * Check if the cache file exists + * @return bool + */ + public function isCached() { + return file_exists( $this->cachePath() ); + } + + /** + * Get the last-modified timestamp of the cache file + * @return string|false TS_MW timestamp + */ + public function cacheTimestamp() { + $timestamp = filemtime( $this->cachePath() ); + return ( $timestamp !== false ) + ? wfTimestamp( TS_MW, $timestamp ) + : false; + } + + /** + * Check if up to date cache file exists + * @param $timestamp string MW_TS timestamp + * + * @return bool + */ + public function isCacheGood( $timestamp = '' ) { + global $wgCacheEpoch; + + if ( !$this->isCached() ) { + return false; + } + + $cachetime = $this->cacheTimestamp(); + $good = ( $timestamp <= $cachetime && $wgCacheEpoch <= $cachetime ); + wfDebug( __METHOD__ . ": cachetime $cachetime, touched '{$timestamp}' epoch {$wgCacheEpoch}, good $good\n"); + + return $good; + } + + /** + * Check if the cache is gzipped + * @return bool + */ + protected function useGzip() { + return $this->mUseGzip; + } + + /** + * Get the uncompressed text from the cache + * @return string + */ + public function fetchText() { + if ( $this->useGzip() ) { + /* Why is there no gzfile_get_contents() or gzdecode()? */ + return implode( '', gzfile( $this->cachePath() ) ); + } else { + return file_get_contents( $this->cachePath() ); + } + } + + /** + * Save and compress text to the cache + * @return string compressed text + */ + public function saveText( $text ) { + global $wgUseFileCache; + if ( !$wgUseFileCache ) { + return false; + } + + if ( $this->useGzip() ) { + $text = gzencode( $text ); + } + + $this->checkCacheDirs(); // build parent dir + if ( !file_put_contents( $this->cachePath(), $text ) ) { + return false; + } + + return $text; + } + + /* + * Clear the cache for this page + * @return void + */ + public function clearCache() { + wfSuppressWarnings(); + unlink( $this->cachePath() ); + wfRestoreWarnings(); + } + + /* + * Create parent directors of $this->cachePath() + * @TODO: why call wfMkdirParents() twice? + * @return void + */ + protected function checkCacheDirs() { + $filename = $this->cachePath(); + $mydir2 = substr( $filename, 0, strrpos( $filename, '/') ); # subdirectory level 2 + $mydir1 = substr( $mydir2, 0, strrpos( $mydir2, '/') ); # subdirectory level 1 + + wfMkdirParents( $mydir1, null, __METHOD__ ); + wfMkdirParents( $mydir2, null, __METHOD__ ); + } + + /* + * Return relative multi-level hash subdirectory with the trailing + * slash or the empty string if $wgFileCacheDepth is off + * @return string + */ + protected function hashSubdirectory() { + global $wgFileCacheDepth; + + $subdir = ''; + if ( $wgFileCacheDepth > 0 ) { + $hash = md5( $this->mKey ); + for ( $i = 1; $i <= $wgFileCacheDepth; $i++ ) { + $subdir .= substr( $hash, 0, $i ) . '/'; + } + } + + return $subdir; + } +} diff --git a/includes/cache/HTMLFileCache.php b/includes/cache/HTMLFileCache.php index 59a8ea7719..a60d5d27d3 100644 --- a/includes/cache/HTMLFileCache.php +++ b/includes/cache/HTMLFileCache.php @@ -4,167 +4,96 @@ * @file * @ingroup Cache */ +class HTMLFileCache extends FileCacheBase { -/** - * Handles talking to the file cache, putting stuff in and taking it back out. - * Mostly called from Article.php for the emergency abort/fallback to cache. - * - * Global options that affect this module: - * - $wgCachePages - * - $wgCacheEpoch - * - $wgUseFileCache - * - $wgCacheDirectory - * - $wgFileCacheDirectory - * - $wgUseGzip - * - * @ingroup Cache - */ -class HTMLFileCache { + public static function newFromTitle( Title $title, $action ) { + $cache = new self(); - /** - * @var Title - */ - var $mTitle; - var $mFileCache, $mType; + $allowedTypes = self::cacheablePageActions(); + if ( !in_array( $action, $allowedTypes ) ) { + throw new MWException( "Invalid filecache type given." ); + } + $cache->mKey = $title->getPrefixedDBkey(); + $cache->mType = (string)$action; + $cache->mExt = 'html'; - public function __construct( $title, $type = 'view' ) { - $this->mTitle = $title; - $this->mType = in_array( $type, self::cacheableActions() ) ? $type : false; - $this->fileCacheName(); // init name + return $cache; } - protected static function cacheableActions() { + /* + * Cacheable actions + * @return array + */ + protected static function cacheablePageActions() { return array( 'view', 'history' ); } - public function fileCacheName() { - if( !$this->mFileCache ) { - global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth; - - if ( $wgFileCacheDirectory ) { - $dir = $wgFileCacheDirectory; - } elseif ( $wgCacheDirectory ) { - $dir = "$wgCacheDirectory/html"; - } else { - throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' ); - } - - # Store other views of aspects of pages elsewhere - $subdir = ($this->mType === 'view') ? '' : "{$this->mType}/"; - - $key = $this->mTitle->getPrefixedDbkey(); - if ( $wgFileCacheDepth > 0 ) { - $hash = md5( $key ); - for ( $i = 1; $i <= $wgFileCacheDepth; $i++ ) { - $subdir .= substr( $hash, 0, $i ) . '/'; - } - } - # Avoid extension confusion - $key = str_replace( '.', '%2E', urlencode( $key ) ); - $this->mFileCache = "{$dir}/{$subdir}{$key}.html"; - - if( $this->useGzip() ) { - $this->mFileCache .= '.gz'; - } - - wfDebug( __METHOD__ . ": {$this->mFileCache}\n" ); - } - return $this->mFileCache; - } - - public function isFileCached() { - if( $this->mType === false ) { - return false; + /** + * Get the base file cache directory + * @return string + */ + protected function cacheDirectory() { + global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth; + if ( $wgFileCacheDirectory ) { + $dir = $wgFileCacheDirectory; + } elseif ( $wgCacheDirectory ) { + $dir = "$wgCacheDirectory/object"; + } else { + throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' ); } - return file_exists( $this->fileCacheName() ); - } - - public function fileCacheTime() { - return wfTimestamp( TS_MW, filemtime( $this->fileCacheName() ) ); + return $dir; } /** * Check if pages can be cached for this request/user + * @param $context RequestContext * @return bool */ - public static function useFileCache() { - global $wgUser, $wgUseFileCache, $wgShowIPinHeader, $wgRequest, $wgLang, $wgContLang; - if( !$wgUseFileCache ) { + public static function useFileCache( RequestContext $context ) { + global $wgUseFileCache, $wgShowIPinHeader, $wgContLang; + if ( !$wgUseFileCache ) { return false; } // Get all query values - $queryVals = $wgRequest->getValues(); - foreach( $queryVals as $query => $val ) { - if( $query == 'title' || $query == 'curid' ) { + $queryVals = $context->getRequest()->getValues(); + foreach ( $queryVals as $query => $val ) { + if ( $query == 'title' || $query == 'curid' ) { continue; // note: curid sets title // Normal page view in query form can have action=view. // Raw hits for pages also stored, like .css pages for example. - } elseif( $query == 'action' && in_array( $val, self::cacheableActions() ) ) { + } elseif ( $query == 'action' && in_array( $val, self::cacheablePageActions() ) ) { continue; // Below are header setting params - } elseif( $query == 'maxage' || $query == 'smaxage' ) { + } elseif ( $query == 'maxage' || $query == 'smaxage' ) { continue; } else { return false; } } + $user = $context->getUser(); // Check for non-standard user language; this covers uselang, // and extensions for auto-detecting user language. - $ulang = $wgLang->getCode(); + $ulang = $context->getLang()->getCode(); $clang = $wgContLang->getCode(); // Check that there are no other sources of variation - return !$wgShowIPinHeader && !$wgUser->getId() && !$wgUser->getNewtalk() && $ulang == $clang; + return !$wgShowIPinHeader && !$user->getId() && !$user->getNewtalk() && $ulang == $clang; } /** - * Check if up to date cache file exists - * @param $timestamp string - * - * @return bool + * Read from cache to context output + * @param $context RequestContext + * @return void */ - public function isFileCacheGood( $timestamp = '' ) { - global $wgCacheEpoch; - - if( !$this->isFileCached() ) { - return false; - } - - $cachetime = $this->fileCacheTime(); - $good = $timestamp <= $cachetime && $wgCacheEpoch <= $cachetime; - - wfDebug( __METHOD__ . ": cachetime $cachetime, touched '{$timestamp}' epoch {$wgCacheEpoch}, good $good\n"); - return $good; - } - - public function useGzip() { - global $wgUseGzip; - return $wgUseGzip; - } - - /* In handy string packages */ - public function fetchRawText() { - return file_get_contents( $this->fileCacheName() ); - } + public function loadFromFileCache( RequestContext $context ) { + global $wgMimeType, $wgLanguageCode; - public function fetchPageText() { - if( $this->useGzip() ) { - /* Why is there no gzfile_get_contents() or gzdecode()? */ - return implode( '', gzfile( $this->fileCacheName() ) ); - } else { - return $this->fetchRawText(); - } - } - - /* Working directory to/from output */ - public function loadFromFileCache() { - global $wgOut, $wgMimeType, $wgLanguageCode; wfDebug( __METHOD__ . "()\n"); - $filename = $this->fileCacheName(); - $wgOut->sendCacheControl(); + $filename = $this->cachePath(); + $context->getOutput()->sendCacheControl(); header( "Content-Type: $wgMimeType; charset=UTF-8" ); header( "Content-Language: $wgLanguageCode" ); - if( $this->useGzip() ) { - if( wfClientAcceptsGzip() ) { + if ( $this->useGzip() ) { + if ( wfClientAcceptsGzip() ) { header( 'Content-Encoding: gzip' ); } else { /* Send uncompressed */ @@ -173,71 +102,70 @@ class HTMLFileCache { } } readfile( $filename ); - $wgOut->disable(); // tell $wgOut that output is taken care of - } - - protected function checkCacheDirs() { - $filename = $this->fileCacheName(); - $mydir2 = substr($filename,0,strrpos($filename,'/')); # subdirectory level 2 - $mydir1 = substr($mydir2,0,strrpos($mydir2,'/')); # subdirectory level 1 - - wfMkdirParents( $mydir1, null, __METHOD__ ); - wfMkdirParents( $mydir2, null, __METHOD__ ); + $context->getOutput()->disable(); // tell $wgOut that output is taken care of } + /** + * Save this cache object with the given text. + * Use this as an ob_start() handler. + * @param $text string + * @return bool Whether $wgUseFileCache is enabled + */ public function saveToFileCache( $text ) { global $wgUseFileCache; - if( !$wgUseFileCache || strlen( $text ) < 512 ) { + + if ( !$wgUseFileCache || strlen( $text ) < 512 ) { // Disabled or empty/broken output (OOM and PHP errors) return $text; } wfDebug( __METHOD__ . "()\n", false); - $this->checkCacheDirs(); + $now = wfTimestampNow(); + if ( $this->useGzip() ) { + $text = str_replace( + '', '\n", $text ); + } else { + $text = str_replace( + '', '\n", $text ); + } - $f = fopen( $this->fileCacheName(), 'w' ); - if($f) { - $now = wfTimestampNow(); - if( $this->useGzip() ) { - $rawtext = str_replace( '', - '\n", - $text ); - $text = gzencode( $rawtext ); - } else { - $text = str_replace( '', - '\n", - $text ); - } - fwrite( $f, $text ); - fclose( $f ); - if( $this->useGzip() ) { - if( wfClientAcceptsGzip() ) { - header( 'Content-Encoding: gzip' ); - return $text; - } else { - return $rawtext; - } + // Store text to FS... + $compressed = $this->saveText( $text ); + if ( $compressed === false ) { + return $text; // error + } + + // gzip output to buffer as needed and set headers... + if ( $this->useGzip() ) { + // @TODO: ugly wfClientAcceptsGzip() function - use context! + if ( wfClientAcceptsGzip() ) { + header( 'Content-Encoding: gzip' ); + return $compressed; } else { return $text; } + } else { + return $text; } - return $text; } - public static function clearFileCache( $title ) { + /** + * Clear the file caches for a page for all actions + * @param $title Title + * @return bool Whether $wgUseFileCache is enabled + */ + public static function clearFileCache( Title $title ) { global $wgUseFileCache; if ( !$wgUseFileCache ) { return false; } - wfSuppressWarnings(); - foreach( self::cacheableActions() as $type ) { - $fc = new self( $title, $type ); - unlink( $fc->fileCacheName() ); + foreach ( self::cacheablePageActions() as $type ) { + $fc = self::newFromTitle( $title, $type ); + $fc->clearCache(); } - wfRestoreWarnings(); return true; } diff --git a/includes/cache/ObjectFileCache.php b/includes/cache/ObjectFileCache.php new file mode 100644 index 0000000000..d4f98e22d1 --- /dev/null +++ b/includes/cache/ObjectFileCache.php @@ -0,0 +1,41 @@ +mKey = (string)$key; + $cache->mType = (string)$type; + $cache->mExt = $allowedTypes[$cache->mType]; + + return $cache; + } + + /* + * Get the type => extension mapping + * @return array + */ + protected static function cacheableTypes() { + return array( 'resources-js' => 'js', 'resources-css' => 'css' ); + } + + /** + * Get the base file cache directory + * @return string + */ + protected function cacheDirectory() { + global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth; + if ( $wgFileCacheDirectory ) { + $dir = $wgFileCacheDirectory; + } elseif ( $wgCacheDirectory ) { + $dir = "$wgCacheDirectory/object"; + } else { + throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' ); + } + return $dir; + } +} diff --git a/includes/db/DatabaseError.php b/includes/db/DatabaseError.php index 2fe409b8f2..acf97b98f0 100644 --- a/includes/db/DatabaseError.php +++ b/includes/db/DatabaseError.php @@ -232,9 +232,9 @@ EOT; $t = Title::newFromText( $this->msg( 'mainpage', 'Main Page' ) ); } - $cache = new HTMLFileCache( $t ); - if ( $cache->isFileCached() ) { - return $cache->fetchPageText(); + $cache = HTMLFileCache::newFromTitle( $t, 'view' ); + if ( $cache->isCached() ) { + return $cache->fetchText(); } else { return ''; } diff --git a/maintenance/rebuildFileCache.php b/maintenance/rebuildFileCache.php index 84ada11c95..d5a3de6664 100644 --- a/maintenance/rebuildFileCache.php +++ b/maintenance/rebuildFileCache.php @@ -37,7 +37,7 @@ class RebuildFileCache extends Maintenance { if ( !$wgUseFileCache ) { $this->error( "Nothing to do -- \$wgUseFileCache is disabled.", true ); } - $wgDisableCounters = false; + $wgDisableCounters = true; $start = $this->getArg( 0, "0" ); if ( !ctype_digit( $start ) ) { $this->error( "Invalid value for start parameter.", true ); @@ -83,8 +83,8 @@ class RebuildFileCache extends Maintenance { $article = new Article( $wgTitle ); // If the article is cacheable, then load it if ( $article->isFileCacheable() ) { - $cache = new HTMLFileCache( $wgTitle ); - if ( $cache->isFileCacheGood() ) { + $cache = HTMLFileCache::newFromTitle( $wgTitle, 'view' ); + if ( $cache->isCacheGood() ) { if ( $overwrite ) { $rebuilt = true; } else { -- 2.20.1