From 93d358cd0c97c79ab9da6ad8db17b9770a6eccb2 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Sun, 2 Oct 2011 17:53:33 +0000 Subject: [PATCH] FileCache: * Added FileCacheBase::*MissesRecent() functions for counting cache misses from different visitors. * Made ObjectFileCache more generic. * Cleaned up FileCacheBase::checkCacheDirs(). * Added FileCacheBase::typeSubdirectory() function and overwrote in HTMLFileCache. Fixes r98405 invalidating all existing cache due to directory change. * Simplified FileCacheBase::checkCacheDirs() a bit ResourceLoader: * Use ResourceFileCache to handle load() requests, if $wgUseFileCache. Only caches requests for default language and skins. Single modules requests are always cached, whereas others require a certain threshold of traffic. * Added ResourceFileCache class (functionality was initially to be in ObjectFileCache). --- includes/AutoLoader.php | 1 + includes/cache/FileCacheBase.php | 99 +++++++++++++++++++--- includes/cache/HTMLFileCache.php | 16 +++- includes/cache/ObjectFileCache.php | 26 +----- includes/cache/ResourceFileCache.php | 84 ++++++++++++++++++ includes/resourceloader/ResourceLoader.php | 68 ++++++++++++++- 6 files changed, 255 insertions(+), 39 deletions(-) create mode 100644 includes/cache/ResourceFileCache.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 92686fe3f3..3c25190b1d 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -373,6 +373,7 @@ $wgAutoloadLocalClasses = array( 'LinkCache' => 'includes/cache/LinkCache.php', 'MessageCache' => 'includes/cache/MessageCache.php', 'ObjectFileCache' => 'includes/cache/ObjectFileCache.php', + 'ResourceFileCache' => 'includes/cache/ResourceFileCache.php', 'SquidUpdate' => 'includes/cache/SquidUpdate.php', 'TitleDependency' => 'includes/cache/CacheDependency.php', 'TitleListDependency' => 'includes/cache/CacheDependency.php', diff --git a/includes/cache/FileCacheBase.php b/includes/cache/FileCacheBase.php index 3a51af4abe..1ed9546f93 100644 --- a/includes/cache/FileCacheBase.php +++ b/includes/cache/FileCacheBase.php @@ -6,16 +6,34 @@ */ abstract class FileCacheBase { protected $mKey; - protected $mType; - protected $mExt; + protected $mType = 'object'; + protected $mExt = 'cache'; protected $mFilePath; protected $mUseGzip; + /* @TODO: configurable? */ + const MISS_FACTOR = 10; // log 1 every MISS_FACTOR cache misses + protected function __construct() { global $wgUseGzip; $this->mUseGzip = (bool)$wgUseGzip; - $this->mExt = 'cache'; + } + + /** + * Get the base file cache directory + * @return string + */ + final protected function baseCacheDirectory() { + global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth; + if ( $wgFileCacheDirectory ) { + $dir = $wgFileCacheDirectory; + } elseif ( $wgCacheDirectory ) { + $dir = $wgCacheDirectory; + } else { + throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' ); + } + return $dir; } /** @@ -34,7 +52,8 @@ abstract class FileCacheBase { } $dir = $this->cacheDirectory(); - $subDirs = $this->mType . '/' . $this->hashSubdirectory(); // includes '/' + # Build directories (methods include the trailing "/") + $subDirs = $this->typeSubdirectory() . $this->hashSubdirectory(); # Avoid extension confusion $key = str_replace( '.', '%2E', urlencode( $this->mKey ) ); # Build the full file path @@ -112,6 +131,7 @@ abstract class FileCacheBase { */ public function saveText( $text ) { global $wgUseFileCache; + if ( !$wgUseFileCache ) { return false; } @@ -121,7 +141,7 @@ abstract class FileCacheBase { } $this->checkCacheDirs(); // build parent dir - if ( !file_put_contents( $this->cachePath(), $text ) ) { + if ( !file_put_contents( $this->cachePath(), $text, LOCK_EX ) ) { return false; } @@ -140,21 +160,23 @@ abstract class FileCacheBase { /** * Create parent directors of $this->cachePath() - * @TODO: why call wfMkdirParents() twice? * @return void */ protected function checkCacheDirs() { - $filename = $this->cachePath(); - $mydir2 = substr( $filename, 0, strrpos( $filename, '/') ); # subdirectory level 2 - $mydir1 = substr( $mydir2, 0, strrpos( $mydir2, '/') ); # subdirectory level 1 + wfMkdirParents( dirname( $this->cachePath() ), null, __METHOD__ ); + } - wfMkdirParents( $mydir1, null, __METHOD__ ); - wfMkdirParents( $mydir2, null, __METHOD__ ); + /** + * Get the cache type subdirectory (with trailing slash) or the empty string + * @return string + */ + protected function typeSubdirectory() { + return $this->mType . '/'; } /** - * Return relative multi-level hash subdirectory with the trailing - * slash or the empty string if $wgFileCacheDepth is off + * Return relative multi-level hash subdirectory (with trailing slash) + * or the empty string if not $wgFileCacheDepth * @return string */ protected function hashSubdirectory() { @@ -170,4 +192,55 @@ abstract class FileCacheBase { return $subdir; } + + /** + * Roughly increments the cache misses in the last hour by unique visitors + * @param $request WebRequest + * @return void + */ + public function incrMissesRecent( WebRequest $request ) { + global $wgMemc; + if ( mt_rand( 0, self::MISS_FACTOR - 1 ) == 0 ) { + # Get an large IP range that should include the user + # even if that person's IP address changes... + $ip = $request->getIP(); + if ( !IP::isValid( $ip ) ) { + return; + } + $ip = IP::isIPv6( $ip ) + ? IP::sanitizeRange( "$ip/64" ) + : IP::sanitizeRange( "$ip/16" ); + + # Bail out if a request already came from this range... + $key = wfMemcKey( get_class( $this ), 'attempt', $this->mType, $this->mKey, $ip ); + if ( $wgMemc->get( $key ) ) { + return; // possibly the same user + } + $wgMemc->set( $key, 1, 3600 ); + + # Increment the number of cache misses... + $key = $this->cacheMissKey(); + if ( $wgMemc->get( $key ) === false ) { + $wgMemc->set( $key, 1, 3600 ); + } else { + $wgMemc->incr( $key ); + } + } + } + + /** + * Roughly gets the cache misses in the last hour by unique visitors + * @return int + */ + public function getMissesRecent() { + global $wgMemc; + return self::MISS_FACTOR * $wgMemc->get( $this->cacheMissKey() ); + } + + /** + * @return string + */ + protected function cacheMissKey() { + return wfMemcKey( get_class( $this ), 'misses', $this->mType, $this->mKey ); + } } diff --git a/includes/cache/HTMLFileCache.php b/includes/cache/HTMLFileCache.php index d8313f60d6..ac63a2a5b3 100644 --- a/includes/cache/HTMLFileCache.php +++ b/includes/cache/HTMLFileCache.php @@ -35,6 +35,7 @@ class HTMLFileCache extends FileCacheBase { /** * Get the base file cache directory + * Note: avoids baseCacheDirectory() for b/c to not skip existing cache * @return string */ protected function cacheDirectory() { @@ -49,6 +50,18 @@ class HTMLFileCache extends FileCacheBase { return $dir; } + /** + * Get the cache type subdirectory (with the trailing slash) or the empty string + * @return string + */ + protected function typeSubdirectory() { + if ( $this->mType === 'view' ) { + return ''; // b/c to not skip existing cache + } else { + return $this->mType . '/'; + } + } + /** * Check if pages can be cached for this request/user * @param $context IContextSource @@ -71,9 +84,8 @@ class HTMLFileCache extends FileCacheBase { // Below are header setting params } elseif ( $query == 'maxage' || $query == 'smaxage' ) { continue; - } else { - return false; } + return false; } $user = $context->getUser(); // Check for non-standard user language; this covers uselang, diff --git a/includes/cache/ObjectFileCache.php b/includes/cache/ObjectFileCache.php index 5503226931..dc5f9f612b 100644 --- a/includes/cache/ObjectFileCache.php +++ b/includes/cache/ObjectFileCache.php @@ -4,7 +4,7 @@ * @file * @ingroup Cache */ -class ObjectFileCache extends FileCacheBase { +abstract class ObjectFileCache extends FileCacheBase { /** * Construct an ObjectFileCache from a key and a type * @param $key string @@ -14,38 +14,18 @@ class ObjectFileCache extends FileCacheBase { public static function newFromKey( $key, $type ) { $cache = new self(); - $allowedTypes = self::cacheableTypes(); - if ( !isset( $allowedTypes[$type] ) ) { - throw new MWException( "Invalid filecache type given." ); - } $cache->mKey = (string)$key; $cache->mType = (string)$type; - $cache->mExt = $allowedTypes[$cache->mType]; + $cache->mExt = 'cache'; return $cache; } - /** - * Get the type => extension mapping - * @return array - */ - protected static function cacheableTypes() { - return array( 'resources-js' => 'js', 'resources-css' => 'css' ); - } - /** * Get the base file cache directory * @return string */ protected function cacheDirectory() { - global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth; - if ( $wgFileCacheDirectory ) { - $dir = $wgFileCacheDirectory; - } elseif ( $wgCacheDirectory ) { - $dir = "$wgCacheDirectory/object"; - } else { - throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' ); - } - return $dir; + return $this->baseCacheDirectory() . '/object'; } } diff --git a/includes/cache/ResourceFileCache.php b/includes/cache/ResourceFileCache.php new file mode 100644 index 0000000000..3d725e72a7 --- /dev/null +++ b/includes/cache/ResourceFileCache.php @@ -0,0 +1,84 @@ +getOnly() === 'styles' ) { + $cache->mType = $cache->mExt = 'css'; + } else { + $cache->mType = $cache->mExt = 'js'; + } + $modules = array_unique( $context->getModules() ); // remove duplicates + sort( $modules ); // normalize the order (permutation => combination) + $cache->mKey = sha1( $context->getHash() . implode( '|', $modules ) ); + if ( count( $modules ) == 1 ) { + $cache->mCacheWorthy = true; // won't take up much space + } + + return $cache; + } + + /** + * Check if an RL request can be cached. + * Caller is responsible for checking if any modules are private. + * @param $context ResourceLoaderContext + * @return bool + */ + public static function useFileCache( ResourceLoaderContext $context ) { + global $wgUseFileCache, $wgDefaultSkin, $wgLanguageCode; + if ( !$wgUseFileCache ) { + return false; + } + // Get all query values + $queryVals = $context->getRequest()->getValues(); + foreach ( $queryVals as $query => $val ) { + if ( $query === 'modules' || $query === '*' ) { // &* added as IE fix + continue; + } elseif ( $query === 'skin' && $val === $wgDefaultSkin ) { + continue; + } elseif ( $query === 'lang' && $val === $wgLanguageCode ) { + continue; + } elseif ( $query === 'only' && in_array( $val, array( 'styles', 'scripts' ) ) ) { + continue; + } elseif ( $query === 'debug' && $val === 'false' ) { + continue; + } + return false; + } + return true; // cacheable + } + + /** + * Get the base file cache directory + * @return string + */ + protected function cacheDirectory() { + return $this->baseCacheDirectory() . '/resources'; + } + + /** + * Recent cache misses + * @return bool + */ + public function isCacheWorthy() { + if ( $this->mCacheWorthy === null ) { + $this->mCacheWorthy = ( $this->getMissesRecent() >= self::MISS_THRESHOLD ); + } + return $this->mCacheWorthy; + } +} diff --git a/includes/resourceloader/ResourceLoader.php b/includes/resourceloader/ResourceLoader.php index b1c4b727ab..1a4817aae9 100644 --- a/includes/resourceloader/ResourceLoader.php +++ b/includes/resourceloader/ResourceLoader.php @@ -353,7 +353,15 @@ class ResourceLoader { * @param $context ResourceLoaderContext: Context in which a response should be formed */ public function respond( ResourceLoaderContext $context ) { - global $wgCacheEpoch; + global $wgCacheEpoch, $wgUseFileCache; + + // Use file cache if enabled and available... + if ( $wgUseFileCache ) { + $fileCache = ResourceFileCache::newFromContext( $context ); + if ( $this->tryRespondFromFileCache( $fileCache, $context ) ) { + return; // output handled + } + } // Buffer output to catch warnings. Normally we'd use ob_clean() on the // top-level output buffer to clear warnings, but that breaks when ob_gzhandler @@ -432,6 +440,18 @@ class ResourceLoader { ob_end_clean(); echo $response; + // Save response to file cache unless there are private modules or errors + if ( isset( $fileCache ) && !$private && !$exceptions && !$missing ) { + // Cache single modules...and other requests if there are enough hits + if ( ResourceFileCache::useFileCache( $context ) ) { + if ( $fileCache->isCacheWorthy() ) { + $fileCache->saveText( $response ); + } else { + $fileCache->incrMissesRecent( $context->getRequest() ); + } + } + } + wfProfileOut( __METHOD__ ); } @@ -519,6 +539,52 @@ class ResourceLoader { return false; } + /** + * Send out code for a response from file cache if possible + * + * @param $fileCache ObjectFileCache: Cache object for this request URL + * @param $context ResourceLoaderContext: Context in which to generate a response + * @return bool If this found a cache file and handled the response + */ + protected function tryRespondFromFileCache( + ResourceFileCache $fileCache, ResourceLoaderContext $context + ) { + global $wgResourceLoaderMaxage; + // Buffer output to catch warnings. + ob_start(); + // Get the maximum age the cache can be + $maxage = is_null( $context->getVersion() ) + ? $wgResourceLoaderMaxage['unversioned']['server'] + : $wgResourceLoaderMaxage['versioned']['server']; + // Minimum timestamp the cache file must have + $good = $fileCache->isCacheGood( wfTimestamp( TS_MW, time() - $maxage ) ); + if ( !$good ) { + try { // RL always hits the DB on file cache miss... + wfGetDB( DB_SLAVE ); + } catch( DBConnectionError $e ) { // ...check if we need to fallback to cache + $good = $fileCache->isCacheGood(); // cache existence check + } + } + if ( $good ) { + $ts = $fileCache->cacheTimestamp(); + // Send content type and cache headers + $this->sendResponseHeaders( $context, $ts, false ); + // If there's an If-Modified-Since header, respond with a 304 appropriately + if ( $this->tryRespondLastModified( $context, $ts ) ) { + return; // output handled (buffers cleared) + } + $response = $fileCache->fetchText(); + // Remove the output buffer and output the response + ob_end_clean(); + echo $response . "\n/* Cached {$ts} */"; + return true; // cache hit + } + // Clear buffer + ob_end_clean(); + + return false; // cache miss + } + /** * Generates code for a response * -- 2.20.1