FileCache:
authorAaron Schulz <aaron@users.mediawiki.org>
Sun, 2 Oct 2011 17:53:33 +0000 (17:53 +0000)
committerAaron Schulz <aaron@users.mediawiki.org>
Sun, 2 Oct 2011 17:53:33 +0000 (17:53 +0000)
* Added FileCacheBase::*MissesRecent() functions for counting cache misses from different visitors.
* Made ObjectFileCache more generic.
* Cleaned up FileCacheBase::checkCacheDirs().
* Added FileCacheBase::typeSubdirectory() function and overwrote in HTMLFileCache. Fixes r98405 invalidating all existing cache due to directory change.
* Simplified FileCacheBase::checkCacheDirs() a bit

ResourceLoader:
* Use ResourceFileCache to handle load() requests, if $wgUseFileCache. Only caches requests for default language and skins. Single modules requests are always cached, whereas others require a certain threshold of traffic.
* Added ResourceFileCache class (functionality was initially to be in ObjectFileCache).

includes/AutoLoader.php
includes/cache/FileCacheBase.php
includes/cache/HTMLFileCache.php
includes/cache/ObjectFileCache.php
includes/cache/ResourceFileCache.php [new file with mode: 0644]
includes/resourceloader/ResourceLoader.php

index 92686fe..3c25190 100644 (file)
@@ -373,6 +373,7 @@ $wgAutoloadLocalClasses = array(
        'LinkCache' => 'includes/cache/LinkCache.php',
        'MessageCache' => 'includes/cache/MessageCache.php',
        'ObjectFileCache' => 'includes/cache/ObjectFileCache.php',
+       'ResourceFileCache' => 'includes/cache/ResourceFileCache.php',
        'SquidUpdate' => 'includes/cache/SquidUpdate.php',
        'TitleDependency' => 'includes/cache/CacheDependency.php',
        'TitleListDependency' => 'includes/cache/CacheDependency.php',
index 3a51af4..1ed9546 100644 (file)
@@ -6,16 +6,34 @@
  */
 abstract class FileCacheBase {
        protected $mKey;
-       protected $mType;
-       protected $mExt;
+       protected $mType = 'object';
+       protected $mExt = 'cache';
        protected $mFilePath;
        protected $mUseGzip;
 
+       /* @TODO: configurable? */
+       const MISS_FACTOR = 10; // log 1 every MISS_FACTOR cache misses
+
        protected function __construct() {
                global $wgUseGzip;
 
                $this->mUseGzip = (bool)$wgUseGzip;
-               $this->mExt = 'cache';
+       }
+
+       /**
+        * Get the base file cache directory
+        * @return string
+        */
+       final protected function baseCacheDirectory() {
+               global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth;
+               if ( $wgFileCacheDirectory ) {
+                       $dir = $wgFileCacheDirectory;
+               } elseif ( $wgCacheDirectory ) {
+                       $dir = $wgCacheDirectory;
+               } else {
+                       throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' );
+               }
+               return $dir;
        }
 
        /**
@@ -34,7 +52,8 @@ abstract class FileCacheBase {
                }
 
                $dir = $this->cacheDirectory();
-               $subDirs = $this->mType . '/' . $this->hashSubdirectory(); // includes '/'
+               # Build directories (methods include the trailing "/")
+               $subDirs = $this->typeSubdirectory() . $this->hashSubdirectory();
                # Avoid extension confusion
                $key = str_replace( '.', '%2E', urlencode( $this->mKey ) );
                # Build the full file path
@@ -112,6 +131,7 @@ abstract class FileCacheBase {
         */
        public function saveText( $text ) {
                global $wgUseFileCache;
+
                if ( !$wgUseFileCache ) {
                        return false;
                }
@@ -121,7 +141,7 @@ abstract class FileCacheBase {
                }
 
                $this->checkCacheDirs(); // build parent dir
-               if ( !file_put_contents( $this->cachePath(), $text ) ) {
+               if ( !file_put_contents( $this->cachePath(), $text, LOCK_EX ) ) {
                        return false;
                }
 
@@ -140,21 +160,23 @@ abstract class FileCacheBase {
 
        /**
         * Create parent directors of $this->cachePath()
-        * @TODO: why call wfMkdirParents() twice?
         * @return void
         */
        protected function checkCacheDirs() {
-               $filename = $this->cachePath();
-               $mydir2 = substr( $filename, 0, strrpos( $filename, '/') ); # subdirectory level 2
-               $mydir1 = substr( $mydir2, 0, strrpos( $mydir2, '/') ); # subdirectory level 1
+               wfMkdirParents( dirname( $this->cachePath() ), null, __METHOD__ );
+       }
 
-               wfMkdirParents( $mydir1, null, __METHOD__ );
-               wfMkdirParents( $mydir2, null, __METHOD__ );
+       /**
+        * Get the cache type subdirectory (with trailing slash) or the empty string
+        * @return string
+        */
+       protected function typeSubdirectory() {
+               return $this->mType . '/';
        }
 
        /**
-        * Return relative multi-level hash subdirectory with the trailing
-        * slash or the empty string if $wgFileCacheDepth is off
+        * Return relative multi-level hash subdirectory (with trailing slash)
+        * or the empty string if not $wgFileCacheDepth
         * @return string
         */
        protected function hashSubdirectory() {
@@ -170,4 +192,55 @@ abstract class FileCacheBase {
 
                return $subdir;
        }
+
+       /**
+        * Roughly increments the cache misses in the last hour by unique visitors
+        * @param $request WebRequest
+        * @return void
+        */
+       public function incrMissesRecent( WebRequest $request ) {
+               global $wgMemc;
+               if ( mt_rand( 0, self::MISS_FACTOR - 1 ) == 0 ) {
+                       # Get an large IP range that should include the user
+                       # even if that person's IP address changes...
+                       $ip = $request->getIP();
+                       if ( !IP::isValid( $ip ) ) {
+                               return;
+                       }
+                       $ip = IP::isIPv6( $ip )
+                               ? IP::sanitizeRange( "$ip/64" )
+                               : IP::sanitizeRange( "$ip/16" );
+
+                       # Bail out if a request already came from this range...
+                       $key = wfMemcKey( get_class( $this ), 'attempt', $this->mType, $this->mKey, $ip );
+                       if ( $wgMemc->get( $key ) ) {
+                               return; // possibly the same user
+                       }
+                       $wgMemc->set( $key, 1, 3600 );
+
+                       # Increment the number of cache misses...
+                       $key = $this->cacheMissKey();
+                       if ( $wgMemc->get( $key ) === false ) {
+                               $wgMemc->set( $key, 1, 3600 );
+                       } else {
+                               $wgMemc->incr( $key );
+                       }
+               }
+       }
+
+       /**
+        * Roughly gets the cache misses in the last hour by unique visitors
+        * @return int
+        */
+       public function getMissesRecent() {
+               global $wgMemc;
+               return self::MISS_FACTOR * $wgMemc->get( $this->cacheMissKey() );
+       }
+
+       /**
+        * @return string
+        */
+       protected function cacheMissKey() {
+               return wfMemcKey( get_class( $this ), 'misses', $this->mType, $this->mKey );
+       }
 }
index d8313f6..ac63a2a 100644 (file)
@@ -35,6 +35,7 @@ class HTMLFileCache extends FileCacheBase {
 
        /**
         * Get the base file cache directory
+        * Note: avoids baseCacheDirectory() for b/c to not skip existing cache
         * @return string
         */
        protected function cacheDirectory() {
@@ -49,6 +50,18 @@ class HTMLFileCache extends FileCacheBase {
                return $dir;
        }
 
+       /**
+        * Get the cache type subdirectory (with the trailing slash) or the empty string
+        * @return string
+        */
+       protected function typeSubdirectory() {
+               if ( $this->mType === 'view' ) {
+                       return ''; //  b/c to not skip existing cache
+               } else {
+                       return $this->mType . '/';
+               }
+       }
+
        /**
         * Check if pages can be cached for this request/user
         * @param $context IContextSource
@@ -71,9 +84,8 @@ class HTMLFileCache extends FileCacheBase {
                        // Below are header setting params
                        } elseif ( $query == 'maxage' || $query == 'smaxage' ) {
                                continue;
-                       } else {
-                               return false;
                        }
+                       return false;
                }
                $user = $context->getUser();
                // Check for non-standard user language; this covers uselang,
index 5503226..dc5f9f6 100644 (file)
@@ -4,7 +4,7 @@
  * @file
  * @ingroup Cache
  */
-class ObjectFileCache extends FileCacheBase {
+abstract class ObjectFileCache extends FileCacheBase {
        /**
         * Construct an ObjectFileCache from a key and a type
         * @param $key string
@@ -14,38 +14,18 @@ class ObjectFileCache extends FileCacheBase {
        public static function newFromKey( $key, $type ) {
                $cache = new self();
 
-               $allowedTypes = self::cacheableTypes();
-               if ( !isset( $allowedTypes[$type] ) ) {
-                       throw new MWException( "Invalid filecache type given." );
-               }
                $cache->mKey = (string)$key;
                $cache->mType = (string)$type;
-               $cache->mExt = $allowedTypes[$cache->mType];
+               $cache->mExt = 'cache';
 
                return $cache;
        }
 
-       /**
-        * Get the type => extension mapping
-        * @return array
-        */
-       protected static function cacheableTypes() {
-               return array( 'resources-js' => 'js', 'resources-css' => 'css' );
-       }
-
        /**
         * Get the base file cache directory
         * @return string
         */
        protected function cacheDirectory() {
-               global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth;
-               if ( $wgFileCacheDirectory ) {
-                       $dir = $wgFileCacheDirectory;
-               } elseif ( $wgCacheDirectory ) {
-                       $dir = "$wgCacheDirectory/object";
-               } else {
-                       throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' );
-               }
-               return $dir;
+               return $this->baseCacheDirectory() . '/object';
        }
 }
diff --git a/includes/cache/ResourceFileCache.php b/includes/cache/ResourceFileCache.php
new file mode 100644 (file)
index 0000000..3d725e7
--- /dev/null
@@ -0,0 +1,84 @@
+<?php
+/**
+ * Contain the ResourceFileCache class
+ * @file
+ * @ingroup Cache
+ */
+class ResourceFileCache extends FileCacheBase {
+       protected $mCacheWorthy;
+
+       /* @TODO: configurable? */
+       const MISS_THRESHOLD = 360; // 6/min * 60 min
+
+       /**
+        * Construct an ResourceFileCache from a context
+        * @param $context ResourceLoaderContext
+        * @return ResourceFileCache
+        */
+       public static function newFromContext( ResourceLoaderContext $context ) {
+               $cache = new self();
+
+               if ( $context->getOnly() === 'styles' ) {
+                       $cache->mType = $cache->mExt = 'css';
+               } else {
+                       $cache->mType = $cache->mExt = 'js';
+               }
+               $modules = array_unique( $context->getModules() ); // remove duplicates
+               sort( $modules ); // normalize the order (permutation => combination)
+               $cache->mKey = sha1( $context->getHash() . implode( '|', $modules ) );
+               if ( count( $modules ) == 1 ) {
+                       $cache->mCacheWorthy = true; // won't take up much space
+               }
+
+               return $cache;
+       }
+
+       /**
+        * Check if an RL request can be cached.
+        * Caller is responsible for checking if any modules are private.
+        * @param $context ResourceLoaderContext
+        * @return bool
+        */
+       public static function useFileCache( ResourceLoaderContext $context ) {
+               global $wgUseFileCache, $wgDefaultSkin, $wgLanguageCode;
+               if ( !$wgUseFileCache ) {
+                       return false;
+               }
+               // Get all query values
+               $queryVals = $context->getRequest()->getValues();
+               foreach ( $queryVals as $query => $val ) {
+                       if ( $query === 'modules' || $query === '*' ) { // &* added as IE fix
+                               continue;
+                       } elseif ( $query === 'skin' && $val === $wgDefaultSkin ) {
+                               continue;
+                       } elseif ( $query === 'lang' && $val === $wgLanguageCode ) {
+                               continue;
+                       } elseif ( $query === 'only' && in_array( $val, array( 'styles', 'scripts' ) ) ) {
+                               continue;
+                       } elseif ( $query === 'debug' && $val === 'false' ) {
+                               continue;
+                       }
+                       return false;
+               }
+               return true; // cacheable
+       }
+
+       /**
+        * Get the base file cache directory
+        * @return string
+        */
+       protected function cacheDirectory() {
+               return $this->baseCacheDirectory() . '/resources';
+       }
+
+       /**
+        * Recent cache misses
+        * @return bool
+        */
+       public function isCacheWorthy() {
+               if ( $this->mCacheWorthy === null ) {
+                       $this->mCacheWorthy = ( $this->getMissesRecent() >= self::MISS_THRESHOLD );
+               }
+               return $this->mCacheWorthy;
+       }
+}
index b1c4b72..1a4817a 100644 (file)
@@ -353,7 +353,15 @@ class ResourceLoader {
         * @param $context ResourceLoaderContext: Context in which a response should be formed
         */
        public function respond( ResourceLoaderContext $context ) {
-               global $wgCacheEpoch;
+               global $wgCacheEpoch, $wgUseFileCache;
+
+               // Use file cache if enabled and available...
+               if ( $wgUseFileCache ) {
+                       $fileCache = ResourceFileCache::newFromContext( $context );
+                       if ( $this->tryRespondFromFileCache( $fileCache, $context ) ) {
+                               return; // output handled
+                       }
+               }
 
                // Buffer output to catch warnings. Normally we'd use ob_clean() on the
                // top-level output buffer to clear warnings, but that breaks when ob_gzhandler
@@ -432,6 +440,18 @@ class ResourceLoader {
                ob_end_clean();
                echo $response;
 
+               // Save response to file cache unless there are private modules or errors
+               if ( isset( $fileCache ) && !$private && !$exceptions && !$missing ) {
+                       // Cache single modules...and other requests if there are enough hits
+                       if ( ResourceFileCache::useFileCache( $context ) ) {
+                               if ( $fileCache->isCacheWorthy() ) {
+                                       $fileCache->saveText( $response );
+                               } else {
+                                       $fileCache->incrMissesRecent( $context->getRequest() );
+                               }
+                       }
+               }
+
                wfProfileOut( __METHOD__ );
        }
 
@@ -519,6 +539,52 @@ class ResourceLoader {
                return false;
        }
 
+       /**
+        * Send out code for a response from file cache if possible
+        *
+        * @param $fileCache ObjectFileCache: Cache object for this request URL
+        * @param $context ResourceLoaderContext: Context in which to generate a response
+        * @return bool If this found a cache file and handled the response
+        */
+       protected function tryRespondFromFileCache(
+               ResourceFileCache $fileCache, ResourceLoaderContext $context
+       ) {
+               global $wgResourceLoaderMaxage;
+               // Buffer output to catch warnings.
+               ob_start();
+               // Get the maximum age the cache can be
+               $maxage = is_null( $context->getVersion() )
+                       ? $wgResourceLoaderMaxage['unversioned']['server']
+                       : $wgResourceLoaderMaxage['versioned']['server'];
+               // Minimum timestamp the cache file must have
+               $good = $fileCache->isCacheGood( wfTimestamp( TS_MW, time() - $maxage ) );
+               if ( !$good ) {
+                       try { // RL always hits the DB on file cache miss...
+                               wfGetDB( DB_SLAVE );
+                       } catch( DBConnectionError $e ) { // ...check if we need to fallback to cache
+                               $good = $fileCache->isCacheGood(); // cache existence check
+                       }
+               }
+               if ( $good ) {
+                       $ts = $fileCache->cacheTimestamp();
+                       // Send content type and cache headers
+                       $this->sendResponseHeaders( $context, $ts, false );
+                       // If there's an If-Modified-Since header, respond with a 304 appropriately
+                       if ( $this->tryRespondLastModified( $context, $ts ) ) {
+                               return; // output handled (buffers cleared)
+                       }
+                       $response = $fileCache->fetchText();
+                       // Remove the output buffer and output the response
+                       ob_end_clean();
+                       echo $response . "\n/* Cached {$ts} */";
+                       return true; // cache hit
+               }
+               // Clear buffer
+               ob_end_clean();
+
+               return false; // cache miss
+       }
+
        /**
         * Generates code for a response
         *