* @author Aaron Schulz
*/
+use Psr\Log\LoggerAwareInterface;
+use Psr\Log\LoggerInterface;
+use Psr\Log\NullLogger;
+
/**
* Multi-datacenter aware caching interface
*
* This class is intended for caching data from primary stores.
* If the get() method does not return a value, then the caller
* should query the new value and backfill the cache using set().
+ * When querying the store on cache miss, the closest DB replica
+ * should be used. Try to avoid heavyweight DB master or quorum reads.
* When the source data changes, a purge method should be called.
* Since purges are expensive, they should be avoided. One can do so if:
* - a) The object cached is immutable; or
* - b) Validity is checked against the source after get(); or
* - c) Using a modest TTL is reasonably correct and performant
+ *
* The simplest purge method is delete().
*
* Instances of this class must be configured to point to a valid
* @ingroup Cache
* @since 1.26
*/
-class WANObjectCache {
+class WANObjectCache implements IExpiringStore, LoggerAwareInterface {
/** @var BagOStuff The local datacenter cache */
protected $cache;
+ /** @var HashBagOStuff Script instance PHP cache */
+ protected $procCache;
/** @var string Cache pool name */
protected $pool;
- /** @var EventRelayer */
+ /** @var EventRelayer Bus that handles purge broadcasts */
protected $relayer;
+ /** @var LoggerInterface */
+ protected $logger;
- /** @var int */
+ /** @var int ERR_* constant for the "last error" registry */
protected $lastRelayError = self::ERR_NONE;
/** Max time expected to pass between delete() and DB commit finishing */
const HOLDOFF_TTL = 14; // MAX_COMMIT_DELAY + MAX_REPLICA_LAG + MAX_SNAPSHOT_LAG + 1
/** Seconds to keep dependency purge keys around */
- const CHECK_KEY_TTL = 31536000; // 1 year
+ const CHECK_KEY_TTL = self::TTL_YEAR;
/** Seconds to keep lock keys around */
const LOCK_TTL = 5;
/** Default remaining TTL at which to consider pre-emptive regeneration */
/** Default time-since-expiry on a miss that makes a key "hot" */
const LOCK_TSE = 1;
- /** Idiom for set()/getWithSetCallback() TTL */
- const TTL_NONE = 0;
+ /** Idiom for set()/getWithSetCallback() TTL being "forever" */
+ const TTL_INDEFINITE = 0;
/** Idiom for getWithSetCallback() callbacks to avoid calling set() */
const TTL_UNCACHEABLE = -1;
/** Idiom for getWithSetCallback() callbacks to 'lockTSE' logic */
/** Cache format version number */
const VERSION = 1;
- /** Fields of value holder arrays */
const FLD_VERSION = 0;
const FLD_VALUE = 1;
const FLD_TTL = 2;
const FLD_TIME = 3;
- /** Possible values for getLastError() */
const ERR_NONE = 0; // no error
const ERR_NO_RESPONSE = 1; // no response
const ERR_UNREACHABLE = 2; // can't connect
* - cache : BagOStuff object
* - pool : pool name
* - relayer : EventRelayer object
+ * - logger : LoggerInterface object
*/
public function __construct( array $params ) {
$this->cache = $params['cache'];
$this->pool = $params['pool'];
$this->relayer = $params['relayer'];
+ $this->procCache = new HashBagOStuff();
+ $this->setLogger( isset( $params['logger'] ) ? $params['logger'] : new NullLogger() );
+ }
+
+ public function setLogger( LoggerInterface $logger ) {
+ $this->logger = $logger;
}
/**
- * @return WANObjectCache Cache that wraps EmptyBagOStuff
+ * Get an instance that wraps EmptyBagOStuff
+ *
+ * @return WANObjectCache
*/
public static function newEmpty() {
return new self( array(
* However, pre-snapshot values might still be seen if an update was made
* in a remote datacenter but the purge from delete() didn't relay yet.
*
- * Consider using getWithSetCallback() instead of get()/set() cycles.
+ * Consider using getWithSetCallback() instead of get() and set() cycles.
* That method has cache slam avoiding features for hot/expensive keys.
*
* @param string $key Cache key
*
* @param array $keys List of cache keys
* @param array $curTTLs Map of (key => approximate TTL left) for existing keys [returned]
- * @param array $checkKeys List of "check" keys
+ * @param array $checkKeys List of "check" keys to apply to all of $keys
* @return array Map of (key => value) for keys that exist
*/
final public function getMulti(
}
/**
- * Set the value of a key from cache
+ * Set the value of a key in cache
*
* Simply calling this method when source data changes is not valid because
* the changes do not replicate to the other WAN sites. In that case, delete()
* - d) T1 reads the row and calls set() due to a cache miss
* - e) Stale value is stuck in cache
*
- * Setting 'lag' helps avoids keys getting stuck in long-term stale states.
+ * Setting 'lag' and 'since' help avoids keys getting stuck in stale states.
*
* Example usage:
* @code
* $setOpts = Database::getCacheSetOptions( $dbr );
* // Fetch the row from the DB
* $row = $dbr->selectRow( ... );
- * $key = wfMemcKey( 'building', $buildingId );
- * $cache->set( $key, $row, 86400, $setOpts );
+ * $key = $cache->makeKey( 'building', $buildingId );
+ * $cache->set( $key, $row, $cache::TTL_DAY, $setOpts );
* @endcode
*
* @param string $key Cache key
* @param mixed $value
- * @param integer $ttl Seconds to live [0=forever]
+ * @param integer $ttl Seconds to live. Special values are:
+ * - WANObjectCache::TTL_INDEFINITE: Cache forever
* @param array $opts Options map:
* - lag : Seconds of slave lag. Typically, this is either the slave lag
* before the data was read or, if applicable, the slave lag before
* the snapshot-isolated transaction the data was read from started.
- * [Default: 0 seconds]
+ * Default: 0 seconds
* - since : UNIX timestamp of the data in $value. Typically, this is either
* the current time the data was read or (if applicable) the time when
* the snapshot-isolated transaction the data was read from started.
- * [Default: 0 seconds]
+ * Default: 0 seconds
+ * - pending : Whether this data is possibly from an uncommitted write transaction.
+ * Generally, other threads should not see values from the future and
+ * they certainly should not see ones that ended up getting rolled back.
+ * Default: false
* - lockTSE : if excessive possible snapshot lag is detected,
* then stash the value into a temporary location
* with this TTL. This is only useful if the reads
* use getWithSetCallback() with "lockTSE" set.
- * [Default: WANObjectCache::TSE_NONE]
+ * Default: WANObjectCache::TSE_NONE
* @return bool Success
*/
final public function set( $key, $value, $ttl = 0, array $opts = array() ) {
$age = isset( $opts['since'] ) ? max( 0, microtime( true ) - $opts['since'] ) : 0;
$lag = isset( $opts['lag'] ) ? $opts['lag'] : 0;
+ if ( !empty( $opts['pending'] ) ) {
+ $this->logger->info( "Rejected set() for $key due to pending writes." );
+
+ return true; // no-op the write for being unsafe
+ }
+
if ( $lag > self::MAX_REPLICA_LAG ) {
// Too much lag detected; lower TTL so it converges faster
$ttl = $ttl ? min( $ttl, self::TTL_LAGGED ) : self::TTL_LAGGED;
+ $this->logger->warning( "Lowered set() TTL for $key due to replication lag." );
}
if ( $age > self::MAX_SNAPSHOT_LAG ) {
$tempTTL = max( 1, (int)$lockTSE ); // set() expects seconds
$this->cache->set( self::STASH_KEY_PREFIX . $key, $value, $tempTTL );
}
+ $this->logger->warning( "Rejected set() for $key due to snapshot lag." );
return true; // no-op the write for being unsafe
}
* This is implemented by storing a special "tombstone" value at the cache
* key that this class recognizes; get() calls will return false for the key
* and any set() calls will refuse to replace tombstone values at the key.
- * For this to always avoid writing stale values, the following must hold:
+ * For this to always avoid stale value writes, the following must hold:
* - a) Replication lag is bounded to being less than HOLDOFF_TTL; or
* - b) If lag is higher, the DB will have gone into read-only mode already
*
+ * Note that set() can also be lag-aware and lower the TTL if it's high.
+ *
* When using potentially long-running ACID transactions, a good pattern is
* to use a pre-commit hook to issue the delete. This means that immediately
* after commit, callers will see the tombstone in cache in the local datacenter
* ... <execute some stuff> ...
* // Update the row in the DB
* $dbw->update( ... );
- * $key = wfMemcKey( 'homes', $homeId );
+ * $key = $cache->makeKey( 'homes', $homeId );
* // Purge the corresponding cache entry just before committing
* $dbw->onTransactionPreCommitOrIdle( function() use ( $cache, $key ) {
* $cache->delete( $key );
* if the key was evicted from cache, such calculations may show the
* time since expiry as ~0 seconds.
*
- * Note that "check" keys won't collide with other regular keys
+ * Note that "check" keys won't collide with other regular keys.
*
* @param string $key
* @return float UNIX timestamp of the key
* keys, the relevant "check" keys must be supplied for this to work.
*
* The "check" key essentially represents a last-modified field.
- * It is set in the future a few seconds when this is called, to
- * avoid race conditions where dependent keys get updated with a
- * stale value (e.g. from a DB slave).
+ * When touched, keys using it via get(), getMulti(), or getWithSetCallback()
+ * will be invalidated. It is treated as being HOLDOFF_TTL seconds in the future
+ * by those methods to avoid race conditions where dependent keys get updated
+ * with stale values (e.g. from a DB slave).
*
- * This is typically useful for keys with static names or some cases
+ * This is typically useful for keys with hardcoded names or in some cases
* dynamically generated names where a low number of combinations exist.
* When a few important keys get a large number of hits, a high cache
- * time is usually desired as well as lockTSE logic. The resetCheckKey()
+ * time is usually desired as well as "lockTSE" logic. The resetCheckKey()
* method is less appropriate in such cases since the "time since expiry"
* cannot be inferred.
*
- * Note that "check" keys won't collide with other regular keys
+ * Note that "check" keys won't collide with other regular keys.
*
* @see WANObjectCache::get()
+ * @see WANObjectCache::getWithSetCallback()
+ * @see WANObjectCache::resetCheckKey()
*
* @param string $key Cache key
* @return bool True if the item was purged or not found, false on failure
/**
* Delete a "check" key from all datacenters, invalidating keys that use it
*
- * This is similar to touchCheckKey() in that keys using it via
- * getWithSetCallback() will be invalidated. The differences are:
+ * This is similar to touchCheckKey() in that keys using it via get(), getMulti(),
+ * or getWithSetCallback() will be invalidated. The differences are:
* - a) The timestamp will be deleted from all caches and lazily
* re-initialized when accessed (rather than set everywhere)
* - b) Thus, dependent keys will be known to be invalid, but not
* This is typically useful for keys with dynamically generated names
* where a high number of combinations exist.
*
- * Note that "check" keys won't collide with other regular keys
+ * Note that "check" keys won't collide with other regular keys.
*
- * @see WANObjectCache::touchCheckKey()
* @see WANObjectCache::get()
+ * @see WANObjectCache::getWithSetCallback()
+ * @see WANObjectCache::touchCheckKey()
*
* @param string $key Cache key
* @return bool True if the item was purged or not found, false on failure
* Method to fetch/regenerate cache keys
*
* On cache miss, the key will be set to the callback result via set()
- * unless the callback returns false. The arguments supplied to it are:
- * (current value or false, &$ttl, &$setOpts)
- * The callback function returns the new value given the current
- * value (false if not present). Preemptive re-caching and $checkKeys
- * can result in a non-false current value. The TTL of the new value
- * can be set dynamically by altering $ttl in the callback (by reference).
- * The $setOpts array can be altered and is given to set() when called;
- * it is recommended to set the 'since' field to avoid race conditions.
- * Setting 'lag' helps avoids keys getting stuck in long-term stale states.
- *
- * Usually, callbacks ignore the current value, but it can be used
- * to maintain "most recent X" values that come from time or sequence
- * based source data, provided that the "as of" id/time is tracked.
- *
- * Usage of $checkKeys is similar to get() and getMulti(). However,
- * rather than the caller having to inspect a "current time left"
- * variable (e.g. $curTTL, $curTTLs), a cache regeneration will be
- * triggered using the callback.
+ * (unless the callback returns false) and that result will be returned.
+ * The arguments supplied to the callback are:
+ * - $oldValue : current cache value or false if not present
+ * - &$ttl : a reference to the TTL which can be altered
+ * - &$setOpts : a reference to options for set() which can be altered
+ *
+ * It is strongly recommended to set the 'lag' and 'since' fields to avoid race conditions
+ * that can cause stale values to get stuck at keys. Usually, callbacks ignore the current
+ * value, but it can be used to maintain "most recent X" values that come from time or
+ * sequence based source data, provided that the "as of" id/time is tracked. Note that
+ * preemptive regeneration and $checkKeys can result in a non-false current value.
+ *
+ * Usage of $checkKeys is similar to get() and getMulti(). However, rather than the caller
+ * having to inspect a "current time left" variable (e.g. $curTTL, $curTTLs), a cache
+ * regeneration will automatically be triggered using the callback.
*
* The simplest way to avoid stampedes for hot keys is to use
* the 'lockTSE' option in $opts. If cache purges are needed, also:
* @code
* $catInfo = $cache->getWithSetCallback(
* // Key to store the cached value under
- * wfMemcKey( 'cat-attributes', $catId ),
+ * $cache->makeKey( 'cat-attributes', $catId ),
+ * // Time-to-live (in seconds)
+ * $cache::TTL_MINUTE,
* // Function that derives the new key value
* function ( $oldValue, &$ttl, array &$setOpts ) {
* $dbr = wfGetDB( DB_SLAVE );
* $setOpts += Database::getCacheSetOptions( $dbr );
*
* return $dbr->selectRow( ... );
- * },
- * // Time-to-live (seconds)
- * 60
+ * }
* );
* @endcode
*
* @code
* $catConfig = $cache->getWithSetCallback(
* // Key to store the cached value under
- * wfMemcKey( 'site-cat-config' ),
+ * $cache->makeKey( 'site-cat-config' ),
+ * // Time-to-live (in seconds)
+ * $cache::TTL_DAY,
* // Function that derives the new key value
* function ( $oldValue, &$ttl, array &$setOpts ) {
* $dbr = wfGetDB( DB_SLAVE );
* $setOpts += Database::getCacheSetOptions( $dbr );
*
* return CatConfig::newFromRow( $dbr->selectRow( ... ) );
- * },
- * // Time-to-live (seconds)
- * 86400,
- * // Calling touchCheckKey() on this key invalidates the cache
- * wfMemcKey( 'site-cat-config' ),
- * // Try to only let one datacenter thread manage cache updates at a time
- * array( 'lockTSE' => 30 )
+ * },
+ * array(
+ * // Calling touchCheckKey() on this key invalidates the cache
+ * 'checkKeys' => array( $cache->makeKey( 'site-cat-config' ) ),
+ * // Try to only let one datacenter thread manage cache updates at a time
+ * 'lockTSE' => 30
+ * )
* );
* @endcode
*
* @code
* $catState = $cache->getWithSetCallback(
* // Key to store the cached value under
- * wfMemcKey( 'cat-state', $cat->getId() ),
+ * $cache->makeKey( 'cat-state', $cat->getId() ),
+ * // Time-to-live (seconds)
+ * 900,
* // Function that derives the new key value
* function ( $oldValue, &$ttl, array &$setOpts ) {
* // Determine new value from the DB
* $setOpts += Database::getCacheSetOptions( $dbr );
*
* return CatState::newFromResults( $dbr->select( ... ) );
- * },
- * // Time-to-live (seconds)
- * 900,
- * // The "check" keys that represent things the value depends on;
- * // Calling touchCheckKey() on any of them invalidates the cache
- * array(
- * wfMemcKey( 'sustenance-bowls', $cat->getRoomId() ),
- * wfMemcKey( 'people-present', $cat->getHouseId() ),
- * wfMemcKey( 'cat-laws', $cat->getCityId() ),
+ * },
+ * array(
+ * // The "check" keys that represent things the value depends on;
+ * // Calling touchCheckKey() on any of them invalidates the cache
+ * 'checkKeys' => array(
+ * $cache->makeKey( 'sustenance-bowls', $cat->getRoomId() ),
+ * $cache->makeKey( 'people-present', $cat->getHouseId() ),
+ * $cache->makeKey( 'cat-laws', $cat->getCityId() ),
+ * )
* )
* );
* @endcode
* @code
* $lastCatActions = $cache->getWithSetCallback(
* // Key to store the cached value under
- * wfMemcKey( 'cat-last-actions', 100 ),
+ * $cache->makeKey( 'cat-last-actions', 100 ),
+ * // Time-to-live (in seconds)
+ * 10,
* // Function that derives the new key value
* function ( $oldValue, &$ttl, array &$setOpts ) {
* $dbr = wfGetDB( DB_SLAVE );
* // Merge them and get the new "last 100" rows
* return array_slice( array_merge( $new, $list ), 0, 100 );
* },
- * // Time-to-live (seconds)
- * 10,
- * // No "check" keys
- * array(),
* // Try to only let one datacenter thread manage cache updates at a time
* array( 'lockTSE' => 30 )
* );
*
* @param string $key Cache key
* @param integer $ttl Seconds to live for key updates. Special values are:
- * - WANObjectCache::TTL_NONE : Cache forever
+ * - WANObjectCache::TTL_INDEFINITE: Cache forever
* - WANObjectCache::TTL_UNCACHEABLE: Do not cache at all
* @param callable $callback Value generation function
* @param array $opts Options map:
- * - checkKeys: List of "check" keys.
+ * - checkKeys: List of "check" keys. The key at $key will be seen as invalid when either
+ * touchCheckKey() or resetCheckKey() is called on any of these keys.
* - lowTTL: Consider pre-emptive updates when the current TTL (sec) of the key is less than
* this. It becomes more likely over time, becoming a certainty once the key is expired.
* Default: WANObjectCache::LOW_TTL seconds.
* expiration is low, the assumption is that the key is hot and that a stampede is worth
* avoiding. Setting this above WANObjectCache::HOLDOFF_TTL makes no difference. The
* higher this is set, the higher the worst-case staleness can be.
- * Use WANObjectCache::TSE_NONE to disable this logic. Default: WANObjectCache::TSE_NONE.
+ * Use WANObjectCache::TSE_NONE to disable this logic.
+ * Default: WANObjectCache::TSE_NONE.
+ * - pcTTL : process cache the value in this PHP instance with this TTL. This avoids
+ * network I/O when a key is read several times. This will not cache if the callback
+ * returns false however. Note that any purges will not be seen while process cached;
+ * since the callback should use slave DBs and they may be lagged or have snapshot
+ * isolation anyway, this should not typically matter.
+ * Default: WANObjectCache::TTL_UNCACHEABLE.
* @return mixed Value to use for the key
*/
- final public function getWithSetCallback(
- $key, $ttl, $callback, array $opts = array(), $oldOpts = array()
- ) {
- // Back-compat with 1.26: Swap $ttl and $callback
- if ( is_int( $callback ) ) {
- $temp = $ttl;
- $ttl = $callback;
- $callback = $temp;
- }
- // Back-compat with 1.26: $checkKeys as separate parameter
- if ( $oldOpts || ( is_array( $opts ) && isset( $opts[0] ) ) ) {
- $checkKeys = $opts;
- $opts = $oldOpts;
- } else {
- $checkKeys = isset( $opts['checkKeys'] ) ? $opts['checkKeys'] : array();
+ final public function getWithSetCallback( $key, $ttl, $callback, array $opts = array() ) {
+ $pcTTL = isset( $opts['pcTTL'] ) ? $opts['pcTTL'] : self::TTL_UNCACHEABLE;
+
+ // Try the process cache if enabled
+ $value = ( $pcTTL >= 0 ) ? $this->procCache->get( $key ) : false;
+
+ if ( $value === false ) {
+ // Fetch the value over the network
+ $value = $this->doGetWithSetCallback( $key, $ttl, $callback, $opts );
+ // Update the process cache if enabled
+ if ( $pcTTL >= 0 && $value !== false ) {
+ $this->procCache->set( $key, $value, $pcTTL );
+ }
}
+ return $value;
+ }
+
+ /**
+ * Do the actual I/O for getWithSetCallback() when needed
+ *
+ * @see WANObjectCache::getWithSetCallback()
+ *
+ * @param string $key
+ * @param integer $ttl
+ * @param callback $callback
+ * @param array $opts
+ * @return mixed
+ */
+ protected function doGetWithSetCallback( $key, $ttl, $callback, array $opts ) {
$lowTTL = isset( $opts['lowTTL'] ) ? $opts['lowTTL'] : min( self::LOW_TTL, $ttl );
$lockTSE = isset( $opts['lockTSE'] ) ? $opts['lockTSE'] : self::TSE_NONE;
+ $checkKeys = isset( $opts['checkKeys'] ) ? $opts['checkKeys'] : array();
// Get the current key value
$curTTL = null;
return $value;
}
+ /**
+ * @see BagOStuff::makeKey()
+ * @param string ... Key component
+ * @return string
+ * @since 1.27
+ */
+ public function makeKey() {
+ return call_user_func_array( array( $this->cache, __FUNCTION__ ), func_get_args() );
+ }
+
+ /**
+ * @see BagOStuff::makeGlobalKey()
+ * @param string ... Key component
+ * @return string
+ * @since 1.27
+ */
+ public function makeGlobalKey() {
+ return call_user_func_array( array( $this->cache, __FUNCTION__ ), func_get_args() );
+ }
+
/**
* Get the "last error" registered; clearLastError() should be called manually
* @return int ERR_* constant for the "last error" registry