protected $logger;
/** @var StatsdDataFactoryInterface */
protected $stats;
+ /** @var bool Whether to use "interim" caching while keys are tombstoned */
+ protected $useInterimHoldOffCaching = true;
+ /** @var callable|null Function that takes a WAN cache callback and runs it later */
+ protected $asyncHandler;
/** @var int ERR_* constant for the "last error" registry */
protected $lastRelayError = self::ERR_NONE;
/** Seconds to keep dependency purge keys around */
const CHECK_KEY_TTL = self::TTL_YEAR;
+ /** Seconds to keep interim value keys for tombstoned keys around */
+ const INTERIM_KEY_TTL = 1;
+
/** Seconds to keep lock keys around */
const LOCK_TTL = 10;
/** Default remaining TTL at which to consider pre-emptive regeneration */
const HOLDOFF_NONE = 0;
/** Idiom for set()/getWithSetCallback() for "do not augment the storage medium TTL" */
const STALE_TTL_NONE = 0;
+ /** Idiom for set()/getWithSetCallback() for "no post-expired grace period" */
+ const GRACE_TTL_NONE = 0;
/** Idiom for getWithSetCallback() for "no minimum required as-of timestamp" */
const MIN_TIMESTAMP_NONE = 0.0;
* - relayers : Map of (action => EventRelayer object). Actions include "purge".
* - logger : LoggerInterface object
* - stats : LoggerInterface object
+ * - asyncHandler : A function that takes a callback and runs it later. If supplied,
+ * whenever a preemptive refresh would be triggered in getWithSetCallback(), the
+ * current cache value is still used instead. However, the async-handler function
+ * receives a WAN cache callback that, when run, will execute the value generation
+ * callback supplied by the getWithSetCallback() caller. The result will be saved
+ * as normal. The handler is expected to call the WAN cache callback at an opportune
+ * time (e.g. HTTP post-send), though generally within a few 100ms. [optional]
*/
public function __construct( array $params ) {
$this->cache = $params['cache'];
: new EventRelayerNull( [] );
$this->setLogger( isset( $params['logger'] ) ? $params['logger'] : new NullLogger() );
$this->stats = isset( $params['stats'] ) ? $params['stats'] : new NullStatsdDataFactory();
+ $this->asyncHandler = isset( $params['asyncHandler'] ) ? $params['asyncHandler'] : null;
}
public function setLogger( LoggerInterface $logger ) {
*/
public static function newEmpty() {
return new static( [
- 'cache' => new EmptyBagOStuff(),
- 'pool' => 'empty'
+ 'cache' => new EmptyBagOStuff()
] );
}
$purgeValues = [];
foreach ( $timeKeys as $timeKey ) {
$purge = isset( $wrappedValues[$timeKey] )
- ? self::parsePurgeValue( $wrappedValues[$timeKey] )
+ ? $this->parsePurgeValue( $wrappedValues[$timeKey] )
: false;
if ( $purge === false ) {
// Key is not set or invalid; regenerate
$newVal = $this->makePurgeValue( $now, self::HOLDOFF_TTL );
$this->cache->add( $timeKey, $newVal, self::CHECK_KEY_TTL );
- $purge = self::parsePurgeValue( $newVal );
+ $purge = $this->parsePurgeValue( $newVal );
}
$purgeValues[] = $purge;
}
* Note that "check" keys won't collide with other regular keys.
*
* @param string $key
- * @return float UNIX timestamp of the check key
+ * @return float UNIX timestamp
*/
final public function getCheckKeyTime( $key ) {
- $key = self::TIME_KEY_PREFIX . $key;
+ return $this->getMultiCheckKeyTime( [ $key ] )[$key];
+ }
- $purge = self::parsePurgeValue( $this->cache->get( $key ) );
- if ( $purge !== false ) {
- $time = $purge[self::FLD_TIME];
- } else {
- // Casting assures identical floats for the next getCheckKeyTime() calls
- $now = (string)$this->getCurrentTime();
- $this->cache->add( $key,
- $this->makePurgeValue( $now, self::HOLDOFF_TTL ),
- self::CHECK_KEY_TTL
- );
- $time = (float)$now;
+ /**
+ * Fetch the values of each timestamp "check" key
+ *
+ * This works like getCheckKeyTime() except it takes a list of keys
+ * and returns a map of timestamps instead of just that of one key
+ *
+ * This might be useful if both:
+ * - a) a class of entities each depend on hundreds of other entities
+ * - b) these other entities are depended upon by millions of entities
+ *
+ * The later entities can each use a "check" key to invalidate their dependee entities.
+ * However, it is expensive for the former entities to verify against all of the relevant
+ * "check" keys during each getWithSetCallback() call. A less expensive approach is to do
+ * these verifications only after a "time-till-verify" (TTV) has passed. This is a middle
+ * ground between using blind TTLs and using constant verification. The adaptiveTTL() method
+ * can be used to dynamically adjust the TTV. Also, the initial TTV can make use of the
+ * last-modified times of the dependant entities (either from the DB or the "check" keys).
+ *
+ * Example usage:
+ * @code
+ * $value = $cache->getWithSetCallback(
+ * $cache->makeGlobalKey( 'wikibase-item', $id ),
+ * self::INITIAL_TTV, // initial time-till-verify
+ * function ( $oldValue, &$ttv, &$setOpts, $oldAsOf ) use ( $checkKeys, $cache ) {
+ * $now = microtime( true );
+ * // Use $oldValue if it passes max ultimate age and "check" key comparisons
+ * if ( $oldValue &&
+ * $oldAsOf > max( $cache->getMultiCheckKeyTime( $checkKeys ) ) &&
+ * ( $now - $oldValue['ctime'] ) <= self::MAX_CACHE_AGE
+ * ) {
+ * // Increase time-till-verify by 50% of last time to reduce overhead
+ * $ttv = $cache->adaptiveTTL( $oldAsOf, self::MAX_TTV, self::MIN_TTV, 1.5 );
+ * // Unlike $oldAsOf, "ctime" is the ultimate age of the cached data
+ * return $oldValue;
+ * }
+ *
+ * $mtimes = []; // dependency last-modified times; passed by reference
+ * $value = [ 'data' => $this->fetchEntityData( $mtimes ), 'ctime' => $now ];
+ * // Guess time-till-change among the dependencies, e.g. 1/(total change rate)
+ * $ttc = 1 / array_sum( array_map(
+ * function ( $mtime ) use ( $now ) {
+ * return 1 / ( $mtime ? ( $now - $mtime ) : 900 );
+ * },
+ * $mtimes
+ * ) );
+ * // The time-to-verify should not be overly pessimistic nor optimistic
+ * $ttv = min( max( $ttc, self::MIN_TTV ), self::MAX_TTV );
+ *
+ * return $value;
+ * },
+ * [ 'staleTTL' => $cache::TTL_DAY ] // keep around to verify and re-save
+ * );
+ * @endcode
+ *
+ * @see WANObjectCache::getCheckKeyTime()
+ * @see WANObjectCache::getWithSetCallback()
+ *
+ * @param array $keys
+ * @return float[] Map of (key => UNIX timestamp)
+ * @since 1.31
+ */
+ final public function getMultiCheckKeyTime( array $keys ) {
+ $rawKeys = [];
+ foreach ( $keys as $key ) {
+ $rawKeys[$key] = self::TIME_KEY_PREFIX . $key;
}
- return $time;
+ $rawValues = $this->cache->getMulti( $rawKeys );
+ $rawValues += array_fill_keys( $rawKeys, false );
+
+ $times = [];
+ foreach ( $rawKeys as $key => $rawKey ) {
+ $purge = $this->parsePurgeValue( $rawValues[$rawKey] );
+ if ( $purge !== false ) {
+ $time = $purge[self::FLD_TIME];
+ } else {
+ // Casting assures identical floats for the next getCheckKeyTime() calls
+ $now = (string)$this->getCurrentTime();
+ $this->cache->add(
+ $rawKey,
+ $this->makePurgeValue( $now, self::HOLDOFF_TTL ),
+ self::CHECK_KEY_TTL
+ );
+ $time = (float)$now;
+ }
+
+ $times[$key] = $time;
+ }
+
+ return $times;
}
/**
* The "check" key essentially represents a last-modified time of an entity.
* When the key is touched, the timestamp will be updated to the current time.
* Keys using the "check" key via get(), getMulti(), or getWithSetCallback() will
- * be invalidated. The timestamp of "check" is treated as being HOLDOFF_TTL seconds
- * in the future by get*() methods in order to avoid race conditions where keys are
- * updated with stale values (e.g. from a DB replica DB).
+ * be invalidated. This approach is useful if many keys depend on a single entity.
*
- * This method is typically useful for keys with hardcoded names or in some cases
- * dynamically generated names, provided the number of such keys is modest. It sets a
- * high TTL on the "check" key, making it possible to know the timestamp of the last
- * change to the corresponding entities in most cases.
+ * The timestamp of the "check" key is treated as being HOLDOFF_TTL seconds in the
+ * future by get*() methods in order to avoid race conditions where keys are updated
+ * with stale values (e.g. from a lagged replica DB). A high TTL is set on the "check"
+ * key, making it possible to know the timestamp of the last change to the corresponding
+ * entities in most cases. This might use more cache space than resetCheckKey().
*
* When a few important keys get a large number of hits, a high cache time is usually
* desired as well as "lockTSE" logic. The resetCheckKey() method is less appropriate
*
* @param string $key Cache key made from makeKey() or makeGlobalKey()
* @param int $ttl Seconds to live for key updates. Special values are:
- * - WANObjectCache::TTL_INDEFINITE: Cache forever
- * - WANObjectCache::TTL_UNCACHEABLE: Do not cache at all
+ * - WANObjectCache::TTL_INDEFINITE: Cache forever (subject to LRU-style evictions)
+ * - WANObjectCache::TTL_UNCACHEABLE: Do not cache (if the key exists, it is not deleted)
* @param callable $callback Value generation function
* @param array $opts Options map:
* - checkKeys: List of "check" keys. The key at $key will be seen as invalid when either
- * touchCheckKey() or resetCheckKey() is called on any of these keys.
+ * touchCheckKey() or resetCheckKey() is called on any of the keys in this list. This
+ * is useful if thousands or millions of keys depend on the same entity. The entity can
+ * simply have its "check" key updated whenever the entity is modified.
* Default: [].
+ * - graceTTL: Consider reusing expired values instead of refreshing them if they expired
+ * less than this many seconds ago. The odds of a refresh becomes more likely over time,
+ * becoming certain once the grace period is reached. This can reduce traffic spikes
+ * when millions of keys are compared to the same "check" key and touchCheckKey()
+ * or resetCheckKey() is called on that "check" key.
+ * Default: WANObjectCache::GRACE_TTL_NONE.
* - lockTSE: If the key is tombstoned or expired (by checkKeys) less than this many seconds
* ago, then try to have a single thread handle cache regeneration at any given time.
* Other threads will try to use stale values if possible. If, on miss, the time since
* This is useful if the source of a key is suspected of having possibly changed
* recently, and the caller wants any such changes to be reflected.
* Default: WANObjectCache::MIN_TIMESTAMP_NONE.
- * - hotTTR: Expected time-till-refresh (TTR) for keys that average ~1 hit/second (1 Hz).
- * Keys with a hit rate higher than 1Hz will refresh sooner than this TTR and vise versa.
- * Such refreshes won't happen until keys are "ageNew" seconds old. The TTR is useful at
+ * - hotTTR: Expected time-till-refresh (TTR) in seconds for keys that average ~1 hit per
+ * second (e.g. 1Hz). Keys with a hit rate higher than 1Hz will refresh sooner than this
+ * TTR and vise versa. Such refreshes won't happen until keys are "ageNew" seconds old.
+ * This uses randomization to avoid triggering cache stampedes. The TTR is useful at
* reducing the impact of missed cache purges, since the effect of a heavily referenced
* key being stale is worse than that of a rarely referenced key. Unlike simply lowering
- * $ttl, seldomly used keys are largely unaffected by this option, which makes it possible
- * to have a high hit rate for the "long-tail" of less-used keys.
+ * $ttl, seldomly used keys are largely unaffected by this option, which makes it
+ * possible to have a high hit rate for the "long-tail" of less-used keys.
* Default: WANObjectCache::HOT_TTR.
* - lowTTL: Consider pre-emptive updates when the current TTL (seconds) of the key is less
* than this. It becomes more likely over time, becoming certain once the key is expired.
+ * This helps avoid cache stampedes that might be triggered due to the key expiring.
* Default: WANObjectCache::LOW_TTL.
* - ageNew: Consider popularity refreshes only once a key reaches this age in seconds.
* Default: WANObjectCache::AGE_NEW.
* Default: WANObjectCache::STALE_TTL_NONE
* @return mixed Value found or written to the key
* @note Options added in 1.28: version, busyValue, hotTTR, ageNew, pcGroup, minAsOf
+ * @note Options added in 1.31: staleTTL, graceTTL
* @note Callable type hints are not used to avoid class-autoloading
*/
final public function getWithSetCallback( $key, $ttl, $callback, array $opts = [] ) {
use ( $callback, $version ) {
if ( is_array( $oldValue )
&& array_key_exists( self::VFLD_DATA, $oldValue )
+ && array_key_exists( self::VFLD_VERSION, $oldValue )
+ && $oldValue[self::VFLD_VERSION] === $version
) {
$oldData = $oldValue[self::VFLD_DATA];
} else {
// VFLD_DATA is not set if an old, unversioned, key is present
$oldData = false;
+ $oldAsOf = null;
}
return [
$lowTTL = isset( $opts['lowTTL'] ) ? $opts['lowTTL'] : min( self::LOW_TTL, $ttl );
$lockTSE = isset( $opts['lockTSE'] ) ? $opts['lockTSE'] : self::TSE_NONE;
$staleTTL = isset( $opts['staleTTL'] ) ? $opts['staleTTL'] : self::STALE_TTL_NONE;
+ $graceTTL = isset( $opts['graceTTL'] ) ? $opts['graceTTL'] : self::GRACE_TTL_NONE;
$checkKeys = isset( $opts['checkKeys'] ) ? $opts['checkKeys'] : [];
$busyValue = isset( $opts['busyValue'] ) ? $opts['busyValue'] : null;
$popWindow = isset( $opts['hotTTR'] ) ? $opts['hotTTR'] : self::HOT_TTR;
$preCallbackTime = $this->getCurrentTime();
// Determine if a cached value regeneration is needed or desired
if ( $value !== false
- && $curTTL > 0
+ && $this->isAliveOrInGracePeriod( $curTTL, $graceTTL )
&& $this->isValid( $value, $versioned, $asOf, $minTime )
- && !$this->worthRefreshExpiring( $curTTL, $lowTTL )
- && !$this->worthRefreshPopular( $asOf, $ageNew, $popWindow, $preCallbackTime )
) {
- $this->stats->increment( "wanobjectcache.$kClass.hit.good" );
+ $preemptiveRefresh = (
+ $this->worthRefreshExpiring( $curTTL, $lowTTL ) ||
+ $this->worthRefreshPopular( $asOf, $ageNew, $popWindow, $preCallbackTime )
+ );
- return $value;
+ if ( !$preemptiveRefresh ) {
+ $this->stats->increment( "wanobjectcache.$kClass.hit.good" );
+
+ return $value;
+ } elseif ( $this->asyncHandler ) {
+ // Update the cache value later, such during post-send of an HTTP request
+ $func = $this->asyncHandler;
+ $func( function () use ( $key, $ttl, $callback, $opts, $asOf ) {
+ $opts['minAsOf'] = INF; // force a refresh
+ $this->doGetWithSetCallback( $key, $ttl, $callback, $opts, $asOf );
+ } );
+ $this->stats->increment( "wanobjectcache.$kClass.hit.refresh" );
+
+ return $value;
+ }
}
// A deleted key with a negative TTL left must be tombstoned
$isTombstone = ( $curTTL !== null && $value === false );
if ( $isTombstone && $lockTSE <= 0 ) {
// Use the INTERIM value for tombstoned keys to reduce regeneration load
- $lockTSE = 1;
+ $lockTSE = self::INTERIM_KEY_TTL;
}
// Assume a key is hot if requested soon after invalidation
$isHot = ( $curTTL !== null && $curTTL <= 0 && abs( $curTTL ) <= $lockTSE );
* @return mixed
*/
protected function getInterimValue( $key, $versioned, $minTime, &$asOf ) {
+ if ( !$this->useInterimHoldOffCaching ) {
+ return false; // disabled
+ }
+
$wrapped = $this->cache->get( self::INTERIM_KEY_PREFIX . $key );
list( $value ) = $this->unwrap( $wrapped, $this->getCurrentTime() );
if ( $value !== false && $this->isValid( $value, $versioned, $asOf, $minTime ) ) {
* @return bool Success
* @since 1.28
*/
- public function reap( $key, $purgeTimestamp, &$isStale = false ) {
+ final public function reap( $key, $purgeTimestamp, &$isStale = false ) {
$minAsOf = $purgeTimestamp + self::HOLDOFF_TTL;
$wrapped = $this->cache->get( self::VALUE_KEY_PREFIX . $key );
if ( is_array( $wrapped ) && $wrapped[self::FLD_TIME] < $minAsOf ) {
* @return bool Success
* @since 1.28
*/
- public function reapCheckKey( $key, $purgeTimestamp, &$isStale = false ) {
+ final public function reapCheckKey( $key, $purgeTimestamp, &$isStale = false ) {
$purge = $this->parsePurgeValue( $this->cache->get( self::TIME_KEY_PREFIX . $key ) );
if ( $purge && $purge[self::FLD_TIME] < $purgeTimestamp ) {
$isStale = true;
* @return ArrayIterator Iterator yielding (cache key => entity ID) in $entities order
* @since 1.28
*/
- public function makeMultiKeys( array $entities, callable $keyFunc ) {
+ final public function makeMultiKeys( array $entities, callable $keyFunc ) {
$map = [];
foreach ( $entities as $entity ) {
$map[$keyFunc( $entity, $this )] = $entity;
$this->processCaches = [];
}
+ /**
+ * Enable or disable the use of brief caching for tombstoned keys
+ *
+ * When a key is purged via delete(), there normally is a period where caching
+ * is hold-off limited to an extremely short time. This method will disable that
+ * caching, forcing the callback to run for any of:
+ * - WANObjectCache::getWithSetCallback()
+ * - WANObjectCache::getMultiWithSetCallback()
+ * - WANObjectCache::getMultiWithUnionSetCallback()
+ *
+ * This is useful when both:
+ * - a) the database used by the callback is known to be up-to-date enough
+ * for some particular purpose (e.g. replica DB has applied transaction X)
+ * - b) the caller needs to exploit that fact, and therefore needs to avoid the
+ * use of inherently volatile and possibly stale interim keys
+ *
+ * @see WANObjectCache::delete()
+ * @param bool $enabled Whether to enable interim caching
+ * @since 1.31
+ */
+ final public function useInterimHoldOffCaching( $enabled ) {
+ $this->useInterimHoldOffCaching = $enabled;
+ }
+
/**
* @param int $flag ATTR_* class constant
* @return int QOS_* class constant
* @return int Number of warmup key cache misses last round
* @since 1.30
*/
- public function getWarmupKeyMisses() {
+ final public function getWarmupKeyMisses() {
return $this->warmupKeyMisses;
}
return $ok;
}
+ /**
+ * Check if a key is fresh or in the grace window and thus due for randomized reuse
+ *
+ * If $curTTL > 0 (e.g. not expired) this returns true. Otherwise, the chance of returning
+ * true decrease steadily from 100% to 0% as the |$curTTL| moves from 0 to $graceTTL seconds.
+ * This handles widely varying levels of cache access traffic.
+ *
+ * If $curTTL <= -$graceTTL (e.g. already expired), then this returns false.
+ *
+ * @param float $curTTL Approximate TTL left on the key if present
+ * @param int $graceTTL Consider using stale values if $curTTL is greater than this
+ * @return bool
+ */
+ protected function isAliveOrInGracePeriod( $curTTL, $graceTTL ) {
+ if ( $curTTL > 0 ) {
+ return true;
+ } elseif ( $graceTTL <= 0 ) {
+ return false;
+ }
+
+ $ageStale = abs( $curTTL ); // seconds of staleness
+ $curGTTL = ( $graceTTL - $ageStale ); // current grace-time-to-live
+ if ( $curGTTL <= 0 ) {
+ return false; // already out of grace period
+ }
+
+ // Chance of using a stale value is the complement of the chance of refreshing it
+ return !$this->worthRefreshExpiring( $curGTTL, $graceTTL );
+ }
+
/**
* Check if a key is nearing expiration and thus due for randomized regeneration
*
- * This returns false if $curTTL >= $lowTTL. Otherwise, the chance
- * of returning true increases steadily from 0% to 100% as the $curTTL
- * moves from $lowTTL to 0 seconds. This handles widely varying
- * levels of cache access traffic.
+ * This returns false if $curTTL >= $lowTTL. Otherwise, the chance of returning true
+ * increases steadily from 0% to 100% as the $curTTL moves from $lowTTL to 0 seconds.
+ * This handles widely varying levels of cache access traffic.
+ *
+ * If $curTTL <= 0 (e.g. already expired), then this returns false.
*
* @param float $curTTL Approximate TTL left on the key if present
* @param float $lowTTL Consider a refresh when $curTTL is less than this
} elseif ( $curTTL >= $lowTTL ) {
return false;
} elseif ( $curTTL <= 0 ) {
- return true;
+ return false;
}
$chance = ( 1 - $curTTL / $lowTTL );
*/
protected function unwrap( $wrapped, $now ) {
// Check if the value is a tombstone
- $purge = self::parsePurgeValue( $wrapped );
+ $purge = $this->parsePurgeValue( $wrapped );
if ( $purge !== false ) {
// Purged values should always have a negative current $ttl
$curTTL = min( $purge[self::FLD_TIME] - $now, self::TINY_NEGATIVE );
* @return array|bool Array containing a UNIX timestamp (float) and holdoff period (integer),
* or false if value isn't a valid purge value
*/
- protected static function parsePurgeValue( $value ) {
+ protected function parsePurgeValue( $value ) {
if ( !is_string( $value ) ) {
return false;
}