From c7b932af6bbcb380cf250d5e3d7cd480514ef747 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Thu, 1 Oct 2015 00:24:18 -0700 Subject: [PATCH] Lower CDN cache TTL when slave lag is high * $wgCdnMaxageLagged controls exactly what that TTL is and the usual "max lag" settings determine what "high" is for lag (which already makes the site read-only). * This helps avoids stale content getting stuck in CDN for a month just because a slave was lagged for a minute. Of course race conditions with normal slave lag and WAN cache relay purges can still lead to this problem, though the scope of it is reduced. Bug: T113204 Change-Id: I7ff0a8d88665f4e557566e7b412e75edee2627fe --- RELEASE-NOTES-1.27 | 3 +++ includes/DefaultSettings.php | 15 +++++++++++---- includes/MediaWiki.php | 7 +++++++ includes/OutputPage.php | 15 ++++++++++++++- includes/db/loadbalancer/LBFactory.php | 15 +++++++++++++++ includes/db/loadbalancer/LoadBalancer.php | 10 ++++++++++ 6 files changed, 60 insertions(+), 5 deletions(-) diff --git a/RELEASE-NOTES-1.27 b/RELEASE-NOTES-1.27 index d9d835949a..2310cdb175 100644 --- a/RELEASE-NOTES-1.27 +++ b/RELEASE-NOTES-1.27 @@ -29,6 +29,9 @@ production. * Added a new hook, 'UserMailerSplitTo', to control which users have to be emailed separately (ie. there is a single address in the To: field) so user-specific changes to the email can be applied safely. +* $wgCdnMaxageLagged was added, which limits the CDN cache TTL + when any load balancer uses a DB that is lagged beyond the 'max lag' + setting in the relevant section of $wgLBFactoryConf. ==== External libraries ==== diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index deb85f5b2b..3153afbf03 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2567,14 +2567,21 @@ $wgVaryOnXFP = false; $wgInternalServer = false; /** - * Cache timeout for the squid, will be sent as s-maxage (without ESI) or - * Surrogate-Control (with ESI). Without ESI, you should strip out s-maxage in - * the Squid config. + * Cache TTL for the CDN sent as s-maxage (without ESI) or + * Surrogate-Control (with ESI). Without ESI, you should strip + * out s-maxage in the Squid config. * -* 18000 seconds = 5 hours, more cache hits with 2678400 = 31 days. + * 18000 seconds = 5 hours, more cache hits with 2678400 = 31 days. */ $wgSquidMaxage = 18000; +/** + * Cache timeout for the CDN when DB slave lag is high + * @see $wgSquidMaxage + * @since 1.27 + */ +$wgCdnMaxageLagged = 30; + /** * Default maximum age for raw CSS/JS accesses * diff --git a/includes/MediaWiki.php b/includes/MediaWiki.php index e29319b594..418ed8b008 100644 --- a/includes/MediaWiki.php +++ b/includes/MediaWiki.php @@ -511,6 +511,13 @@ class MediaWiki { $expires = time() + $this->config->get( 'DataCenterUpdateStickTTL' ); $request->response()->setCookie( 'UseDC', 'master', $expires ); } + + // Avoid letting a few seconds of slave lag cause a month of stale data + if ( $factory->laggedSlaveUsed() ) { + $maxAge = $this->config->get( 'CdnMaxageLagged' ); + $this->context->getOutput()->lowerCdnMaxage( $maxAge ); + wfDebugLog( 'replication', "Lagged DB used; CDN cache TTL limited to $maxAge seconds" ); + } } /** diff --git a/includes/OutputPage.php b/includes/OutputPage.php index f680d456d2..03ae8c951a 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -236,6 +236,8 @@ class OutputPage extends ContextSource { /** @var int Cache stuff. Looks like mEnableClientCache */ protected $mSquidMaxage = 0; + /** @var in Upper limit on mSquidMaxage */ + protected $mCdnMaxageLimit = INF; /** * @var bool Controls if anti-clickjacking / frame-breaking headers will @@ -1945,7 +1947,18 @@ class OutputPage extends ContextSource { * @param int $maxage Maximum cache time on the Squid, in seconds. */ public function setSquidMaxage( $maxage ) { - $this->mSquidMaxage = $maxage; + $this->mSquidMaxage = min( $maxage, $this->mCdnMaxageLimit ); + } + + /** + * Lower the value of the "s-maxage" part of the "Cache-control" HTTP header + * + * @param int $maxage Maximum cache time on the Squid, in seconds + * @since 1.27 + */ + public function lowerCdnMaxage( $maxage ) { + $this->mCdnMaxageLimit = $this->min( $maxage, $this->mCdnMaxageLimit ); + $this->setSquidMaxage( $this->mSquidMaxage ); } /** diff --git a/includes/db/loadbalancer/LBFactory.php b/includes/db/loadbalancer/LBFactory.php index e5fb09435f..a06d826e2c 100644 --- a/includes/db/loadbalancer/LBFactory.php +++ b/includes/db/loadbalancer/LBFactory.php @@ -211,6 +211,21 @@ abstract class LBFactory { $this->forEachLB( function ( LoadBalancer $lb ) use ( &$ret ) { $ret = $ret || $lb->hasMasterChanges(); } ); + + return $ret; + } + + /** + * Detemine if any lagged slave connection was used + * @since 1.27 + * @return bool + */ + public function laggedSlaveUsed() { + $ret = false; + $this->forEachLB( function ( LoadBalancer $lb ) use ( &$ret ) { + $ret = $ret || $lb->laggedSlaveUsed(); + } ); + return $ret; } diff --git a/includes/db/loadbalancer/LoadBalancer.php b/includes/db/loadbalancer/LoadBalancer.php index a0ef753b42..95e71c7d65 100644 --- a/includes/db/loadbalancer/LoadBalancer.php +++ b/includes/db/loadbalancer/LoadBalancer.php @@ -1131,6 +1131,7 @@ class LoadBalancer { } /** + * @note This method will trigger a DB connection if not yet done * @return bool Whether the generic connection for reads is highly "lagged" */ public function getLaggedSlaveMode() { @@ -1140,6 +1141,15 @@ class LoadBalancer { return $this->mLaggedSlaveMode; } + /** + * @note This method will never cause a new DB connection + * @return bool Whether any generic connection used for reads was highly "lagged" + * @since 1.27 + */ + public function laggedSlaveUsed() { + return $this->mLaggedSlaveMode; + } + /** * Disables/enables lag checks * @param null|bool $mode -- 2.20.1