From: Aaron Schulz Date: Tue, 3 May 2016 00:31:03 +0000 (-0700) Subject: Tweak RefreshLinksJob cache logic X-Git-Tag: 1.31.0-rc.0~7075^2 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/banques/?a=commitdiff_plain;h=a6f75ac03cf5b2d42a8b6b50f932d9e672fcf22e;p=lhc%2Fweb%2Fwiklou.git Tweak RefreshLinksJob cache logic * Make this actually use the cache beyond edge cases by making the page_touched check less strict. The final check on the cache timestamp is good enough. * Log metrics to statsd to give visibility. Change-Id: I14c14846a7b68d079e1a29c6d50e354a3c1926d6 --- diff --git a/includes/jobqueue/jobs/RefreshLinksJob.php b/includes/jobqueue/jobs/RefreshLinksJob.php index c2977152f2..b7653be79f 100644 --- a/includes/jobqueue/jobs/RefreshLinksJob.php +++ b/includes/jobqueue/jobs/RefreshLinksJob.php @@ -20,6 +20,7 @@ * @file * @ingroup JobQueue */ +use MediaWiki\MediaWikiServices; /** * Job to update link tables for pages @@ -149,14 +150,16 @@ class RefreshLinksJob extends Job { $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST ); } + $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); + if ( !$revision ) { + $stats->increment( 'refreshlinks.rev_not_found' ); $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" ); return false; // just deleted? - } - - if ( !$revision->isCurrent() ) { + } elseif ( !$revision->isCurrent() ) { // If the revision isn't current, there's no point in doing a bunch // of work just to fail at the lockAndGetLatest() check later. + $stats->increment( 'refreshlinks.rev_not_current' ); $this->setLastError( "Revision {$revision->getId()} is not current" ); return false; } @@ -188,12 +191,12 @@ class RefreshLinksJob extends Job { if ( $page->getLinksTimestamp() > $skewedTimestamp ) { // Something already updated the backlinks since this job was made + $stats->increment( 'refreshlinks.update_skipped' ); return true; } - if ( $page->getTouched() >= $skewedTimestamp || $opportunistic ) { - // Something bumped page_touched since this job was made or the cache is - // otherwise suspected to be up-to-date. As long as the cache rev ID matches + if ( $page->getTouched() >= $this->params['rootJobTimestamp'] || $opportunistic ) { + // Cache is suspected to be up-to-date. As long as the cache rev ID matches // and it reflects the job's triggering change, then it is usable. $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); if ( !$parserOutput @@ -206,7 +209,9 @@ class RefreshLinksJob extends Job { } // Fetch the current revision and parse it if necessary... - if ( !$parserOutput ) { + if ( $parserOutput ) { + $stats->increment( 'refreshlinks.parser_cached' ); + } else { $start = microtime( true ); // Revision ID must be passed to the parser output to get revision variables correct $parserOutput = $content->getParserOutput( @@ -224,6 +229,7 @@ class RefreshLinksJob extends Job { $parserOutput, $page, $parserOptions, $ctime, $revision->getId() ); } + $stats->increment( 'refreshlinks.parser_uncached' ); } $updates = $content->getSecondaryDataUpdates( @@ -257,6 +263,7 @@ class RefreshLinksJob extends Job { // serialized, it would be OK to update links based on older revisions since it // would eventually get to the latest. Since that is not the case (by design), // only update the link tables to a state matching the current revision's output. + $stats->increment( 'refreshlinks.rev_cas_failure' ); $this->setLastError( "page_latest changed from {$revision->getId()} to $latestNow" ); return false; }