From 187fd647232a1887c202d3624a5b8e7c7f674093 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Fri, 8 May 2015 22:20:25 -0700 Subject: [PATCH] Made triggerOpportunisticLinksUpdate() jobs make use of parser cache * On Wikipedia, for example, these jobs are good percent of all refreshLinks jobs; skipping the parse step should avoid runner CPU overhead * Also fixed bad TS_MW/TS_UNIX comparison * Moved the fudge factor to a constant and raised it a bit Bug: T98621 Change-Id: Id6d64972739df4b26847e4374f30ddcc7f93b54a --- includes/jobqueue/jobs/RefreshLinksJob.php | 32 +++++++++++++++++----- includes/page/WikiPage.php | 3 ++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/includes/jobqueue/jobs/RefreshLinksJob.php b/includes/jobqueue/jobs/RefreshLinksJob.php index 1252b0b5e2..749913aef7 100644 --- a/includes/jobqueue/jobs/RefreshLinksJob.php +++ b/includes/jobqueue/jobs/RefreshLinksJob.php @@ -37,6 +37,8 @@ class RefreshLinksJob extends Job { const PARSE_THRESHOLD_SEC = 1.0; + const CLOCK_FUDGE = 10; + function __construct( $title, $params = '' ) { parent::__construct( 'refreshLinks', $title, $params ); // A separate type is used just for cascade-protected backlinks @@ -140,22 +142,38 @@ class RefreshLinksJob extends Job { $parserOutput = false; $parserOptions = $page->makeParserOptions( 'canonical' ); - // If page_touched changed after this root job (with a good slave lag skew factor), - // then it is likely that any views of the pages already resulted in re-parses which - // are now in cache. This can be reused to avoid expensive parsing in some cases. + // If page_touched changed after this root job, then it is likely that + // any views of the pages already resulted in re-parses which are now in + // cache. The cache can be reused to avoid expensive parsing in some cases. if ( isset( $this->params['rootJobTimestamp'] ) ) { - $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5; - if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { + $opportunistic = !empty( $this->params['isOpportunistic'] ); + + $skewedTimestamp = $this->params['rootJobTimestamp']; + if ( $opportunistic ) { + // Neither clock skew nor DB snapshot/slave lag matter much for such + // updates; focus on reusing the (often recently updated) cache + } else { + // For transclusion updates, the template changes must be reflected + $skewedTimestamp = wfTimestamp( TS_MW, + wfTimestamp( TS_UNIX, $skewedTimestamp ) + self::CLOCK_FUDGE + ); + } + + if ( $page->getLinksTimestamp() > $skewedTimestamp ) { // Something already updated the backlinks since this job was made return true; } - if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { + + if ( $page->getTouched() >= $skewedTimestamp || $opportunistic ) { + // Something bumped page_touched since this job was made + // or the cache is otherwise suspected to be up-to-date $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); - if ( $parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp ) { + if ( $parserOutput && $parserOutput->getCacheTime() < $skewedTimestamp ) { $parserOutput = false; // too stale } } } + // Fetch the current revision and parse it if necessary... if ( $parserOutput == false ) { $start = microtime( true ); diff --git a/includes/page/WikiPage.php b/includes/page/WikiPage.php index cc182a4258..7b33b02e71 100644 --- a/includes/page/WikiPage.php +++ b/includes/page/WikiPage.php @@ -3411,6 +3411,9 @@ class WikiPage implements Page, IDBAccessObject { // Check if the last link refresh was before page_touched if ( $this->getLinksTimestamp() < $this->getTouched() ) { + $params['isOpportunistic'] = true; + $params['rootJobTimestamp'] = $parserOutput->getCacheTime(); + JobQueueGroup::singleton()->push( EnqueueJob::newFromLocalJobs( new JobSpecification( 'refreshLinks', $params, array( 'removeDuplicates' => true ), $this->mTitle ) -- 2.20.1