From 48a77e1d830e93e28375e1be2252c572af44eae5 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Wed, 27 Nov 2013 22:43:00 -0800 Subject: [PATCH] Avoid parsing more in refreshLinksJobs * This reuses the parser cache in some cases when possible * Clarified the return value of CacheTime::getCacheTime() * A few documentation tweaks Change-Id: I80b7c6404b3f8c48b53c3bba96115dbf94d80873 --- includes/job/Job.php | 4 ++- includes/job/jobs/RefreshLinksJob.php | 40 +++++++++++++++++++-------- includes/parser/CacheTime.php | 7 +++-- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/includes/job/Job.php b/includes/job/Job.php index 3f44a918ff..e33baf535a 100644 --- a/includes/job/Job.php +++ b/includes/job/Job.php @@ -241,7 +241,9 @@ abstract class Job { /** * @see JobQueue::deduplicateRootJob() * @param string $key A key that identifies the task - * @return array + * @return array Map of: + * - rootJobSignature : hash (e.g. SHA1) that identifies the task + * - rootJobTimestamp : TS_MW timestamp of this instance of the task * @since 1.21 */ public static function newRootJobParams( $key ) { diff --git a/includes/job/jobs/RefreshLinksJob.php b/includes/job/jobs/RefreshLinksJob.php index 0372d85927..ea1d5962ec 100644 --- a/includes/job/jobs/RefreshLinksJob.php +++ b/includes/job/jobs/RefreshLinksJob.php @@ -119,20 +119,38 @@ class RefreshLinksJob extends Job { wfGetLB()->waitFor( $this->params['masterPos'] ); } - $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); - if ( !$revision ) { - $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" ); - return false; // XXX: what if it was just deleted? + $parserOutput = false; + // If page_touched changed after this root job (with a good slave lag skew factor), + // then it is likely that any views of the pages already resulted in re-parses which + // are now in cache. This can be reused to avoid expensive parsing in some cases. + if ( isset( $this->params['rootJobTimestamp'] ) ) { + $page = WikiPage::factory( $title ); + $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5; + if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { + $parserOptions = $page->makeParserOptions( 'canonical' ); + $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); + if ( $parserOutput->getCacheTime() <= $skewedTimestamp ) { + $parserOutput = false; // too stale + } + } } + // Fetch the current revision and parse it if necessary... + if ( $parserOutput == false ) { + $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); + if ( !$revision ) { + $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" ); + return false; // XXX: what if it was just deleted? + } - $content = $revision->getContent( Revision::RAW ); - if ( !$content ) { - // If there is no content, pretend the content is empty - $content = $revision->getContentHandler()->makeEmptyContent(); - } + $content = $revision->getContent( Revision::RAW ); + if ( !$content ) { + // If there is no content, pretend the content is empty + $content = $revision->getContentHandler()->makeEmptyContent(); + } - // Revision ID must be passed to the parser output to get revision variables correct - $parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false ); + // Revision ID must be passed to the parser output to get revision variables correct + $parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false ); + } $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput ); DataUpdate::runUpdates( $updates ); diff --git a/includes/parser/CacheTime.php b/includes/parser/CacheTime.php index a4203b0894..7b8935a646 100644 --- a/includes/parser/CacheTime.php +++ b/includes/parser/CacheTime.php @@ -37,14 +37,17 @@ class CacheTime { $mCacheExpiry = null, # Seconds after which the object should expire, use 0 for uncachable. Used in ParserCache. $mContainsOldMagic; # Boolean variable indicating if the input contained variables like {{CURRENTDAY}} - function getCacheTime() { return $this->mCacheTime; } + /** + * @return string TS_MW timestamp + */ + function getCacheTime() { return wfTimestamp( TS_MW, $this->mCacheTime ); } function containsOldMagic() { return $this->mContainsOldMagic; } function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); } /** * setCacheTime() sets the timestamp expressing when the page has been rendered. - * This doesn not control expiry, see updateCacheExpiry() for that! + * This does not control expiry, see updateCacheExpiry() for that! * @param $t string * @return string */ -- 2.20.1