Removed doCascadeProtectionUpdates method to avoid DB writes on page views
authorAaron Schulz <aschulz@wikimedia.org>
Thu, 12 Feb 2015 23:03:24 +0000 (15:03 -0800)
committerAaron Schulz <aschulz@wikimedia.org>
Sun, 22 Feb 2015 21:36:13 +0000 (13:36 -0800)
* Use special prioritized refreshLinksJobs instead, which triggers when
  transcluded pages are changed
* Also added a triggerOpportunisticLinksUpdate() method to handle
  dynamic transcludes

bug: T89389
Change-Id: Iea952d4d2e660b7957eafb5f73fc87fab347dbe7

includes/DefaultSettings.php
includes/cache/BacklinkCache.php
includes/deferred/HTMLCacheUpdate.php
includes/deferred/LinksUpdate.php
includes/jobqueue/jobs/RefreshLinksJob.php
includes/page/Article.php
includes/page/WikiPage.php
includes/parser/ParserOutput.php
includes/poolcounter/PoolWorkArticleView.php

index d4cdf9e..bddccec 100644 (file)
@@ -6422,6 +6422,7 @@ $wgJobClasses = array(
        'PublishStashedFile' => 'PublishStashedFileJob',
        'ThumbnailRender' => 'ThumbnailRenderJob',
        'recentChangesUpdate' => 'RecentChangesUpdateJob',
+       'refreshLinksPrioritized' => 'RefreshLinksJob', // for cascading protection
        'null' => 'NullJob'
 );
 
index c6d9a18..c3f455e 100644 (file)
@@ -487,4 +487,55 @@ class BacklinkCache {
 
                return array( 'numRows' => $numRows, 'batches' => $batches );
        }
+
+       /**
+        * Get a Title iterator for cascade-protected template/file use backlinks
+        *
+        * @return TitleArray
+        * @since 1.25
+        */
+       public function getCascadeProtectedLinks() {
+               $dbr = $this->getDB();
+
+               // @todo: use UNION without breaking tests that use temp tables
+               $resSets = array();
+               $resSets[] = $dbr->select(
+                       array( 'templatelinks', 'page_restrictions', 'page' ),
+                       array( 'page_namespace', 'page_title', 'page_id' ),
+                       array(
+                               'tl_namespace' => $this->title->getNamespace(),
+                               'tl_title' => $this->title->getDBkey(),
+                               'tl_from = pr_page',
+                               'pr_cascade' => 1,
+                               'page_id = tl_from'
+                       ),
+                       __METHOD__,
+                       array( 'DISTINCT' )
+               );
+               if ( $this->title->getNamespace() == NS_FILE ) {
+                       $resSets[] = $dbr->select(
+                               array( 'imagelinks', 'page_restrictions', 'page' ),
+                               array( 'page_namespace', 'page_title', 'page_id' ),
+                               array(
+                                       'il_to' => $this->title->getDBkey(),
+                                       'il_from = pr_page',
+                                       'pr_cascade' => 1,
+                                       'page_id = il_from'
+                               ),
+                               __METHOD__,
+                               array( 'DISTINCT' )
+                       );
+               }
+
+               // Combine and de-duplicate the results
+               $mergedRes = array();
+               foreach ( $resSets as $res ) {
+                       foreach ( $res as $row ) {
+                               $mergedRes[$row->page_id] = $row;
+                       }
+               }
+
+               return TitleArray::newFromResult(
+                       new FakeResultWrapper( array_values( $mergedRes ) ) );
+       }
 }
index e02cfbc..862ac27 100644 (file)
@@ -43,7 +43,6 @@ class HTMLCacheUpdate implements DeferrableUpdate {
        }
 
        public function doUpdate() {
-
                $job = new HTMLCacheUpdateJob(
                        $this->mTitle,
                        array(
@@ -63,6 +62,5 @@ class HTMLCacheUpdate implements DeferrableUpdate {
                                $job->run(); // just do the purge query now
                        } );
                }
-
        }
 }
index 9c377df..e4f00e7 100644 (file)
@@ -228,12 +228,24 @@ class LinksUpdate extends SqlDataUpdate {
         * Which means do LinksUpdate on all pages that include the current page,
         * using the job queue.
         */
-       function queueRecursiveJobs() {
+       protected function queueRecursiveJobs() {
                self::queueRecursiveJobsForTable( $this->mTitle, 'templatelinks' );
                if ( $this->mTitle->getNamespace() == NS_FILE ) {
                        // Process imagelinks in case the title is or was a redirect
                        self::queueRecursiveJobsForTable( $this->mTitle, 'imagelinks' );
                }
+
+               $bc = $this->mTitle->getBacklinkCache();
+               // Get jobs for cascade-protected backlinks for a high priority queue.
+               // If meta-templates change to using a new template, the new template
+               // should be implicitly protected as soon as possible, if applicable.
+               // These jobs duplicate a subset of the above ones, but can run sooner.
+               // Which ever runs first generally no-ops the other one.
+               $jobs = array();
+               foreach ( $bc->getCascadeProtectedLinks() as $title ) {
+                       $jobs[] = new RefreshLinksJob( $title, array( 'prioritize' => true ) );
+               }
+               JobQueueGroup::singleton()->push( $jobs );
        }
 
        /**
@@ -253,6 +265,7 @@ class LinksUpdate extends SqlDataUpdate {
                                        "refreshlinks:{$table}:{$title->getPrefixedText()}"
                                )
                        );
+
                        JobQueueGroup::singleton()->push( $job );
                        JobQueueGroup::singleton()->deduplicateRootJob( $job );
                }
index 5d95792..1252b0b 100644 (file)
@@ -39,6 +39,10 @@ class RefreshLinksJob extends Job {
 
        function __construct( $title, $params = '' ) {
                parent::__construct( 'refreshLinks', $title, $params );
+               // A separate type is used just for cascade-protected backlinks
+               if ( !empty( $this->params['prioritize'] ) ) {
+                       $this->command .= 'Prioritized';
+               }
                // Base backlink update jobs and per-title update jobs can be de-duplicated.
                // If template A changes twice before any jobs run, a clean queue will have:
                //              (A base, A base)
@@ -100,6 +104,10 @@ class RefreshLinksJob extends Job {
                return true;
        }
 
+       /**
+        * @param Title $title
+        * @return bool
+        */
        protected function runForTitle( Title $title = null ) {
                $linkCache = LinkCache::singleton();
                $linkCache->clear();
index 59f2ae7..83c3241 100644 (file)
@@ -707,7 +707,7 @@ class Article implements Page {
                }
 
                # Get the ParserOutput actually *displayed* here.
-               # Note that $this->mParserOutput is the *current* version output.
+               # Note that $this->mParserOutput is the *current*/oldid version output.
                $pOutput = ( $outputDone instanceof ParserOutput )
                        ? $outputDone // object fetched by hook
                        : $this->mParserOutput;
index d30f589..fe61f6f 100644 (file)
@@ -3378,70 +3378,35 @@ class WikiPage implements Page, IDBAccessObject {
        }
 
        /**
-        * Updates cascading protections
+        * Opportunistically enqueue link update jobs given fresh parser output if useful
         *
-        * @param ParserOutput $parserOutput ParserOutput object for the current version
+        * @param ParserOutput $parserOutput Current version page output
+        * @return bool Whether a job was pushed
+        * @since 1.25
         */
-       public function doCascadeProtectionUpdates( ParserOutput $parserOutput ) {
-               if ( wfReadOnly() || !$this->mTitle->areRestrictionsCascading() ) {
-                       return;
-               }
-
-               // templatelinks or imagelinks tables may have become out of sync,
-               // especially if using variable-based transclusions.
-               // For paranoia, check if things have changed and if
-               // so apply updates to the database. This will ensure
-               // that cascaded protections apply as soon as the changes
-               // are visible.
-
-               // Get templates from templatelinks and images from imagelinks
-               $id = $this->getId();
-
-               $dbLinks = array();
-
-               $dbr = wfGetDB( DB_SLAVE );
-               $res = $dbr->select( array( 'templatelinks' ),
-                       array( 'tl_namespace', 'tl_title' ),
-                       array( 'tl_from' => $id ),
-                       __METHOD__
-               );
-
-               foreach ( $res as $row ) {
-                       $dbLinks["{$row->tl_namespace}:{$row->tl_title}"] = true;
+       public function triggerOpportunisticLinksUpdate( ParserOutput $parserOutput ) {
+               if ( wfReadOnly() ) {
+                       return false;
                }
 
-               $dbr = wfGetDB( DB_SLAVE );
-               $res = $dbr->select( array( 'imagelinks' ),
-                       array( 'il_to' ),
-                       array( 'il_from' => $id ),
-                       __METHOD__
-               );
-
-               foreach ( $res as $row ) {
-                       $dbLinks[NS_FILE . ":{$row->il_to}"] = true;
+               if ( $this->mTitle->areRestrictionsCascading() ) {
+                       // If the page is cascade protecting, the links should really be up-to-date
+                       $params = array( 'prioritize' => true );
+               } elseif ( $parserOutput->hasDynamicContent() ) {
+                       // Assume the output contains time/random based magic words
+                       $params = array();
+               } else {
+                       // If the inclusions are deterministic, the edit-triggered link jobs are enough
+                       return false;
                }
 
-               // Get templates and images from parser output.
-               $poLinks = array();
-               foreach ( $parserOutput->getTemplates() as $ns => $templates ) {
-                       foreach ( $templates as $dbk => $id ) {
-                               $poLinks["$ns:$dbk"] = true;
-                       }
-               }
-               foreach ( $parserOutput->getImages() as $dbk => $id ) {
-                       $poLinks[NS_FILE . ":$dbk"] = true;
+               // Check if the last link refresh was before page_touched
+               if ( $this->getLinksTimestamp() < $this->getTouched() ) {
+                       JobQueueGroup::singleton()->push( new RefreshLinksJob( $this->mTitle, $params ) );
+                       return true;
                }
 
-               // Get the diff
-               $links_diff = array_diff_key( $poLinks, $dbLinks );
-
-               if ( count( $links_diff ) > 0 ) {
-                       // Whee, link updates time.
-                       // Note: we are only interested in links here. We don't need to get
-                       // other DataUpdate items from the parser output.
-                       $u = new LinksUpdate( $this->mTitle, $parserOutput, false );
-                       $u->doUpdate();
-               }
+               return false;
        }
 
        /**
index e9e72be..da7842a 100644 (file)
@@ -879,6 +879,22 @@ class ParserOutput extends CacheTime {
                $this->mLimitReportData[$key] = $value;
        }
 
+       /**
+        * Check whether the cache TTL was lowered due to dynamic content
+        *
+        * When content is determined by more than hard state (e.g. page edits),
+        * such as template/file transclusions based on the current timestamp or
+        * extension tags that generate lists based on queries, this return true.
+        *
+        * @return bool
+        * @since 1.25
+        */
+       public function hasDynamicContent() {
+               global $wgParserCacheExpireTime;
+
+               return $this->getCacheExpiry() < $wgParserCacheExpireTime;
+       }
+
        /**
         * Get or set the prevent-clickjacking flag
         *
index da20f94..54cbb27 100644 (file)
@@ -159,7 +159,7 @@ class PoolWorkArticleView extends PoolCounterWork {
                }
 
                if ( $isCurrent ) {
-                       $this->page->doCascadeProtectionUpdates( $this->parserOutput );
+                       $this->page->triggerOpportunisticLinksUpdate( $this->parserOutput );
                }
 
                return true;