merging latest master
[lhc/web/wiklou.git] / includes / job / RefreshLinksJob.php
index cc03070..4848e0a 100644 (file)
@@ -37,7 +37,6 @@ class RefreshLinksJob extends Job {
         * @return boolean success
         */
        function run() {
-               global $wgParser, $wgContLang;
                wfProfileIn( __METHOD__ );
 
                $linkCache = LinkCache::singleton();
@@ -49,26 +48,40 @@ class RefreshLinksJob extends Job {
                        return false;
                }
 
-               $revision = Revision::newFromTitle( $this->title );
+               # Wait for the DB of the current/next slave DB handle to catch up to the master.
+               # This way, we get the correct page_latest for templates or files that just changed
+               # milliseconds ago, having triggered this job to begin with.
+               if ( isset( $this->params['masterPos'] ) ) {
+                       wfGetLB()->waitFor( $this->params['masterPos'] );
+               }
+
+               $revision = Revision::newFromTitle( $this->title, 0, Revision::READ_NORMAL );
                if ( !$revision ) {
-                       $this->error = 'refreshLinks: Article not found "' . $this->title->getPrefixedDBkey() . '"';
+                       $this->error = 'refreshLinks: Article not found "' .
+                               $this->title->getPrefixedDBkey() . '"';
                        wfProfileOut( __METHOD__ );
-                       return false;
+                       return false; // XXX: what if it was just deleted?
                }
 
-               wfProfileIn( __METHOD__.'-parse' );
+               self::runForTitleInternal( $this->title, $revision, __METHOD__ );
+
+               wfProfileOut( __METHOD__ );
+               return true;
+       }
+
+       public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
+               global $wgContLang;
+
+               wfProfileIn( $fname . '-parse' );
                $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
                $content = $revision->getContent();
-               $parserOutput = $content->getParserOutput( $this->title, $revision->getId(), $options, false );
-               wfProfileOut( __METHOD__.'-parse' );
-               wfProfileIn( __METHOD__.'-update' );
+               $parserOutput = $content->getParserOutput( $title, $revision->getId(), $options, false );
+               wfProfileOut( $fname . '-parse' );
 
-               $updates = $content->getSecondaryDataUpdates( $this->title, null, false, $parserOutput  );
+               wfProfileIn( $fname . '-update' );
+               $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput  );
                DataUpdate::runUpdates( $updates );
-
-               wfProfileOut( __METHOD__.'-update' );
-               wfProfileOut( __METHOD__ );
-               return true;
+               wfProfileOut( $fname . '-update' );
        }
 }
 
@@ -79,6 +92,7 @@ class RefreshLinksJob extends Job {
  * @ingroup JobQueue
  */
 class RefreshLinksJob2 extends Job {
+       const MAX_TITLES_RUN = 10;
 
        function __construct( $title, $params, $id = 0 ) {
                parent::__construct( 'refreshLinks2', $title, $params, $id );
@@ -89,64 +103,100 @@ class RefreshLinksJob2 extends Job {
         * @return boolean success
         */
        function run() {
-               global $wgParser, $wgContLang;
-
                wfProfileIn( __METHOD__ );
 
                $linkCache = LinkCache::singleton();
                $linkCache->clear();
 
-               if( is_null( $this->title ) ) {
+               if ( is_null( $this->title ) ) {
                        $this->error = "refreshLinks2: Invalid title";
                        wfProfileOut( __METHOD__ );
                        return false;
-               }
-               if( !isset($this->params['start']) || !isset($this->params['end']) ) {
+               } elseif ( !isset( $this->params['start'] ) || !isset( $this->params['end'] ) ) {
                        $this->error = "refreshLinks2: Invalid params";
                        wfProfileOut( __METHOD__ );
                        return false;
                }
+
                // Back compat for pre-r94435 jobs
                $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
-               $titles = $this->title->getBacklinkCache()->getLinks( 
-                       $table, $this->params['start'], $this->params['end']);
-               
-               # Not suitable for page load triggered job running!
-               # Gracefully switch to refreshLinks jobs if this happens.
-               if( php_sapi_name() != 'cli' ) {
+
+               // Avoid slave lag when fetching templates
+               if ( isset( $this->params['masterPos'] ) ) {
+                       $masterPos = $this->params['masterPos'];
+               } elseif ( wfGetLB()->getServerCount() > 1  ) {
+                       $masterPos = wfGetLB()->getMasterPos();
+               } else {
+                       $masterPos = false;
+               }
+
+               $titles = $this->title->getBacklinkCache()->getLinks(
+                       $table, $this->params['start'], $this->params['end'] );
+
+               if ( $titles->count() > self::MAX_TITLES_RUN ) {
+                       # We don't want to parse too many pages per job as it can starve other jobs.
+                       # If there are too many pages to parse, break this up into smaller jobs. By passing
+                       # in the master position here we can cut down on the time spent waiting for slaves to
+                       # catch up by the runners handling these jobs since time will have passed between now
+                       # and when they pop these jobs off the queue.
+                       $start = 0; // batch start
+                       $end   = 0; // batch end
+                       $bsize = 0; // batch size
+                       $first = true; // first of batch
+                       $jobs  = array();
+                       foreach ( $titles as $title ) {
+                               $start = $first ? $title->getArticleId() : $start;
+                               $end   = $title->getArticleId();
+                               $first = false;
+                               if ( ++$bsize >= self::MAX_TITLES_RUN ) {
+                                       $jobs[] = new RefreshLinksJob2( $this->title, array(
+                                               'table'     => $table,
+                                               'start'     => $start,
+                                               'end'       => $end,
+                                               'masterPos' => $masterPos
+                                       ) );
+                                       $first = true;
+                                       $start = $end = $bsize = 0;
+                               }
+                       }
+                       if ( $bsize > 0 ) { // group remaining pages into a job
+                               $jobs[] = new RefreshLinksJob2( $this->title, array(
+                                       'table'     => $table,
+                                       'start'     => $start,
+                                       'end'       => $end,
+                                       'masterPos' => $masterPos
+                               ) );
+                       }
+                       Job::batchInsert( $jobs );
+               } elseif ( php_sapi_name() != 'cli' ) {
+                       # Not suitable for page load triggered job running!
+                       # Gracefully switch to refreshLinks jobs if this happens.
                        $jobs = array();
                        foreach ( $titles as $title ) {
-                               $jobs[] = new RefreshLinksJob( $title, '' );
+                               $jobs[] = new RefreshLinksJob( $title, array( 'masterPos' => $masterPos ) );
                        }
                        Job::batchInsert( $jobs );
-
-                       wfProfileOut( __METHOD__ );
-                       return true;
-               }
-               $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
-               # Re-parse each page that transcludes this page and update their tracking links...
-               foreach ( $titles as $title ) {
-                       $revision = Revision::newFromTitle( $title );
-                       if ( !$revision ) {
-                               $this->error = 'refreshLinks: Article not found "' . $title->getPrefixedDBkey() . '"';
-                               wfProfileOut( __METHOD__ );
-                               return false;
+               } else {
+                       # Wait for the DB of the current/next slave DB handle to catch up to the master.
+                       # This way, we get the correct page_latest for templates or files that just changed
+                       # milliseconds ago, having triggered this job to begin with.
+                       if ( $masterPos ) {
+                               wfGetLB()->waitFor( $masterPos );
+                       }
+                       # Re-parse each page that transcludes this page and update their tracking links...
+                       foreach ( $titles as $title ) {
+                               $revision = Revision::newFromTitle( $title, 0, Revision::READ_NORMAL );
+                               if ( !$revision ) {
+                                       $this->error = 'refreshLinks: Article not found "' .
+                                               $title->getPrefixedDBkey() . '"';
+                                       continue; // skip this page
+                               }
+                               RefreshLinksJob::runForTitleInternal( $title, $revision, __METHOD__ );
+                               wfWaitForSlaves();
                        }
-                       wfProfileIn( __METHOD__.'-parse' );
-                       $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
-                       $content = $revision->getContent();
-                       $parserOutput = $content->getParserOutput( $title, $revision->getId(), $options, false );
-                       wfProfileOut( __METHOD__.'-parse' );
-                       wfProfileIn( __METHOD__.'-update' );
-
-                       $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput  );
-                       DataUpdate::runUpdates( $updates );
-
-                       wfProfileOut( __METHOD__.'-update' );
-                       wfWaitForSlaves();
                }
-               wfProfileOut( __METHOD__ );
 
+               wfProfileOut( __METHOD__ );
                return true;
        }
 }