Merge branch 'master' into Wikidata
[lhc/web/wiklou.git] / maintenance / refreshLinks.php
index d481a5b..dd8c8a7 100644 (file)
@@ -1,5 +1,7 @@
 <?php
 /**
+ * Refresh link tables.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -58,19 +60,17 @@ class RefreshLinks extends Maintenance {
         */
        private function doRefreshLinks( $start, $newOnly = false, $maxLag = false,
                                                $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
-               global $wgUser, $wgParser, $wgUseTidy;
+               global $wgParser, $wgUseTidy;
 
                $reportingInterval = 100;
                $dbr = wfGetDB( DB_SLAVE );
                $start = intval( $start );
 
-               # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
-               $wgUser->setOption( 'math', MW_MATH_SOURCE );
+               // Give extensions a chance to optimize settings
+               wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) );
 
                # Don't generate extension images (e.g. Timeline)
-               if ( method_exists( $wgParser, "clearTagHooks" ) ) {
-                       $wgParser->clearTagHooks();
-               }
+               $wgParser->clearTagHooks();
 
                # Don't use HTML tidy
                $wgUseTidy = false;
@@ -79,22 +79,35 @@ class RefreshLinks extends Maintenance {
 
                if ( $oldRedirectsOnly ) {
                        # This entire code path is cut-and-pasted from below.  Hurrah.
-                       $res = $dbr->query(
-                               "SELECT page_id " .
-                               "FROM page " .
-                               "LEFT JOIN redirect ON page_id=rd_from " .
-                               "WHERE page_is_redirect=1 AND rd_from IS NULL AND " .
-                               ( $end == 0 ? "page_id >= $start"
-                                                  : "page_id BETWEEN $start AND $end" ),
-                               __METHOD__
+
+                       $conds = array(
+                               "page_is_redirect=1",
+                               "rd_from IS NULL"
+                       );
+
+                       if ( $end == 0 ) {
+                               $conds[] = "page_id >= $start";
+                       } else {
+                               $conds[] = "page_id BETWEEN $start AND $end";
+                       }
+
+                       $res = $dbr->select(
+                               array( 'page', 'redirect' ),
+                               'page_id',
+                               $conds,
+                               __METHOD__,
+                               array(),
+                               array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) )
                        );
                        $num = $dbr->numRows( $res );
                        $this->output( "Refreshing $num old redirects from $start...\n" );
 
+                       $i = 0;
+
                        foreach ( $res as $row ) {
                                if ( !( ++$i % $reportingInterval ) ) {
                                        $this->output( "$i\n" );
-                                       wfWaitForSlaves( $maxLag );
+                                       wfWaitForSlaves();
                                }
                                $this->fixRedirect( $row->page_id );
                        }
@@ -114,12 +127,13 @@ class RefreshLinks extends Maintenance {
                        foreach ( $res as $row ) {
                                if ( !( ++$i % $reportingInterval ) ) {
                                        $this->output( "$i\n" );
-                                       wfWaitForSlaves( $maxLag );
+                                       wfWaitForSlaves();
                                }
-                               if ( $redirectsOnly )
+                               if ( $redirectsOnly ) {
                                        $this->fixRedirect( $row->page_id );
-                               else
+                               } else {
                                        self::fixLinksFromArticle( $row->page_id );
+                               }
                        }
                } else {
                        if ( !$end ) {
@@ -134,7 +148,7 @@ class RefreshLinks extends Maintenance {
 
                                if ( !( $id % $reportingInterval ) ) {
                                        $this->output( "$id\n" );
-                                       wfWaitForSlaves( $maxLag );
+                                       wfWaitForSlaves();
                                }
                                $this->fixRedirect( $id );
                        }
@@ -147,7 +161,7 @@ class RefreshLinks extends Maintenance {
 
                                        if ( !( $id % $reportingInterval ) ) {
                                                $this->output( "$id\n" );
-                                               wfWaitForSlaves( $maxLag );
+                                               wfWaitForSlaves();
                                        }
                                        self::fixLinksFromArticle( $id );
                                }
@@ -170,17 +184,15 @@ class RefreshLinks extends Maintenance {
                                __METHOD__ );
                        return;
                }
-               $article = new Article( $title );
 
-               $rt = $article->followRedirect();
+               $page = WikiPage::factory( $title );
+               $rt = $page->getRedirectTarget();
 
-               if ( !$rt || !is_object( $rt ) ) {
+               if ( $rt === null ) {
                        // $title is not a redirect
                        // Delete any redirect table entry for it
                        $dbw->delete( 'redirect', array( 'rd_from' => $id ),
                                __METHOD__ );
-               } else {
-                       $article->updateRedirectOn( $dbw, $rt );
                }
        }
 
@@ -199,31 +211,38 @@ class RefreshLinks extends Maintenance {
                if ( is_null( $title ) ) {
                        return;
                }
-               $dbw->begin();
 
                $revision = Revision::newFromTitle( $title );
                if ( !$revision ) {
                        return;
                }
 
+               $dbw->begin( __METHOD__ );
+
                $options = new ParserOptions;
                $parserOutput = $wgParser->parse( $revision->getText(), $title, $options, true, true, $revision->getId() );
+
+        $updates = $parserOutput->getLinksUpdateAndOtherUpdates( $title, false );
+        SecondaryDataUpdate::runUpdates( $updates );
+
+        $dbw->commit();
+        // TODO: We don't know what happens here.
                $update = new LinksUpdate( $title, $parserOutput, false );
                $update->doUpdate();
-               $dbw->commit();
+               $dbw->commit( __METHOD__ );
        }
 
-       /*
+       /**
         * Removes non-existing links from pages from pagelinks, imagelinks,
-        * categorylinks, templatelinks and externallinks tables.
+        * categorylinks, templatelinks, externallinks, interwikilinks, langlinks and redirect tables.
         *
-        * @param $maxLag
-        * @param $batchSize The size of deletion batches
+        * @param $maxLag int
+        * @param $batchSize int The size of deletion batches
         *
         * @author Merlijn van Deen <valhallasw@arctus.nl>
         */
        private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
-               wfWaitForSlaves( $maxLag );
+               wfWaitForSlaves();
 
                $dbw = wfGetDB( DB_MASTER );
 
@@ -237,6 +256,10 @@ class RefreshLinks extends Maintenance {
                        'categorylinks' => 'cl_from',
                        'templatelinks' => 'tl_from',
                        'externallinks' => 'el_from',
+                       'iwlinks' => 'iwl_from',
+                       'langlinks' => 'll_from',
+                       'redirect' => 'rd_from',
+                       'page_props' => 'pp_page',
                );
 
                foreach ( $linksTables as $table => $field ) {
@@ -254,12 +277,11 @@ class RefreshLinks extends Maintenance {
                        $counter = 0;
                        $list = array();
                        $this->output( "0.." );
-
                        foreach ( $results as $row ) {
                                $counter++;
                                $list[] = $row->$field;
                                if ( ( $counter % $batchSize ) == 0 ) {
-                                       wfWaitForSlaves( 5 );
+                                       wfWaitForSlaves();
                                        $dbw->delete( $table, array( $field => $list ), __METHOD__ );
 
                                        $this->output( $counter . ".." );
@@ -277,4 +299,4 @@ class RefreshLinks extends Maintenance {
 }
 
 $maintClass = 'RefreshLinks';
-require_once( DO_MAINTENANCE );
+require_once( RUN_MAINTENANCE_IF_MAIN );