ported refreshLinks.php improvements (and associated change to commandLine.inc and...
authorTim Starling <tstarling@users.mediawiki.org>
Sun, 19 Jun 2005 01:05:56 +0000 (01:05 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Sun, 19 Jun 2005 01:05:56 +0000 (01:05 +0000)
includes/Database.php
maintenance/commandLine.inc
maintenance/refreshLinks.inc
maintenance/refreshLinks.php

index d46b4cd..8d110ae 100644 (file)
@@ -1447,7 +1447,12 @@ class Database {
         * Ping the server and try to reconnect if it there is no connection
         */
        function ping() {
-               return mysql_ping( $this->mConn );
+               if( function_exists( 'mysql_ping' ) ) {
+                       return mysql_ping( $this->mConn );
+               } else {
+                       wfDebug( "Tried to call mysql_ping but this is ancient PHP version. Faking it!\n" );
+                       return true;
+               }
        }
        
        /**
index 86f0731..3e308ab 100644 (file)
@@ -18,6 +18,7 @@ define("MEDIAWIKI",true);
 # $options becomes an array with keys set to the option names
 # $optionsWithArgs is an array of GNU-style options that take an argument. The arguments are returned
 # in the values of $options.
+# $args becomes a zero-based array containing the non-option arguments
 
 if ( !isset( $optionsWithArgs ) ) {
        $optionsWithArgs = array();
@@ -129,7 +130,10 @@ if ( $sep == ":" && strpos( `hostname`, "wikimedia.org" ) !== false ) {
        require_once( "$IP/includes/Defines.php" );
        require_once( $settingsFile );
        ini_set( "include_path", ".$sep$IP$sep$IP/includes$sep$IP/languages$sep$IP/maintenance" );
-       require_once( "$IP/AdminSettings.php" );
+       
+       if ( is_readable( "$IP/AdminSettings.php" ) ) {
+               require_once( "$IP/AdminSettings.php" );
+       }
 }
 
 # Turn off output buffering again, it might have been turned on in the settings files
@@ -139,9 +143,43 @@ $wgCommandLineMode = true;
 $wgDBuser = $wgDBadminuser;
 $wgDBpassword = $wgDBadminpassword;
 
+if ( !empty( $wgUseNormalUser ) && isset( $wgDBadminuser ) ) {
+       $wgDBuser = $wgDBadminuser;
+       $wgDBpassword = $wgDBadminpassword;
+
+       foreach ( $wgDBservers as $i => $server ) {
+               $wgDBservers[$i]['user'] = $wgDBuser;
+               $wgDBservers[$i]['password'] = $wgDBpassword;
+       }
+}
+
+ini_set( 'memory_limit', -1 );
 
 require_once( "Setup.php" );
 require_once( "install-utils.inc" );
 $wgTitle = Title::newFromText( "Command line script" );
 set_time_limit(0);
-?>
\ No newline at end of file
+
+// --------------------------------------------------------------------
+// Functions
+// --------------------------------------------------------------------
+
+function wfWaitForSlaves( $maxLag ) {
+       global $wgLoadBalancer;
+       if ( $maxLag ) {
+               list( $host, $lag ) = $wgLoadBalancer->getMaxLag();
+               while ( $lag > $maxLag ) {
+                       $name = @gethostbyaddr( $host );
+                       if ( $name !== false ) {
+                               $host = $name;
+                       }
+                       print "Waiting for $host (lagged $lag seconds)...\n";
+                       sleep($maxLag);
+                       list( $host, $lag ) = $wgLoadBalancer->getMaxLag();
+               }
+       }
+}
+
+
+
+?>
index 7eb6c60..355cf52 100644 (file)
  */
 
 /** */
-define( "REPORTING_INTERVAL", 50 );
-define( "PAUSE_INTERVAL", 50 );
+define( "REPORTING_INTERVAL", 100 );
 
-function refreshLinks( $start ) {
-       global $wgUser, $wgTitle, $wgArticle, $wgLinkCache, $wgOut;
+function refreshLinks( $start, $newOnly = false, $maxLag = false ) {
+       global $wgUser, $wgParser, $wgUseImageResize;
 
+       $fname = 'refreshLinks';
+       $dbr =& wfGetDB( DB_SLAVE );
        $dbw =& wfGetDB( DB_MASTER );
+       $start = intval( $start );
        
-       $end = $dbw->selectField( 'page', 'max(page_id)', false );
-
-       print("Refreshing link table. Starting from page_id $start of $end.\n");
-
        # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
        $wgUser->setOption("math", 3);
+
+       # Don't generate extension images (e.g. Timeline)
+       $wgParser->mTagHooks = array();
        
+       # Don't generate thumbnail images
+       $wgUseImageResize = false;
+
+       if ( $newOnly ) {
+               print "Refreshing links from ";
+               $res = $dbr->select( 'page', array( 'page_id' ), 
+                 array( 'page_is_new' => 1, "page_id > $start" ), $fname );
+               $num = $dbr->numRows( $res );
+               print "$num new articles...\n";
+               
+               $i = 0;
+               while ( $row = $dbr->fetchObject( $res ) ) {
+                       if ( !( ++$i % REPORTING_INTERVAL ) ) {
+                               print "$i\n";
+                               wfWaitForSlaves( $maxLag );
+                       }
 
-       for ($id = $start; $id <= $end; $id++) {
-               if ( !($id % REPORTING_INTERVAL) ) {
-                       print "$id\n";
+                       fixLinksFromArticle( $row->page_id );
                }
+       } else {
+               print "Refreshing link table.\n";
+               $end = $dbr->selectField( 'page', 'max(page_id)', false );
+               print("Starting from page_id $start of $end.\n");
 
-               if ( !($id % PAUSE_INTERVAL) ) {
-                       sleep(1);
+               for ($id = $start; $id <= $end; $id++) {
+                       
+                       if ( !($id % REPORTING_INTERVAL) ) {
+                               print "$id\n";
+                               wfWaitForSlaves( $maxLag );
+                       }
+                       fixLinksFromArticle( $id );
                }
                
-               $wgTitle = Title::newFromID( $id );
-               if ( is_null( $wgTitle ) ) {
-                       continue;
+
+       }
+}
+
+function fixLinksFromArticle( $id ) {
+       global $wgTitle, $wgArticle, $wgLinkCache, $wgOut;
+       
+       $wgTitle = Title::newFromID( $id );
+       $dbw =& wfGetDB( DB_MASTER );
+       
+       if ( is_null( $wgTitle ) ) {
+               return;
+       }
+       $dbw->begin();
+
+       $wgArticle = new Article( $wgTitle );
+       $text = $wgArticle->getContent( true );
+       $wgLinkCache = new LinkCache;
+       $wgLinkCache->forUpdate( true );
+       
+       global $wgLinkHolders;
+       $wgLinkHolders = array(
+               'namespaces' => array(),
+               'dbkeys' => array(),
+               'queries' => array(),
+               'texts' => array(),
+               'titles' => array()
+       );
+
+
+       # Parse the text and replace links with placeholders
+       $wgOut->addWikiText( $text );
+       
+       # Look up the links in the DB and add them to the link cache
+       $wgOut->transformBuffer();
+       $wgOut->clearHTML();
+
+       $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() );
+       $linksUpdate->doDumbUpdate();
+       $dbw->immediateCommit();
+}
+
+function deleteLinksFromNonexistent( $maxLag = 0 ) {
+       $fname = 'deleteLinksFromNonexistent';
+       
+       wfWaitForSlaves( $maxLag );
+
+       $dbw =& wfGetDB( DB_WRITE );
+       
+       $linksTables = array( 
+               'pagelinks' => 'pl_from',
+               'imagelinks' => 'il_from',
+               'categorylinks' => 'cl_from',
+       );
+
+       $page = $dbw->tableName( 'page' );
+
+
+       foreach ( $linksTables as $table => $field ) {
+               if ( !$dbw->ping() ) {
+                       print "DB disconnected, reconnecting...";
+                       while ( !$dbw->ping() ) {
+                               print ".";
+                               sleep(10);
+                       }
+                       print "\n";
                }
-               $dbw->query("BEGIN");
 
-               $wgArticle = new Article( $wgTitle );
-               $text = $wgArticle->getContent( true );
-               $wgLinkCache = new LinkCache;
-               $wgLinkCache->forUpdate( true );
-               
-               global $wgLinkHolders;
-               $wgLinkHolders = array(
-                       'namespaces' => array(),
-                       'dbkeys' => array(),
-                       'queries' => array(),
-                       'texts' => array(),
-                       'titles' => array()
-               );
-
-
-               # Parse the text and replace links with placeholders
-               $wgOut->addWikiText( $text );
+               $pTable = $dbw->tableName( $table );
+               global $wgDBmysql4, $wgDBtype;
+               if( $wgDBmysql4 || $wgDBtype != 'mysql' ) {
+                       $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL";
+               } else {
+                       # Hack-around for MySQL 3.x, which lacks support
+                       # for multi-table deletes.
+                       
+                       $sql = "SELECT DISTINCT $field AS id FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL";
+                       echo "Looking in $table from non-existent articles...";
+                       $result = $dbw->query( $sql );
+                       $ids = array();
+                       while( $row = $dbw->fetchObject( $result ) ) {
+                               $ids[] = $row->id;
+                       }
+                       $dbw->freeResult( $result );
+                       
+                       if( empty( $ids ) ) {
+                               echo " none.\n";
+                               continue;
+                       }
+                       echo " found.\n";
+                       $sql = "DELETE FROM $pTable WHERE $field IN (" . implode( ",", $ids ) . ")";
+               }
                
-               # Look up the links in the DB and add them to the link cache
-               $wgOut->transformBuffer( RLH_FOR_UPDATE );
-               $wgOut->clearHTML();
-
-               $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() );
-               $linksUpdate->doDumbUpdate();
-               $dbw->query("COMMIT");
+               print "Deleting $table from non-existent articles...";
+               $dbw->query( $sql, $fname );
+               print " fixed " .$dbw->affectedRows() . " row(s)\n";
        }
 }
+
 ?>
index d319406..f37c8e8 100644 (file)
@@ -6,20 +6,21 @@
  */
 
 /** */
+$optionsWithArgs = array( 'm' );
 require_once( "commandLine.inc" );
 require_once( "refreshLinks.inc" );
 
 error_reporting( E_ALL & (~E_NOTICE) );
 
+if ( !$options['dfn-only'] ) {
+       if ($args[0]) {
+               $start = (int)$args[0];
+       } else {
+               $start = 1;
+       }
 
-if ($argv[2]) {
-       $start = (int)$argv[2];
-} else {
-       $start = 1;
+       refreshLinks( $start, $options['new-only'], $options['m'] );
 }
-
-refreshLinks( $start );
-
-exit();
+deleteLinksFromNonexistent();
 
 ?>