From: Tim Starling Date: Sun, 19 Jun 2005 01:05:56 +0000 (+0000) Subject: ported refreshLinks.php improvements (and associated change to commandLine.inc and... X-Git-Tag: 1.5.0beta1~160 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/banques/ajouter.php?a=commitdiff_plain;h=d5e212999a16c7f89e24902846290e948abadb2e;p=lhc%2Fweb%2Fwiklou.git ported refreshLinks.php improvements (and associated change to commandLine.inc and Database.php) from 1.4 --- diff --git a/includes/Database.php b/includes/Database.php index d46b4cd104..8d110ae76c 100644 --- a/includes/Database.php +++ b/includes/Database.php @@ -1447,7 +1447,12 @@ class Database { * Ping the server and try to reconnect if it there is no connection */ function ping() { - return mysql_ping( $this->mConn ); + if( function_exists( 'mysql_ping' ) ) { + return mysql_ping( $this->mConn ); + } else { + wfDebug( "Tried to call mysql_ping but this is ancient PHP version. Faking it!\n" ); + return true; + } } /** diff --git a/maintenance/commandLine.inc b/maintenance/commandLine.inc index 86f0731840..3e308ab38e 100644 --- a/maintenance/commandLine.inc +++ b/maintenance/commandLine.inc @@ -18,6 +18,7 @@ define("MEDIAWIKI",true); # $options becomes an array with keys set to the option names # $optionsWithArgs is an array of GNU-style options that take an argument. The arguments are returned # in the values of $options. +# $args becomes a zero-based array containing the non-option arguments if ( !isset( $optionsWithArgs ) ) { $optionsWithArgs = array(); @@ -129,7 +130,10 @@ if ( $sep == ":" && strpos( `hostname`, "wikimedia.org" ) !== false ) { require_once( "$IP/includes/Defines.php" ); require_once( $settingsFile ); ini_set( "include_path", ".$sep$IP$sep$IP/includes$sep$IP/languages$sep$IP/maintenance" ); - require_once( "$IP/AdminSettings.php" ); + + if ( is_readable( "$IP/AdminSettings.php" ) ) { + require_once( "$IP/AdminSettings.php" ); + } } # Turn off output buffering again, it might have been turned on in the settings files @@ -139,9 +143,43 @@ $wgCommandLineMode = true; $wgDBuser = $wgDBadminuser; $wgDBpassword = $wgDBadminpassword; +if ( !empty( $wgUseNormalUser ) && isset( $wgDBadminuser ) ) { + $wgDBuser = $wgDBadminuser; + $wgDBpassword = $wgDBadminpassword; + + foreach ( $wgDBservers as $i => $server ) { + $wgDBservers[$i]['user'] = $wgDBuser; + $wgDBservers[$i]['password'] = $wgDBpassword; + } +} + +ini_set( 'memory_limit', -1 ); require_once( "Setup.php" ); require_once( "install-utils.inc" ); $wgTitle = Title::newFromText( "Command line script" ); set_time_limit(0); -?> \ No newline at end of file + +// -------------------------------------------------------------------- +// Functions +// -------------------------------------------------------------------- + +function wfWaitForSlaves( $maxLag ) { + global $wgLoadBalancer; + if ( $maxLag ) { + list( $host, $lag ) = $wgLoadBalancer->getMaxLag(); + while ( $lag > $maxLag ) { + $name = @gethostbyaddr( $host ); + if ( $name !== false ) { + $host = $name; + } + print "Waiting for $host (lagged $lag seconds)...\n"; + sleep($maxLag); + list( $host, $lag ) = $wgLoadBalancer->getMaxLag(); + } + } +} + + + +?> diff --git a/maintenance/refreshLinks.inc b/maintenance/refreshLinks.inc index 7eb6c60e21..355cf52002 100644 --- a/maintenance/refreshLinks.inc +++ b/maintenance/refreshLinks.inc @@ -6,62 +6,152 @@ */ /** */ -define( "REPORTING_INTERVAL", 50 ); -define( "PAUSE_INTERVAL", 50 ); +define( "REPORTING_INTERVAL", 100 ); -function refreshLinks( $start ) { - global $wgUser, $wgTitle, $wgArticle, $wgLinkCache, $wgOut; +function refreshLinks( $start, $newOnly = false, $maxLag = false ) { + global $wgUser, $wgParser, $wgUseImageResize; + $fname = 'refreshLinks'; + $dbr =& wfGetDB( DB_SLAVE ); $dbw =& wfGetDB( DB_MASTER ); + $start = intval( $start ); - $end = $dbw->selectField( 'page', 'max(page_id)', false ); - - print("Refreshing link table. Starting from page_id $start of $end.\n"); - # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway) $wgUser->setOption("math", 3); + + # Don't generate extension images (e.g. Timeline) + $wgParser->mTagHooks = array(); + # Don't generate thumbnail images + $wgUseImageResize = false; + + if ( $newOnly ) { + print "Refreshing links from "; + $res = $dbr->select( 'page', array( 'page_id' ), + array( 'page_is_new' => 1, "page_id > $start" ), $fname ); + $num = $dbr->numRows( $res ); + print "$num new articles...\n"; + + $i = 0; + while ( $row = $dbr->fetchObject( $res ) ) { + if ( !( ++$i % REPORTING_INTERVAL ) ) { + print "$i\n"; + wfWaitForSlaves( $maxLag ); + } - for ($id = $start; $id <= $end; $id++) { - if ( !($id % REPORTING_INTERVAL) ) { - print "$id\n"; + fixLinksFromArticle( $row->page_id ); } + } else { + print "Refreshing link table.\n"; + $end = $dbr->selectField( 'page', 'max(page_id)', false ); + print("Starting from page_id $start of $end.\n"); - if ( !($id % PAUSE_INTERVAL) ) { - sleep(1); + for ($id = $start; $id <= $end; $id++) { + + if ( !($id % REPORTING_INTERVAL) ) { + print "$id\n"; + wfWaitForSlaves( $maxLag ); + } + fixLinksFromArticle( $id ); } - $wgTitle = Title::newFromID( $id ); - if ( is_null( $wgTitle ) ) { - continue; + + } +} + +function fixLinksFromArticle( $id ) { + global $wgTitle, $wgArticle, $wgLinkCache, $wgOut; + + $wgTitle = Title::newFromID( $id ); + $dbw =& wfGetDB( DB_MASTER ); + + if ( is_null( $wgTitle ) ) { + return; + } + $dbw->begin(); + + $wgArticle = new Article( $wgTitle ); + $text = $wgArticle->getContent( true ); + $wgLinkCache = new LinkCache; + $wgLinkCache->forUpdate( true ); + + global $wgLinkHolders; + $wgLinkHolders = array( + 'namespaces' => array(), + 'dbkeys' => array(), + 'queries' => array(), + 'texts' => array(), + 'titles' => array() + ); + + + # Parse the text and replace links with placeholders + $wgOut->addWikiText( $text ); + + # Look up the links in the DB and add them to the link cache + $wgOut->transformBuffer(); + $wgOut->clearHTML(); + + $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() ); + $linksUpdate->doDumbUpdate(); + $dbw->immediateCommit(); +} + +function deleteLinksFromNonexistent( $maxLag = 0 ) { + $fname = 'deleteLinksFromNonexistent'; + + wfWaitForSlaves( $maxLag ); + + $dbw =& wfGetDB( DB_WRITE ); + + $linksTables = array( + 'pagelinks' => 'pl_from', + 'imagelinks' => 'il_from', + 'categorylinks' => 'cl_from', + ); + + $page = $dbw->tableName( 'page' ); + + + foreach ( $linksTables as $table => $field ) { + if ( !$dbw->ping() ) { + print "DB disconnected, reconnecting..."; + while ( !$dbw->ping() ) { + print "."; + sleep(10); + } + print "\n"; } - $dbw->query("BEGIN"); - $wgArticle = new Article( $wgTitle ); - $text = $wgArticle->getContent( true ); - $wgLinkCache = new LinkCache; - $wgLinkCache->forUpdate( true ); - - global $wgLinkHolders; - $wgLinkHolders = array( - 'namespaces' => array(), - 'dbkeys' => array(), - 'queries' => array(), - 'texts' => array(), - 'titles' => array() - ); - - - # Parse the text and replace links with placeholders - $wgOut->addWikiText( $text ); + $pTable = $dbw->tableName( $table ); + global $wgDBmysql4, $wgDBtype; + if( $wgDBmysql4 || $wgDBtype != 'mysql' ) { + $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL"; + } else { + # Hack-around for MySQL 3.x, which lacks support + # for multi-table deletes. + + $sql = "SELECT DISTINCT $field AS id FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL"; + echo "Looking in $table from non-existent articles..."; + $result = $dbw->query( $sql ); + $ids = array(); + while( $row = $dbw->fetchObject( $result ) ) { + $ids[] = $row->id; + } + $dbw->freeResult( $result ); + + if( empty( $ids ) ) { + echo " none.\n"; + continue; + } + echo " found.\n"; + $sql = "DELETE FROM $pTable WHERE $field IN (" . implode( ",", $ids ) . ")"; + } - # Look up the links in the DB and add them to the link cache - $wgOut->transformBuffer( RLH_FOR_UPDATE ); - $wgOut->clearHTML(); - - $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() ); - $linksUpdate->doDumbUpdate(); - $dbw->query("COMMIT"); + print "Deleting $table from non-existent articles..."; + $dbw->query( $sql, $fname ); + print " fixed " .$dbw->affectedRows() . " row(s)\n"; } } + ?> diff --git a/maintenance/refreshLinks.php b/maintenance/refreshLinks.php index d31940681e..f37c8e874f 100644 --- a/maintenance/refreshLinks.php +++ b/maintenance/refreshLinks.php @@ -6,20 +6,21 @@ */ /** */ +$optionsWithArgs = array( 'm' ); require_once( "commandLine.inc" ); require_once( "refreshLinks.inc" ); error_reporting( E_ALL & (~E_NOTICE) ); +if ( !$options['dfn-only'] ) { + if ($args[0]) { + $start = (int)$args[0]; + } else { + $start = 1; + } -if ($argv[2]) { - $start = (int)$argv[2]; -} else { - $start = 1; + refreshLinks( $start, $options['new-only'], $options['m'] ); } - -refreshLinks( $start ); - -exit(); +deleteLinksFromNonexistent(); ?>