setOption('math', MW_MATH_SOURCE); # Don't generate extension images (e.g. Timeline) if( method_exists( $wgParser, "clearTagHooks" ) ) { $wgParser->clearTagHooks(); } # Don't use HTML tidy $wgUseTidy = false; $what = $redirectsOnly ? "redirects" : "links"; if( $oldRedirectsOnly ) { # This entire code path is cut-and-pasted from below. Hurrah. $res = $dbr->query( "SELECT page_id ". "FROM page ". "LEFT JOIN redirect ON page_id=rd_from ". "WHERE page_is_redirect=1 AND rd_from IS NULL AND ". ($end == 0 ? "page_id >= $start" : "page_id BETWEEN $start AND $end"), $fname ); $num = $dbr->numRows( $res ); print "Refreshing $num old redirects from $start...\n"; while( $row = $dbr->fetchObject( $res ) ) { if ( !( ++$i % $reportingInterval ) ) { print "$i\n"; wfWaitForSlaves( $maxLag ); } fixRedirect( $row->page_id ); } } elseif( $newOnly ) { print "Refreshing $what from "; $res = $dbr->select( 'page', array( 'page_id' ), array( 'page_is_new' => 1, "page_id >= $start" ), $fname ); $num = $dbr->numRows( $res ); print "$num new articles...\n"; $i = 0; while ( $row = $dbr->fetchObject( $res ) ) { if ( !( ++$i % $reportingInterval ) ) { print "$i\n"; wfWaitForSlaves( $maxLag ); } if($redirectsOnly) fixRedirect( $row->page_id ); else fixLinksFromArticle( $row->page_id ); } } else { print "Refreshing $what table.\n"; if ( !$end ) { $end = $dbr->selectField( 'page', 'max(page_id)', false ); } print("Starting from page_id $start of $end.\n"); for ($id = $start; $id <= $end; $id++) { if ( !($id % $reportingInterval) ) { print "$id\n"; wfWaitForSlaves( $maxLag ); } if($redirectsOnly) fixRedirect( $id ); else fixLinksFromArticle( $id ); } } } function fixRedirect( $id ){ global $wgTitle, $wgArticle; $wgTitle = Title::newFromID( $id ); $dbw = wfGetDB( DB_MASTER ); if ( is_null( $wgTitle ) ) { return; } $wgArticle = new Article($wgTitle); $rt = $wgArticle->followRedirect(); if($rt == false || !is_object($rt)) return; $wgArticle->updateRedirectOn($dbw,$rt); } function fixLinksFromArticle( $id ) { global $wgTitle, $wgParser; $wgTitle = Title::newFromID( $id ); $dbw = wfGetDB( DB_MASTER ); $linkCache =& LinkCache::singleton(); $linkCache->clear(); if ( is_null( $wgTitle ) ) { return; } $dbw->begin(); $revision = Revision::newFromTitle( $wgTitle ); if ( !$revision ) { return; } $options = new ParserOptions; $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() ); $update = new LinksUpdate( $wgTitle, $parserOutput, false ); $update->doUpdate(); $dbw->immediateCommit(); } /* * Removes non-existing links from pages from pagelinks, imagelinks, * categorylinks, templatelinks and externallinks tables. * * @param $maxLag * @param $batchSize The size of deletion batches * * @author Merlijn van Deen */ function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { $fname = 'deleteLinksFromNonexistent'; wfWaitForSlaves( $maxLag ); $dbw = wfGetDB( DB_MASTER ); $dbr = wfGetDB( DB_SLAVE ); $dbr->bufferResults(false); $linksTables = array( // table name => page_id field 'pagelinks' => 'pl_from', 'imagelinks' => 'il_from', 'categorylinks' => 'cl_from', 'templatelinks' => 'tl_from', 'externallinks' => 'el_from', ); $readPage = $dbr->tableName( 'page' ); foreach ( $linksTables as $table => $field ) { $readLinks = $dbr->tableName( $table ); print "Retrieving illegal entries from $table... "; $sql = "SELECT DISTINCT( $field ) FROM $readLinks LEFT JOIN $readPage ON $field=page_id WHERE page_id IS NULL;"; $results = $dbr->query( $sql, $fname . ':' . $readLinks ); print $results->numRows() . " illegal " . $field. "s. "; if ( $results->numRows() > 0 ) { $counter = 0; $list = array(); print "Removing illegal links: 1.."; foreach( $results as $row ) { $counter++; $list[] = $row->$field; if ( ( $counter % $batchSize ) == 0 ) { print $counter . ".."; deleteBatch($dbw, $table, $field, $list); $list = array(); } } print $counter; deleteBatch($dbw, $table, $field, $list); } print "\n"; } } /* Deletes a batch of items from a table. * Runs the query: DELETE FROM <$table> WHERE <$field> IN (<$list>) * * @param $dbw Database Database object to run the DELETE query on * @param $table table to work on; will be converted via $dbw->tableName. * @param $field column to search in * @param $list values to remove. Array with SQL-safe (!) values. * * @author Merlijn van Deen */ function deleteBatch($dbw, $table, $field, $list) { if (count($list) == 0) return; $masterLinks = $dbw->tableName( $table ); $fname = "deleteBatch:masterLinks"; if ( !$dbw->ping() ) { print "\nDB disconnected, reconnecting..."; while ( !$dbw->ping() ) { print "."; sleep(10); } print "\n"; } $sql = "DELETE FROM $masterLinks WHERE $field IN (" . join("," , $list) . ");"; $dbw->query($sql, $fname); }