X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=maintenance%2FrefreshLinks.php;h=b5aa85f891547c1346e3aace23ad61b618f54bb2;hb=73994ed5968b127eead3f0f0d6571c289b557abe;hp=7c22644af175bdec512fc1f80d0cb6da788c7384;hpb=26505b170adb24a6ae68945920db322c9382e470;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/refreshLinks.php b/maintenance/refreshLinks.php index 7c22644af1..b5aa85f891 100644 --- a/maintenance/refreshLinks.php +++ b/maintenance/refreshLinks.php @@ -60,19 +60,17 @@ class RefreshLinks extends Maintenance { */ private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) { - global $wgUser, $wgParser, $wgUseTidy; + global $wgParser, $wgUseTidy; $reportingInterval = 100; $dbr = wfGetDB( DB_SLAVE ); $start = intval( $start ); - # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway) - $wgUser->setOption( 'math', MW_MATH_SOURCE ); + // Give extensions a chance to optimize settings + wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); # Don't generate extension images (e.g. Timeline) - if ( method_exists( $wgParser, "clearTagHooks" ) ) { - $wgParser->clearTagHooks(); - } + $wgParser->clearTagHooks(); # Don't use HTML tidy $wgUseTidy = false; @@ -81,22 +79,35 @@ class RefreshLinks extends Maintenance { if ( $oldRedirectsOnly ) { # This entire code path is cut-and-pasted from below. Hurrah. - $res = $dbr->query( - "SELECT page_id " . - "FROM page " . - "LEFT JOIN redirect ON page_id=rd_from " . - "WHERE page_is_redirect=1 AND rd_from IS NULL AND " . - ( $end == 0 ? "page_id >= $start" - : "page_id BETWEEN $start AND $end" ), - __METHOD__ + + $conds = array( + "page_is_redirect=1", + "rd_from IS NULL" + ); + + if ( $end == 0 ) { + $conds[] = "page_id >= $start"; + } else { + $conds[] = "page_id BETWEEN $start AND $end"; + } + + $res = $dbr->select( + array( 'page', 'redirect' ), + 'page_id', + $conds, + __METHOD__, + array(), + array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) ); $num = $dbr->numRows( $res ); $this->output( "Refreshing $num old redirects from $start...\n" ); + $i = 0; + foreach ( $res as $row ) { if ( !( ++$i % $reportingInterval ) ) { $this->output( "$i\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } $this->fixRedirect( $row->page_id ); } @@ -116,12 +127,13 @@ class RefreshLinks extends Maintenance { foreach ( $res as $row ) { if ( !( ++$i % $reportingInterval ) ) { $this->output( "$i\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } - if ( $redirectsOnly ) + if ( $redirectsOnly ) { $this->fixRedirect( $row->page_id ); - else + } else { self::fixLinksFromArticle( $row->page_id ); + } } } else { if ( !$end ) { @@ -136,7 +148,7 @@ class RefreshLinks extends Maintenance { if ( !( $id % $reportingInterval ) ) { $this->output( "$id\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } $this->fixRedirect( $id ); } @@ -149,7 +161,7 @@ class RefreshLinks extends Maintenance { if ( !( $id % $reportingInterval ) ) { $this->output( "$id\n" ); - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); } self::fixLinksFromArticle( $id ); } @@ -162,27 +174,24 @@ class RefreshLinks extends Maintenance { * @param $id int The page_id of the redirect */ private function fixRedirect( $id ) { - $title = Title::newFromID( $id ); + $page = WikiPage::newFromID( $id ); $dbw = wfGetDB( DB_MASTER ); - if ( is_null( $title ) ) { + if ( $page === null ) { // This page doesn't exist (any more) // Delete any redirect table entry for it $dbw->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); return; } - $article = new Article( $title ); - $rt = $article->followRedirect(); + $rt = $page->getRedirectTarget(); - if ( !$rt || !is_object( $rt ) ) { - // $title is not a redirect + if ( $rt === null ) { + // The page is not a redirect // Delete any redirect table entry for it $dbw->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); - } else { - $article->updateRedirectOn( $dbw, $rt ); } } @@ -191,41 +200,43 @@ class RefreshLinks extends Maintenance { * @param $id int The page_id */ public static function fixLinksFromArticle( $id ) { - global $wgParser; + global $wgParser, $wgContLang; - $title = Title::newFromID( $id ); - $dbw = wfGetDB( DB_MASTER ); + $page = WikiPage::newFromID( $id ); LinkCache::singleton()->clear(); - if ( is_null( $title ) ) { + if ( $page === null ) { return; } - $dbw->begin(); - $revision = Revision::newFromTitle( $title ); - if ( !$revision ) { + $text = $page->getRawText(); + if ( $text === false ) { return; } - $options = new ParserOptions; - $parserOutput = $wgParser->parse( $revision->getText(), $title, $options, true, true, $revision->getId() ); - $update = new LinksUpdate( $title, $parserOutput, false ); + $dbw = wfGetDB( DB_MASTER ); + $dbw->begin( __METHOD__ ); + + $options = ParserOptions::newFromUserAndLang( new User, $wgContLang ); + $parserOutput = $wgParser->parse( $text, $page->getTitle(), $options, true, true, $page->getLatest() ); + $update = new LinksUpdate( $page->getTitle(), $parserOutput, false ); $update->doUpdate(); - $dbw->commit(); + + $dbw->commit( __METHOD__ ); } - /* + /** * Removes non-existing links from pages from pagelinks, imagelinks, - * categorylinks, templatelinks and externallinks tables. + * categorylinks, templatelinks, externallinks, interwikilinks, langlinks and redirect tables. * - * @param $maxLag - * @param $batchSize The size of deletion batches + * @param $maxLag int + * @param $batchSize int The size of deletion batches * * @author Merlijn van Deen */ private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { - wfWaitForSlaves( $maxLag ); + wfWaitForSlaves(); $dbw = wfGetDB( DB_MASTER ); @@ -239,6 +250,10 @@ class RefreshLinks extends Maintenance { 'categorylinks' => 'cl_from', 'templatelinks' => 'tl_from', 'externallinks' => 'el_from', + 'iwlinks' => 'iwl_from', + 'langlinks' => 'll_from', + 'redirect' => 'rd_from', + 'page_props' => 'pp_page', ); foreach ( $linksTables as $table => $field ) { @@ -256,12 +271,11 @@ class RefreshLinks extends Maintenance { $counter = 0; $list = array(); $this->output( "0.." ); - foreach ( $results as $row ) { $counter++; $list[] = $row->$field; if ( ( $counter % $batchSize ) == 0 ) { - wfWaitForSlaves( 5 ); + wfWaitForSlaves(); $dbw->delete( $table, array( $field => $list ), __METHOD__ ); $this->output( $counter . ".." ); @@ -273,6 +287,7 @@ class RefreshLinks extends Maintenance { $dbw->delete( $table, array( $field => $list ), __METHOD__ ); } $this->output( "\n" ); + wfWaitForSlaves(); } $lb->closeAll(); }