From: Brion Vibber Date: Mon, 27 Sep 2004 06:48:15 +0000 (+0000) Subject: Obsoleted by refreshlinks, apparently forgot to remove these files. X-Git-Tag: 1.5.0alpha1~1749 X-Git-Url: http://git.cyclocoop.org/fichier?a=commitdiff_plain;h=42e0d0bfdb9a7a6a61b17a5c32c3deab418ce6c0;p=lhc%2Fweb%2Fwiklou.git Obsoleted by refreshlinks, apparently forgot to remove these files. --- diff --git a/maintenance/rebuildlinks.inc b/maintenance/rebuildlinks.inc deleted file mode 100644 index 12148d6b23..0000000000 --- a/maintenance/rebuildlinks.inc +++ /dev/null @@ -1,400 +0,0 @@ -count; - print "$total\n"; - - print "Finding highest article id\n"; - $sql = "SELECT MIN(cur_id) AS min, MAX(cur_id) AS max FROM cur"; - $res = wfQuery( $sql, DB_SLAVE ); - $obj = wfFetchObject( $res ); - - $cur_pulser = new SelectPulser("SELECT cur_id,cur_namespace,cur_title,cur_text " . - "FROM cur WHERE cur_id ", - $obj->min, $obj->max, 100); - - $brokenlinks_inserter = new InsertBuffer( - "INSERT IGNORE INTO brokenlinks (bl_from,bl_to) VALUES " , $rowbuf_size); - - $links_inserter = new InsertBuffer( - "INSERT IGNORE INTO links (l_from,l_to) VALUES ", $rowbuf_size); - - $imagelinks_inserter = new InsertBuffer("INSERT IGNORE INTO imagelinks ". - "(il_from,il_to) VALUES ", $rowbuf_size); - - print "Starting processing\n"; - - $ins = $wgLang->getNsText( Namespace::getImage() ); - $inslen = strlen($ins)+1; - - $tc = Title::legalChars(); - - $titleCache = new MRUCache( 10000 ); - $titlecount = 0; - $start_time = time(); - - while ( $row = $cur_pulser->next() ) { - - $from_id = intval($row->cur_id); - $ns = $wgLang->getNsText( $row->cur_namespace ); - $from_full_title = $row->cur_title; - if ( "" != $ns ) { - $from_full_title = "$ns:{$from_full_title}"; - } - $from_full_title_with_slashes = addslashes( $from_full_title ); - $text = $row->cur_text; - - $numlinks = preg_match_all( "/\\[\\[([{$tc}]+)(]|\\|)/", $text, - $m, PREG_PATTERN_ORDER ); - - $seen_dbtitles = array(); // seen links (normalized and with ns, see below) - $titles_ready_for_insertion = array(); - $titles_needing_curdata = array(); - $titles_needing_curdata_pos = array(); - $links_corresponding_to_titles = array(); - - for ( $i = 0 ; $i < $numlinks; ++$i ) { - $link = $m[1][$i]; - if( preg_match( '/^(http|https|ftp|mailto|news):/', $m[1][$i] ) ) { - # an URL link; not for us! - continue; - } - - # FIXME: Handle subpage links - $nt = $titleCache->get( $link ); - if( $nt != false ){ - // Only process each unique link once per page - $nt_key = $nt->getDBkey() . $nt->getNamespace(); - if( isset( $seen_dbtitles[$nt_key] ) ) - continue; - $seen_dbtitles[$nt_key] = 1; - - $titles_ready_for_insertion[] = $nt; - } else { - $nt = Title::newFromText( $link ); - if (! $nt) { - // Invalid link, probably something like "[[ ]]" - continue; - } - - // Only process each unique link once per page - $nt_key = $nt->getDBkey() . $nt->getNamespace(); - if( isset( $seen_dbtitles[$nt_key] ) ) - continue; - $seen_dbtitles[$nt_key] = 1; - - if( $nt->getInterwiki() != "" ) { - # Interwiki links are not stored in the link tables - continue; - } - if( $nt->getNamespace() == Namespace::getSpecial() ) { - # Special links not stored in link tables - continue; - } - if( $nt->getNamespace() == Namespace::getMedia() ) { - # treat media: links as image: links - $nt = Title::makeTitle( Namespace::getImage(), $nt->getDBkey() ); - } - $nt->mArticleID = 0; // assume broken link until proven otherwise - - $pos = array_push($titles_needing_curdata, $nt) - 1; - $titles_needing_curdata_pos[$nt->getDBkey() . $nt->getNamespace()] = $pos; - $links_corresponding_to_titles[] = $link; - unset( $link ); // useless outside this loop, but tempting - } - } - - - if ( count( $titles_needing_curdata ) > 0 ){ - $parts = array(); - foreach ($titles_needing_curdata as $nt ) { - $parts[] = " (cur_namespace = " . $nt->getNamespace() . " AND " . - "cur_title='" . wfStrencode( $nt->getDBkey() ) . "')"; - } - $sql = "SELECT cur_namespace, cur_title, cur_id FROM cur_fast WHERE " . - implode(" OR ", $parts); - $res = wfQuery( $sql, DB_MASTER ); - while($row = wfFetchObject( $res ) ){ - $pos = $titles_needing_curdata_pos[$row->cur_title . $row->cur_namespace]; - $titles_needing_curdata[$pos]->mArticleID = intval($row->cur_id); - } - for( $k = 0; $k < count( $titles_needing_curdata ) ; $k++) { - $tmplink = $links_corresponding_to_titles[$k]; - $titleCache->set( $tmplink, $titles_needing_curdata[$k] ); - $titles_ready_for_insertion[] = $titles_needing_curdata[$k]; - } - } - - foreach ( $titles_ready_for_insertion as $nt ) { - $dest_noslashes = $nt->getPrefixedDBkey(); - $dest = addslashes( $dest_noslashes ); - $dest_id = $nt->getArticleID(); - $from = $from_full_title_with_slashes; - - # print "\nLINK '$from_full_title' ($from_id) -> '$dest' ($dest_id)\n"; - - if ( 0 == strncmp( "$ins:", $dest_noslashes, $inslen ) ) { - $iname = addslashes( substr( $dest_noslashes, $inslen ) ); - $imagelinks_inserter->insert( "('{$from}','{$iname}')" ); - } else if ( 0 == $dest_id ) { - $brokenlinks_inserter->insert( "({$from_id},'{$dest}')" ); - } else { - $links_inserter->insert( "('{$from}',{$dest_id})" ); - } - $titlecount++; - } - - if ( ( $count % 20 ) == 0 ) - print "."; - - if ( ( ++$count % 1000 ) == 0 ) { - $dt = time() - $start_time; - $start_time = time(); - $rps = persec(1000, $dt); - $tps = persec($titlecount, $dt); - $titlecount = 0; - print "\n$count of $total articles scanned ({$rps} articles ". - "and {$tps} titles per second)\n"; - print "Title cache hits: " . $titleCache->getPerformance() . "%\n"; - - } - - } - - print "\nFlushing insertion buffers..."; - $imagelinks_inserter->flush(); - $links_inserter->flush(); - $brokenlinks_inserter->flush(); - print "ok\n"; - - print "$count articles scanned.\n"; - - $sql = "UNLOCK TABLES"; - wfQuery( $sql, DB_MASTER ); - print "Done\n"; -} - -/* private */ function persec($n, $t){ - if($n == 0) - return "zero"; - if($t == 0) - return "lots of"; - return intval($n/$t); -} - -/** - * InsertBuffer increases performance slightly by inserting many rows - * at once. The gain is small (<5%) when running against a local, idle - * database, but may be significant in other circumstances. It also - * limits the number of inserted rows uppwards, which should avoid - * problems with huge articles and certain mysql settings that limits - * the size of queries. It's also convenient. - * - * @deprecated - * @package MediaWiki - * @subpackage Maintenance - */ -class InsertBuffer { - /* private */ var $mBuf, $mSql, $mBufcount, $mMaxsize; - - function InsertBuffer( $sql, $bufsize ){ - $this->mSql = $sql; - $this->mBuf = array(); - $this->mBufcount = 0; - $this->mMaxsize = $bufsize; - } - - function insert( $value ){ - // print $this->mSql . " -> " . $value . "\n"; - $this->mBuf[] = $value; - $this->mBufcount++; - if($this->mBufcount > $this->mMaxsize){ - $this->flush(); - } - } - - function flush(){ - if( $this->mBufcount > 0 ){ - $sql = $this->mSql . implode(",", $this->mBuf); - wfQuery( $sql, DB_MASTER ); - $this->mBuf = array(); - $this->mBufcount = 0; - // print "Wrote query of size " . strlen( $sql ) . "\n"; - } - } - -} - -/** - * Select parts from a large table by using the "BETWEEN X AND Y" - * operator on the id column. Avoids buffering the whole thing in - * RAM. It's also convenient. - * - * @deprecated - * @package MediaWiki - * @subpackage Maintenance - */ -class SelectPulser { - /* private */ var $mSql, $mSetsize, $mPos, $mMax, $mSet; - - function SelectPulser( $sql, $min, $max, $setsize) { - $this->mSql = $sql; - $this->mSet = array(); - $this->mPos = $min; - $this->mMax = $max; - $this->mSetsize = $setsize; - } - - function next(){ - $result = current( $this->mSet ); - next( $this->mSet ); - if( false !== $result ){ - return $result; - } - while( $this->mPos <= $this->mMax ){ - $this->mSet = array(); - $sql = $this->mSql . " BETWEEN " . $this->mPos . - " AND " . ($this->mPos + $this->mSetsize - 1); - $this->mPos += $this->mSetsize; - - $res = wfQuery( $sql, DB_SLAVE ); - while ( $row = wfFetchObject( $res ) ) { - $this->mSet[] = $row; - } - wfFreeResult( $res ); - if( count( $this->mSet ) > 0 ){ - return $this->next(); - } - } - return false; - } -} - -/** - * A simple MRU for general cacheing. - * @deprecated - * @todo document - * @package MediaWiki - * @subpackage Maintenance - */ -class MRUCache { - /* private */ var $mMru, $mCache, $mSize, $mPurgefreq, $nexti; - /* private */ var $hits, $misses; - - function MRUCache( $size, $purgefreq = -1 ) { - // purgefreq is 1/10 of $size if not stated - $purgefreq = ($purgefreq == -1 ? intval($size/10) : $purgefreq); - $purgefreq = ($purgefreq <= 0 ? 1 : $purgefreq); - - $this->mSize = $size; - $this->mMru = array(); - $this->mCache = array(); - $this->mPurgefreq = $purgefreq; - $this->nexti = 1; - print "purgefreq = " . $this->mPurgefreq . "\n"; - } - - function get( $key ){ - if ( ! array_key_exists( $key, $this->mCache) ){ - $this->misses++; - return false; - } - $this->hits++; - $this->mMru[$key] = $this->nexti++; - return $this->mCache[$key]; - } - - function set( $key, $value ){ - $this->mMru[$key] = $this->nexti++; - $this->mCache[$key] = $value; - - if($this->nexti % $this->mPurgefreq == 0) - $this->purge(); - } - - function purge(){ - $to_remove = count( $this->mMru ) - $this->mSize; - if( $to_remove <= 0 ){ - return; - } - asort( $this->mMru ); - $removed = array_splice( $this->mMru, 0, $to_remove ); - foreach( array_keys( $removed ) as $key ){ - unset( $this->mCache[$key] ); - } - } - - function getPerformance(){ - $tot = $this->hits + $this->misses; - if($tot > 0) - return intval(100.0 * $this->hits / $tot); - else - return 0; - } -} - -?> diff --git a/maintenance/rebuildlinks.php b/maintenance/rebuildlinks.php deleted file mode 100644 index 3a004f68ad..0000000000 --- a/maintenance/rebuildlinks.php +++ /dev/null @@ -1,24 +0,0 @@ -