From: Brion Vibber Date: Wed, 17 Jan 2007 00:54:54 +0000 (+0000) Subject: Apply most of the code tweaks from the live site: X-Git-Tag: 1.31.0-rc.0~54352 X-Git-Url: https://git.cyclocoop.org//%22?a=commitdiff_plain;h=d88bf87284c59097878f761cdbcfa27c75b6262c;p=lhc%2Fweb%2Fwiklou.git Apply most of the code tweaks from the live site: * use configured cache servers for mctest.php * bucket details in mcc.php * fix input validation and remove debugging code in compressOld * full ID range for moveToExternal * fix resolveStubs.php for compatibility with older serialized data * maximum line length for bar graphs in getLagTimes.php * recognize specieswiki in rebuildInterwiki.inc * --purge option to do additional parser-cache purging for purgeList.php * default changed in MiniDonation extension * profile unicode cleanup in Xml * log slow parses in Article.php * profile wfMsgReal * log mkdir failures * profile AutoLoader * rebuild empty DjVu metadata containing '' * security fix for DjVu metadata retrieval --- diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 3f66ad36b5..b2146f8df1 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -82,6 +82,22 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * Introduce "BookInformation" hook; see docs/hooks.txt for more details * English Wikipedia added to interwiki table as wikipedia: No idea why it wasn't there before. +* use configured cache servers for mctest.php +* bucket details in mcc.php +* fix input validation and remove debugging code in compressOld +* full ID range for moveToExternal +* fix resolveStubs.php for compatibility with older serialized data +* maximum line length for bar graphs in getLagTimes.php +* recognize specieswiki in rebuildInterwiki.inc +* --purge option to do additional parser-cache purging for purgeList.php +* profile unicode cleanup in Xml +* log slow parses in Article.php +* profile wfMsgReal +* log mkdir failures +* profile AutoLoader +* rebuild empty DjVu metadata containing '' +* security fix for DjVu metadata retrieval + == Languages updated == diff --git a/includes/Article.php b/includes/Article.php index ea6035ed50..180f6aa589 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -808,11 +808,21 @@ class Article { $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false ); } # Display content and don't save to parser cache + # With timing hack -- TS 2006-07-26 + $time = -wfTime(); $this->outputWikiText( $text, false ); + $time += wfTime(); + + # Timing hack + if ( $time > 3 ) { + wfDebugLog( 'slow-parse', sprintf( "%-5.2f %s", $time, + $this->mTitle->getPrefixedDBkey())); + } if( !$this->isCurrent() ) { $wgOut->parserOptions()->setEditSection( $oldEditSectionSetting ); } + } } /* title may have been set from the cache */ diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 0b1c82bf72..747e9ec06c 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -273,6 +273,7 @@ function __autoload($className) { 'ApiResult' => 'includes/api/ApiResult.php', ); + wfProfileIn( __METHOD__ ); if ( isset( $localClasses[$className] ) ) { $filename = $localClasses[$className]; } elseif ( isset( $wgAutoloadClasses[$className] ) ) { @@ -289,6 +290,7 @@ function __autoload($className) { } if ( !$filename ) { # Give up + wfProfileOut( __METHOD__ ); return; } } @@ -299,6 +301,7 @@ function __autoload($className) { $filename = "$IP/$filename"; } require( $filename ); + wfProfileOut( __METHOD__ ); } function wfLoadAllExtensions() { diff --git a/includes/DjVuImage.php b/includes/DjVuImage.php index ceb7aa1a37..a0a73057ea 100644 --- a/includes/DjVuImage.php +++ b/includes/DjVuImage.php @@ -221,7 +221,7 @@ class DjVuImage { function retrieveMetaData() { global $wgDjvuToXML; if ( isset( $wgDjvuToXML ) ) { - $cmd = $wgDjvuToXML . ' --without-anno --without-text ' . $this->mFilename; + $cmd = $wgDjvuToXML . ' --without-anno --without-text ' . escapeshellarg( $this->mFilename ); $xml = wfShellExec( $cmd ); } else { $xml = null; diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 777c02c417..49760553c3 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -376,8 +376,11 @@ function wfMsgNoDBForContent( $key ) { * @return String: the requested message. */ function wfMsgReal( $key, $args, $useDB = true, $forContent=false, $transform = true ) { + $fname = 'wfMsgReal'; + wfProfileIn( $fname ); $message = wfMsgGetKey( $key, $useDB, $forContent, $transform ); $message = wfMsgReplaceArgs( $message, $args ); + wfProfileOut( $fname ); return $message; } @@ -1624,6 +1627,7 @@ function wfMkdirParents( $fullDir, $mode = 0777 ) { foreach ( $createList as $dir ) { # use chmod to override the umask, as suggested by the PHP manual if ( !mkdir( $dir, $mode ) || !chmod( $dir, $mode ) ) { + wfDebugLog( 'mkdir', "Unable to create directory $dir\n" ); return false; } } diff --git a/includes/Image.php b/includes/Image.php index 1f3895c600..7a6442c36c 100644 --- a/includes/Image.php +++ b/includes/Image.php @@ -2271,7 +2271,7 @@ class Image # Check for files uploaded prior to DJVU support activation # They have a '0' in their metadata field. # - if ( $this->metadata == '0' ) { + if ( $this->metadata == '0' || $this->metadata == '' ) { $deja = new DjVuImage( $this->imagePath ); $this->metadata = $deja->retrieveMetaData(); $this->purgeMetadataCache(); diff --git a/includes/Xml.php b/includes/Xml.php index 67dda7fea9..30b9eacce0 100644 --- a/includes/Xml.php +++ b/includes/Xml.php @@ -50,7 +50,9 @@ class Xml { $attribs = array_map( array( 'UtfNormal', 'cleanUp' ), $attribs ); } if( $contents ) { + wfProfileIn( __METHOD__ . '-norm' ); $contents = UtfNormal::cleanUp( $contents ); + wfProfileOut( __METHOD__ . '-norm' ); } return self::element( $element, $attribs, $contents ); } diff --git a/maintenance/getLagTimes.php b/maintenance/getLagTimes.php index f2c06f6a7a..5c55d52c9d 100644 --- a/maintenance/getLagTimes.php +++ b/maintenance/getLagTimes.php @@ -15,7 +15,8 @@ if( empty( $wgDBservers ) ) { } else { $ip = gethostbyname( $host ); } - $stars = str_repeat( '*', intval( $lag ) ); + $starLen = min( intval( $lag ), 40 ); + $stars = str_repeat( '*', $starLen ); printf( "%10s %20s %3d %s\n", $ip, $host, $lag, $stars ); } } diff --git a/maintenance/mcc.php b/maintenance/mcc.php index 93b6ec183a..a4977d6333 100644 --- a/maintenance/mcc.php +++ b/maintenance/mcc.php @@ -112,7 +112,11 @@ do { case 'server': $res = $mcc->get( $args[0] ); - print $mcc->_buckets[$mcc->_hashfunc( $args[0] ) % $mcc->_bucketcount] . "\n"; + $hv = $mcc->_hashfunc( $args[0] ); + for ( $i = 0; $i < 3; $i++ ) { + print $mcc->_buckets[$hv % $mcc->_bucketcount] . "\n"; + $hv += $mcc->_hashfunc( $i . $args[0] ); + } break; case 'set': diff --git a/maintenance/mctest.php b/maintenance/mctest.php index 316620d78e..90c4420559 100644 --- a/maintenance/mctest.php +++ b/maintenance/mctest.php @@ -16,8 +16,6 @@ function microtime_float() if ( isset( $args[0] ) ) { $wgMemCachedServers = array( $args[0] ); -} else { - $wgMemCachedServers[] = 'localhost'; } if ( isset( $options['i'] ) ) { $iterations = $options['i']; diff --git a/maintenance/purgeList.php b/maintenance/purgeList.php index 9bf7c1bfcd..abe76683d6 100644 --- a/maintenance/purgeList.php +++ b/maintenance/purgeList.php @@ -19,6 +19,9 @@ while( !feof( $stdin ) ) { $url = $title->getFullUrl(); echo "$url\n"; $urls[] = $url; + if( isset( $options['purge'] ) ) { + $title->invalidateCache(); + } } else { echo "(Invalid title '$page')\n"; } diff --git a/maintenance/rebuildInterwiki.inc b/maintenance/rebuildInterwiki.inc index 458693d3bc..a6dd885d84 100644 --- a/maintenance/rebuildInterwiki.inc +++ b/maintenance/rebuildInterwiki.inc @@ -59,6 +59,7 @@ function getRebuildInterwikiSQL() { 'sep11wiki' => 'sep11.wikipedia.org', 'metawiki' => 'meta.wikimedia.org', 'commonswiki' => 'commons.wikimedia.org', + 'specieswiki' => 'species.wikimedia.org', ); # Extra interwiki links that can't be in the intermap for some reason diff --git a/maintenance/storage/compressOld.inc b/maintenance/storage/compressOld.inc index 30a294dad8..e634a6693d 100644 --- a/maintenance/storage/compressOld.inc +++ b/maintenance/storage/compressOld.inc @@ -111,9 +111,17 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh "old_flags NOT LIKE '%object%' AND old_flags NOT LIKE '%external%'"); if ( $beginDate ) { + if ( !preg_match( '/^\d{14}$/', $beginDate ) ) { + print "Invalid begin date \"$beginDate\"\n"; + return false; + } $conds[] = "rev_timestamp>'" . $beginDate . "'"; } if ( $endDate ) { + if ( !preg_match( '/^\d{14}$/', $endDate ) ) { + print "Invalid end date \"$endDate\"\n"; + return false; + } $conds[] = "rev_timestamp<'" . $endDate . "'"; } if ( $loadStyle == LS_CHUNKED ) { @@ -151,26 +159,15 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title ); print "$pageId\t" . $titleObj->getPrefixedDBkey() . " "; - print_r( - array( - 'rev_page' => $pageRow->page_id, - # Don't operate on the current revision - # Use < instead of <> in case the current revision has changed - # since the page select, which wasn't locking - 'rev_id < ' . $pageRow->page_latest - ) + $conds - ); - exit; // FIXME: is this "exit" supposed to be here? If no, delete, else delete dead code below. - # Load revisions $revRes = $dbw->select( $tables, $fields, - array( + array_merge( array( 'rev_page' => $pageRow->page_id, # Don't operate on the current revision # Use < instead of <> in case the current revision has changed # since the page select, which wasn't locking 'rev_id < ' . $pageRow->page_latest - ) + $conds, + ), $conds ), $fname, $revLoadOptions ); diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php index d597f1dfc5..18bc2bd997 100644 --- a/maintenance/storage/compressOld.php +++ b/maintenance/storage/compressOld.php @@ -31,7 +31,7 @@ * */ -$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid' ); +$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid', 'e' ); require_once( "../commandLine.inc" ); require_once( "compressOld.inc" ); diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php index 0b46f70bdf..2b58615144 100644 --- a/maintenance/storage/moveToExternal.php +++ b/maintenance/storage/moveToExternal.php @@ -1,9 +1,9 @@ ] \n"; + print "Usage: php moveToExternal.php [-s ] [-e ] \n"; exit; } $cluster = $args[0]; $dbw =& wfGetDB( DB_MASTER ); - if ( isset( $options['m'] ) ) { - $maxID = $options['m']; + if ( isset( $options['e'] ) ) { + $maxID = $options['e']; } else { $maxID = $dbw->selectField( 'text', 'MAX(old_id)', false, $fname ); } + $minID = isset( $options['s'] ) ? $options['s'] : 1; - moveToExternal( $cluster, $maxID ); + moveToExternal( $cluster, $maxID, $minID ); } -function moveToExternal( $cluster, $maxID ) { +function moveToExternal( $cluster, $maxID, $minID = 1 ) { $fname = 'moveToExternal'; $dbw =& wfGetDB( DB_MASTER ); + $dbr =& wfGetDB( DB_SLAVE ); - print "Moving $maxID text rows to external storage\n"; + $count = $maxID - $minID + 1; + $blockSize = 1000; + $numBlocks = ceil( $count / $blockSize ); + print "Moving text rows from $minID to $maxID to external storage\n"; $ext = new ExternalStoreDB; - for ( $id = 1; $id <= $maxID; $id++ ) { - if ( !($id % REPORTING_INTERVAL) ) { - print "$id\n"; - wfWaitForSlaves( 5 ); + $numMoved = 0; + $numStubs = 0; + + for ( $block = 0; $block < $numBlocks; $block++ ) { + $blockStart = $block * $blockSize + $minID; + $blockEnd = $blockStart + $blockSize - 1; + + if ( !($block % REPORTING_INTERVAL) ) { + print "oldid=$blockStart, moved=$numMoved\n"; + wfWaitForSlaves( 2 ); } - $row = $dbw->selectRow( 'text', array( 'old_flags', 'old_text' ), + + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), array( - 'old_id' => $id, + "old_id BETWEEN $blockStart AND $blockEnd", "old_flags NOT LIKE '%external%'", ), $fname ); - if ( !$row ) { - # Non-existent or already done - continue; - } - - # Resolve stubs - $text = $row->old_text; - if ( $row->old_flags === '' ) { - $flags = 'external'; - } else { - $flags = "{$row->old_flags},external"; - } - - if ( strpos( $flags, 'object' ) !== false ) { - $obj = unserialize( $text ); - $className = strtolower( get_class( $obj ) ); - if ( $className == 'historyblobstub' ) { - resolveStub( $id, $row->old_text, $row->old_flags ); - continue; - } elseif ( $className == 'historyblobcurstub' ) { - $text = gzdeflate( $obj->getText() ); - $flags = 'utf-8,gzip,external'; - } elseif ( $className == 'concatenatedgziphistoryblob' ) { - // Do nothing + while ( $row = $dbr->fetchObject( $res ) ) { + # Resolve stubs + $text = $row->old_text; + $id = $row->old_id; + if ( $row->old_flags === '' ) { + $flags = 'external'; } else { - print "Warning: unrecognised object class \"$className\"\n"; - continue; + $flags = "{$row->old_flags},external"; + } + + if ( strpos( $flags, 'object' ) !== false ) { + $obj = unserialize( $text ); + $className = strtolower( get_class( $obj ) ); + if ( $className == 'historyblobstub' ) { + #resolveStub( $id, $row->old_text, $row->old_flags ); + #$numStubs++; + continue; + } elseif ( $className == 'historyblobcurstub' ) { + $text = gzdeflate( $obj->getText() ); + $flags = 'utf-8,gzip,external'; + } elseif ( $className == 'concatenatedgziphistoryblob' ) { + // Do nothing + } else { + print "Warning: unrecognised object class \"$className\"\n"; + continue; + } + } else { + $className = false; } - } - if ( strlen( $text ) < 100 ) { - // Don't move tiny revisions - continue; - } + if ( strlen( $text ) < 100 && $className === false ) { + // Don't move tiny revisions + continue; + } - #print "Storing " . strlen( $text ) . " bytes to $url\n"; + #print "Storing " . strlen( $text ) . " bytes to $url\n"; + #print "old_id=$id\n"; - $url = $ext->store( $cluster, $text ); - if ( !$url ) { - print "Error writing to external storage\n"; - exit; + $url = $ext->store( $cluster, $text ); + if ( !$url ) { + print "Error writing to external storage\n"; + exit; + } + $dbw->update( 'text', + array( 'old_flags' => $flags, 'old_text' => $url ), + array( 'old_id' => $id ), $fname ); + $numMoved++; } - $dbw->update( 'text', - array( 'old_flags' => $flags, 'old_text' => $url ), - array( 'old_id' => $id ), $fname ); + $dbr->freeResult( $res ); } } diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php index 343222adb8..8eb4df247b 100644 --- a/maintenance/storage/resolveStubs.php +++ b/maintenance/storage/resolveStubs.php @@ -24,7 +24,7 @@ function resolveStubs() { $numBlocks = intval( $maxID / $blockSize ) + 1; for ( $b = 0; $b < $numBlocks; $b++ ) { - wfWaitForSlaves( 5 ); + wfWaitForSlaves( 2 ); printf( "%5.2f%%\n", $b / $numBlocks * 100 ); $start = intval($maxID / $numBlocks) * $b + 1; @@ -36,7 +36,7 @@ function resolveStubs() { #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". "AND old_flags='object' " . - "AND old_text LIKE 'O:15:\"historyblobstub\"%'", $fname ); + "AND LOWER(LEFT(old_text,22)) = 'O:15:\"historyblobstub\"'", $fname ); while ( $row = $dbr->fetchObject( $res ) ) { resolveStub( $row->old_id, $row->old_text, $row->old_flags ); } @@ -83,6 +83,7 @@ function resolveStub( $id, $stubText, $flags ) { } # Update the row + #print "oldid=$id\n"; $dbw->update( 'text', array( /* SET */ 'old_flags' => $newFlags,