Apply most of the code tweaks from the live site:
authorBrion Vibber <brion@users.mediawiki.org>
Wed, 17 Jan 2007 00:54:54 +0000 (00:54 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Wed, 17 Jan 2007 00:54:54 +0000 (00:54 +0000)
* use configured cache servers for mctest.php
* bucket details in mcc.php
* fix input validation and remove debugging code in compressOld
* full ID range for moveToExternal
* fix resolveStubs.php for compatibility with older serialized data
* maximum line length for bar graphs in getLagTimes.php
* recognize specieswiki in rebuildInterwiki.inc
* --purge option to do additional parser-cache purging for purgeList.php
* default changed in MiniDonation extension
* profile unicode cleanup in Xml
* log slow parses in Article.php
* profile wfMsgReal
* log mkdir failures
* profile AutoLoader
* rebuild empty DjVu metadata containing ''
* security fix for DjVu metadata retrieval

16 files changed:
RELEASE-NOTES
includes/Article.php
includes/AutoLoader.php
includes/DjVuImage.php
includes/GlobalFunctions.php
includes/Image.php
includes/Xml.php
maintenance/getLagTimes.php
maintenance/mcc.php
maintenance/mctest.php
maintenance/purgeList.php
maintenance/rebuildInterwiki.inc
maintenance/storage/compressOld.inc
maintenance/storage/compressOld.php
maintenance/storage/moveToExternal.php
maintenance/storage/resolveStubs.php

index 3f66ad3..b2146f8 100644 (file)
@@ -82,6 +82,22 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * Introduce "BookInformation" hook; see docs/hooks.txt for more details
 * English Wikipedia added to interwiki table as wikipedia: No idea why it wasn't
   there before.
+* use configured cache servers for mctest.php
+* bucket details in mcc.php
+* fix input validation and remove debugging code in compressOld
+* full ID range for moveToExternal
+* fix resolveStubs.php for compatibility with older serialized data
+* maximum line length for bar graphs in getLagTimes.php
+* recognize specieswiki in rebuildInterwiki.inc
+* --purge option to do additional parser-cache purging for purgeList.php
+* profile unicode cleanup in Xml
+* log slow parses in Article.php
+* profile wfMsgReal
+* log mkdir failures
+* profile AutoLoader
+* rebuild empty DjVu metadata containing ''
+* security fix for DjVu metadata retrieval
+
 
 == Languages updated ==
 
index ea6035e..180f6aa 100644 (file)
@@ -808,11 +808,21 @@ class Article {
                                        $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false );
                                }
                                # Display content and don't save to parser cache
+                               # With timing hack -- TS 2006-07-26
+                               $time = -wfTime();
                                $this->outputWikiText( $text, false );
+                               $time += wfTime();
+
+                               # Timing hack
+                               if ( $time > 3 ) {
+                                       wfDebugLog( 'slow-parse', sprintf( "%-5.2f %s", $time, 
+                                               $this->mTitle->getPrefixedDBkey()));
+                               }
 
                                if( !$this->isCurrent() ) {
                                        $wgOut->parserOptions()->setEditSection( $oldEditSectionSetting );
                                }
+
                        }
                }
                /* title may have been set from the cache */
index 0b1c82b..747e9ec 100644 (file)
@@ -273,6 +273,7 @@ function __autoload($className) {
                'ApiResult' => 'includes/api/ApiResult.php',
        );
        
+       wfProfileIn( __METHOD__ );
        if ( isset( $localClasses[$className] ) ) {
                $filename = $localClasses[$className];
        } elseif ( isset( $wgAutoloadClasses[$className] ) ) {
@@ -289,6 +290,7 @@ function __autoload($className) {
                }
                if ( !$filename ) {
                        # Give up
+                       wfProfileOut( __METHOD__ );
                        return;
                }
        }
@@ -299,6 +301,7 @@ function __autoload($className) {
                $filename = "$IP/$filename";
        }
        require( $filename );
+       wfProfileOut( __METHOD__ );
 }
 
 function wfLoadAllExtensions() {
index ceb7aa1..a0a7305 100644 (file)
@@ -221,7 +221,7 @@ class DjVuImage {
        function retrieveMetaData() {
                global $wgDjvuToXML;
                if ( isset( $wgDjvuToXML ) ) {
-                       $cmd = $wgDjvuToXML . ' --without-anno --without-text ' . $this->mFilename;
+                       $cmd = $wgDjvuToXML . ' --without-anno --without-text ' . escapeshellarg( $this->mFilename );
                        $xml = wfShellExec( $cmd );
                } else {
                        $xml = null;
index 777c02c..4976055 100644 (file)
@@ -376,8 +376,11 @@ function wfMsgNoDBForContent( $key ) {
  * @return String: the requested message.
  */
 function wfMsgReal( $key, $args, $useDB = true, $forContent=false, $transform = true ) {
+       $fname = 'wfMsgReal';
+       wfProfileIn( $fname );
        $message = wfMsgGetKey( $key, $useDB, $forContent, $transform );
        $message = wfMsgReplaceArgs( $message, $args );
+       wfProfileOut( $fname );
        return $message;
 }
 
@@ -1624,6 +1627,7 @@ function wfMkdirParents( $fullDir, $mode = 0777 ) {
        foreach ( $createList as $dir ) {
                # use chmod to override the umask, as suggested by the PHP manual
                if ( !mkdir( $dir, $mode ) || !chmod( $dir, $mode ) ) {
+                       wfDebugLog( 'mkdir', "Unable to create directory $dir\n" );
                        return false;
                } 
        }
index 1f3895c..7a6442c 100644 (file)
@@ -2271,7 +2271,7 @@ class Image
                # Check for files uploaded prior to DJVU support activation
                # They have a '0' in their metadata field.
                #
-               if ( $this->metadata == '0' ) {
+               if ( $this->metadata == '0' || $this->metadata == '' ) {
                        $deja = new DjVuImage( $this->imagePath );
                        $this->metadata = $deja->retrieveMetaData();
                        $this->purgeMetadataCache();
index 67dda7f..30b9eac 100644 (file)
@@ -50,7 +50,9 @@ class Xml {
                        $attribs = array_map( array( 'UtfNormal', 'cleanUp' ), $attribs );
                }
                if( $contents ) {
+                       wfProfileIn( __METHOD__ . '-norm' );
                        $contents = UtfNormal::cleanUp( $contents );
+                       wfProfileOut( __METHOD__ . '-norm' );
                }
                return self::element( $element, $attribs, $contents );
        }
index f2c06f6..5c55d52 100644 (file)
@@ -15,7 +15,8 @@ if( empty( $wgDBservers ) ) {
                } else {
                        $ip = gethostbyname( $host );
                }
-               $stars = str_repeat( '*', intval( $lag ) );
+               $starLen = min( intval( $lag ), 40 );
+               $stars = str_repeat( '*', $starLen );
                printf( "%10s %20s %3d %s\n", $ip, $host, $lag, $stars );
        }
 }
index 93b6ec1..a4977d6 100644 (file)
@@ -112,7 +112,11 @@ do {
 
                case 'server':
                        $res = $mcc->get( $args[0] );
-                       print $mcc->_buckets[$mcc->_hashfunc( $args[0] ) % $mcc->_bucketcount] . "\n";
+                       $hv = $mcc->_hashfunc( $args[0] );
+                       for ( $i = 0; $i < 3; $i++ ) {
+                               print $mcc->_buckets[$hv % $mcc->_bucketcount] . "\n";
+                               $hv += $mcc->_hashfunc( $i . $args[0] );
+                       }
                        break;
 
                case 'set':
index 316620d..90c4420 100644 (file)
@@ -16,8 +16,6 @@ function microtime_float()
 
 if ( isset( $args[0] ) ) {
        $wgMemCachedServers = array( $args[0] );
-} else {
-       $wgMemCachedServers[] = 'localhost';
 }
 if ( isset( $options['i'] ) ) {
        $iterations = $options['i'];
index 9bf7c1b..abe7668 100644 (file)
@@ -19,6 +19,9 @@ while( !feof( $stdin ) ) {
                        $url = $title->getFullUrl();
                        echo "$url\n";
                        $urls[] = $url;
+                       if( isset( $options['purge'] ) ) {
+                               $title->invalidateCache();
+                       }
                } else {
                        echo "(Invalid title '$page')\n";
                }
index 458693d..a6dd885 100644 (file)
@@ -59,6 +59,7 @@ function getRebuildInterwikiSQL() {
                'sep11wiki' => 'sep11.wikipedia.org',
                'metawiki' => 'meta.wikimedia.org',
                'commonswiki' => 'commons.wikimedia.org',
+               'specieswiki' => 'species.wikimedia.org',
        );
 
        # Extra interwiki links that can't be in the intermap for some reason
index 30a294d..e634a66 100644 (file)
@@ -111,9 +111,17 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh
                "old_flags NOT LIKE '%object%' AND old_flags NOT LIKE '%external%'");
 
        if ( $beginDate ) {
+               if ( !preg_match( '/^\d{14}$/', $beginDate ) ) {
+                       print "Invalid begin date \"$beginDate\"\n";
+                       return false;
+               }
                $conds[] = "rev_timestamp>'" . $beginDate . "'";
        }
        if ( $endDate )  {
+               if ( !preg_match( '/^\d{14}$/', $endDate ) ) {
+                       print "Invalid end date \"$endDate\"\n";
+                       return false;
+               }
                $conds[] = "rev_timestamp<'" . $endDate . "'";
        }
        if ( $loadStyle == LS_CHUNKED ) {
@@ -151,26 +159,15 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh
                $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title );
                print "$pageId\t" . $titleObj->getPrefixedDBkey() . " ";
 
-               print_r(
-                       array( 
-                               'rev_page' => $pageRow->page_id, 
-                               # Don't operate on the current revision
-                               # Use < instead of <> in case the current revision has changed 
-                               # since the page select, which wasn't locking
-                               'rev_id < ' . $pageRow->page_latest
-                       ) + $conds
-               );
-               exit; // FIXME: is this "exit" supposed to be here? If no, delete, else delete dead code below.
-
                # Load revisions
                $revRes = $dbw->select( $tables, $fields,
-                       array( 
+                       array_merge( array
                                'rev_page' => $pageRow->page_id, 
                                # Don't operate on the current revision
                                # Use < instead of <> in case the current revision has changed 
                                # since the page select, which wasn't locking
                                'rev_id < ' . $pageRow->page_latest
-                       ) + $conds,
+                       ), $conds ),
                        $fname,
                        $revLoadOptions
                );
index d597f1d..18bc2bd 100644 (file)
@@ -31,7 +31,7 @@
  *
  */
 
-$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid' );
+$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid', 'e' );
 require_once( "../commandLine.inc" );
 require_once( "compressOld.inc" );
 
index 0b46f70..2b58615 100644 (file)
@@ -1,9 +1,9 @@
 <?php
 
-define( 'REPORTING_INTERVAL', 100 );
+define( 'REPORTING_INTERVAL', 1 );
 
 if ( !defined( 'MEDIAWIKI' ) ) {
-       $optionsWithArgs = array( 'm' );
+       $optionsWithArgs = array( 'm', 's' );
 
        require_once( '../commandLine.inc' );
        require_once( 'ExternalStoreDB.php' );
@@ -12,85 +12,101 @@ if ( !defined( 'MEDIAWIKI' ) ) {
        $fname = 'moveToExternal';
 
        if ( !isset( $args[0] ) ) {
-               print "Usage: php moveToExternal.php [-m <maxid>] <cluster>\n";
+               print "Usage: php moveToExternal.php [-s <startid>] [-e <endid>] <cluster>\n";
                exit;
        }
 
        $cluster = $args[0];
        $dbw =& wfGetDB( DB_MASTER );
 
-       if ( isset( $options['m'] ) ) {
-               $maxID = $options['m'];
+       if ( isset( $options['e'] ) ) {
+               $maxID = $options['e'];
        } else {
                $maxID = $dbw->selectField( 'text', 'MAX(old_id)', false, $fname );
        }
+       $minID = isset( $options['s'] ) ? $options['s'] : 1;
 
-       moveToExternal( $cluster, $maxID );
+       moveToExternal( $cluster, $maxID, $minID );
 }
 
 
 
-function moveToExternal( $cluster, $maxID ) {
+function moveToExternal( $cluster, $maxID, $minID = 1 ) {
        $fname = 'moveToExternal';
        $dbw =& wfGetDB( DB_MASTER );
+       $dbr =& wfGetDB( DB_SLAVE );
 
-       print "Moving $maxID text rows to external storage\n";
+       $count = $maxID - $minID + 1;
+       $blockSize = 1000;
+       $numBlocks = ceil( $count / $blockSize );
+       print "Moving text rows from $minID to $maxID to external storage\n";
        $ext = new ExternalStoreDB;
-       for ( $id = 1; $id <= $maxID; $id++ ) {
-               if ( !($id % REPORTING_INTERVAL) ) {
-                       print "$id\n";
-                       wfWaitForSlaves( 5 );
+       $numMoved = 0;
+       $numStubs = 0;
+       
+       for ( $block = 0; $block < $numBlocks; $block++ ) {
+               $blockStart = $block * $blockSize + $minID;
+               $blockEnd = $blockStart + $blockSize - 1;
+               
+               if ( !($block % REPORTING_INTERVAL) ) {
+                       print "oldid=$blockStart, moved=$numMoved\n";
+                       wfWaitForSlaves( 2 );
                }
-               $row = $dbw->selectRow( 'text', array( 'old_flags', 'old_text' ),
+               
+               $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
                        array(
-                               'old_id' => $id,
+                               "old_id BETWEEN $blockStart AND $blockEnd",
                                "old_flags NOT LIKE '%external%'",
                        ), $fname );
-               if ( !$row ) {
-                       # Non-existent or already done
-                       continue;
-               }
-
-               # Resolve stubs
-               $text = $row->old_text;
-               if ( $row->old_flags === '' ) {
-                       $flags = 'external';
-               } else {
-                       $flags = "{$row->old_flags},external";
-               }
-               
-               if ( strpos( $flags, 'object' ) !== false ) {
-                       $obj = unserialize( $text );
-                       $className = strtolower( get_class( $obj ) );
-                       if ( $className == 'historyblobstub' ) {
-                               resolveStub( $id, $row->old_text, $row->old_flags );
-                               continue;
-                       } elseif ( $className == 'historyblobcurstub' ) {
-                               $text = gzdeflate( $obj->getText() );
-                               $flags = 'utf-8,gzip,external';
-                       } elseif ( $className == 'concatenatedgziphistoryblob' ) {
-                               // Do nothing
+               while ( $row = $dbr->fetchObject( $res ) ) {
+                       # Resolve stubs
+                       $text = $row->old_text;
+                       $id = $row->old_id;
+                       if ( $row->old_flags === '' ) {
+                               $flags = 'external';
                        } else {
-                               print "Warning: unrecognised object class \"$className\"\n";
-                               continue;
+                               $flags = "{$row->old_flags},external";
+                       }
+                       
+                       if ( strpos( $flags, 'object' ) !== false ) {
+                               $obj = unserialize( $text );
+                               $className = strtolower( get_class( $obj ) );
+                               if ( $className == 'historyblobstub' ) {
+                                       #resolveStub( $id, $row->old_text, $row->old_flags );
+                                       #$numStubs++;
+                                       continue;
+                               } elseif ( $className == 'historyblobcurstub' ) {
+                                       $text = gzdeflate( $obj->getText() );
+                                       $flags = 'utf-8,gzip,external';
+                               } elseif ( $className == 'concatenatedgziphistoryblob' ) {
+                                       // Do nothing
+                               } else {
+                                       print "Warning: unrecognised object class \"$className\"\n";
+                                       continue;
+                               }
+                       } else {
+                               $className = false;
                        }
-               }
 
-               if ( strlen( $text ) < 100 ) {
-                       // Don't move tiny revisions
-                       continue;
-               }
+                       if ( strlen( $text ) < 100 && $className === false ) {
+                               // Don't move tiny revisions
+                               continue;
+                       }
 
-               #print "Storing "  . strlen( $text ) . " bytes to $url\n";
+                       #print "Storing "  . strlen( $text ) . " bytes to $url\n";
+                       #print "old_id=$id\n";
 
-               $url = $ext->store( $cluster, $text );
-               if ( !$url ) {
-                       print "Error writing to external storage\n";
-                       exit;
+                       $url = $ext->store( $cluster, $text );
+                       if ( !$url ) {
+                               print "Error writing to external storage\n";
+                               exit;
+                       }
+                       $dbw->update( 'text',
+                               array( 'old_flags' => $flags, 'old_text' => $url ),
+                               array( 'old_id' => $id ), $fname );
+                       $numMoved++;
                }
-               $dbw->update( 'text',
-                       array( 'old_flags' => $flags, 'old_text' => $url ),
-                       array( 'old_id' => $id ), $fname );
+               $dbr->freeResult( $res );
        }
 }
 
index 343222a..8eb4df2 100644 (file)
@@ -24,7 +24,7 @@ function resolveStubs() {
        $numBlocks = intval( $maxID / $blockSize ) + 1;
 
        for ( $b = 0; $b < $numBlocks; $b++ ) {
-               wfWaitForSlaves( 5 );
+               wfWaitForSlaves( 2 );
                
                printf( "%5.2f%%\n", $b / $numBlocks * 100 );
                $start = intval($maxID / $numBlocks) * $b + 1;
@@ -36,7 +36,7 @@ function resolveStubs() {
                        #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
                        
                        "AND old_flags='object' " .
-                       "AND old_text LIKE 'O:15:\"historyblobstub\"%'", $fname );
+                       "AND LOWER(LEFT(old_text,22)) = 'O:15:\"historyblobstub\"'", $fname );
                while ( $row = $dbr->fetchObject( $res ) ) {
                        resolveStub( $row->old_id, $row->old_text, $row->old_flags );
                }
@@ -83,6 +83,7 @@ function resolveStub( $id, $stubText, $flags ) {
        }
 
        # Update the row
+       #print "oldid=$id\n";
        $dbw->update( 'text',
                array( /* SET */
                        'old_flags' => $newFlags,