From: Tim Starling Date: Sat, 1 Nov 2008 12:06:23 +0000 (+0000) Subject: * Added $wgDebugLogPrefix, to allow users (or in this case, multiprocess command... X-Git-Tag: 1.31.0-rc.0~44484 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/comptes/?a=commitdiff_plain;h=38707d9565de59fbec83f4303cb2256efbf61752;p=lhc%2Fweb%2Fwiklou.git * Added $wgDebugLogPrefix, to allow users (or in this case, multiprocess command-line scripts) to set a debug log line prefix * Improved blob_tracking.sql docs * Allow testCompression.php to run until happy instead of a specified revision count * Added docs for some debugging globals in DefaultSettings.php In experimental script recompressTracked.php: * Fixed crippling bugs, seems to work now. Needs more testing. * Improved usage documentation * Use DiffHistoryBlob by default if available * Set a process-specific log prefix, added more debugging output * Optimised commit() slightly by merging the locking read queries using IN() * Fixed finishIncompleteMoves() so doPage() acts on only that page. Also run finishIncompleteMoves() for orphan lists. --- diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 75bc1ed7e9..8d3817dd88 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -838,7 +838,6 @@ $wgTranslateNumerals = true; /** * Translation using MediaWiki: namespace. - * This will increase load times by 25-60% unless memcached is installed. * Interface messages will be loaded from the database. */ $wgUseDatabaseMessages = true; @@ -952,6 +951,16 @@ $wgCleanSignatures = true; $wgExtraSubtitle = ''; $wgSiteSupportPage = ''; # A page where you users can receive donations +/** + * Set this to a string to put the wiki into read-only mode. The text will be + * used as an explanation to users. + * + * This prevents most write operations via the web interface. Cache updates may + * still be possible. To prevent database writes completely, use the read_only + * option in MySQL. + */ +$wgReadOnly = null; + /*** * If this lock file exists, the wiki will be forced into read-only mode. * Its contents will be shown to users as part of the read-only warning @@ -960,15 +969,42 @@ $wgSiteSupportPage = ''; # A page where you users can receive donations $wgReadOnlyFile = false; ///< defaults to "{$wgUploadDirectory}/lock_yBgMBwiR"; /** + * Filename for debug logging. * The debug log file should be not be publicly accessible if it is used, as it - * may contain private data. */ + * may contain private data. + */ $wgDebugLogFile = ''; +/** + * Prefix for debug log lines + */ +$wgDebugLogPrefix = ''; + +/** + * If true, instead of redirecting, show a page with a link to the redirect + * destination. This allows for the inspection of PHP error messages, and easy + * resubmission of form data. For developer use only. + */ $wgDebugRedirects = false; -$wgDebugRawPage = false; # Avoid overlapping debug entries by leaving out CSS +/** + * If true, log debugging data from action=raw. + * This is normally false to avoid overlapping debug entries due to gen=css and + * gen=js requests. + */ +$wgDebugRawPage = false; + +/** + * Send debug data to an HTML comment in the output. + * + * This may occasionally be useful when supporting a non-technical end-user. It's + * more secure than exposing the debug log file to the web, since the output only + * contains private data for the current user. But it's not ideal for development + * use since data is lost on fatal errors and redirects. + */ $wgDebugComments = false; -$wgReadOnly = null; + +/** Does nothing. Obsolete? */ $wgLogQueries = false; /** @@ -1027,7 +1063,8 @@ $wgUseCategoryBrowser = false; * same options. * * This can provide a significant speedup for medium to large pages, - * so you probably want to keep it on. + * so you probably want to keep it on. Extensions that conflict with the + * parser cache should disable the cache on a per-page basis instead. */ $wgEnableParserCache = true; diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index ead550fa6c..e7792a3525 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -195,6 +195,7 @@ function wfUrlencode( $s ) { */ function wfDebug( $text, $logonly = false ) { global $wgOut, $wgDebugLogFile, $wgDebugComments, $wgProfileOnly, $wgDebugRawPage; + global $wgDebugLogPrefix; static $recursion = 0; static $cache = array(); // Cache of unoutputted messages @@ -227,6 +228,7 @@ function wfDebug( $text, $logonly = false ) { # Strip unprintables; they can switch terminal modes when binary data # gets dumped, which is pretty annoying. $text = preg_replace( '![\x00-\x08\x0b\x0c\x0e-\x1f]!', ' ', $text ); + $text = $wgDebugLogPrefix . $text; wfErrorLog( $text, $wgDebugLogFile ); } } diff --git a/maintenance/storage/blob_tracking.sql b/maintenance/storage/blob_tracking.sql index b136609c9a..6cac9a3836 100644 --- a/maintenance/storage/blob_tracking.sql +++ b/maintenance/storage/blob_tracking.sql @@ -4,10 +4,14 @@ CREATE TABLE /*$wgDBprefix*/blob_tracking ( -- page.page_id -- This may be zero for orphan or deleted text + -- Note that this is for compression grouping only -- it doesn't need to be + -- accurate at the time recompressTracked is run. Operations such as a + -- delete/undelete cycle may make it inaccurate. bt_page integer not null, -- revision.rev_id -- This may be zero for orphan or deleted text + -- Like bt_page, it does not need to be accurate when recompressTracked is run. bt_rev_id integer not null, -- text.old_id diff --git a/maintenance/storage/recompressTracked.php b/maintenance/storage/recompressTracked.php index 9c32f126a7..1bb15b7c67 100644 --- a/maintenance/storage/recompressTracked.php +++ b/maintenance/storage/recompressTracked.php @@ -4,9 +4,13 @@ $optionsWithArgs = RecompressTracked::getOptionsWithArgs(); require( dirname( __FILE__ ) .'/../commandLine.inc' ); if ( count( $args ) < 1 ) { - echo "Usage: php recompressTracked.php [... ...]\n"; - echo "Moves blobs indexed by trackBlobs.php to a specified list of destination -clusters, and recompresses them in the process. Restartable.\n"; + echo "Usage: php recompressTracked.php [options] [... ...] +Moves blobs indexed by trackBlobs.php to a specified list of destination clusters, and recompresses them in the process. Restartable. + +Options: + --procs Set the number of child processes (default 8) + --copy-only Copy only, do not update the text table. Restart without this option to complete. +"; exit( 1 ); } @@ -18,17 +22,16 @@ class RecompressTracked { var $batchSize = 1000; var $reportingInterval = 10; var $numProcs = 8; + var $useDiff, $pageBlobClass, $orphanBlobClass; var $slavePipes, $slaveProcs, $prevSlaveId; - var $blobClass = 'DiffHistoryBlob'; var $copyOnly = false; var $isChild = false; var $slaveId = false; var $store; - static $optionsWithArgs = array( 'procs', 'class' ); + static $optionsWithArgs = array( 'procs', 'slave-id' ); static $cmdLineOptionMap = array( 'procs' => 'numProcs', - 'class' => 'blobClass', 'copy-only' => 'copyOnly', 'child' => 'isChild', 'slave-id' => 'slaveId', @@ -53,14 +56,18 @@ class RecompressTracked { $this->$name = $value; } $this->store = new ExternalStoreDB; + if ( !$this->isChild ) { + $GLOBALS['wgDebugLogPrefix'] = "RCT M: "; + } elseif ( $this->slaveId !== false ) { + $GLOBALS['wgDebugLogPrefix'] = "RCT {$this->slaveId}: "; + } + $this->useDiff = function_exists( 'xdiff_string_bdiff' ); + $this->pageBlobClass = $this->useDiff ? 'DiffHistoryBlob' : 'ConcatenatedGzipHistoryBlob'; + $this->orphanBlobClass = 'ConcatenatedGzipHistoryBlob'; } function debug( $msg ) { - if ( $this->slaveId !== false ) { - $msg = "{$this->slaveId}: $msg"; - } - $msg .= "\n"; - wfDebug( $msg ); + wfDebug( "$msg\n" ); } /** @@ -146,7 +153,7 @@ class RecompressTracked { array( 'file', '/dev/stderr', 'w' ) ); wfSuppressWarnings(); - $proc = proc_open( $cmd, $spec, $pipes ); + $proc = proc_open( "$cmd --slave-id $i", $spec, $pipes ); wfRestoreWarnings(); if ( !$proc ) { echo "Error opening slave process\n"; @@ -299,6 +306,7 @@ class RecompressTracked { * Main entry point for worker processes */ function executeChild() { + $this->debug( 'starting' ); $this->syncDBs(); while ( !feof( STDIN ) ) { @@ -306,6 +314,7 @@ class RecompressTracked { if ( $line == '' ) { continue; } + $this->debug( $line ); $args = explode( ' ', $line ); $cmd = array_shift( $args ); switch ( $cmd ) { @@ -325,15 +334,21 @@ class RecompressTracked { * Move tracked text in a given page */ function doPage( $pageId ) { + $title = Title::newFromId( $pageId ); + if ( $title ) { + $titleText = $title->getPrefixedText(); + } else { + $titleText = '[deleted]'; + } $dbr = wfGetDB( DB_SLAVE ); // Finish any incomplete transactions if ( !$this->copyOnly ) { - $this->finishIncompleteMoves(); + $this->finishIncompleteMoves( array( 'bt_page' => $pageId ) ); } $startId = 0; - $trx = new CgzCopyTransaction( $this ); + $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); while ( true ) { $res = $dbr->select( @@ -343,7 +358,7 @@ class RecompressTracked { 'bt_page' => $pageId, 'bt_text_id > ' . $dbr->addQuotes( $startId ), 'bt_moved' => 0, - 'bt_new_url' => '', + 'bt_new_url IS NULL', 'bt_text_id=old_id', ), __METHOD__, @@ -372,12 +387,15 @@ class RecompressTracked { // Queue it if ( !$trx->addItem( $text, $row->bt_text_id ) ) { + $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); $trx->commit(); - $trx = new CgzCopyTransaction( $this ); + $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); } } $startId = $row->bt_text_id; } + + $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); $trx->commit(); } @@ -420,18 +438,18 @@ class RecompressTracked { * This function completes any moves that only have done bt_new_url. This * can happen when the script is interrupted, or when --copy-only is used. */ - function finishIncompleteMoves() { + function finishIncompleteMoves( $conds ) { $dbr = wfGetDB( DB_SLAVE ); $startId = 0; + $conds = array_merge( $conds, array( + 'bt_moved' => 0, + 'bt_new_url IS NOT NULL' + )); while ( true ) { $res = $dbr->select( 'blob_tracking', '*', - array( - 'bt_text_id > ' . $dbr->addQuotes( $startId ), - 'bt_moved' => 0, - "bt_new_url <> ''", - ), + array_merge( $conds, array( 'bt_text_id > ' . $dbr->addQuotes( $startId ) ) ), __METHOD__, array( 'ORDER BY' => 'bt_text_id', @@ -441,6 +459,7 @@ class RecompressTracked { if ( !$res->numRows() ) { break; } + $this->debug( 'Incomplete: ' . $row->numRows() . ' rows' ); foreach ( $res as $row ) { $this->moveTextRow( $row->bt_text_id, $row->bt_new_url ); } @@ -471,7 +490,10 @@ class RecompressTracked { * Move an orphan text_id to the new cluster */ function doOrphanList( $textIds ) { - $trx = new CgzCopyTransaction( $this ); + // Finish incomplete moves + $this->finishIncompleteMoves( array( 'bt_text_id' => $textIds ) ); + + $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); foreach ( $textIds as $textId ) { $row = wfGetDB( DB_SLAVE )->selectRow( 'text', array( 'old_text', 'old_flags' ), array( 'old_id' => $textId ), __METHOD__ ); @@ -482,10 +504,13 @@ class RecompressTracked { } if ( !$trx->addItem( $text, $textId ) ) { + $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); $trx->commit(); - $trx = new CgzCopyTransaction( $this ); + $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); } } + $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); + $trx->commit(); } } @@ -493,6 +518,7 @@ class RecompressTracked { * Class to represent a recompression operation for a single CGZ blob */ class CgzCopyTransaction { + var $parent; var $blobClass; var $cgz; var $referrers; @@ -500,10 +526,11 @@ class CgzCopyTransaction { /** * Create a transaction from a RecompressTracked object */ - function __construct( $parent ) { - $this->blobClass = $parent->blobClass; + function __construct( $parent, $blobClass ) { + $this->blobClass = $blobClass; $this->cgz = false; $this->texts = array(); + $this->parent = $parent; } /** @@ -521,6 +548,10 @@ class CgzCopyTransaction { return $this->cgz->isHappy(); } + function getSize() { + return count( $this->texts ); + } + /** * Recompress text after some aberrant modification */ @@ -554,16 +585,16 @@ class CgzCopyTransaction { // We do a locking read to prevent closer-run race conditions. $dbw = wfGetDB( DB_MASTER ); $dbw->begin(); + $res = $dbw->select( 'blob_tracking', + array( 'bt_text_id', 'bt_moved' ), + array( 'bt_text_id' => array_keys( $this->referrers ) ), + __METHOD__, array( 'FOR UPDATE' ) ); $dirty = false; - foreach ( $this->referrers as $textId => $hash ) { - $moved = $dbw->selectField( 'blob_tracking', 'bt_moved', - array( 'bt_text_id' => $textId ), - __METHOD__, - array( 'FOR UPDATE' ) - ); - if ( !$moved ) { + foreach ( $res as $row ) { + if ( $row->bt_moved ) { # This row has already been moved, remove it - unset( $this->texts[$textId] ); + $this->parent->debug( "TRX: conflict detected in old_id={$row->bt_text_id}" ); + unset( $this->texts[$row->bt_text_id] ); $dirty = true; } } @@ -574,7 +605,7 @@ class CgzCopyTransaction { // All have been moved already if ( $originalCount > 1 ) { // This is suspcious, make noise - echo "Warning: concurrent operation detected, are there two conflicting\n" . + echo "Warning: concurrent operation detected, are there two conflicting " . "processes running, doing the same job?\n"; } return; @@ -616,9 +647,5 @@ class CgzCopyTransaction { } } } - - function signalHandler() { - $this->signalled = true; - } } diff --git a/maintenance/storage/testCompression.php b/maintenance/storage/testCompression.php index eaf7e35fa7..9c96c9f81d 100644 --- a/maintenance/storage/testCompression.php +++ b/maintenance/storage/testCompression.php @@ -15,7 +15,13 @@ if ( isset( $options['start'] ) ) { } else { $start = '19700101000000'; } -$limit = isset( $options['limit'] ) ? $options['limit'] : 10; +if ( isset( $options['limit'] ) ) { + $limit = $options['limit']; + $untilHappy = false; +} else { + $limit = 1000; + $untilHappy = true; +} $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob'; @@ -43,16 +49,21 @@ foreach ( $res as $row ) { $uncompressedSize += strlen( $text ); $hashes[$row->rev_id] = md5( $text ); $keys[$row->rev_id] = $blob->addItem( $text ); + if ( $untilHappy && !$blob->isHappy() ) { + break; + } } $serialized = serialize( $blob ); $t += microtime( true ); +#print_r( $blob->mDiffMap ); -printf( "Compression ratio for %d revisions: %5.2f, %s -> %s\n", - $res->numRows(), +printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n", + $type, + count( $hashes ), $uncompressedSize / strlen( $serialized ), $wgLang->formatSize( $uncompressedSize ), - $wgLang->formatSize( strlen( $serialized ) ) + strlen( $serialized ) ); printf( "Compression time: %5.2f ms\n", $t * 1000 );