From 094b5bd4811786fe5f7c485ccb5ea3f6ef3e7144 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 6 May 2005 11:31:18 +0000 Subject: [PATCH] * Add $wgLegacySchemaConversion update-time option to reduce amount of copying during the schema upgrade: creates HistoryBlobCurStub reference records in text instead of copying all the cur_text fields. Requires that the cur table be left in place until/unless such fields are migrated into the main text store. In my test with a nl.wikipedia.org dump, this sped up the cur-to-old copy operation from a few hours to a few minutes. Also added some hacky timer echos in the updater for debug purposes. They should be prettied up but probably kept. --- RELEASE-NOTES | 6 +++++- includes/DefaultSettings.php | 17 ++++++++++++++++ includes/HistoryBlob.php | 39 ++++++++++++++++++++++++++++++++++++ includes/Parser.php | 1 + maintenance/updaters.inc | 30 +++++++++++++++++++++++++-- 5 files changed, 90 insertions(+), 3 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index bd8bfc491e..2dde35894d 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -166,7 +166,11 @@ Various bugfixes, small features, and a few experimental things: * Fix for reading incorrectly re-gzipped HistoryBlob entries * (bug 1906) Generalize project namespace for Latin localization, update namespaces * (bug 2075) Corrected namespace definitions in Tamil localization - +* Add $wgLegacySchemaConversion update-time option to reduce amount of + copying during the schema upgrade: creates HistoryBlobCurStub reference + records in text instead of copying all the cur_text fields. Requires + that the cur table be left in place until/unless such fields are migrated + into the main text store. === Caveats === diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 587870c2af..6b3c96cdb8 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -373,6 +373,23 @@ $wgEditEncoding = ''; # en masse in the database before continuing as a UTF-8 wiki. $wgLegacyEncoding = false; +/** + * If set to true, the MediaWiki 1.4 to 1.5 schema conversion will + * create stub reference rows in the text table instead of copying + * the full text of all current entries from 'cur' to 'text'. + * + * This will speed up the conversion step for large sites, but + * requires that the cur table be kept around for those revisions + * to remain viewable. + * + * maintenance/migrateCurStubs.php can be used to complete the + * migration in the background once the wiki is back online. + * + * This option affects the updaters *only*. Any present cur stub + * revisions will be readable at runtime regardless of this setting. + */ +$wgLegacySchemaConversion = false; + $wgMimeType = 'text/html'; $wgJsMimeType = 'text/javascript'; $wgDocType = '-//W3C//DTD XHTML 1.0 Transitional//EN'; diff --git a/includes/HistoryBlob.php b/includes/HistoryBlob.php index 2fe5d1f5d0..5b1f453e84 100644 --- a/includes/HistoryBlob.php +++ b/includes/HistoryBlob.php @@ -218,4 +218,43 @@ class HistoryBlobStub { return $this->mHash; } } + + +/** + * To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the + * leftover cur table as the backend. This avoids expensively copying hundreds + * of megabytes of data during the conversion downtime. + * + * Serialized HistoryBlobCurStub objects will be inserted into the text table + * on conversion if $wgFastSchemaUpgrades is set to true. + * + * @package MediaWiki + */ +class HistoryBlobCurStub { + var $mCurId; + + /** @todo document */ + function HistoryBlobCurStub( $curid = 0 ) { + $this->mCurId = $curid; + } + + /** + * Sets the location (cur_id) of the main object to which this object + * points + */ + function setLocation( $id ) { + $this->mCurId = $id; + } + + /** @todo document */ + function getText() { + $dbr =& wfGetDB( DB_SLAVE ); + $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); + if( !$row ) { + return false; + } + return $row->cur_text; + } +} + ?> diff --git a/includes/Parser.php b/includes/Parser.php index f4a8896b6f..c24cb82439 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -3081,6 +3081,7 @@ class Parser # remember to set an alignment, don't render immediately $align = 'none'; } elseif ( $wgUseImageResize && ! is_null( $match = $mwWidth->matchVariableStartToEnd($val) ) ) { + wfDebug( "MAG_IMG_WIDTH match: $match\n" ); # $match is the image width in pixels if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $match, $m ) ) { $width = intval( $m[1] ); diff --git a/maintenance/updaters.inc b/maintenance/updaters.inc index 01c6a68bb0..6a0f24e042 100644 --- a/maintenance/updaters.inc +++ b/maintenance/updaters.inc @@ -282,6 +282,7 @@ function do_schema_restructuring() { echo "...page table already exists.\n"; } else { echo "...converting from cur/old to page/revision/text DB structure.\n"; flush(); + echo wfTimestamp(); echo "......checking for duplicate entries.\n"; flush(); extract( $wgDatabase->tableNames( 'cur', 'old', 'page', 'revision', 'text' ) ); @@ -290,6 +291,7 @@ function do_schema_restructuring() { FROM $cur GROUP BY cur_title, cur_namespace HAVING c>1", $fname ); if ( $wgDatabase->numRows( $rows ) > 0 ) { + echo wfTimestamp(); echo "......Found duplicate entries\n"; echo ( sprintf( " %-60s %3s %5s\n", 'Title', 'NS', 'Count' ) ); while ( $row = $wgDatabase->fetchObject( $rows ) ) { @@ -337,10 +339,12 @@ function do_schema_restructuring() { } $sql = "DELETE FROM $cur WHERE cur_id IN ( " . join( ',', $deleteId ) . ')'; $rows = $wgDatabase->query( $sql, $fname ); + echo wfTimestamp(); echo "......Deleted ".$wgDatabase->affectedRows()." records.\n"; } + echo wfTimestamp(); echo "......Creating tables.\n"; $wgDatabase->query("CREATE TABLE $page ( page_id int(8) unsigned NOT NULL auto_increment, @@ -378,18 +382,35 @@ function do_schema_restructuring() { INDEX usertext_timestamp (rev_user_text,rev_timestamp) ) TYPE=InnoDB", $fname ); + echo wfTimestamp(); echo "......Locking tables.\n"; $wgDatabase->query( "LOCK TABLES $page WRITE, $revision WRITE, $old WRITE, $cur WRITE", $fname ); $maxold = $wgDatabase->selectField( 'old', 'max(old_id)', '', $fname ); + echo wfTimestamp(); echo "......maxold is {$maxold}\n"; - echo "......Moving text from cur.\n"; + echo wfTimestamp(); + global $wgLegacySchemaConversion; + if( $wgLegacySchemaConversion ) { + // Create HistoryBlobCurStub entries. + // Text will be pulled from the leftover 'cur' table at runtime. + echo "......Moving metadata from cur; using blob references to text in cur table.\n"; + $cur_text = "concat('O:18:\"historyblobcurstub\":1:{s:6:\"mCurId\";i:',cur_id,';}')"; + $cur_flags = "'object'"; + } else { + // Copy all cur text in immediately: this may take longer but avoids + // having to keep an extra table around. + echo "......Moving text from cur.\n"; + $cur_text = 'cur_text'; + $cur_flags = "''"; + } $wgDatabase->query( "INSERT INTO $old (old_namespace, old_title, old_text, old_comment, old_user, old_user_text, old_timestamp, old_minor_edit, old_flags) - SELECT cur_namespace, cur_title, cur_text, cur_comment, cur_user, cur_user_text, cur_timestamp, cur_minor_edit,'' + SELECT cur_namespace, cur_title, $cur_text, cur_comment, cur_user, cur_user_text, cur_timestamp, cur_minor_edit, $cur_flags FROM $cur", $fname ); + echo wfTimestamp(); echo "......Setting up revision table.\n"; $wgDatabase->query( "INSERT INTO $revision (rev_id, rev_page, rev_comment, rev_user, rev_user_text, rev_timestamp, rev_minor_edit) @@ -397,6 +418,7 @@ function do_schema_restructuring() { old_timestamp, old_minor_edit FROM $old,$cur WHERE old_namespace=cur_namespace AND old_title=cur_title", $fname ); + echo wfTimestamp(); echo "......Setting up page table.\n"; $wgDatabase->query( "INSERT INTO $page (page_id, page_namespace, page_title, page_restrictions, page_counter, page_is_redirect, page_is_new, page_random, page_touched, page_latest, page_len) @@ -405,11 +427,15 @@ function do_schema_restructuring() { FROM $cur,$revision WHERE cur_id=rev_page AND rev_timestamp=cur_timestamp AND rev_id > {$maxold}", $fname ); + echo wfTimestamp(); echo "......Unlocking tables.\n"; $wgDatabase->query( "UNLOCK TABLES", $fname ); + echo wfTimestamp(); echo "......Renaming old.\n"; $wgDatabase->query( "ALTER TABLE $old RENAME TO $text", $fname ); + + echo wfTimestamp(); echo "...done.\n"; } } -- 2.20.1