From: James D. Forrester Date: Mon, 20 Feb 2017 22:48:21 +0000 (-0800) Subject: maintenance: Replace implicit Bugzilla bug numbers with Phab ones X-Git-Tag: 1.31.0-rc.0~3967^2 X-Git-Url: http://git.cyclocoop.org/%28?a=commitdiff_plain;h=242df680cee7241c01ef8b8e62dac62b1bad4d6d;p=lhc%2Fweb%2Fwiklou.git maintenance: Replace implicit Bugzilla bug numbers with Phab ones It's unreasonable to expect newbies to know that "bug 12345" means "Task T14345" except where it doesn't, so let's just standardise on the real numbers. This includes renaming fixBug20757.php to fixT22757.php for similar consistency. Change-Id: If81a590d658fbd82c20c54ac47dfdc8856745ca3 --- diff --git a/autoload.php b/autoload.php index b21310e04c..372ae3c6d8 100644 --- a/autoload.php +++ b/autoload.php @@ -485,10 +485,10 @@ $wgAutoloadLocalClasses = [ 'FindHooks' => __DIR__ . '/maintenance/findHooks.php', 'FindMissingFiles' => __DIR__ . '/maintenance/findMissingFiles.php', 'FindOrphanedFiles' => __DIR__ . '/maintenance/findOrphanedFiles.php', - 'FixBug20757' => __DIR__ . '/maintenance/storage/fixBug20757.php', 'FixDefaultJsonContentPages' => __DIR__ . '/maintenance/fixDefaultJsonContentPages.php', 'FixDoubleRedirects' => __DIR__ . '/maintenance/fixDoubleRedirects.php', 'FixExtLinksProtocolRelative' => __DIR__ . '/maintenance/fixExtLinksProtocolRelative.php', + 'FixT22757' => __DIR__ . '/maintenance/storage/fixT22757.php', 'FixTimestamps' => __DIR__ . '/maintenance/fixTimestamps.php', 'FixUserRegistration' => __DIR__ . '/maintenance/fixUserRegistration.php', 'ForeignAPIFile' => __DIR__ . '/includes/filerepo/file/ForeignAPIFile.php', diff --git a/maintenance/archives/patch-archive-ar_id.sql b/maintenance/archives/patch-archive-ar_id.sql index ddd1d7b482..08287cd5e7 100644 --- a/maintenance/archives/patch-archive-ar_id.sql +++ b/maintenance/archives/patch-archive-ar_id.sql @@ -1,7 +1,7 @@ -- -- patch-archive-ar_id.sql -- --- Bug 39675. Add archive.ar_id. +-- T41675. Add archive.ar_id. ALTER TABLE /*$wgDBprefix*/archive ADD COLUMN ar_id int unsigned NOT NULL AUTO_INCREMENT FIRST, diff --git a/maintenance/archives/patch-categorylinks-better-collation.sql b/maintenance/archives/patch-categorylinks-better-collation.sql index f5ff1f1d1a..f8b6340573 100644 --- a/maintenance/archives/patch-categorylinks-better-collation.sql +++ b/maintenance/archives/patch-categorylinks-better-collation.sql @@ -1,11 +1,11 @@ -- -- patch-categorylinks-better-collation.sql -- --- Bugs 164, 1211, 23682. This is the second version of this patch; the +-- T2164, T3211, T25682. This is the second version of this patch; the -- changes are also incorporated into patch-categorylinks-better-collation2.sql, -- for the benefit of trunk users who applied the original. -- --- Due to bug 25254, the length limit of 255 bytes for cl_sortkey_prefix +-- Due to T27254, the length limit of 255 bytes for cl_sortkey_prefix -- is also enforced in php. If you change the length of that field, make -- sure to also change the check in LinksUpdate.php. ALTER TABLE /*$wgDBprefix*/categorylinks diff --git a/maintenance/archives/patch-externallinks-el_id.sql b/maintenance/archives/patch-externallinks-el_id.sql index d4b51b5175..ded84543b9 100644 --- a/maintenance/archives/patch-externallinks-el_id.sql +++ b/maintenance/archives/patch-externallinks-el_id.sql @@ -1,7 +1,7 @@ -- -- patch-extenallinks-el_id.sql -- --- Bug 15441. Add externallinks.el_id. +-- T17441. Add externallinks.el_id. ALTER TABLE /*$wgDBprefix*/externallinks ADD COLUMN el_id int unsigned NOT NULL AUTO_INCREMENT FIRST, diff --git a/maintenance/archives/patch-up_property.sql b/maintenance/archives/patch-up_property.sql index 742841e412..c516aafdb1 100644 --- a/maintenance/archives/patch-up_property.sql +++ b/maintenance/archives/patch-up_property.sql @@ -1,4 +1,4 @@ --- Increase the length of up_property from 32 -> 255 bytes. Bug 19408 +-- Increase the length of up_property from 32 -> 255 bytes. T21408 ALTER TABLE /*_*/user_properties MODIFY up_property varbinary(255); diff --git a/maintenance/benchmarks/bench_utf8_title_check.php b/maintenance/benchmarks/bench_utf8_title_check.php index c92a720868..b2f7e96138 100644 --- a/maintenance/benchmarks/bench_utf8_title_check.php +++ b/maintenance/benchmarks/bench_utf8_title_check.php @@ -41,7 +41,7 @@ class BenchUtf8TitleCheck extends Benchmarker { "United States of America", // 7bit ASCII "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e", "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn", - // This comes from bug 36839 + // This comes from T38839 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C" . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C" . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C" diff --git a/maintenance/cleanupImages.php b/maintenance/cleanupImages.php index c8f393d841..e0da027f76 100644 --- a/maintenance/cleanupImages.php +++ b/maintenance/cleanupImages.php @@ -146,7 +146,7 @@ class ImageCleanup extends TableCleanup { * if the target title exists in the image table, or if both the * original and target titles exist in the page table, append * increasing version numbers until the target title exists in - * neither. (See also bug 16916.) + * neither. (See also T18916.) */ $version = 0; $final = $new; diff --git a/maintenance/cleanupTitles.php b/maintenance/cleanupTitles.php index 650fae085a..ccc6406809 100644 --- a/maintenance/cleanupTitles.php +++ b/maintenance/cleanupTitles.php @@ -138,14 +138,14 @@ class TitleCleanup extends TableCleanup { $prior = $title->getDBkey(); } - # Old cleanupTitles could move articles there. See bug 23147. + # Old cleanupTitles could move articles there. See T25147. $ns = $row->page_namespace; if ( $ns < 0 ) { $ns = 0; } # Namespace which no longer exists. Put the page in the main namespace - # since we don't have any idea of the old namespace name. See bug 68501. + # since we don't have any idea of the old namespace name. See T70501. if ( !MWNamespace::exists( $ns ) ) { $ns = 0; } diff --git a/maintenance/fixDoubleRedirects.php b/maintenance/fixDoubleRedirects.php index 1d6f31de08..79f75ef7ed 100644 --- a/maintenance/fixDoubleRedirects.php +++ b/maintenance/fixDoubleRedirects.php @@ -72,7 +72,7 @@ class FixDoubleRedirects extends Maintenance { 'rd_from = pa.page_id', 'rd_namespace = pb.page_namespace', 'rd_title = pb.page_title', - 'rd_interwiki IS NULL OR rd_interwiki = ' . $dbr->addQuotes( '' ), // bug 40352 + 'rd_interwiki IS NULL OR rd_interwiki = ' . $dbr->addQuotes( '' ), // T42352 'pb.page_is_redirect' => 1, ]; diff --git a/maintenance/generateSitemap.php b/maintenance/generateSitemap.php index 87af5b83cc..fb00bede07 100644 --- a/maintenance/generateSitemap.php +++ b/maintenance/generateSitemap.php @@ -523,7 +523,7 @@ class GenerateSitemap extends Maintenance { function fileEntry( $url, $date, $priority ) { return "\t\n" . - // bug 34666: $url may contain bad characters such as ampersands. + // T36666: $url may contain bad characters such as ampersands. "\t\t" . htmlspecialchars( $url ) . "\n" . "\t\t$date\n" . "\t\t$priority\n" . @@ -545,7 +545,7 @@ class GenerateSitemap extends Maintenance { * @param int $namespace */ function generateLimit( $namespace ) { - // bug 17961: make a title with the longest possible URL in this namespace + // T19961: make a title with the longest possible URL in this namespace $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" ); $this->limit = [ diff --git a/maintenance/migrateUserGroup.php b/maintenance/migrateUserGroup.php index 396be1d7ea..597a876df9 100644 --- a/maintenance/migrateUserGroup.php +++ b/maintenance/migrateUserGroup.php @@ -79,7 +79,7 @@ class MigrateUserGroup extends Maintenance { $affected += $dbw->affectedRows(); $this->commitTransaction( $dbw, __METHOD__ ); - // Clear cache for the affected users (bug 40340) + // Clear cache for the affected users (T42340) if ( $affected > 0 ) { // XXX: This also invalidates cache of unaffected users that // were in the new group and not in the group. diff --git a/maintenance/mssql/tables.sql b/maintenance/mssql/tables.sql index 1c633beb77..78f067173b 100644 --- a/maintenance/mssql/tables.sql +++ b/maintenance/mssql/tables.sql @@ -301,7 +301,7 @@ CREATE TABLE /*_*/categorylinks ( -- conversion algorithm is run. We store this so that we can update -- collations without reparsing all pages. -- Note: If you change the length of this field, you also need to change - -- code in LinksUpdate.php. See bug 25254. + -- code in LinksUpdate.php. See T27254. cl_sortkey_prefix varbinary(255) NOT NULL default 0x, -- This isn't really used at present. Provided for an optional @@ -528,7 +528,7 @@ CREATE TABLE /*_*/ipblocks ( -- Size chosen to allow IPv6 -- FIXME: these fields were originally blank for single-IP blocks, -- but now they are populated. No migration was ever done. They - -- should be fixed to be blank again for such blocks (bug 49504). + -- should be fixed to be blank again for such blocks (T51504). ipb_range_start varchar(255) NOT NULL, ipb_range_end varchar(255) NOT NULL, diff --git a/maintenance/populateRevisionLength.php b/maintenance/populateRevisionLength.php index 5e44fafd9f..a9457c2a1c 100644 --- a/maintenance/populateRevisionLength.php +++ b/maintenance/populateRevisionLength.php @@ -136,7 +136,7 @@ class PopulateRevisionLength extends LoggedUpdateMaintenance { $content = $rev->getContent(); if ( !$content ) { - # This should not happen, but sometimes does (bug 20757) + # This should not happen, but sometimes does (T22757) $id = $row->$idCol; $this->output( "Content of $table $id unavailable!\n" ); diff --git a/maintenance/populateRevisionSha1.php b/maintenance/populateRevisionSha1.php index 095c26693b..fb97e910f8 100644 --- a/maintenance/populateRevisionSha1.php +++ b/maintenance/populateRevisionSha1.php @@ -156,10 +156,10 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { } catch ( Exception $e ) { $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); - return false; // bug 22624? + return false; // T24624? } if ( !is_string( $text ) ) { - # This should not happen, but sometimes does (bug 20757) + # This should not happen, but sometimes does (T22757) $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); return false; @@ -185,11 +185,11 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { } catch ( Exception $e ) { $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); - return false; // bug 22624? + return false; // T24624? } $text = $rev->getSerializedData(); if ( !is_string( $text ) ) { - # This should not happen, but sometimes does (bug 20757) + # This should not happen, but sometimes does (T22757) $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); return false; diff --git a/maintenance/rebuildFileCache.php b/maintenance/rebuildFileCache.php index d073282d96..3520279fc3 100644 --- a/maintenance/rebuildFileCache.php +++ b/maintenance/rebuildFileCache.php @@ -140,7 +140,7 @@ class RebuildFileCache extends Maintenance { MediaWiki\suppressWarnings(); // header notices // Cache ?action=view - $wgRequestTime = microtime( true ); # bug 22852 + $wgRequestTime = microtime( true ); # T24852 ob_start(); $article->view(); $context->getOutput()->output(); @@ -148,7 +148,7 @@ class RebuildFileCache extends Maintenance { $viewHtml = ob_get_clean(); $viewCache->saveToFileCache( $viewHtml ); // Cache ?action=history - $wgRequestTime = microtime( true ); # bug 22852 + $wgRequestTime = microtime( true ); # T24852 ob_start(); Action::factory( 'history', $article, $context )->show(); $context->getOutput()->output(); diff --git a/maintenance/sql.php b/maintenance/sql.php index cc976ed97f..e42a8efad0 100644 --- a/maintenance/sql.php +++ b/maintenance/sql.php @@ -137,7 +137,7 @@ class MwSql extends Maintenance { } if ( $historyFile ) { # Delimiter is eated by streamStatementEnd, we add it - # up in the history (bug 37020) + # up in the history (T39020) readline_add_history( $wholeLine . ';' ); readline_write_history( $historyFile ); } diff --git a/maintenance/sqlite/archives/initial-indexes.sql b/maintenance/sqlite/archives/initial-indexes.sql index f322a03cf8..2d0c9eea9c 100644 --- a/maintenance/sqlite/archives/initial-indexes.sql +++ b/maintenance/sqlite/archives/initial-indexes.sql @@ -3,7 +3,7 @@ -- Unique indexes need to be handled with INSERT SELECT since just running -- the CREATE INDEX statement will fail if there are duplicate values. -- --- Ignore duplicates, several tables will have them (e.g. bug 16966) but in +-- Ignore duplicates, several tables will have them (e.g. T18966) but in -- most cases it's harmless to discard them. -------------------------------------------------------------------------------- diff --git a/maintenance/storage/fixBug20757.php b/maintenance/storage/fixBug20757.php deleted file mode 100644 index b444f31143..0000000000 --- a/maintenance/storage/fixBug20757.php +++ /dev/null @@ -1,349 +0,0 @@ -addDescription( 'Script to fix bug 20757 assuming that blob_tracking is intact' ); - $this->addOption( 'dry-run', 'Report only' ); - $this->addOption( 'start', 'old_id to start at', false, true ); - } - - function execute() { - $dbr = $this->getDB( DB_REPLICA ); - $dbw = $this->getDB( DB_MASTER ); - - $dryRun = $this->getOption( 'dry-run' ); - if ( $dryRun ) { - print "Dry run only.\n"; - } - - $startId = $this->getOption( 'start', 0 ); - $numGood = 0; - $numFixed = 0; - $numBad = 0; - - $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); - - // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function - $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))'; - - while ( true ) { - print "ID: $startId / $totalRevs\r"; - - $res = $dbr->select( - 'text', - [ 'old_id', 'old_flags', 'old_text' ], - [ - 'old_id > ' . intval( $startId ), - 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', - "$lowerLeft = 'o:15:\"historyblobstub\"'", - ], - __METHOD__, - [ - 'ORDER BY' => 'old_id', - 'LIMIT' => $this->batchSize, - ] - ); - - if ( !$res->numRows() ) { - break; - } - - $secondaryIds = []; - $stubs = []; - - foreach ( $res as $row ) { - $startId = $row->old_id; - - // Basic sanity checks - $obj = unserialize( $row->old_text ); - if ( $obj === false ) { - print "{$row->old_id}: unrecoverable: cannot unserialize\n"; - ++$numBad; - continue; - } - - if ( !is_object( $obj ) ) { - print "{$row->old_id}: unrecoverable: unserialized to type " . - gettype( $obj ) . ", possible double-serialization\n"; - ++$numBad; - continue; - } - - if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) { - print "{$row->old_id}: unrecoverable: unexpected object class " . - get_class( $obj ) . "\n"; - ++$numBad; - continue; - } - - // Process flags - $flags = explode( ',', $row->old_flags ); - if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { - $legacyEncoding = false; - } else { - $legacyEncoding = true; - } - - // Queue the stub for future batch processing - $id = intval( $obj->mOldId ); - $secondaryIds[] = $id; - $stubs[$row->old_id] = [ - 'legacyEncoding' => $legacyEncoding, - 'secondaryId' => $id, - 'hash' => $obj->mHash, - ]; - } - - $secondaryIds = array_unique( $secondaryIds ); - - if ( !count( $secondaryIds ) ) { - continue; - } - - // Run the batch query on blob_tracking - $res = $dbr->select( - 'blob_tracking', - '*', - [ - 'bt_text_id' => $secondaryIds, - ], - __METHOD__ - ); - $trackedBlobs = []; - foreach ( $res as $row ) { - $trackedBlobs[$row->bt_text_id] = $row; - } - - // Process the stubs - foreach ( $stubs as $primaryId => $stub ) { - $secondaryId = $stub['secondaryId']; - if ( !isset( $trackedBlobs[$secondaryId] ) ) { - // No tracked blob. Work out what went wrong - $secondaryRow = $dbr->selectRow( - 'text', - [ 'old_flags', 'old_text' ], - [ 'old_id' => $secondaryId ], - __METHOD__ - ); - if ( !$secondaryRow ) { - print "$primaryId: unrecoverable: secondary row is missing\n"; - ++$numBad; - } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { - // Not broken yet, and not in the tracked clusters so it won't get - // broken by the current RCT run. - ++$numGood; - } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { - print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n"; - ++$numBad; - } else { - print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n"; - ++$numBad; - } - unset( $stubs[$primaryId] ); - continue; - } - $trackRow = $trackedBlobs[$secondaryId]; - - // Check that the specified text really is available in the tracked source row - $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}"; - $text = ExternalStore::fetchFromURL( $url ); - if ( $text === false ) { - print "$primaryId: unrecoverable: source text missing\n"; - ++$numBad; - unset( $stubs[$primaryId] ); - continue; - } - if ( md5( $text ) !== $stub['hash'] ) { - print "$primaryId: unrecoverable: content hashes do not match\n"; - ++$numBad; - unset( $stubs[$primaryId] ); - continue; - } - - // Find the page_id and rev_id - // The page is probably the same as the page of the secondary row - $pageId = intval( $trackRow->bt_page ); - if ( !$pageId ) { - $revId = $pageId = 0; - } else { - $revId = $this->findTextIdInPage( $pageId, $primaryId ); - if ( !$revId ) { - // Actually an orphan - $pageId = $revId = 0; - } - } - - $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; - - if ( !$dryRun ) { - // Reset the text row to point to the original copy - $this->beginTransaction( $dbw, __METHOD__ ); - $dbw->update( - 'text', - // SET - [ - 'old_flags' => $newFlags, - 'old_text' => $url - ], - // WHERE - [ 'old_id' => $primaryId ], - __METHOD__ - ); - - // Add a blob_tracking row so that the new reference can be recompressed - // without needing to run trackBlobs.php again - $dbw->insert( 'blob_tracking', - [ - 'bt_page' => $pageId, - 'bt_rev_id' => $revId, - 'bt_text_id' => $primaryId, - 'bt_cluster' => $trackRow->bt_cluster, - 'bt_blob_id' => $trackRow->bt_blob_id, - 'bt_cgz_hash' => $stub['hash'], - 'bt_new_url' => null, - 'bt_moved' => 0, - ], - __METHOD__ - ); - $this->commitTransaction( $dbw, __METHOD__ ); - $this->waitForSlaves(); - } - - print "$primaryId: resolved to $url\n"; - ++$numFixed; - } - } - - print "\n"; - print "Fixed: $numFixed\n"; - print "Unrecoverable: $numBad\n"; - print "Good stubs: $numGood\n"; - } - - function waitForSlaves() { - static $iteration = 0; - ++$iteration; - if ( ++$iteration > 50 == 0 ) { - wfWaitForSlaves(); - $iteration = 0; - } - } - - function findTextIdInPage( $pageId, $textId ) { - $ids = $this->getRevTextMap( $pageId ); - if ( !isset( $ids[$textId] ) ) { - return null; - } else { - return $ids[$textId]; - } - } - - function getRevTextMap( $pageId ) { - if ( !isset( $this->mapCache[$pageId] ) ) { - // Limit cache size - while ( $this->mapCacheSize > $this->maxMapCacheSize ) { - $key = key( $this->mapCache ); - $this->mapCacheSize -= count( $this->mapCache[$key] ); - unset( $this->mapCache[$key] ); - } - - $dbr = $this->getDB( DB_REPLICA ); - $map = []; - $res = $dbr->select( 'revision', - [ 'rev_id', 'rev_text_id' ], - [ 'rev_page' => $pageId ], - __METHOD__ - ); - foreach ( $res as $row ) { - $map[$row->rev_text_id] = $row->rev_id; - } - $this->mapCache[$pageId] = $map; - $this->mapCacheSize += count( $map ); - } - - return $this->mapCache[$pageId]; - } - - /** - * This is based on part of HistoryBlobStub::getText(). - * Determine if the text can be retrieved from the row in the normal way. - * @param array $stub - * @param stdClass $secondaryRow - * @return bool - */ - function isUnbrokenStub( $stub, $secondaryRow ) { - $flags = explode( ',', $secondaryRow->old_flags ); - $text = $secondaryRow->old_text; - if ( in_array( 'external', $flags ) ) { - $url = $text; - MediaWiki\suppressWarnings(); - list( /* $proto */, $path ) = explode( '://', $url, 2 ); - MediaWiki\restoreWarnings(); - - if ( $path == "" ) { - return false; - } - $text = ExternalStore::fetchFromURL( $url ); - } - if ( !in_array( 'object', $flags ) ) { - return false; - } - - if ( in_array( 'gzip', $flags ) ) { - $obj = unserialize( gzinflate( $text ) ); - } else { - $obj = unserialize( $text ); - } - - if ( !is_object( $obj ) ) { - // Correct for old double-serialization bug. - $obj = unserialize( $obj ); - } - - if ( !is_object( $obj ) ) { - return false; - } - - $obj->uncompress(); - $text = $obj->getItem( $stub['hash'] ); - - return $text !== false; - } -} - -$maintClass = 'FixBug20757'; -require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/storage/fixT22757.php b/maintenance/storage/fixT22757.php new file mode 100644 index 0000000000..e8bd23d4d3 --- /dev/null +++ b/maintenance/storage/fixT22757.php @@ -0,0 +1,349 @@ +addDescription( 'Script to fix T22757 assuming that blob_tracking is intact' ); + $this->addOption( 'dry-run', 'Report only' ); + $this->addOption( 'start', 'old_id to start at', false, true ); + } + + function execute() { + $dbr = $this->getDB( DB_REPLICA ); + $dbw = $this->getDB( DB_MASTER ); + + $dryRun = $this->getOption( 'dry-run' ); + if ( $dryRun ) { + print "Dry run only.\n"; + } + + $startId = $this->getOption( 'start', 0 ); + $numGood = 0; + $numFixed = 0; + $numBad = 0; + + $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); + + // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function + $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))'; + + while ( true ) { + print "ID: $startId / $totalRevs\r"; + + $res = $dbr->select( + 'text', + [ 'old_id', 'old_flags', 'old_text' ], + [ + 'old_id > ' . intval( $startId ), + 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', + "$lowerLeft = 'o:15:\"historyblobstub\"'", + ], + __METHOD__, + [ + 'ORDER BY' => 'old_id', + 'LIMIT' => $this->batchSize, + ] + ); + + if ( !$res->numRows() ) { + break; + } + + $secondaryIds = []; + $stubs = []; + + foreach ( $res as $row ) { + $startId = $row->old_id; + + // Basic sanity checks + $obj = unserialize( $row->old_text ); + if ( $obj === false ) { + print "{$row->old_id}: unrecoverable: cannot unserialize\n"; + ++$numBad; + continue; + } + + if ( !is_object( $obj ) ) { + print "{$row->old_id}: unrecoverable: unserialized to type " . + gettype( $obj ) . ", possible double-serialization\n"; + ++$numBad; + continue; + } + + if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) { + print "{$row->old_id}: unrecoverable: unexpected object class " . + get_class( $obj ) . "\n"; + ++$numBad; + continue; + } + + // Process flags + $flags = explode( ',', $row->old_flags ); + if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { + $legacyEncoding = false; + } else { + $legacyEncoding = true; + } + + // Queue the stub for future batch processing + $id = intval( $obj->mOldId ); + $secondaryIds[] = $id; + $stubs[$row->old_id] = [ + 'legacyEncoding' => $legacyEncoding, + 'secondaryId' => $id, + 'hash' => $obj->mHash, + ]; + } + + $secondaryIds = array_unique( $secondaryIds ); + + if ( !count( $secondaryIds ) ) { + continue; + } + + // Run the batch query on blob_tracking + $res = $dbr->select( + 'blob_tracking', + '*', + [ + 'bt_text_id' => $secondaryIds, + ], + __METHOD__ + ); + $trackedBlobs = []; + foreach ( $res as $row ) { + $trackedBlobs[$row->bt_text_id] = $row; + } + + // Process the stubs + foreach ( $stubs as $primaryId => $stub ) { + $secondaryId = $stub['secondaryId']; + if ( !isset( $trackedBlobs[$secondaryId] ) ) { + // No tracked blob. Work out what went wrong + $secondaryRow = $dbr->selectRow( + 'text', + [ 'old_flags', 'old_text' ], + [ 'old_id' => $secondaryId ], + __METHOD__ + ); + if ( !$secondaryRow ) { + print "$primaryId: unrecoverable: secondary row is missing\n"; + ++$numBad; + } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { + // Not broken yet, and not in the tracked clusters so it won't get + // broken by the current RCT run. + ++$numGood; + } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { + print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n"; + ++$numBad; + } else { + print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n"; + ++$numBad; + } + unset( $stubs[$primaryId] ); + continue; + } + $trackRow = $trackedBlobs[$secondaryId]; + + // Check that the specified text really is available in the tracked source row + $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}"; + $text = ExternalStore::fetchFromURL( $url ); + if ( $text === false ) { + print "$primaryId: unrecoverable: source text missing\n"; + ++$numBad; + unset( $stubs[$primaryId] ); + continue; + } + if ( md5( $text ) !== $stub['hash'] ) { + print "$primaryId: unrecoverable: content hashes do not match\n"; + ++$numBad; + unset( $stubs[$primaryId] ); + continue; + } + + // Find the page_id and rev_id + // The page is probably the same as the page of the secondary row + $pageId = intval( $trackRow->bt_page ); + if ( !$pageId ) { + $revId = $pageId = 0; + } else { + $revId = $this->findTextIdInPage( $pageId, $primaryId ); + if ( !$revId ) { + // Actually an orphan + $pageId = $revId = 0; + } + } + + $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; + + if ( !$dryRun ) { + // Reset the text row to point to the original copy + $this->beginTransaction( $dbw, __METHOD__ ); + $dbw->update( + 'text', + // SET + [ + 'old_flags' => $newFlags, + 'old_text' => $url + ], + // WHERE + [ 'old_id' => $primaryId ], + __METHOD__ + ); + + // Add a blob_tracking row so that the new reference can be recompressed + // without needing to run trackBlobs.php again + $dbw->insert( 'blob_tracking', + [ + 'bt_page' => $pageId, + 'bt_rev_id' => $revId, + 'bt_text_id' => $primaryId, + 'bt_cluster' => $trackRow->bt_cluster, + 'bt_blob_id' => $trackRow->bt_blob_id, + 'bt_cgz_hash' => $stub['hash'], + 'bt_new_url' => null, + 'bt_moved' => 0, + ], + __METHOD__ + ); + $this->commitTransaction( $dbw, __METHOD__ ); + $this->waitForSlaves(); + } + + print "$primaryId: resolved to $url\n"; + ++$numFixed; + } + } + + print "\n"; + print "Fixed: $numFixed\n"; + print "Unrecoverable: $numBad\n"; + print "Good stubs: $numGood\n"; + } + + function waitForSlaves() { + static $iteration = 0; + ++$iteration; + if ( ++$iteration > 50 == 0 ) { + wfWaitForSlaves(); + $iteration = 0; + } + } + + function findTextIdInPage( $pageId, $textId ) { + $ids = $this->getRevTextMap( $pageId ); + if ( !isset( $ids[$textId] ) ) { + return null; + } else { + return $ids[$textId]; + } + } + + function getRevTextMap( $pageId ) { + if ( !isset( $this->mapCache[$pageId] ) ) { + // Limit cache size + while ( $this->mapCacheSize > $this->maxMapCacheSize ) { + $key = key( $this->mapCache ); + $this->mapCacheSize -= count( $this->mapCache[$key] ); + unset( $this->mapCache[$key] ); + } + + $dbr = $this->getDB( DB_REPLICA ); + $map = []; + $res = $dbr->select( 'revision', + [ 'rev_id', 'rev_text_id' ], + [ 'rev_page' => $pageId ], + __METHOD__ + ); + foreach ( $res as $row ) { + $map[$row->rev_text_id] = $row->rev_id; + } + $this->mapCache[$pageId] = $map; + $this->mapCacheSize += count( $map ); + } + + return $this->mapCache[$pageId]; + } + + /** + * This is based on part of HistoryBlobStub::getText(). + * Determine if the text can be retrieved from the row in the normal way. + * @param array $stub + * @param stdClass $secondaryRow + * @return bool + */ + function isUnbrokenStub( $stub, $secondaryRow ) { + $flags = explode( ',', $secondaryRow->old_flags ); + $text = $secondaryRow->old_text; + if ( in_array( 'external', $flags ) ) { + $url = $text; + MediaWiki\suppressWarnings(); + list( /* $proto */, $path ) = explode( '://', $url, 2 ); + MediaWiki\restoreWarnings(); + + if ( $path == "" ) { + return false; + } + $text = ExternalStore::fetchFromURL( $url ); + } + if ( !in_array( 'object', $flags ) ) { + return false; + } + + if ( in_array( 'gzip', $flags ) ) { + $obj = unserialize( gzinflate( $text ) ); + } else { + $obj = unserialize( $text ); + } + + if ( !is_object( $obj ) ) { + // Correct for old double-serialization bug. + $obj = unserialize( $obj ); + } + + if ( !is_object( $obj ) ) { + return false; + } + + $obj->uncompress(); + $text = $obj->getItem( $stub['hash'] ); + + return $text !== false; + } +} + +$maintClass = 'FixT22757'; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/storage/trackBlobs.php b/maintenance/storage/trackBlobs.php index a2dc376800..4f22843056 100644 --- a/maintenance/storage/trackBlobs.php +++ b/maintenance/storage/trackBlobs.php @@ -69,7 +69,7 @@ class TrackBlobs { echo "Doing integrity check...\n"; $dbr = wfGetDB( DB_REPLICA ); - // Scan for HistoryBlobStub objects in the text table (bug 20757) + // Scan for HistoryBlobStub objects in the text table (T22757) $exists = $dbr->selectField( 'text', 1, 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' . @@ -84,7 +84,7 @@ class TrackBlobs { exit( 1 ); } - // Scan the archive table for HistoryBlobStub objects or external flags (bug 22624) + // Scan the archive table for HistoryBlobStub objects or external flags (T24624) $flags = $dbr->selectField( 'archive', 'ar_flags', 'ar_flags LIKE \'%external%\' OR (' . 'ar_flags LIKE \'%object%\' ' . diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 892f799d54..44922a4084 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -594,7 +594,7 @@ CREATE TABLE /*_*/categorylinks ( -- conversion algorithm is run. We store this so that we can update -- collations without reparsing all pages. -- Note: If you change the length of this field, you also need to change - -- code in LinksUpdate.php. See bug 25254. + -- code in LinksUpdate.php. See T27254. cl_sortkey_prefix varchar(255) binary NOT NULL default '', -- This isn't really used at present. Provided for an optional @@ -816,7 +816,7 @@ CREATE TABLE /*_*/ipblocks ( -- Size chosen to allow IPv6 -- FIXME: these fields were originally blank for single-IP blocks, -- but now they are populated. No migration was ever done. They - -- should be fixed to be blank again for such blocks (bug 49504). + -- should be fixed to be blank again for such blocks (T51504). ipb_range_start tinyblob NOT NULL, ipb_range_end tinyblob NOT NULL, diff --git a/maintenance/update.php b/maintenance/update.php index a672e294e7..d96cecd6be 100755 --- a/maintenance/update.php +++ b/maintenance/update.php @@ -112,7 +112,7 @@ class UpdateMediaWiki extends Maintenance { } $lang = Language::factory( 'en' ); - // Set global language to ensure localised errors are in English (bug 20633) + // Set global language to ensure localised errors are in English (T22633) RequestContext::getMain()->setLanguage( $lang ); $wgLang = $lang; // BackCompat @@ -203,7 +203,7 @@ class UpdateMediaWiki extends Maintenance { # Don't try to access the database # This needs to be disabled early since extensions will try to use the l10n - # cache from $wgExtensionFunctions (bug 20471) + # cache from $wgExtensionFunctions (T22471) $wgLocalisationCacheConf = [ 'class' => 'LocalisationCache', 'storeClass' => 'LCStoreNull',