Merge "maintenance: Replace implicit Bugzilla bug numbers with Phab ones"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 28 Feb 2017 00:37:38 +0000 (00:37 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Tue, 28 Feb 2017 00:37:39 +0000 (00:37 +0000)
22 files changed:
autoload.php
maintenance/archives/patch-archive-ar_id.sql
maintenance/archives/patch-categorylinks-better-collation.sql
maintenance/archives/patch-externallinks-el_id.sql
maintenance/archives/patch-up_property.sql
maintenance/benchmarks/bench_utf8_title_check.php
maintenance/cleanupImages.php
maintenance/cleanupTitles.php
maintenance/fixDoubleRedirects.php
maintenance/generateSitemap.php
maintenance/migrateUserGroup.php
maintenance/mssql/tables.sql
maintenance/populateRevisionLength.php
maintenance/populateRevisionSha1.php
maintenance/rebuildFileCache.php
maintenance/sql.php
maintenance/sqlite/archives/initial-indexes.sql
maintenance/storage/fixBug20757.php [deleted file]
maintenance/storage/fixT22757.php [new file with mode: 0644]
maintenance/storage/trackBlobs.php
maintenance/tables.sql
maintenance/update.php

index e56b681..aa4e544 100644 (file)
@@ -487,10 +487,10 @@ $wgAutoloadLocalClasses = [
        'FindHooks' => __DIR__ . '/maintenance/findHooks.php',
        'FindMissingFiles' => __DIR__ . '/maintenance/findMissingFiles.php',
        'FindOrphanedFiles' => __DIR__ . '/maintenance/findOrphanedFiles.php',
-       'FixBug20757' => __DIR__ . '/maintenance/storage/fixBug20757.php',
        'FixDefaultJsonContentPages' => __DIR__ . '/maintenance/fixDefaultJsonContentPages.php',
        'FixDoubleRedirects' => __DIR__ . '/maintenance/fixDoubleRedirects.php',
        'FixExtLinksProtocolRelative' => __DIR__ . '/maintenance/fixExtLinksProtocolRelative.php',
+       'FixT22757' => __DIR__ . '/maintenance/storage/fixT22757.php',
        'FixTimestamps' => __DIR__ . '/maintenance/fixTimestamps.php',
        'FixUserRegistration' => __DIR__ . '/maintenance/fixUserRegistration.php',
        'ForeignAPIFile' => __DIR__ . '/includes/filerepo/file/ForeignAPIFile.php',
index ddd1d7b..08287cd 100644 (file)
@@ -1,7 +1,7 @@
 --
 -- patch-archive-ar_id.sql
 --
--- Bug 39675. Add archive.ar_id.
+-- T41675. Add archive.ar_id.
 
 ALTER TABLE /*$wgDBprefix*/archive
     ADD COLUMN ar_id int unsigned NOT NULL AUTO_INCREMENT FIRST,
index f5ff1f1..f8b6340 100644 (file)
@@ -1,11 +1,11 @@
 --
 -- patch-categorylinks-better-collation.sql
 --
--- Bugs 164, 1211, 23682.  This is the second version of this patch; the
+-- T2164, T3211, T25682.  This is the second version of this patch; the
 -- changes are also incorporated into patch-categorylinks-better-collation2.sql,
 -- for the benefit of trunk users who applied the original.
 --
--- Due to bug 25254, the length limit of 255 bytes for cl_sortkey_prefix
+-- Due to T27254, the length limit of 255 bytes for cl_sortkey_prefix
 -- is also enforced in php. If you change the length of that field, make
 -- sure to also change the check in LinksUpdate.php.
 ALTER TABLE /*$wgDBprefix*/categorylinks
index d4b51b5..ded8454 100644 (file)
@@ -1,7 +1,7 @@
 --
 -- patch-extenallinks-el_id.sql
 --
--- Bug 15441. Add externallinks.el_id.
+-- T17441. Add externallinks.el_id.
 
 ALTER TABLE /*$wgDBprefix*/externallinks
     ADD COLUMN el_id int unsigned NOT NULL AUTO_INCREMENT FIRST,
index 742841e..c516aaf 100644 (file)
@@ -1,4 +1,4 @@
--- Increase the length of up_property from 32 -> 255 bytes. Bug 19408
+-- Increase the length of up_property from 32 -> 255 bytes. T21408
 
 ALTER TABLE /*_*/user_properties
        MODIFY up_property varbinary(255);
index c92a720..b2f7e96 100644 (file)
@@ -41,7 +41,7 @@ class BenchUtf8TitleCheck extends Benchmarker {
                        "United States of America", // 7bit ASCII
                        "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
                        "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
-                       // This comes from bug 36839
+                       // This comes from T38839
                        "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
                        . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
                        . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
index c8f393d..e0da027 100644 (file)
@@ -146,7 +146,7 @@ class ImageCleanup extends TableCleanup {
                 * if the target title exists in the image table, or if both the
                 * original and target titles exist in the page table, append
                 * increasing version numbers until the target title exists in
-                * neither.  (See also bug 16916.)
+                * neither.  (See also T18916.)
                 */
                $version = 0;
                $final = $new;
index 650fae0..ccc6406 100644 (file)
@@ -138,14 +138,14 @@ class TitleCleanup extends TableCleanup {
                                $prior = $title->getDBkey();
                        }
 
-                       # Old cleanupTitles could move articles there. See bug 23147.
+                       # Old cleanupTitles could move articles there. See T25147.
                        $ns = $row->page_namespace;
                        if ( $ns < 0 ) {
                                $ns = 0;
                        }
 
                        # Namespace which no longer exists. Put the page in the main namespace
-                       # since we don't have any idea of the old namespace name. See bug 68501.
+                       # since we don't have any idea of the old namespace name. See T70501.
                        if ( !MWNamespace::exists( $ns ) ) {
                                $ns = 0;
                        }
index 1d6f31d..79f75ef 100644 (file)
@@ -72,7 +72,7 @@ class FixDoubleRedirects extends Maintenance {
                        'rd_from = pa.page_id',
                        'rd_namespace = pb.page_namespace',
                        'rd_title = pb.page_title',
-                       'rd_interwiki IS NULL OR rd_interwiki = ' . $dbr->addQuotes( '' ), // bug 40352
+                       'rd_interwiki IS NULL OR rd_interwiki = ' . $dbr->addQuotes( '' ), // T42352
                        'pb.page_is_redirect' => 1,
                ];
 
index 87af5b8..fb00bed 100644 (file)
@@ -523,7 +523,7 @@ class GenerateSitemap extends Maintenance {
        function fileEntry( $url, $date, $priority ) {
                return
                        "\t<url>\n" .
-                       // bug 34666: $url may contain bad characters such as ampersands.
+                       // T36666: $url may contain bad characters such as ampersands.
                        "\t\t<loc>" . htmlspecialchars( $url ) . "</loc>\n" .
                        "\t\t<lastmod>$date</lastmod>\n" .
                        "\t\t<priority>$priority</priority>\n" .
@@ -545,7 +545,7 @@ class GenerateSitemap extends Maintenance {
         * @param int $namespace
         */
        function generateLimit( $namespace ) {
-               // bug 17961: make a title with the longest possible URL in this namespace
+               // T19961: make a title with the longest possible URL in this namespace
                $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" );
 
                $this->limit = [
index 396be1d..597a876 100644 (file)
@@ -79,7 +79,7 @@ class MigrateUserGroup extends Maintenance {
                        $affected += $dbw->affectedRows();
                        $this->commitTransaction( $dbw, __METHOD__ );
 
-                       // Clear cache for the affected users (bug 40340)
+                       // Clear cache for the affected users (T42340)
                        if ( $affected > 0 ) {
                                // XXX: This also invalidates cache of unaffected users that
                                // were in the new group and not in the group.
index 1c633be..78f0671 100644 (file)
@@ -301,7 +301,7 @@ CREATE TABLE /*_*/categorylinks (
   -- conversion algorithm is run.  We store this so that we can update
   -- collations without reparsing all pages.
   -- Note: If you change the length of this field, you also need to change
-  -- code in LinksUpdate.php. See bug 25254.
+  -- code in LinksUpdate.php. See T27254.
   cl_sortkey_prefix varbinary(255) NOT NULL default 0x,
 
   -- This isn't really used at present. Provided for an optional
@@ -528,7 +528,7 @@ CREATE TABLE /*_*/ipblocks (
   -- Size chosen to allow IPv6
   -- FIXME: these fields were originally blank for single-IP blocks,
   -- but now they are populated. No migration was ever done. They
-  -- should be fixed to be blank again for such blocks (bug 49504).
+  -- should be fixed to be blank again for such blocks (T51504).
   ipb_range_start varchar(255) NOT NULL,
   ipb_range_end varchar(255) NOT NULL,
 
index 5e44faf..a9457c2 100644 (file)
@@ -136,7 +136,7 @@ class PopulateRevisionLength extends LoggedUpdateMaintenance {
 
                $content = $rev->getContent();
                if ( !$content ) {
-                       # This should not happen, but sometimes does (bug 20757)
+                       # This should not happen, but sometimes does (T22757)
                        $id = $row->$idCol;
                        $this->output( "Content of $table $id unavailable!\n" );
 
index 095c266..fb97e91 100644 (file)
@@ -156,10 +156,10 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
                } catch ( Exception $e ) {
                        $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
 
-                       return false; // bug 22624?
+                       return false; // T24624?
                }
                if ( !is_string( $text ) ) {
-                       # This should not happen, but sometimes does (bug 20757)
+                       # This should not happen, but sometimes does (T22757)
                        $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
 
                        return false;
@@ -185,11 +185,11 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
                } catch ( Exception $e ) {
                        $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
 
-                       return false; // bug 22624?
+                       return false; // T24624?
                }
                $text = $rev->getSerializedData();
                if ( !is_string( $text ) ) {
-                       # This should not happen, but sometimes does (bug 20757)
+                       # This should not happen, but sometimes does (T22757)
                        $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
 
                        return false;
index d073282..3520279 100644 (file)
@@ -140,7 +140,7 @@ class RebuildFileCache extends Maintenance {
 
                                        MediaWiki\suppressWarnings(); // header notices
                                        // Cache ?action=view
-                                       $wgRequestTime = microtime( true ); # bug 22852
+                                       $wgRequestTime = microtime( true ); # T24852
                                        ob_start();
                                        $article->view();
                                        $context->getOutput()->output();
@@ -148,7 +148,7 @@ class RebuildFileCache extends Maintenance {
                                        $viewHtml = ob_get_clean();
                                        $viewCache->saveToFileCache( $viewHtml );
                                        // Cache ?action=history
-                                       $wgRequestTime = microtime( true ); # bug 22852
+                                       $wgRequestTime = microtime( true ); # T24852
                                        ob_start();
                                        Action::factory( 'history', $article, $context )->show();
                                        $context->getOutput()->output();
index cc976ed..e42a8ef 100644 (file)
@@ -137,7 +137,7 @@ class MwSql extends Maintenance {
                        }
                        if ( $historyFile ) {
                                # Delimiter is eated by streamStatementEnd, we add it
-                               # up in the history (bug 37020)
+                               # up in the history (T39020)
                                readline_add_history( $wholeLine . ';' );
                                readline_write_history( $historyFile );
                        }
index f322a03..2d0c9ee 100644 (file)
@@ -3,7 +3,7 @@
 -- Unique indexes need to be handled with INSERT SELECT since just running
 -- the CREATE INDEX statement will fail if there are duplicate values.
 --
--- Ignore duplicates, several tables will have them (e.g. bug 16966) but in
+-- Ignore duplicates, several tables will have them (e.g. T18966) but in
 -- most cases it's harmless to discard them.
 
 --------------------------------------------------------------------------------
diff --git a/maintenance/storage/fixBug20757.php b/maintenance/storage/fixBug20757.php
deleted file mode 100644 (file)
index b444f31..0000000
+++ /dev/null
@@ -1,349 +0,0 @@
-<?php
-/**
- * Script to fix bug 20757.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Maintenance ExternalStorage
- */
-
-require_once __DIR__ . '/../Maintenance.php';
-
-/**
- * Maintenance script to fix bug 20757.
- *
- * @ingroup Maintenance ExternalStorage
- */
-class FixBug20757 extends Maintenance {
-       public $batchSize = 10000;
-       public $mapCache = [];
-       public $mapCacheSize = 0;
-       public $maxMapCacheSize = 1000000;
-
-       function __construct() {
-               parent::__construct();
-               $this->addDescription( 'Script to fix bug 20757 assuming that blob_tracking is intact' );
-               $this->addOption( 'dry-run', 'Report only' );
-               $this->addOption( 'start', 'old_id to start at', false, true );
-       }
-
-       function execute() {
-               $dbr = $this->getDB( DB_REPLICA );
-               $dbw = $this->getDB( DB_MASTER );
-
-               $dryRun = $this->getOption( 'dry-run' );
-               if ( $dryRun ) {
-                       print "Dry run only.\n";
-               }
-
-               $startId = $this->getOption( 'start', 0 );
-               $numGood = 0;
-               $numFixed = 0;
-               $numBad = 0;
-
-               $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
-
-               // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
-               $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
-
-               while ( true ) {
-                       print "ID: $startId / $totalRevs\r";
-
-                       $res = $dbr->select(
-                               'text',
-                               [ 'old_id', 'old_flags', 'old_text' ],
-                               [
-                                       'old_id > ' . intval( $startId ),
-                                       'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
-                                       "$lowerLeft = 'o:15:\"historyblobstub\"'",
-                               ],
-                               __METHOD__,
-                               [
-                                       'ORDER BY' => 'old_id',
-                                       'LIMIT' => $this->batchSize,
-                               ]
-                       );
-
-                       if ( !$res->numRows() ) {
-                               break;
-                       }
-
-                       $secondaryIds = [];
-                       $stubs = [];
-
-                       foreach ( $res as $row ) {
-                               $startId = $row->old_id;
-
-                               // Basic sanity checks
-                               $obj = unserialize( $row->old_text );
-                               if ( $obj === false ) {
-                                       print "{$row->old_id}: unrecoverable: cannot unserialize\n";
-                                       ++$numBad;
-                                       continue;
-                               }
-
-                               if ( !is_object( $obj ) ) {
-                                       print "{$row->old_id}: unrecoverable: unserialized to type " .
-                                               gettype( $obj ) . ", possible double-serialization\n";
-                                       ++$numBad;
-                                       continue;
-                               }
-
-                               if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
-                                       print "{$row->old_id}: unrecoverable: unexpected object class " .
-                                               get_class( $obj ) . "\n";
-                                       ++$numBad;
-                                       continue;
-                               }
-
-                               // Process flags
-                               $flags = explode( ',', $row->old_flags );
-                               if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
-                                       $legacyEncoding = false;
-                               } else {
-                                       $legacyEncoding = true;
-                               }
-
-                               // Queue the stub for future batch processing
-                               $id = intval( $obj->mOldId );
-                               $secondaryIds[] = $id;
-                               $stubs[$row->old_id] = [
-                                       'legacyEncoding' => $legacyEncoding,
-                                       'secondaryId' => $id,
-                                       'hash' => $obj->mHash,
-                               ];
-                       }
-
-                       $secondaryIds = array_unique( $secondaryIds );
-
-                       if ( !count( $secondaryIds ) ) {
-                               continue;
-                       }
-
-                       // Run the batch query on blob_tracking
-                       $res = $dbr->select(
-                               'blob_tracking',
-                               '*',
-                               [
-                                       'bt_text_id' => $secondaryIds,
-                               ],
-                               __METHOD__
-                       );
-                       $trackedBlobs = [];
-                       foreach ( $res as $row ) {
-                               $trackedBlobs[$row->bt_text_id] = $row;
-                       }
-
-                       // Process the stubs
-                       foreach ( $stubs as $primaryId => $stub ) {
-                               $secondaryId = $stub['secondaryId'];
-                               if ( !isset( $trackedBlobs[$secondaryId] ) ) {
-                                       // No tracked blob. Work out what went wrong
-                                       $secondaryRow = $dbr->selectRow(
-                                               'text',
-                                               [ 'old_flags', 'old_text' ],
-                                               [ 'old_id' => $secondaryId ],
-                                               __METHOD__
-                                       );
-                                       if ( !$secondaryRow ) {
-                                               print "$primaryId: unrecoverable: secondary row is missing\n";
-                                               ++$numBad;
-                                       } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
-                                               // Not broken yet, and not in the tracked clusters so it won't get
-                                               // broken by the current RCT run.
-                                               ++$numGood;
-                                       } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
-                                               print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
-                                               ++$numBad;
-                                       } else {
-                                               print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
-                                               ++$numBad;
-                                       }
-                                       unset( $stubs[$primaryId] );
-                                       continue;
-                               }
-                               $trackRow = $trackedBlobs[$secondaryId];
-
-                               // Check that the specified text really is available in the tracked source row
-                               $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
-                               $text = ExternalStore::fetchFromURL( $url );
-                               if ( $text === false ) {
-                                       print "$primaryId: unrecoverable: source text missing\n";
-                                       ++$numBad;
-                                       unset( $stubs[$primaryId] );
-                                       continue;
-                               }
-                               if ( md5( $text ) !== $stub['hash'] ) {
-                                       print "$primaryId: unrecoverable: content hashes do not match\n";
-                                       ++$numBad;
-                                       unset( $stubs[$primaryId] );
-                                       continue;
-                               }
-
-                               // Find the page_id and rev_id
-                               // The page is probably the same as the page of the secondary row
-                               $pageId = intval( $trackRow->bt_page );
-                               if ( !$pageId ) {
-                                       $revId = $pageId = 0;
-                               } else {
-                                       $revId = $this->findTextIdInPage( $pageId, $primaryId );
-                                       if ( !$revId ) {
-                                               // Actually an orphan
-                                               $pageId = $revId = 0;
-                                       }
-                               }
-
-                               $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
-
-                               if ( !$dryRun ) {
-                                       // Reset the text row to point to the original copy
-                                       $this->beginTransaction( $dbw, __METHOD__ );
-                                       $dbw->update(
-                                               'text',
-                                               // SET
-                                               [
-                                                       'old_flags' => $newFlags,
-                                                       'old_text' => $url
-                                               ],
-                                               // WHERE
-                                               [ 'old_id' => $primaryId ],
-                                               __METHOD__
-                                       );
-
-                                       // Add a blob_tracking row so that the new reference can be recompressed
-                                       // without needing to run trackBlobs.php again
-                                       $dbw->insert( 'blob_tracking',
-                                               [
-                                                       'bt_page' => $pageId,
-                                                       'bt_rev_id' => $revId,
-                                                       'bt_text_id' => $primaryId,
-                                                       'bt_cluster' => $trackRow->bt_cluster,
-                                                       'bt_blob_id' => $trackRow->bt_blob_id,
-                                                       'bt_cgz_hash' => $stub['hash'],
-                                                       'bt_new_url' => null,
-                                                       'bt_moved' => 0,
-                                               ],
-                                               __METHOD__
-                                       );
-                                       $this->commitTransaction( $dbw, __METHOD__ );
-                                       $this->waitForSlaves();
-                               }
-
-                               print "$primaryId: resolved to $url\n";
-                               ++$numFixed;
-                       }
-               }
-
-               print "\n";
-               print "Fixed: $numFixed\n";
-               print "Unrecoverable: $numBad\n";
-               print "Good stubs: $numGood\n";
-       }
-
-       function waitForSlaves() {
-               static $iteration = 0;
-               ++$iteration;
-               if ( ++$iteration > 50 == 0 ) {
-                       wfWaitForSlaves();
-                       $iteration = 0;
-               }
-       }
-
-       function findTextIdInPage( $pageId, $textId ) {
-               $ids = $this->getRevTextMap( $pageId );
-               if ( !isset( $ids[$textId] ) ) {
-                       return null;
-               } else {
-                       return $ids[$textId];
-               }
-       }
-
-       function getRevTextMap( $pageId ) {
-               if ( !isset( $this->mapCache[$pageId] ) ) {
-                       // Limit cache size
-                       while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
-                               $key = key( $this->mapCache );
-                               $this->mapCacheSize -= count( $this->mapCache[$key] );
-                               unset( $this->mapCache[$key] );
-                       }
-
-                       $dbr = $this->getDB( DB_REPLICA );
-                       $map = [];
-                       $res = $dbr->select( 'revision',
-                               [ 'rev_id', 'rev_text_id' ],
-                               [ 'rev_page' => $pageId ],
-                               __METHOD__
-                       );
-                       foreach ( $res as $row ) {
-                               $map[$row->rev_text_id] = $row->rev_id;
-                       }
-                       $this->mapCache[$pageId] = $map;
-                       $this->mapCacheSize += count( $map );
-               }
-
-               return $this->mapCache[$pageId];
-       }
-
-       /**
-        * This is based on part of HistoryBlobStub::getText().
-        * Determine if the text can be retrieved from the row in the normal way.
-        * @param array $stub
-        * @param stdClass $secondaryRow
-        * @return bool
-        */
-       function isUnbrokenStub( $stub, $secondaryRow ) {
-               $flags = explode( ',', $secondaryRow->old_flags );
-               $text = $secondaryRow->old_text;
-               if ( in_array( 'external', $flags ) ) {
-                       $url = $text;
-                       MediaWiki\suppressWarnings();
-                       list( /* $proto */, $path ) = explode( '://', $url, 2 );
-                       MediaWiki\restoreWarnings();
-
-                       if ( $path == "" ) {
-                               return false;
-                       }
-                       $text = ExternalStore::fetchFromURL( $url );
-               }
-               if ( !in_array( 'object', $flags ) ) {
-                       return false;
-               }
-
-               if ( in_array( 'gzip', $flags ) ) {
-                       $obj = unserialize( gzinflate( $text ) );
-               } else {
-                       $obj = unserialize( $text );
-               }
-
-               if ( !is_object( $obj ) ) {
-                       // Correct for old double-serialization bug.
-                       $obj = unserialize( $obj );
-               }
-
-               if ( !is_object( $obj ) ) {
-                       return false;
-               }
-
-               $obj->uncompress();
-               $text = $obj->getItem( $stub['hash'] );
-
-               return $text !== false;
-       }
-}
-
-$maintClass = 'FixBug20757';
-require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/storage/fixT22757.php b/maintenance/storage/fixT22757.php
new file mode 100644 (file)
index 0000000..e8bd23d
--- /dev/null
@@ -0,0 +1,349 @@
+<?php
+/**
+ * Script to fix T22757.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance ExternalStorage
+ */
+
+require_once __DIR__ . '/../Maintenance.php';
+
+/**
+ * Maintenance script to fix T22757.
+ *
+ * @ingroup Maintenance ExternalStorage
+ */
+class FixT22757 extends Maintenance {
+       public $batchSize = 10000;
+       public $mapCache = [];
+       public $mapCacheSize = 0;
+       public $maxMapCacheSize = 1000000;
+
+       function __construct() {
+               parent::__construct();
+               $this->addDescription( 'Script to fix T22757 assuming that blob_tracking is intact' );
+               $this->addOption( 'dry-run', 'Report only' );
+               $this->addOption( 'start', 'old_id to start at', false, true );
+       }
+
+       function execute() {
+               $dbr = $this->getDB( DB_REPLICA );
+               $dbw = $this->getDB( DB_MASTER );
+
+               $dryRun = $this->getOption( 'dry-run' );
+               if ( $dryRun ) {
+                       print "Dry run only.\n";
+               }
+
+               $startId = $this->getOption( 'start', 0 );
+               $numGood = 0;
+               $numFixed = 0;
+               $numBad = 0;
+
+               $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
+
+               // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
+               $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
+
+               while ( true ) {
+                       print "ID: $startId / $totalRevs\r";
+
+                       $res = $dbr->select(
+                               'text',
+                               [ 'old_id', 'old_flags', 'old_text' ],
+                               [
+                                       'old_id > ' . intval( $startId ),
+                                       'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
+                                       "$lowerLeft = 'o:15:\"historyblobstub\"'",
+                               ],
+                               __METHOD__,
+                               [
+                                       'ORDER BY' => 'old_id',
+                                       'LIMIT' => $this->batchSize,
+                               ]
+                       );
+
+                       if ( !$res->numRows() ) {
+                               break;
+                       }
+
+                       $secondaryIds = [];
+                       $stubs = [];
+
+                       foreach ( $res as $row ) {
+                               $startId = $row->old_id;
+
+                               // Basic sanity checks
+                               $obj = unserialize( $row->old_text );
+                               if ( $obj === false ) {
+                                       print "{$row->old_id}: unrecoverable: cannot unserialize\n";
+                                       ++$numBad;
+                                       continue;
+                               }
+
+                               if ( !is_object( $obj ) ) {
+                                       print "{$row->old_id}: unrecoverable: unserialized to type " .
+                                               gettype( $obj ) . ", possible double-serialization\n";
+                                       ++$numBad;
+                                       continue;
+                               }
+
+                               if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
+                                       print "{$row->old_id}: unrecoverable: unexpected object class " .
+                                               get_class( $obj ) . "\n";
+                                       ++$numBad;
+                                       continue;
+                               }
+
+                               // Process flags
+                               $flags = explode( ',', $row->old_flags );
+                               if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
+                                       $legacyEncoding = false;
+                               } else {
+                                       $legacyEncoding = true;
+                               }
+
+                               // Queue the stub for future batch processing
+                               $id = intval( $obj->mOldId );
+                               $secondaryIds[] = $id;
+                               $stubs[$row->old_id] = [
+                                       'legacyEncoding' => $legacyEncoding,
+                                       'secondaryId' => $id,
+                                       'hash' => $obj->mHash,
+                               ];
+                       }
+
+                       $secondaryIds = array_unique( $secondaryIds );
+
+                       if ( !count( $secondaryIds ) ) {
+                               continue;
+                       }
+
+                       // Run the batch query on blob_tracking
+                       $res = $dbr->select(
+                               'blob_tracking',
+                               '*',
+                               [
+                                       'bt_text_id' => $secondaryIds,
+                               ],
+                               __METHOD__
+                       );
+                       $trackedBlobs = [];
+                       foreach ( $res as $row ) {
+                               $trackedBlobs[$row->bt_text_id] = $row;
+                       }
+
+                       // Process the stubs
+                       foreach ( $stubs as $primaryId => $stub ) {
+                               $secondaryId = $stub['secondaryId'];
+                               if ( !isset( $trackedBlobs[$secondaryId] ) ) {
+                                       // No tracked blob. Work out what went wrong
+                                       $secondaryRow = $dbr->selectRow(
+                                               'text',
+                                               [ 'old_flags', 'old_text' ],
+                                               [ 'old_id' => $secondaryId ],
+                                               __METHOD__
+                                       );
+                                       if ( !$secondaryRow ) {
+                                               print "$primaryId: unrecoverable: secondary row is missing\n";
+                                               ++$numBad;
+                                       } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
+                                               // Not broken yet, and not in the tracked clusters so it won't get
+                                               // broken by the current RCT run.
+                                               ++$numGood;
+                                       } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
+                                               print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
+                                               ++$numBad;
+                                       } else {
+                                               print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
+                                               ++$numBad;
+                                       }
+                                       unset( $stubs[$primaryId] );
+                                       continue;
+                               }
+                               $trackRow = $trackedBlobs[$secondaryId];
+
+                               // Check that the specified text really is available in the tracked source row
+                               $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
+                               $text = ExternalStore::fetchFromURL( $url );
+                               if ( $text === false ) {
+                                       print "$primaryId: unrecoverable: source text missing\n";
+                                       ++$numBad;
+                                       unset( $stubs[$primaryId] );
+                                       continue;
+                               }
+                               if ( md5( $text ) !== $stub['hash'] ) {
+                                       print "$primaryId: unrecoverable: content hashes do not match\n";
+                                       ++$numBad;
+                                       unset( $stubs[$primaryId] );
+                                       continue;
+                               }
+
+                               // Find the page_id and rev_id
+                               // The page is probably the same as the page of the secondary row
+                               $pageId = intval( $trackRow->bt_page );
+                               if ( !$pageId ) {
+                                       $revId = $pageId = 0;
+                               } else {
+                                       $revId = $this->findTextIdInPage( $pageId, $primaryId );
+                                       if ( !$revId ) {
+                                               // Actually an orphan
+                                               $pageId = $revId = 0;
+                                       }
+                               }
+
+                               $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
+
+                               if ( !$dryRun ) {
+                                       // Reset the text row to point to the original copy
+                                       $this->beginTransaction( $dbw, __METHOD__ );
+                                       $dbw->update(
+                                               'text',
+                                               // SET
+                                               [
+                                                       'old_flags' => $newFlags,
+                                                       'old_text' => $url
+                                               ],
+                                               // WHERE
+                                               [ 'old_id' => $primaryId ],
+                                               __METHOD__
+                                       );
+
+                                       // Add a blob_tracking row so that the new reference can be recompressed
+                                       // without needing to run trackBlobs.php again
+                                       $dbw->insert( 'blob_tracking',
+                                               [
+                                                       'bt_page' => $pageId,
+                                                       'bt_rev_id' => $revId,
+                                                       'bt_text_id' => $primaryId,
+                                                       'bt_cluster' => $trackRow->bt_cluster,
+                                                       'bt_blob_id' => $trackRow->bt_blob_id,
+                                                       'bt_cgz_hash' => $stub['hash'],
+                                                       'bt_new_url' => null,
+                                                       'bt_moved' => 0,
+                                               ],
+                                               __METHOD__
+                                       );
+                                       $this->commitTransaction( $dbw, __METHOD__ );
+                                       $this->waitForSlaves();
+                               }
+
+                               print "$primaryId: resolved to $url\n";
+                               ++$numFixed;
+                       }
+               }
+
+               print "\n";
+               print "Fixed: $numFixed\n";
+               print "Unrecoverable: $numBad\n";
+               print "Good stubs: $numGood\n";
+       }
+
+       function waitForSlaves() {
+               static $iteration = 0;
+               ++$iteration;
+               if ( ++$iteration > 50 == 0 ) {
+                       wfWaitForSlaves();
+                       $iteration = 0;
+               }
+       }
+
+       function findTextIdInPage( $pageId, $textId ) {
+               $ids = $this->getRevTextMap( $pageId );
+               if ( !isset( $ids[$textId] ) ) {
+                       return null;
+               } else {
+                       return $ids[$textId];
+               }
+       }
+
+       function getRevTextMap( $pageId ) {
+               if ( !isset( $this->mapCache[$pageId] ) ) {
+                       // Limit cache size
+                       while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
+                               $key = key( $this->mapCache );
+                               $this->mapCacheSize -= count( $this->mapCache[$key] );
+                               unset( $this->mapCache[$key] );
+                       }
+
+                       $dbr = $this->getDB( DB_REPLICA );
+                       $map = [];
+                       $res = $dbr->select( 'revision',
+                               [ 'rev_id', 'rev_text_id' ],
+                               [ 'rev_page' => $pageId ],
+                               __METHOD__
+                       );
+                       foreach ( $res as $row ) {
+                               $map[$row->rev_text_id] = $row->rev_id;
+                       }
+                       $this->mapCache[$pageId] = $map;
+                       $this->mapCacheSize += count( $map );
+               }
+
+               return $this->mapCache[$pageId];
+       }
+
+       /**
+        * This is based on part of HistoryBlobStub::getText().
+        * Determine if the text can be retrieved from the row in the normal way.
+        * @param array $stub
+        * @param stdClass $secondaryRow
+        * @return bool
+        */
+       function isUnbrokenStub( $stub, $secondaryRow ) {
+               $flags = explode( ',', $secondaryRow->old_flags );
+               $text = $secondaryRow->old_text;
+               if ( in_array( 'external', $flags ) ) {
+                       $url = $text;
+                       MediaWiki\suppressWarnings();
+                       list( /* $proto */, $path ) = explode( '://', $url, 2 );
+                       MediaWiki\restoreWarnings();
+
+                       if ( $path == "" ) {
+                               return false;
+                       }
+                       $text = ExternalStore::fetchFromURL( $url );
+               }
+               if ( !in_array( 'object', $flags ) ) {
+                       return false;
+               }
+
+               if ( in_array( 'gzip', $flags ) ) {
+                       $obj = unserialize( gzinflate( $text ) );
+               } else {
+                       $obj = unserialize( $text );
+               }
+
+               if ( !is_object( $obj ) ) {
+                       // Correct for old double-serialization bug.
+                       $obj = unserialize( $obj );
+               }
+
+               if ( !is_object( $obj ) ) {
+                       return false;
+               }
+
+               $obj->uncompress();
+               $text = $obj->getItem( $stub['hash'] );
+
+               return $text !== false;
+       }
+}
+
+$maintClass = 'FixT22757';
+require_once RUN_MAINTENANCE_IF_MAIN;
index a2dc376..4f22843 100644 (file)
@@ -69,7 +69,7 @@ class TrackBlobs {
                echo "Doing integrity check...\n";
                $dbr = wfGetDB( DB_REPLICA );
 
-               // Scan for HistoryBlobStub objects in the text table (bug 20757)
+               // Scan for HistoryBlobStub objects in the text table (T22757)
 
                $exists = $dbr->selectField( 'text', 1,
                        'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
@@ -84,7 +84,7 @@ class TrackBlobs {
                        exit( 1 );
                }
 
-               // Scan the archive table for HistoryBlobStub objects or external flags (bug 22624)
+               // Scan the archive table for HistoryBlobStub objects or external flags (T24624)
                $flags = $dbr->selectField( 'archive', 'ar_flags',
                        'ar_flags LIKE \'%external%\' OR (' .
                        'ar_flags LIKE \'%object%\' ' .
index 892f799..44922a4 100644 (file)
@@ -594,7 +594,7 @@ CREATE TABLE /*_*/categorylinks (
   -- conversion algorithm is run.  We store this so that we can update
   -- collations without reparsing all pages.
   -- Note: If you change the length of this field, you also need to change
-  -- code in LinksUpdate.php. See bug 25254.
+  -- code in LinksUpdate.php. See T27254.
   cl_sortkey_prefix varchar(255) binary NOT NULL default '',
 
   -- This isn't really used at present. Provided for an optional
@@ -816,7 +816,7 @@ CREATE TABLE /*_*/ipblocks (
   -- Size chosen to allow IPv6
   -- FIXME: these fields were originally blank for single-IP blocks,
   -- but now they are populated. No migration was ever done. They
-  -- should be fixed to be blank again for such blocks (bug 49504).
+  -- should be fixed to be blank again for such blocks (T51504).
   ipb_range_start tinyblob NOT NULL,
   ipb_range_end tinyblob NOT NULL,
 
index a672e29..d96cecd 100755 (executable)
@@ -112,7 +112,7 @@ class UpdateMediaWiki extends Maintenance {
                }
 
                $lang = Language::factory( 'en' );
-               // Set global language to ensure localised errors are in English (bug 20633)
+               // Set global language to ensure localised errors are in English (T22633)
                RequestContext::getMain()->setLanguage( $lang );
                $wgLang = $lang; // BackCompat
 
@@ -203,7 +203,7 @@ class UpdateMediaWiki extends Maintenance {
 
                # Don't try to access the database
                # This needs to be disabled early since extensions will try to use the l10n
-               # cache from $wgExtensionFunctions (bug 20471)
+               # cache from $wgExtensionFunctions (T22471)
                $wgLocalisationCacheConf = [
                        'class' => 'LocalisationCache',
                        'storeClass' => 'LCStoreNull',