Assignment in loop conditions suck
[lhc/web/wiklou.git] / maintenance / storage / fixBug20757.php
index 614e180..73c21d5 100644 (file)
@@ -12,6 +12,7 @@ class FixBug20757 extends Maintenance {
                parent::__construct();
                $this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact';
                $this->addOption( 'dry-run', 'Report only' );
+               $this->addOption( 'start', 'old_id to start at', false, true );
        }
        
        function execute() {
@@ -23,25 +24,36 @@ class FixBug20757 extends Maintenance {
                        print "Dry run only.\n";
                }
 
-               $startId = 0;
+               $startId = $this->getOption( 'start', 0 );
                $numGood = 0;
                $numFixed = 0;
                $numBad = 0;
 
                $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
 
+               if ( $dbr->getType() == 'mysql'
+                       && version_compare( $dbr->getServerVersion(), '4.1.0', '>=' ) )
+               {
+                       // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
+                       $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
+               } else {
+                       // No CONVERT() in MySQL 4.0
+                       $lowerLeft = 'LOWER(LEFT(old_text,22))';
+               }
+
                while ( true ) {
                        print "ID: $startId / $totalRevs\r";
 
                        $res = $dbr->select(
                                'text',
                                array( 'old_id', 'old_flags', 'old_text' ),
-                               array( 
+                               array(
                                        'old_id > ' . intval( $startId ),
-                                       'old_flags' => 'object'
+                                       'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
+                                       "$lowerLeft = 'o:15:\"historyblobstub\"'",
                                ),
                                __METHOD__,
-                               array( 
+                               array(
                                        'ORDER BY' => 'old_id',
                                        'LIMIT' => $this->batchSize,
                                )
@@ -66,20 +78,12 @@ class FixBug20757 extends Maintenance {
                                }
 
                                if ( !is_object( $obj ) ) {
-                                       print "{$row->old_id}: unrecoverable: unserialized to type " . 
+                                       print "{$row->old_id}: unrecoverable: unserialized to type " .
                                                gettype( $obj ) . ", possible double-serialization\n";
                                        ++$numBad;
                                        continue;
                                }
 
-                               // Check if it really is broken
-                               $text = Revision::getRevisionText( $row );
-                               if ( $text !== false ) {
-                                       // Not broken yet
-                                       ++$numGood;
-                                       continue;
-                               }
-
                                if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
                                        print "{$row->old_id}: unrecoverable: unexpected object class " .
                                                get_class( $obj ) . "\n";
@@ -87,10 +91,19 @@ class FixBug20757 extends Maintenance {
                                        continue;
                                }
 
+                               // Process flags
+                               $flags = explode( ',', $row->old_flags );
+                               if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
+                                       $legacyEncoding = false;
+                               } else {
+                                       $legacyEncoding = true;
+                               }
+
                                // Queue the stub for future batch processing
                                $id = intval( $obj->mOldId );
                                $secondaryIds[] = $id;
                                $stubs[$row->old_id] = array(
+                                       'legacyEncoding' => $legacyEncoding,
                                        'secondaryId' => $id,
                                        'hash' => $obj->mHash,
                                );
@@ -108,7 +121,6 @@ class FixBug20757 extends Maintenance {
                                '*',
                                array(
                                        'bt_text_id' => $secondaryIds,
-                                       'bt_moved' => 1,
                                ),
                                __METHOD__
                        );
@@ -118,24 +130,30 @@ class FixBug20757 extends Maintenance {
                        }
 
                        // Process the stubs
-                       $stubsToFix = array();
                        foreach ( $stubs as $primaryId => $stub ) {
                                $secondaryId = $stub['secondaryId'];
                                if ( !isset( $trackedBlobs[$secondaryId] ) ) {
-                                       $secondaryRow = $dbr->selectRow( 
-                                               'text', 
+                                       // No tracked blob. Work out what went wrong
+                                       $secondaryRow = $dbr->selectRow(
+                                               'text',
                                                array( 'old_flags', 'old_text' ),
-                                               array( 'old_id' => $secondaryId ), 
+                                               array( 'old_id' => $secondaryId ),
                                                __METHOD__
                                        );
                                        if ( !$secondaryRow ) {
                                                print "$primaryId: unrecoverable: secondary row is missing\n";
+                                               ++$numBad;
+                                       } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
+                                               // Not broken yet, and not in the tracked clusters so it won't get 
+                                               // broken by the current RCT run.
+                                               ++$numGood;
                                        } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
                                                print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
+                                               ++$numBad;
                                        } else {
                                                print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
+                                               ++$numBad;
                                        }
-                                       ++$numBad;
                                        unset( $stubs[$primaryId] );
                                        continue;
                                }
@@ -159,17 +177,19 @@ class FixBug20757 extends Maintenance {
 
                                // Find the page_id and rev_id
                                // The page is probably the same as the page of the secondary row
-                               $pageId = $this->bt_page;
-                               if ( $pageId === null ) {
-                                       $revId = null;
+                               $pageId = intval( $trackRow->bt_page );
+                               if ( !$pageId ) {
+                                       $revId = $pageId = 0;
                                } else {
                                        $revId = $this->findTextIdInPage( $pageId, $primaryId );
-                                       if ( $revId === null ) {
+                                       if ( !$revId ) {
                                                // Actually an orphan
-                                               $pageId = null;
+                                               $pageId = $revId = 0;
                                        }
                                }
 
+                               $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
+
                                if ( !$dryRun ) {
                                        // Reset the text row to point to the original copy
                                        $dbw->begin();
@@ -177,7 +197,7 @@ class FixBug20757 extends Maintenance {
                                                'text',
                                                // SET
                                                array(
-                                                       'old_flags' => 'external', // use legacy encoding
+                                                       'old_flags' => $newFlags,
                                                        'old_text' => $url
                                                ),
                                                // WHERE
@@ -189,7 +209,7 @@ class FixBug20757 extends Maintenance {
                                        // without needing to run trackBlobs.php again
                                        $dbw->insert( 'blob_tracking',
                                                array(
-                                                       'bt_page' => $trackRow->bt_page,
+                                                       'bt_page' => $pageId,
                                                        'bt_rev_id' => $revId,
                                                        'bt_text_id' => $primaryId,
                                                        'bt_cluster' => $trackRow->bt_cluster,
@@ -212,7 +232,7 @@ class FixBug20757 extends Maintenance {
                print "\n";
                print "Fixed: $numFixed\n";
                print "Unrecoverable: $numBad\n";
-               print "Not yet broken: $numGood\n";
+               print "Good stubs: $numGood\n";
        }
 
        function waitForSlaves() {
@@ -244,7 +264,7 @@ class FixBug20757 extends Maintenance {
 
                        $dbr = wfGetDB( DB_SLAVE );
                        $map = array();
-                       $res = $dbr->select( 'revision', 
+                       $res = $dbr->select( 'revision',
                                array( 'rev_id', 'rev_text_id' ),
                                array( 'rev_page' => $pageId ),
                                __METHOD__
@@ -258,6 +278,44 @@ class FixBug20757 extends Maintenance {
                return $this->mapCache[$pageId];
        }
 
+       /**
+        * This is based on part of HistoryBlobStub::getText().
+        * Determine if the text can be retrieved from the row in the normal way.
+        */
+       function isUnbrokenStub( $stub, $secondaryRow ) {
+               $flags = explode( ',', $secondaryRow->old_flags );
+               $text = $secondaryRow->old_text;
+               if ( in_array( 'external', $flags ) ) {
+                       $url = $text;
+                       @list( /* $proto */ , $path ) = explode( '://', $url, 2 );
+                       if ( $path == "" ) {
+                               return false;
+                       }
+                       $text = ExternalStore::fetchFromUrl( $url );
+               }
+               if ( !in_array( 'object', $flags ) ) {
+                       return false;
+               }
+
+               if ( in_array( 'gzip', $flags ) ) {
+                       $obj = unserialize( gzinflate( $text ) );
+               } else {
+                       $obj = unserialize( $text );
+               }
+
+               if ( !is_object( $obj ) ) {
+                       // Correct for old double-serialization bug.
+                       $obj = unserialize( $obj );
+               }
+
+               if ( !is_object( $obj ) ) {
+                       return false;
+               }
+
+               $obj->uncompress();
+               $text = $obj->getItem( $stub['hash'] );
+               return $text !== false;
+       }
 }
 
 $maintClass = 'FixBug20757';