In fixBug20757.php:
authorTim Starling <tstarling@users.mediawiki.org>
Mon, 22 Feb 2010 02:33:44 +0000 (02:33 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Mon, 22 Feb 2010 02:33:44 +0000 (02:33 +0000)
* Don't use buildLike() just yet, for easy backporting to wmf-deployment.
* Don't try to fetch old_flags=object,external rows
* Skip rows with the wrong class using a MySQL condition since we really don't care about them
* Propagate old_flags to the new pointer row. This could be necessary if moveToExternal.php was run after compressOld.php from MW>1.5. This never actually happened at Wikimedia.
* Don't skip rows with bt_moved=0, we need to resolve these stubs if RCT is to continue.
* Fix isUnbrokenStub() to not overwrite its input, fixes minor progress message issue
* Tested locally.

In resolveStubs.php:
* Fixed two bugs in the condition intended to limit the rows acted on to HistoryBlobStub: the string we compared against was not all in lower case, and the character set was binary, making LOWER() do nothing.
* Resolve stubs with old_flags='object,utf-8', analysis showed that we have some of these on enwiki
* Tested locally.

In trackBlobs.php:
* Fixed a bug causing incorrect values to be inserted into bt_cgz_hash. There was no impact on RCT or any other script since bt_cgz_hash is unused at present. Tested locally.

maintenance/storage/fixBug20757.php
maintenance/storage/resolveStubs.php
maintenance/storage/trackBlobs.php

index 8182b4e..922d472 100644 (file)
@@ -39,7 +39,8 @@ class FixBug20757 extends Maintenance {
                                array( 'old_id', 'old_flags', 'old_text' ),
                                array( 
                                        'old_id > ' . intval( $startId ),
-                                       'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'object', $dbr->anyString )
+                                       'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
+                                       'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
                                ),
                                __METHOD__,
                                array( 
@@ -80,10 +81,19 @@ class FixBug20757 extends Maintenance {
                                        continue;
                                }
 
+                               // Process flags
+                               $flags = explode( ',', $row->old_flags );
+                               if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
+                                       $legacyEncoding = false;
+                               } else {
+                                       $legacyEncoding = true;
+                               }
+
                                // Queue the stub for future batch processing
                                $id = intval( $obj->mOldId );
                                $secondaryIds[] = $id;
                                $stubs[$row->old_id] = array(
+                                       'legacyEncoding' => $legacyEncoding,
                                        'secondaryId' => $id,
                                        'hash' => $obj->mHash,
                                );
@@ -101,7 +111,6 @@ class FixBug20757 extends Maintenance {
                                '*',
                                array(
                                        'bt_text_id' => $secondaryIds,
-                                       'bt_moved' => 1,
                                ),
                                __METHOD__
                        );
@@ -170,6 +179,8 @@ class FixBug20757 extends Maintenance {
                                        }
                                }
 
+                               $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
+
                                if ( !$dryRun ) {
                                        // Reset the text row to point to the original copy
                                        $dbw->begin();
@@ -177,7 +188,7 @@ class FixBug20757 extends Maintenance {
                                                'text',
                                                // SET
                                                array(
-                                                       'old_flags' => 'external', // use legacy encoding
+                                                       'old_flags' => $newFlags,
                                                        'old_text' => $url
                                                ),
                                                // WHERE
@@ -264,22 +275,23 @@ class FixBug20757 extends Maintenance {
         */
        function isUnbrokenStub( $stub, $secondaryRow ) {
                $flags = explode( ',', $secondaryRow->old_flags );
+               $text = $secondaryRow->old_text;
                if( in_array( 'external', $flags ) ) {
-                       $url = $secondaryRow->old_text;
+                       $url = $text;
                        @list( /* $proto */ , $path ) = explode( '://', $url, 2 );
                        if ( $path == "" ) {
                                return false;
                        }
-                       $secondaryRow->old_text = ExternalStore::fetchFromUrl( $url );
+                       $text = ExternalStore::fetchFromUrl( $url );
                }
                if( !in_array( 'object', $flags ) ) {
                        return false;
                }
 
                if( in_array( 'gzip', $flags ) ) {
-                       $obj = unserialize( gzinflate( $secondaryRow->old_text ) );
+                       $obj = unserialize( gzinflate( $text ) );
                } else {
-                       $obj = unserialize( $secondaryRow->old_text );
+                       $obj = unserialize( $text );
                }
 
                if( !is_object( $obj ) ) {
index 0e51f0b..346151e 100644 (file)
@@ -35,11 +35,9 @@ function resolveStubs() {
                
                $res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ),
                        "old_id>=$start AND old_id<=$end " .
-                       # Using a more restrictive flag set for now, until I do some more analysis -- TS
-                       #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
-                       
-                       "AND old_flags='object' " .
-                       "AND LOWER(LEFT(old_text,22)) = 'O:15:\"historyblobstub\"'", $fname );
+                       "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
+                       'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', 
+                       $fname );
                while ( $row = $dbr->fetchObject( $res ) ) {
                        resolveStub( $row->old_id, $row->old_text, $row->old_flags );
                }
index 5f25e39..63327d5 100644 (file)
@@ -73,7 +73,7 @@ class TrackBlobs {
                return array(
                        'cluster' => $m[1],
                        'id' => intval( $m[2] ),
-                       'hash' => isset( $m[3] ) ? $m[2] : null
+                       'hash' => isset( $m[3] ) ? $m[3] : null
                );
        }