From 980d06840476638e379ca35da707d3a9084e2949 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Mon, 22 Feb 2010 02:33:44 +0000 Subject: [PATCH] In fixBug20757.php: * Don't use buildLike() just yet, for easy backporting to wmf-deployment. * Don't try to fetch old_flags=object,external rows * Skip rows with the wrong class using a MySQL condition since we really don't care about them * Propagate old_flags to the new pointer row. This could be necessary if moveToExternal.php was run after compressOld.php from MW>1.5. This never actually happened at Wikimedia. * Don't skip rows with bt_moved=0, we need to resolve these stubs if RCT is to continue. * Fix isUnbrokenStub() to not overwrite its input, fixes minor progress message issue * Tested locally. In resolveStubs.php: * Fixed two bugs in the condition intended to limit the rows acted on to HistoryBlobStub: the string we compared against was not all in lower case, and the character set was binary, making LOWER() do nothing. * Resolve stubs with old_flags='object,utf-8', analysis showed that we have some of these on enwiki * Tested locally. In trackBlobs.php: * Fixed a bug causing incorrect values to be inserted into bt_cgz_hash. There was no impact on RCT or any other script since bt_cgz_hash is unused at present. Tested locally. --- maintenance/storage/fixBug20757.php | 26 +++++++++++++++++++------- maintenance/storage/resolveStubs.php | 8 +++----- maintenance/storage/trackBlobs.php | 2 +- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/maintenance/storage/fixBug20757.php b/maintenance/storage/fixBug20757.php index 8182b4e4f1..922d47259a 100644 --- a/maintenance/storage/fixBug20757.php +++ b/maintenance/storage/fixBug20757.php @@ -39,7 +39,8 @@ class FixBug20757 extends Maintenance { array( 'old_id', 'old_flags', 'old_text' ), array( 'old_id > ' . intval( $startId ), - 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'object', $dbr->anyString ) + 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', + 'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', ), __METHOD__, array( @@ -80,10 +81,19 @@ class FixBug20757 extends Maintenance { continue; } + // Process flags + $flags = explode( ',', $row->old_flags ); + if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { + $legacyEncoding = false; + } else { + $legacyEncoding = true; + } + // Queue the stub for future batch processing $id = intval( $obj->mOldId ); $secondaryIds[] = $id; $stubs[$row->old_id] = array( + 'legacyEncoding' => $legacyEncoding, 'secondaryId' => $id, 'hash' => $obj->mHash, ); @@ -101,7 +111,6 @@ class FixBug20757 extends Maintenance { '*', array( 'bt_text_id' => $secondaryIds, - 'bt_moved' => 1, ), __METHOD__ ); @@ -170,6 +179,8 @@ class FixBug20757 extends Maintenance { } } + $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; + if ( !$dryRun ) { // Reset the text row to point to the original copy $dbw->begin(); @@ -177,7 +188,7 @@ class FixBug20757 extends Maintenance { 'text', // SET array( - 'old_flags' => 'external', // use legacy encoding + 'old_flags' => $newFlags, 'old_text' => $url ), // WHERE @@ -264,22 +275,23 @@ class FixBug20757 extends Maintenance { */ function isUnbrokenStub( $stub, $secondaryRow ) { $flags = explode( ',', $secondaryRow->old_flags ); + $text = $secondaryRow->old_text; if( in_array( 'external', $flags ) ) { - $url = $secondaryRow->old_text; + $url = $text; @list( /* $proto */ , $path ) = explode( '://', $url, 2 ); if ( $path == "" ) { return false; } - $secondaryRow->old_text = ExternalStore::fetchFromUrl( $url ); + $text = ExternalStore::fetchFromUrl( $url ); } if( !in_array( 'object', $flags ) ) { return false; } if( in_array( 'gzip', $flags ) ) { - $obj = unserialize( gzinflate( $secondaryRow->old_text ) ); + $obj = unserialize( gzinflate( $text ) ); } else { - $obj = unserialize( $secondaryRow->old_text ); + $obj = unserialize( $text ); } if( !is_object( $obj ) ) { diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php index 0e51f0b098..346151e941 100644 --- a/maintenance/storage/resolveStubs.php +++ b/maintenance/storage/resolveStubs.php @@ -35,11 +35,9 @@ function resolveStubs() { $res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ), "old_id>=$start AND old_id<=$end " . - # Using a more restrictive flag set for now, until I do some more analysis -- TS - #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". - - "AND old_flags='object' " . - "AND LOWER(LEFT(old_text,22)) = 'O:15:\"historyblobstub\"'", $fname ); + "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". + 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', + $fname ); while ( $row = $dbr->fetchObject( $res ) ) { resolveStub( $row->old_id, $row->old_text, $row->old_flags ); } diff --git a/maintenance/storage/trackBlobs.php b/maintenance/storage/trackBlobs.php index 5f25e390ea..63327d5389 100644 --- a/maintenance/storage/trackBlobs.php +++ b/maintenance/storage/trackBlobs.php @@ -73,7 +73,7 @@ class TrackBlobs { return array( 'cluster' => $m[1], 'id' => intval( $m[2] ), - 'hash' => isset( $m[3] ) ? $m[2] : null + 'hash' => isset( $m[3] ) ? $m[3] : null ); } -- 2.20.1