parent::__construct();
$this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact';
$this->addOption( 'dry-run', 'Report only' );
+ $this->addOption( 'start', 'old_id to start at', false, true );
}
function execute() {
print "Dry run only.\n";
}
- $startId = 0;
+ $startId = $this->getOption( 'start', 0 );
$numGood = 0;
$numFixed = 0;
$numBad = 0;
$totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
+ if ( $dbr->getType() == 'mysql'
+ && version_compare( $dbr->getServerVersion(), '4.1.0', '>=' ) )
+ {
+ // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
+ $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
+ } else {
+ // No CONVERT() in MySQL 4.0
+ $lowerLeft = 'LOWER(LEFT(old_text,22))';
+ }
+
while ( true ) {
print "ID: $startId / $totalRevs\r";
$res = $dbr->select(
'text',
array( 'old_id', 'old_flags', 'old_text' ),
- array(
+ array(
'old_id > ' . intval( $startId ),
- 'old_flags' => 'object'
+ 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
+ "$lowerLeft = 'o:15:\"historyblobstub\"'",
),
__METHOD__,
- array(
+ array(
'ORDER BY' => 'old_id',
'LIMIT' => $this->batchSize,
)
}
if ( !is_object( $obj ) ) {
- print "{$row->old_id}: unrecoverable: unserialized to type " .
+ print "{$row->old_id}: unrecoverable: unserialized to type " .
gettype( $obj ) . ", possible double-serialization\n";
++$numBad;
continue;
}
- // Check if it really is broken
- $text = Revision::getRevisionText( $row );
- if ( $text !== false ) {
- // Not broken yet
- ++$numGood;
- continue;
- }
-
if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
print "{$row->old_id}: unrecoverable: unexpected object class " .
get_class( $obj ) . "\n";
continue;
}
+ // Process flags
+ $flags = explode( ',', $row->old_flags );
+ if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
+ $legacyEncoding = false;
+ } else {
+ $legacyEncoding = true;
+ }
+
// Queue the stub for future batch processing
$id = intval( $obj->mOldId );
$secondaryIds[] = $id;
$stubs[$row->old_id] = array(
+ 'legacyEncoding' => $legacyEncoding,
'secondaryId' => $id,
'hash' => $obj->mHash,
);
'*',
array(
'bt_text_id' => $secondaryIds,
- 'bt_moved' => 1,
),
__METHOD__
);
}
// Process the stubs
- $stubsToFix = array();
foreach ( $stubs as $primaryId => $stub ) {
$secondaryId = $stub['secondaryId'];
if ( !isset( $trackedBlobs[$secondaryId] ) ) {
- $secondaryRow = $dbr->selectRow(
- 'text',
+ // No tracked blob. Work out what went wrong
+ $secondaryRow = $dbr->selectRow(
+ 'text',
array( 'old_flags', 'old_text' ),
- array( 'old_id' => $secondaryId ),
+ array( 'old_id' => $secondaryId ),
__METHOD__
);
if ( !$secondaryRow ) {
print "$primaryId: unrecoverable: secondary row is missing\n";
+ ++$numBad;
+ } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
+ // Not broken yet, and not in the tracked clusters so it won't get
+ // broken by the current RCT run.
+ ++$numGood;
} elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
+ ++$numBad;
} else {
print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
+ ++$numBad;
}
- ++$numBad;
unset( $stubs[$primaryId] );
continue;
}
// Find the page_id and rev_id
// The page is probably the same as the page of the secondary row
- $pageId = $this->bt_page;
- if ( $pageId === null ) {
- $revId = null;
+ $pageId = intval( $trackRow->bt_page );
+ if ( !$pageId ) {
+ $revId = $pageId = 0;
} else {
$revId = $this->findTextIdInPage( $pageId, $primaryId );
- if ( $revId === null ) {
+ if ( !$revId ) {
// Actually an orphan
- $pageId = null;
+ $pageId = $revId = 0;
}
}
+ $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
+
if ( !$dryRun ) {
// Reset the text row to point to the original copy
$dbw->begin();
'text',
// SET
array(
- 'old_flags' => 'external', // use legacy encoding
+ 'old_flags' => $newFlags,
'old_text' => $url
),
// WHERE
// without needing to run trackBlobs.php again
$dbw->insert( 'blob_tracking',
array(
- 'bt_page' => $trackRow->bt_page,
+ 'bt_page' => $pageId,
'bt_rev_id' => $revId,
'bt_text_id' => $primaryId,
'bt_cluster' => $trackRow->bt_cluster,
print "\n";
print "Fixed: $numFixed\n";
print "Unrecoverable: $numBad\n";
- print "Not yet broken: $numGood\n";
+ print "Good stubs: $numGood\n";
}
function waitForSlaves() {
$dbr = wfGetDB( DB_SLAVE );
$map = array();
- $res = $dbr->select( 'revision',
+ $res = $dbr->select( 'revision',
array( 'rev_id', 'rev_text_id' ),
array( 'rev_page' => $pageId ),
__METHOD__
return $this->mapCache[$pageId];
}
+ /**
+ * This is based on part of HistoryBlobStub::getText().
+ * Determine if the text can be retrieved from the row in the normal way.
+ */
+ function isUnbrokenStub( $stub, $secondaryRow ) {
+ $flags = explode( ',', $secondaryRow->old_flags );
+ $text = $secondaryRow->old_text;
+ if ( in_array( 'external', $flags ) ) {
+ $url = $text;
+ @list( /* $proto */ , $path ) = explode( '://', $url, 2 );
+ if ( $path == "" ) {
+ return false;
+ }
+ $text = ExternalStore::fetchFromUrl( $url );
+ }
+ if ( !in_array( 'object', $flags ) ) {
+ return false;
+ }
+
+ if ( in_array( 'gzip', $flags ) ) {
+ $obj = unserialize( gzinflate( $text ) );
+ } else {
+ $obj = unserialize( $text );
+ }
+
+ if ( !is_object( $obj ) ) {
+ // Correct for old double-serialization bug.
+ $obj = unserialize( $obj );
+ }
+
+ if ( !is_object( $obj ) ) {
+ return false;
+ }
+
+ $obj->uncompress();
+ $text = $obj->getItem( $stub['hash'] );
+ return $text !== false;
+ }
}
$maintClass = 'FixBug20757';