3 require_once( dirname( __FILE__
) . '/../Maintenance.php' );
5 class FixBug20757
extends Maintenance
{
6 var $batchSize = 10000;
7 var $mapCache = array();
9 var $maxMapCacheSize = 1000000;
11 function __construct() {
12 parent
::__construct();
13 $this->mDescription
= 'Script to fix bug 20757 assuming that blob_tracking is intact';
14 $this->addOption( 'dry-run', 'Report only' );
18 $dbr = wfGetDB( DB_SLAVE
);
19 $dbw = wfGetDB( DB_MASTER
);
21 $dryRun = $this->getOption( 'dry-run' );
23 print "Dry run only.\n";
31 $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__
);
34 print "ID: $startId / $totalRevs\r";
38 array( 'old_id', 'old_flags', 'old_text' ),
40 'old_id > ' . intval( $startId ),
41 'old_flags' => 'object'
45 'ORDER BY' => 'old_id',
46 'LIMIT' => $this->batchSize
,
50 if ( !$res->numRows() ) {
54 $secondaryIds = array();
57 foreach ( $res as $row ) {
58 $startId = $row->old_id
;
60 // Basic sanity checks
61 $obj = unserialize( $row->old_text
);
62 if ( $obj === false ) {
63 print "{$row->old_id}: unrecoverable: cannot unserialize\n";
68 if ( !is_object( $obj ) ) {
69 print "{$row->old_id}: unrecoverable: unserialized to type " .
70 gettype( $obj ) . ", possible double-serialization\n";
75 // Check if it really is broken
76 $text = Revision
::getRevisionText( $row );
77 if ( $text !== false ) {
83 if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
84 print "{$row->old_id}: unrecoverable: unexpected object class " .
85 get_class( $obj ) . "\n";
90 // Queue the stub for future batch processing
91 $id = intval( $obj->mOldId
);
92 $secondaryIds[] = $id;
93 $stubs[$row->old_id
] = array(
95 'hash' => $obj->mHash
,
99 $secondaryIds = array_unique( $secondaryIds );
101 if ( !count( $secondaryIds ) ) {
105 // Run the batch query on blob_tracking
110 'bt_text_id' => $secondaryIds,
115 $trackedBlobs = array();
116 foreach ( $res as $row ) {
117 $trackedBlobs[$row->bt_text_id
] = $row;
121 $stubsToFix = array();
122 foreach ( $stubs as $primaryId => $stub ) {
123 $secondaryId = $stub['secondaryId'];
124 if ( !isset( $trackedBlobs[$secondaryId] ) ) {
125 $secondaryRow = $dbr->selectRow(
127 array( 'old_flags', 'old_text' ),
128 array( 'old_id' => $secondaryId ),
131 if ( !$secondaryRow ) {
132 print "$primaryId: unrecoverable: secondary row is missing\n";
133 } elseif ( strpos( $secondaryRow->old_flags
, 'external' ) !== false ) {
134 print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
136 print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
139 unset( $stubs[$primaryId] );
142 $trackRow = $trackedBlobs[$secondaryId];
144 // Check that the specified text really is available in the tracked source row
145 $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
146 $text = ExternalStore
::fetchFromURL( $url );
147 if ( $text === false ) {
148 print "$primaryId: unrecoverable: source text missing\n";
150 unset( $stubs[$primaryId] );
153 if ( md5( $text ) !== $stub['hash'] ) {
154 print "$primaryId: unrecoverable: content hashes do not match\n";
156 unset( $stubs[$primaryId] );
160 // Find the page_id and rev_id
161 // The page is probably the same as the page of the secondary row
162 $pageId = $this->bt_page
;
163 if ( $pageId === null ) {
166 $revId = $this->findTextIdInPage( $pageId, $primaryId );
167 if ( $revId === null ) {
168 // Actually an orphan
174 // Reset the text row to point to the original copy
180 'old_flags' => 'external', // use legacy encoding
184 array( 'old_id' => $primaryId ),
188 // Add a blob_tracking row so that the new reference can be recompressed
189 // without needing to run trackBlobs.php again
190 $dbw->insert( 'blob_tracking',
192 'bt_page' => $trackRow->bt_page
,
193 'bt_rev_id' => $revId,
194 'bt_text_id' => $primaryId,
195 'bt_cluster' => $trackRow->bt_cluster
,
196 'bt_blob_id' => $trackRow->bt_blob_id
,
197 'bt_cgz_hash' => $stub['hash'],
198 'bt_new_url' => null,
204 $this->waitForSlaves();
207 print "$primaryId: resolved to $url\n";
213 print "Fixed: $numFixed\n";
214 print "Unrecoverable: $numBad\n";
215 print "Not yet broken: $numGood\n";
218 function waitForSlaves() {
219 static $iteration = 0;
221 if ( ++
$iteration > 50 == 0 ) {
222 wfWaitForSlaves( 5 );
227 function findTextIdInPage( $pageId, $textId ) {
228 $ids = $this->getRevTextMap( $pageId );
229 if ( !isset( $ids[$textId] ) ) {
232 return $ids[$textId];
236 function getRevTextMap( $pageId ) {
237 if ( !isset( $this->mapCache
[$pageId] ) ) {
239 while ( $this->mapCacheSize
> $this->maxMapCacheSize
) {
240 $key = key( $this->mapCache
);
241 $this->mapCacheSize
-= count( $this->mapCache
[$key] );
242 unset( $this->mapCache
[$key] );
245 $dbr = wfGetDB( DB_SLAVE
);
247 $res = $dbr->select( 'revision',
248 array( 'rev_id', 'rev_text_id' ),
249 array( 'rev_page' => $pageId ),
252 foreach ( $res as $row ) {
253 $map[$row->rev_text_id
] = $row->rev_id
;
255 $this->mapCache
[$pageId] = $map;
256 $this->mapCacheSize +
= count( $map );
258 return $this->mapCache
[$pageId];
263 $maintClass = 'FixBug20757';
264 require_once( DO_MAINTENANCE
);