\r
//----------------------------------------------------------------------------------\r
\r
-function checkError( $msg, $ids ) {\r
- global $oldIdMap;\r
- if ( is_array( $ids ) && count( $ids ) == 1 ) {\r
- $ids = reset( $ids );\r
- }\r
- if ( is_array( $ids ) ) {\r
- $revIds = array();\r
- foreach ( $ids as $id ) {\r
- $revIds = array_merge( $revIds, array_keys( $oldIdMap, $id ) );\r
- }\r
- print "$msg in text rows " . implode( ', ', $ids ) . \r
- ", revisions " . implode( ', ', $revIds ) . "\n";\r
- } else {\r
- $id = $ids;\r
- $revIds = array_keys( $oldIdMap, $id );\r
- if ( count( $revIds ) == 1 ) {\r
- print "$msg in old_id $id, rev_id {$revIds[0]}\n";\r
- } else {\r
- print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";\r
- }\r
- }\r
-}\r
-\r
function checkStorage() {\r
- global $oldIdMap;\r
+ global $oldIdMap, $brokenRevisions;\r
\r
$fname = 'checkStorage';\r
$dbr =& wfGetDB( DB_SLAVE );\r
$objectStats = array();\r
$knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );\r
$dbStore = null;\r
+ $brokenRevisions = array();\r
\r
for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {\r
$chunkEnd = $chunkStart + $chunkSize - 1;\r
- //print "$chunkStart to $chunkEnd of $maxRevId\n";\r
+ //print "$chunkStart of $maxRevId\n";\r
\r
// Fetch revision rows\r
$oldIdMap = array();\r
}\r
$dbr->freeResult( $res );\r
\r
+ if ( !count( $oldIdMap ) ) {\r
+ continue;\r
+ }\r
+\r
// Fetch old_flags\r
$missingTextRows = array_flip( $oldIdMap );\r
$externalRevs = array();\r
\r
// Check for unrecognised flags\r
if ( count( array_diff( $flagArray, $knownFlags ) ) ) {\r
- print_r( array_diff( $flagArray, $knownFlags ) );\r
checkError( "Warning: invalid flags field \"$flags\"", $id );\r
}\r
}\r
continue;\r
}\r
\r
- $className = $matches[2];\r
+ $className = strtolower( $matches[2] );\r
if ( strlen( $className ) != $matches[1] ) {\r
checkError( "Error: invalid object header, wrong class name length", $oldId );\r
continue;\r
checkError( "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",\r
$concatBlobs[$row->old_id] );\r
}\r
- } elseif ( substr( $row->header, 0, strlen( CONCAT_HEADER ) ) != CONCAT_HEADER ) {\r
+ } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) {\r
checkError( "Error: Incorrect object header for concat bulk row {$row->old_id}", \r
$concatBlobs[$row->old_id] );\r
} # else good\r
// next chunk\r
}\r
\r
- print "\n\nFlag statistics:\n";\r
+ print "\n\n" . count( $brokenRevisions ) . " broken revisions\n";\r
+\r
+ print "\nFlag statistics:\n";\r
$total = array_sum( $flagStats );\r
foreach ( $flagStats as $flag => $count ) {\r
printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );\r
}\r
\r
\r
+function checkError( $msg, $ids ) {\r
+ global $oldIdMap, $brokenRevisions;\r
+ if ( is_array( $ids ) && count( $ids ) == 1 ) {\r
+ $ids = reset( $ids );\r
+ }\r
+ if ( is_array( $ids ) ) {\r
+ $revIds = array();\r
+ foreach ( $ids as $id ) {\r
+ $revIds = array_merge( $revIds, array_keys( $oldIdMap, $id ) );\r
+ }\r
+ print "$msg in text rows " . implode( ', ', $ids ) . \r
+ ", revisions " . implode( ', ', $revIds ) . "\n";\r
+ } else {\r
+ $id = $ids;\r
+ $revIds = array_keys( $oldIdMap, $id );\r
+ if ( count( $revIds ) == 1 ) {\r
+ print "$msg in old_id $id, rev_id {$revIds[0]}\n";\r
+ } else {\r
+ print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";\r
+ }\r
+ }\r
+ $brokenRevisions = $brokenRevisions + array_flip( $revIds );\r
+}\r
+\r
function checkExternalConcatBlobs( $externalConcatBlobs ) {\r
static $dbStore = null;\r
$fname = 'checkExternalConcatBlobs';\r
array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ), \r
array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );\r
while ( $row = $extDb->fetchObject( $res ) ) {\r
- if ( $row->header != CONCAT_HEADER ) {\r
- checkError( "Error: invalid header on target of two-part ES URL", \r
- $oldIds[$row->blob_id] );\r
- } \r
+ if ( strcasecmp( $row->header, CONCAT_HEADER ) ) {\r
+ checkError( "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL", \r
+ $oldIds[$row->blob_id] );\r
+ }\r
unset( $oldIds[$row->blob_id] );\r
\r
}\r
\r
// Print errors for missing blobs rows\r
foreach ( $oldIds as $blobId => $oldIds ) {\r
- checkError( "Error: missing target $blobId for two-part ES URL", $oldIds );\r
+ checkError( "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds );\r
}\r
}\r
}\r