follow up r62353 Make ApiBase::requireOnlyOneParameter() accept parameters that are...
[lhc/web/wiklou.git] / maintenance / storage / trackBlobs.php
index 653a8c3..5f25e39 100644 (file)
@@ -6,10 +6,13 @@ require( dirname( __FILE__ ) .'/../commandLine.inc' );
 if ( count( $args ) < 1 ) {
        echo "Usage: php trackBlobs.php <cluster> [... <cluster>]\n";
        echo "Adds blobs from a given ES cluster to the blob_tracking table\n";
+       echo "Automatically deletes the tracking table and starts from the start again when restarted.\n";
+
        exit( 1 );
 }
 $tracker = new TrackBlobs( $args );
-$tracker->trackBlobs();
+$tracker->run();
+echo "All done.\n";
 
 class TrackBlobs {
        var $clusters, $textClause;
@@ -31,7 +34,7 @@ class TrackBlobs {
                }
        }
 
-       function trackBlobs() {
+       function run() {
                $this->initTrackingTable();
                $this->trackRevisions();
                $this->trackOrphanText();
@@ -42,9 +45,11 @@ class TrackBlobs {
 
        function initTrackingTable() {
                $dbw = wfGetDB( DB_MASTER );
-               if ( !$dbw->tableExists( 'blob_tracking' ) ) {
-                       $dbw->sourceFile( dirname( __FILE__ ) . '/blob_tracking.sql' );
+               if ( $dbw->tableExists( 'blob_tracking' ) ) {
+                       $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_tracking' ) );
+                       $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_orphans' ) );
                }
+               $dbw->sourceFile( dirname( __FILE__ ) . '/blob_tracking.sql' );
        }
 
        function getTextClause() {
@@ -55,7 +60,7 @@ class TrackBlobs {
                                if ( $this->textClause != '' ) {
                                        $this->textClause .= ' OR ';
                                }
-                               $this->textClause .= 'old_text LIKE ' . $dbr->addQuotes( $dbr->escapeLike( "DB://$cluster/" ) . '%' );
+                               $this->textClause .= 'old_text' . $dbr->buildLike( "DB://$cluster/", $dbr->anyString() );
                        }
                }
                return $this->textClause;
@@ -94,7 +99,7 @@ class TrackBlobs {
                                        'rev_id > ' . $dbr->addQuotes( $startId ),
                                        'rev_text_id=old_id',
                                        $textClause,
-                                       "old_flags LIKE '%external%'",
+                                       'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
                                ),
                                __METHOD__,
                                array(
@@ -170,7 +175,7 @@ class TrackBlobs {
                                array( 
                                        'old_id>' . $dbr->addQuotes( $startId ),
                                        $textClause,
-                                       "old_flags LIKE '%external%'",
+                                       'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
                                        'bt_text_id IS NULL'
                                ),
                                __METHOD__,
@@ -240,24 +245,37 @@ class TrackBlobs {
                        return;
                }
 
-               # Wait until the blob_tracking table is available in the slave
                $dbw = wfGetDB( DB_MASTER );
-               $dbr = wfGetDB( DB_SLAVE );
-               $pos = $dbw->getMasterPos();
-               $dbr->masterPosWait( $pos, 100000 );
 
                foreach ( $this->clusters as $cluster ) {
                        echo "Searching for orphan blobs in $cluster...\n";
                        $lb = wfGetLBFactory()->getExternalLB( $cluster );
-                       $extDB = $lb->getConnection( DB_SLAVE );
+                       try {
+                               $extDB = $lb->getConnection( DB_SLAVE );
+                       } catch ( DBConnectionError $e ) {
+                               if ( strpos( $e->error, 'Unknown database' ) !== false ) {
+                                       echo "No database on $cluster\n";
+                               } else {
+                                       echo "Error on $cluster: " . $e->getMessage() . "\n";
+                               }
+                               continue;
+                       }
+                       $table = $extDB->getLBInfo( 'blobs table' );
+                       if ( is_null( $table ) ) {
+                               $table = 'blobs';
+                       }
+                       if ( !$extDB->tableExists( $table ) ) {
+                               echo "No blobs table on cluster $cluster\n";
+                               continue;
+                       }
                        $startId = 0;
                        $batchesDone = 0;
                        $actualBlobs = gmp_init( 0 );
-                       $endId = $extDB->selectField( 'blobs', 'MAX(blob_id)', false, __METHOD__ );
+                       $endId = $extDB->selectField( $table, 'MAX(blob_id)', false, __METHOD__ );
 
                        // Build a bitmap of actual blob rows
                        while ( true ) {
-                               $res = $extDB->select( 'blobs'
+                               $res = $extDB->select( $table
                                        array( 'blob_id' ), 
                                        array( 'blob_id > ' . $extDB->addQuotes( $startId ) ),
                                        __METHOD__,
@@ -287,6 +305,7 @@ class TrackBlobs {
                        // Traverse the orphan list
                        $insertBatch = array();
                        $id = 0;
+                       $numOrphans = 0;
                        while ( true ) {
                                $id = gmp_scan1( $orphans, $id );
                                if ( $id == -1 ) {
@@ -296,12 +315,18 @@ class TrackBlobs {
                                        'bo_cluster' => $cluster,
                                        'bo_blob_id' => $id
                                );
+                               if ( count( $insertBatch ) > $this->batchSize ) {
+                                       $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+                                       $insertBatch = array();
+                               }
+
                                ++$id;
+                               ++$numOrphans;
                        }
-
-                       // Insert the batch
-                       echo "Found " . count( $insertBatch ) . " orphan(s) in $cluster\n";
-                       $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+                       if ( $insertBatch ) {
+                               $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+                       }
+                       echo "Found $numOrphans orphan(s) in $cluster\n";
                }
        }
 }