Update populateRevisionLength maintenance script to populate archive.ar_len
authorReedy <reedy@wikimedia.org>
Sat, 16 Mar 2013 01:37:27 +0000 (01:37 +0000)
committerChad Horohoe <chadh@wikimedia.org>
Mon, 28 Oct 2013 18:44:59 +0000 (11:44 -0700)
Bug: 24538
Change-Id: I0f685aa5ea48468279475b487a627ff73c70d5cd

RELEASE-NOTES-1.22
maintenance/populateRevisionLength.php

index bf0c67b..92699be 100644 (file)
@@ -260,6 +260,8 @@ production.
 * A hidden, empty input field was added to the edit form, and any edit that fills
   it in will be rejected. This prevents against the simplest form of spambots.
   Previously in the "SimpleAntiSpam" extension by Ryan Schmidt.
+* populateRevisionLength.php maintenance script updated to also populate
+  archive.ar_len field.
 
 === Bug fixes in 1.22 ===
 * Disable Special:PasswordReset when $wgEnableEmail is false. Previously one
index 3c69125..4acc579 100644 (file)
@@ -1,6 +1,7 @@
 <?php
 /**
- * Populates the rev_len field for old revisions created before MW 1.10.
+ * Populates the rev_len and ar_len fields for old revisions created
+ * before MW 1.10.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 require_once __DIR__ . '/Maintenance.php';
 
 /**
- * Maintenance script that populates the rev_len field for old revisions
- * created before MW 1.10.
+ * Maintenance script that populates the rev_len and ar_len fields
+ * for old revisions created before MW 1.10.
  *
  * @ingroup Maintenance
  */
 class PopulateRevisionLength extends LoggedUpdateMaintenance {
        public function __construct() {
                parent::__construct();
-               $this->mDescription = "Populates the rev_len field";
+               $this->mDescription = "Populates the rev_len and ar_len fields";
                $this->setBatchSize( 200 );
        }
 
        protected function getUpdateKey() {
-               return 'populate rev_len';
-       }
-
-       protected function updateSkippedMessage() {
-               return 'rev_len column of revision table already populated.';
+               return 'populate rev_len and ar_len';
        }
 
        public function doDBUpdates() {
                $db = $this->getDB( DB_MASTER );
                if ( !$db->tableExists( 'revision' ) ) {
                        $this->error( "revision table does not exist", true );
+               } elseif ( !$db->tableExists( 'archive' ) ) {
+                       $this->error( "archive table does not exist", true );
                } elseif ( !$db->fieldExists( 'revision', 'rev_len', __METHOD__ ) ) {
                        $this->output( "rev_len column does not exist\n\n", true );
                        return false;
                }
 
                $this->output( "Populating rev_len column\n" );
+               $rev = $this->doLenUpdates( 'revision', 'rev_id', 'rev' );
+
+               $this->output( "Populating ar_len column\n" );
+               $ar = $this->doLenUpdates( 'archive', 'ar_id', 'ar' );
+
+               $this->output( "rev_len and ar_len population complete [$rev revision rows, $ar archive rows].\n" );
+               return true;
+       }
 
-               $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __METHOD__ );
-               $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
+       /**
+        * @param $table
+        * @param $idCol
+        * @param $prefix
+        * @return int
+        */
+       protected function doLenUpdates( $table, $idCol, $prefix ) {
+               $db = $this->getDB( DB_MASTER );
+               $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
+               $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
                if ( !$start || !$end ) {
-                       $this->output( "...revision table seems to be empty.\n" );
-                       return true;
+                       $this->output( "...$table table seems to be empty.\n" );
+                       return 0;
                }
 
                # Do remaining chunks
                $blockStart = intval( $start );
                $blockEnd = intval( $start ) + $this->mBatchSize - 1;
                $count = 0;
-               $missing = 0;
                $fields = Revision::selectFields();
                while ( $blockStart <= $end ) {
                        $this->output( "...doing rev_id from $blockStart to $blockEnd\n" );
                        $res = $db->select(
-                               'revision',
+                               $table,
                                $fields,
                                array(
-                                       "rev_id >= $blockStart",
-                                       "rev_id <= $blockEnd",
-                                       "rev_len IS NULL"
+                                       "$idCol >= $blockStart",
+                                       "$idCol <= $blockEnd",
+                                       "{$prefix}_len IS NULL"
                                ),
                                __METHOD__
                        );
+
+                       $db->begin( __METHOD__ );
                        # Go through and update rev_len from these rows.
                        foreach ( $res as $row ) {
-                               $rev = new Revision( $row );
-                               $content = $rev->getContent();
-                               if ( !$content ) {
-                                       # This should not happen, but sometimes does (bug 20757)
-                                       $this->output( "Content of revision {$row->rev_id} unavailable!\n" );
-                                       $missing++;
-                               }
-                               else {
-                                       # Update the row...
-                                       $db->update( 'revision',
-                                                        array( 'rev_len' => $content->getSize() ),
-                                                        array( 'rev_id' => $row->rev_id ),
-                                                        __METHOD__ );
+                               if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) {
                                        $count++;
                                }
                        }
+                       $db->commit( __METHOD__ );
+
                        $blockStart += $this->mBatchSize;
                        $blockEnd += $this->mBatchSize;
                        wfWaitForSlaves();
                }
 
-               $this->output( "rev_len population complete ... {$count} rows changed ({$missing} missing)\n" );
+               return $count;
+       }
+
+       /**
+        * @param $row
+        * @param $table
+        * @param $idCol
+        * @param $prefix
+        * @return bool
+        */
+       protected function upgradeRow( $row, $table, $idCol, $prefix ) {
+               $db = $this->getDB( DB_MASTER );
+
+               $rev = ( $table === 'archive' )
+                       ? Revision::newFromArchiveRow( $row )
+                       : new Revision( $row );
+
+               $content = $rev->getContent();
+               if ( !$content ) {
+                       # This should not happen, but sometimes does (bug 20757)
+                       $id = $row->$idCol;
+                       $this->output( "Content of $table $id unavailable!\n" );
+                       return false;
+               }
+
+               # Update the row...
+               $db->update( $table,
+                       array( "{$prefix}_len" => $content->getSize() ),
+                       array( $idCol => $row->$idCol ),
+                       __METHOD__
+               );
+
                return true;
        }
 }