3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
19 * @ingroup Maintenance
22 use MediaWiki\MediaWikiServices
;
23 use MediaWiki\Revision\SlotRecord
;
24 use MediaWiki\Storage\BlobStore
;
25 use MediaWiki\Storage\NameTableStore
;
26 use MediaWiki\Storage\SqlBlobStore
;
27 use Wikimedia\Assert\Assert
;
28 use Wikimedia\Rdbms\IDatabase
;
29 use Wikimedia\Rdbms\IResultWrapper
;
31 require_once __DIR__
. '/Maintenance.php';
34 * Populate the content and slot tables.
37 class PopulateContentTables
extends Maintenance
{
42 /** @var NameTableStore */
43 private $contentModelStore;
51 /** @var array|null Map "{$modelId}:{$address}" to content_id */
52 private $contentRowMap = null;
54 private $count = 0, $totalCount = 0;
56 public function __construct() {
57 parent
::__construct();
59 $this->addDescription( 'Populate content and slot tables' );
60 $this->addOption( 'table', 'revision or archive table, or `all` to populate both', false,
62 $this->addOption( 'reuse-content',
63 'Reuse content table rows when the address and model are the same. '
64 . 'This will increase the script\'s time and memory usage, perhaps significantly.',
66 $this->addOption( 'start-revision', 'The rev_id to start at', false, true );
67 $this->addOption( 'start-archive', 'The ar_rev_id to start at', false, true );
68 $this->setBatchSize( 500 );
71 private function initServices() {
72 $this->dbw
= $this->getDB( DB_MASTER
);
73 $this->contentModelStore
= MediaWikiServices
::getInstance()->getContentModelStore();
74 $this->blobStore
= MediaWikiServices
::getInstance()->getBlobStore();
75 $this->mainRoleId
= MediaWikiServices
::getInstance()->getSlotRoleStore()
76 ->acquireId( SlotRecord
::MAIN
);
79 public function execute() {
80 global $wgMultiContentRevisionSchemaMigrationStage;
82 $t0 = microtime( true );
84 if ( ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW
) === 0 ) {
86 '...cannot update while \$wgMultiContentRevisionSchemaMigrationStage '
87 . 'does not have the SCHEMA_COMPAT_WRITE_NEW bit set.'
92 $this->initServices();
94 if ( $this->getOption( 'reuse-content', false ) ) {
95 $this->loadContentMap();
98 foreach ( $this->getTables() as $table ) {
99 $this->populateTable( $table );
102 $elapsed = microtime( true ) - $t0;
103 $this->writeln( "Done. Processed $this->totalCount rows in $elapsed seconds" );
110 private function getTables() {
111 $table = $this->getOption( 'table', 'all' );
112 $validTableOptions = [ 'all', 'revision', 'archive' ];
114 if ( !in_array( $table, $validTableOptions ) ) {
115 $this->fatalError( 'Invalid table. Must be either `revision` or `archive` or `all`' );
118 if ( $table === 'all' ) {
119 $tables = [ 'revision', 'archive' ];
121 $tables = [ $table ];
127 private function loadContentMap() {
128 $t0 = microtime( true );
129 $this->writeln( "Loading existing content table rows..." );
130 $this->contentRowMap
= [];
131 $dbr = $this->getDB( DB_REPLICA
);
136 [ 'content_id', 'content_address', 'content_model' ],
137 $from ?
"content_id > $from" : '',
139 [ 'ORDER BY' => 'content_id', 'LIMIT' => $this->getBatchSize() ]
141 if ( !$res ||
!$res->numRows() ) {
144 foreach ( $res as $row ) {
145 $from = $row->content_id
;
146 $this->contentRowMap
["{$row->content_model}:{$row->content_address}"] = $row->content_id
;
149 $elapsed = microtime( true ) - $t0;
150 $this->writeln( "Loaded " . count( $this->contentRowMap
) . " rows in $elapsed seconds" );
154 * @param string $table
156 private function populateTable( $table ) {
157 $t0 = microtime( true );
159 $this->writeln( "Populating $table..." );
161 if ( $table === 'revision' ) {
163 $tables = [ 'revision', 'slots', 'page' ];
167 'sha1' => 'rev_sha1',
168 'text_id' => 'rev_text_id',
169 'content_model' => 'rev_content_model',
170 'namespace' => 'page_namespace',
171 'title' => 'page_title',
174 'slots' => [ 'LEFT JOIN', 'rev_id=slot_revision_id' ],
175 'page' => [ 'LEFT JOIN', 'rev_page=page_id' ],
177 $startOption = 'start-revision';
179 $idField = 'ar_rev_id';
180 $tables = [ 'archive', 'slots' ];
182 'rev_id' => 'ar_rev_id',
185 'text_id' => 'ar_text_id',
186 'content_model' => 'ar_content_model',
187 'namespace' => 'ar_namespace',
188 'title' => 'ar_title',
191 'slots' => [ 'LEFT JOIN', 'ar_rev_id=slot_revision_id' ],
193 $startOption = 'start-archive';
196 if ( !$this->dbw
->fieldExists( $table, $fields['text_id'], __METHOD__
) ) {
197 $this->writeln( "No need to populate, $table.{$fields['text_id']} field does not exist" );
201 $minmax = $this->dbw
->selectRow(
203 [ 'min' => "MIN( $idField )", 'max' => "MAX( $idField )" ],
207 if ( $this->hasOption( $startOption ) ) {
208 $minmax->min
= (int)$this->getOption( $startOption );
210 if ( !$minmax ||
!is_numeric( $minmax->min
) ||
!is_numeric( $minmax->max
) ) {
212 $minmax = (object)[ 'min' => 1, 'max' => 0 ];
215 $batchSize = $this->getBatchSize();
217 for ( $startId = $minmax->min
; $startId <= $minmax->max
; $startId +
= $batchSize ) {
218 $endId = min( $startId +
$batchSize - 1, $minmax->max
);
219 $rows = $this->dbw
->select(
223 "$idField >= $startId",
224 "$idField <= $endId",
225 'slot_revision_id IS NULL',
228 [ 'ORDER BY' => 'rev_id' ],
231 if ( $rows->numRows() !== 0 ) {
232 $this->populateContentTablesForRowBatch( $rows, $startId, $table );
235 $elapsed = microtime( true ) - $t0;
237 "... $table processed up to revision id $endId of {$minmax->max}"
238 . " ($this->count rows in $elapsed seconds)"
242 $elapsed = microtime( true ) - $t0;
243 $this->writeln( "Done populating $table table. Processed $this->count rows in $elapsed seconds" );
247 * @param IResultWrapper $rows
248 * @param int $startId
249 * @param string $table
252 private function populateContentTablesForRowBatch( IResultWrapper
$rows, $startId, $table ) {
253 $this->beginTransaction( $this->dbw
, __METHOD__
);
255 if ( $this->contentRowMap
=== null ) {
258 $map = &$this->contentRowMap
;
263 // Step 1: Figure out content rows needing insertion.
265 foreach ( $rows as $row ) {
266 $revisionId = $row->rev_id
;
268 Assert
::invariant( $revisionId !== null, 'rev_id must not be null' );
270 $model = $this->getContentModel( $row );
271 $modelId = $this->contentModelStore
->acquireId( $model );
272 $address = SqlBlobStore
::makeAddressFromTextId( $row->text_id
);
274 $key = "{$modelId}:{$address}";
275 $contentKeys[$revisionId] = $key;
277 if ( !isset( $map[$key] ) ) {
278 $this->fillMissingFields( $row, $model, $address );
282 'content_size' => (int)$row->len
,
283 'content_sha1' => $row->sha1
,
284 'content_model' => $modelId,
285 'content_address' => $address,
290 // Step 2: Insert them, then read them back in for use in the next step.
291 if ( $contentRows ) {
292 $id = $this->dbw
->selectField( 'content', 'MAX(content_id)', '', __METHOD__
);
293 $this->dbw
->insert( 'content', $contentRows, __METHOD__
);
294 $res = $this->dbw
->select(
296 [ 'content_id', 'content_model', 'content_address' ],
297 'content_id > ' . (int)$id,
300 foreach ( $res as $row ) {
301 $key = $row->content_model
. ':' . $row->content_address
;
302 $map[$key] = $row->content_id
;
306 // Step 3: Insert the slot rows.
308 foreach ( $rows as $row ) {
309 $revisionId = $row->rev_id
;
310 $contentId = $map[$contentKeys[$revisionId]] ??
false;
311 if ( $contentId === false ) {
312 throw new \
RuntimeException( "Content row for $revisionId not found after content insert" );
315 'slot_revision_id' => $revisionId,
316 'slot_role_id' => $this->mainRoleId
,
317 'slot_content_id' => $contentId,
318 // There's no way to really know the previous revision, so assume no inheriting.
319 // rev_parent_id can get changed on undeletions, and deletions can screw up
320 // rev_timestamp ordering.
321 'slot_origin' => $revisionId,
324 $this->dbw
->insert( 'slots', $slotRows, __METHOD__
);
325 $this->count +
= count( $slotRows );
326 $this->totalCount +
= count( $slotRows );
327 } catch ( \Exception
$e ) {
328 $this->rollbackTransaction( $this->dbw
, __METHOD__
);
329 $this->fatalError( "Failed to populate content table $table row batch starting at $startId "
330 . "due to exception: " . $e->__toString() );
333 $this->commitTransaction( $this->dbw
, __METHOD__
);
337 * @param \stdClass $row
340 private function getContentModel( $row ) {
341 if ( isset( $row->content_model
) ) {
342 return $row->content_model
;
345 $title = Title
::makeTitle( $row->namespace, $row->title
);
347 return ContentHandler
::getDefaultModelFor( $title );
353 private function writeln( $msg ) {
354 $this->output( "$msg\n" );
358 * Compute any missing fields in $row.
359 * The way the missing values are computed must correspond to the way this is done in SlotRecord.
361 * @param object $row to be modified
362 * @param string $model
363 * @param string $address
365 private function fillMissingFields( $row, $model, $address ) {
366 if ( !isset( $row->content_model
) ) {
367 // just for completeness
368 $row->content_model
= $model;
371 if ( isset( $row->len
) && isset( $row->sha1
) && $row->sha1
!== '' ) {
372 // No need to load the content, quite now.
376 $blob = $this->blobStore
->getBlob( $address );
378 if ( !isset( $row->len
) ) {
379 // NOTE: The nominal size of the content may not be the length of the raw blob.
380 $handler = ContentHandler
::getForModelID( $model );
381 $content = $handler->unserializeContent( $blob );
383 $row->len
= $content->getSize();
386 if ( !isset( $row->sha1
) ||
$row->sha1
=== '' ) {
387 $row->sha1
= SlotRecord
::base36Sha1( $blob );
392 $maintClass = 'PopulateContentTables';
393 require_once RUN_MAINTENANCE_IF_MAIN
;