From f1e450564ffecf9a337d88708c1d77929719f26d Mon Sep 17 00:00:00 2001 From: Amir Sarabadani Date: Wed, 6 Jun 2018 00:59:11 +0200 Subject: [PATCH] Add maintenance to populate change_tag_def table and ct_tag_id field Bug: T193871 Change-Id: I5e863ffcfad5f2b66fb8d50666494acae3480d1a --- autoload.php | 1 + maintenance/populateChangeTagDef.php | 192 ++++++++++++ .../maintenance/populateChangeTagDefTest.php | 286 ++++++++++++++++++ 3 files changed, 479 insertions(+) create mode 100644 maintenance/populateChangeTagDef.php create mode 100644 tests/phpunit/maintenance/populateChangeTagDefTest.php diff --git a/autoload.php b/autoload.php index 67cd3b9943..76b41d8a8c 100644 --- a/autoload.php +++ b/autoload.php @@ -1100,6 +1100,7 @@ $wgAutoloadLocalClasses = [ 'PopulateArchiveRevId' => __DIR__ . '/maintenance/populateArchiveRevId.php', 'PopulateBacklinkNamespace' => __DIR__ . '/maintenance/populateBacklinkNamespace.php', 'PopulateCategory' => __DIR__ . '/maintenance/populateCategory.php', + 'PopulateChangeTagDef' => __DIR__ . '/maintenance/populateChangeTagDef.php', 'PopulateContentModel' => __DIR__ . '/maintenance/populateContentModel.php', 'PopulateExternallinksIndex60' => __DIR__ . '/maintenance/populateExternallinksIndex60.php', 'PopulateFilearchiveSha1' => __DIR__ . '/maintenance/populateFilearchiveSha1.php', diff --git a/maintenance/populateChangeTagDef.php b/maintenance/populateChangeTagDef.php new file mode 100644 index 0000000000..c815d8dba4 --- /dev/null +++ b/maintenance/populateChangeTagDef.php @@ -0,0 +1,192 @@ +addDescription( 'Populate and improve accuracy of change_tag_def statistics' ); + $this->addOption( 'dry-run', 'Print debug info instead of actually deleting' ); + $this->setBatchSize( 1000 ); + $this->addOption( + 'sleep', + 'Sleep time (in seconds) between every batch', + false, + true + ); + $this->lbFactory = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + } + + public function execute() { + global $wgChangeTagsSchemaMigrationStage; + $this->setBatchSize( $this->getOption( 'batch-size', $this->getBatchSize() ) ); + + $this->countDown( 5 ); + if ( $wgChangeTagsSchemaMigrationStage < MIGRATION_NEW ) { + $this->updateCountTag(); + $this->backpopulateChangeTagId(); + } else { + $this->updateCountTagId(); + } + + // TODO: Implement + // $this->cleanZeroCountRows(); + } + + private function updateCountTagId() { + $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA ); + + // This query can be pretty expensive, don't run it on master + $res = $dbr->select( + 'change_tag', + [ 'ct_tag_id', 'hitcount' => 'count(*)' ], + [], + __METHOD__, + [ 'GROUP BY' => 'ct_tag_id' ] + ); + + $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER ); + + foreach ( $res as $row ) { + if ( !$row->ct_tag_id ) { + continue; + } + + if ( $this->hasOption( 'dry-run' ) ) { + $this->output( 'This row will be updated: ' . implode( ', ', $row ) . "\n" ); + continue; + } + + $dbw->update( + 'change_tag_def', + [ 'ctd_count' => $row->hitcount ], + [ 'ctd_id' => $row->ct_tag_id ], + __METHOD__ + ); + } + $this->lbFactory->waitForReplication(); + } + + private function updateCountTag() { + $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA ); + + // This query can be pretty expensive, don't run it on master + $res = $dbr->select( + 'change_tag', + [ 'ct_tag', 'hitcount' => 'count(*)' ], + [], + __METHOD__, + [ 'GROUP BY' => 'ct_tag' ] + ); + + $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER ); + + foreach ( $res as $row ) { + // Hygiene check + if ( !$row->ct_tag ) { + continue; + } + + if ( $this->hasOption( 'dry-run' ) ) { + $this->output( 'This row will be updated: ' . $row->ct_tag . $row->hitcount . "\n" ); + continue; + } + + $dbw->upsert( + 'change_tag_def', + [ + 'ctd_name' => $row->ct_tag, + 'ctd_user_defined' => 0, + 'ctd_count' => $row->hitcount + ], + [ 'ctd_name' ], + [ 'ctd_count' => $row->hitcount ], + __METHOD__ + ); + } + $this->lbFactory->waitForReplication(); + } + + private function backpopulateChangeTagId() { + $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA ); + $changeTagDefs = $dbr->select( + 'change_tag_def', + [ 'ctd_name', 'ctd_id' ], + [], + __METHOD__ + ); + + foreach ( $changeTagDefs as $row ) { + $this->backpopulateChangeTagPerTag( $row->ctd_name, $row->ctd_id ); + } + } + + private function backpopulateChangeTagPerTag( $tagName, $tagId ) { + $dbr = $this->lbFactory->getMainLB()->getConnection( DB_REPLICA ); + $dbw = $this->lbFactory->getMainLB()->getConnection( DB_MASTER ); + $sleep = (int)$this->getOption( 'sleep', 10 ); + $lastId = 0; + while ( true ) { + // Given that indexes might not be there, it's better to use replica + $ids = $dbr->selectFieldValues( + 'change_tag', + 'ct_id', + [ 'ct_tag' => $tagName, 'ct_tag_id' => null, 'ct_id > ' . $lastId ], + __METHOD__, + [ 'LIMIT' => $this->getBatchSize() ] + ); + + if ( !$ids ) { + break; + } + $lastId = end( $ids ); + + if ( $this->hasOption( 'dry-run' ) ) { + $this->output( + "These ids will be changed to have \"{$tagId}\" as tag id: " . implode( ', ', $ids ) . "\n" + ); + continue; + } + + $dbw->update( + 'change_tag', + [ 'ct_tag_id' => $tagId ], + [ 'ct_id' => $ids ], + __METHOD__ + ); + + $this->lbFactory->waitForReplication(); + if ( $sleep > 0 ) { + sleep( $sleep ); + } + } + } + +} + +$maintClass = PopulateChangeTagDef::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/tests/phpunit/maintenance/populateChangeTagDefTest.php b/tests/phpunit/maintenance/populateChangeTagDefTest.php new file mode 100644 index 0000000000..719b46bcb0 --- /dev/null +++ b/tests/phpunit/maintenance/populateChangeTagDefTest.php @@ -0,0 +1,286 @@ +tablesUsed = [ 'change_tag', 'change_tag_def' ]; + + $this->cleanChangeTagTables(); + $this->insertChangeTagData(); + } + + private function cleanChangeTagTables() { + wfGetDB( DB_MASTER )->delete( 'change_tag', '*' ); + wfGetDB( DB_MASTER )->delete( 'change_tag_def', '*' ); + } + + private function insertChangeTagData() { + $changeTags = []; + + $changeTags[] = [ + 'ct_rc_id' => 1234, + 'ct_tag' => 'One Tag', + ]; + + $changeTags[] = [ + 'ct_rc_id' => 1235, + 'ct_tag' => 'Two Tags', + ]; + + $changeTags[] = [ + 'ct_log_id' => 1236, + 'ct_tag' => 'Two Tags', + ]; + + $changeTags[] = [ + 'ct_rev_id' => 1237, + 'ct_tag' => 'Three Tags', + ]; + + $changeTags[] = [ + 'ct_rc_id' => 1238, + 'ct_tag' => 'Three Tags', + ]; + + $changeTags[] = [ + 'ct_log_id' => 1239, + 'ct_tag' => 'Three Tags', + ]; + + wfGetDB( DB_MASTER )->insert( 'change_tag', $changeTags ); + } + + public function testRun() { + $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_WRITE_BOTH ); + $this->maintenance->loadWithArgv( [ '--sleep', '0' ] ); + + $this->maintenance->execute(); + + $changeTagDefRows = [ + (object)[ + 'ctd_name' => 'One Tag', + 'ctd_count' => 1, + ], + (object)[ + 'ctd_name' => 'Two Tags', + 'ctd_count' => 2, + ], + (object)[ + 'ctd_name' => 'Three Tags', + 'ctd_count' => 3, + ], + ]; + + $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select( + [ 'change_tag_def' ], + [ 'ctd_name', 'ctd_count' ], + [], + __METHOD__, + [ 'ORDER BY' => 'ctd_count' ] + ); + + $this->assertEquals( $changeTagDefRows, iterator_to_array( $actualChangeTagDefs, false ) ); + + // Check if change_tag is also backpopulated + $actualChangeTags = wfGetDB( DB_REPLICA )->select( + [ 'change_tag', 'change_tag_def' ], + [ 'ct_tag', 'ct_tag_id', 'ctd_count' ], + [], + __METHOD__, + [], + [ 'change_tag_def' => [ 'LEFT JOIN', 'ct_tag_id=ctd_id' ] ] + ); + $mapping = [ + 'One Tag' => 1, + 'Two Tags' => 2, + 'Three Tags' => 3 + ]; + foreach ( $actualChangeTags as $row ) { + $this->assertNotNull( $row->ct_tag_id ); + $this->assertEquals( $row->ctd_count, $mapping[$row->ct_tag] ); + } + } + + public function testRunUpdateHitCountMigrationNew() { + $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_NEW ); + $changeTagDefBadRows = [ + [ + 'ctd_name' => 'One Tag', + 'ctd_user_defined' => 0, + 'ctd_count' => 50, + ], + [ + 'ctd_name' => 'Two Tags', + 'ctd_user_defined' => 0, + 'ctd_count' => 4, + ], + [ + 'ctd_name' => 'Three Tags', + 'ctd_user_defined' => 0, + 'ctd_count' => 3, + ], + ]; + wfGetDB( DB_MASTER )->insert( + 'change_tag_def', + $changeTagDefBadRows + ); + + $mapping = [ + 'One Tag' => 1, + 'Two Tags' => 2, + 'Three Tags' => 3 + ]; + foreach ( $mapping as $tagName => $tagId ) { + wfGetDB( DB_MASTER )->update( + 'change_tag', + [ 'ct_tag_id' => $tagId ], + [ 'ct_tag' => $tagName ] + ); + } + + $this->maintenance->loadWithArgv( [ '--sleep', '0' ] ); + + $this->maintenance->execute(); + + $changeTagDefRows = [ + (object)[ + 'ctd_name' => 'One Tag', + 'ctd_count' => 1, + ], + (object)[ + 'ctd_name' => 'Two Tags', + 'ctd_count' => 2, + ], + (object)[ + 'ctd_name' => 'Three Tags', + 'ctd_count' => 3, + ], + ]; + + $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select( + [ 'change_tag_def' ], + [ 'ctd_name', 'ctd_count' ], + [], + __METHOD__, + [ 'ORDER BY' => 'ctd_count' ] + ); + + $this->assertEquals( $changeTagDefRows, iterator_to_array( $actualChangeTagDefs, false ) ); + } + + public function testRunUpdateHitCountMigrationWriteBoth() { + $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_WRITE_BOTH ); + $changeTagDefBadRows = [ + [ + 'ctd_name' => 'One Tag', + 'ctd_user_defined' => 0, + 'ctd_count' => 50, + ], + [ + 'ctd_name' => 'Two Tags', + 'ctd_user_defined' => 0, + 'ctd_count' => 4, + ], + [ + 'ctd_name' => 'Three Tags', + 'ctd_user_defined' => 0, + 'ctd_count' => 3, + ], + ]; + wfGetDB( DB_MASTER )->insert( + 'change_tag_def', + $changeTagDefBadRows + ); + + $this->maintenance->loadWithArgv( [ '--sleep', '0' ] ); + + $this->maintenance->execute(); + + $changeTagDefRows = [ + (object)[ + 'ctd_name' => 'One Tag', + 'ctd_count' => 1, + ], + (object)[ + 'ctd_name' => 'Two Tags', + 'ctd_count' => 2, + ], + (object)[ + 'ctd_name' => 'Three Tags', + 'ctd_count' => 3, + ], + ]; + + $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select( + [ 'change_tag_def' ], + [ 'ctd_name', 'ctd_count' ], + [], + __METHOD__, + [ 'ORDER BY' => 'ctd_count' ] + ); + + $this->assertEquals( $changeTagDefRows, iterator_to_array( $actualChangeTagDefs, false ) ); + } + + public function testDryRunMigrationNew() { + $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_NEW ); + $this->maintenance->loadWithArgv( [ '--dry-run', '--sleep', '0' ] ); + + $this->maintenance->execute(); + + $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select( + [ 'change_tag_def' ], + [ 'ctd_id', 'ctd_name' ] + ); + + $this->assertEquals( [], iterator_to_array( $actualChangeTagDefs, false ) ); + + $actualChangeTags = wfGetDB( DB_REPLICA )->select( + [ 'change_tag' ], + [ 'ct_tag_id', 'ct_tag' ] + ); + + foreach ( $actualChangeTags as $row ) { + $this->assertNull( $row->ct_tag_id ); + $this->assertNotNull( $row->ct_tag ); + } + } + + public function testDryRunMigrationWriteBoth() { + $this->setMwGlobals( 'wgChangeTagsSchemaMigrationStage', MIGRATION_WRITE_BOTH ); + $this->maintenance->loadWithArgv( [ '--dry-run', '--sleep', '0' ] ); + + $this->maintenance->execute(); + + $actualChangeTagDefs = wfGetDB( DB_REPLICA )->select( + [ 'change_tag_def' ], + [ 'ctd_id', 'ctd_name' ] + ); + + $this->assertEquals( [], iterator_to_array( $actualChangeTagDefs, false ) ); + + $actualChangeTags = wfGetDB( DB_REPLICA )->select( + [ 'change_tag' ], + [ 'ct_tag_id', 'ct_tag' ] + ); + + foreach ( $actualChangeTags as $row ) { + $this->assertNull( $row->ct_tag_id ); + $this->assertNotNull( $row->ct_tag ); + } + } + +} -- 2.20.1