Needed for selective updates of pages using a particular feature.
Intended to be run in production, so needs to scale.
Bug: T149723
Change-Id: If20fb1f91de8d4227def5b07d6d52b91161ed3fd
* Article::doEditContent() was marked as deprecated, to be removed in 1.30
or later.
* ContentHandler::runLegacyHooks() was removed.
+* refreshLinks.php now can be limited to a particular category with --category=...
+ or a tracking category with --tracking-category=...
== Compatibility ==
'TitlePrefixSearch' => __DIR__ . '/includes/PrefixSearch.php',
'TitleValue' => __DIR__ . '/includes/title/TitleValue.php',
'TrackBlobs' => __DIR__ . '/maintenance/storage/trackBlobs.php',
+ 'TrackingCategories' => __DIR__ . '/includes/TrackingCategories.php',
'TraditionalImageGallery' => __DIR__ . '/includes/gallery/TraditionalImageGallery.php',
'TransactionProfiler' => __DIR__ . '/includes/libs/rdbms/TransactionProfiler.php',
'TransformParameterError' => __DIR__ . '/includes/media/MediaTransformOutput.php',
--- /dev/null
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Categories
+ */
+
+/**
+ * This class performs some operations related to tracking categories, such as creating
+ * a list of all such categories.
+ */
+class TrackingCategories {
+ /** @var Config */
+ private $config;
+
+ /**
+ * Tracking categories that exist in core
+ *
+ * @var array
+ */
+ private static $coreTrackingCategories = [
+ 'index-category',
+ 'noindex-category',
+ 'duplicate-args-category',
+ 'expensive-parserfunction-category',
+ 'post-expand-template-argument-category',
+ 'post-expand-template-inclusion-category',
+ 'hidden-category-category',
+ 'broken-file-category',
+ 'node-count-exceeded-category',
+ 'expansion-depth-exceeded-category',
+ 'restricted-displaytitle-ignored',
+ 'deprecated-self-close-category',
+ ];
+
+ /**
+ * @param Config $config
+ */
+ public function __construct( Config $config ) {
+ $this->config = $config;
+ }
+
+ /**
+ * Read the global and extract title objects from the corresponding messages
+ * @return array Array( 'msg' => Title, 'cats' => Title[] )
+ */
+ public function getTrackingCategories() {
+ $categories = array_merge(
+ self::$coreTrackingCategories,
+ ExtensionRegistry::getInstance()->getAttribute( 'TrackingCategories' ),
+ $this->config->get( 'TrackingCategories' ) // deprecated
+ );
+
+ // Only show magic link tracking categories if they are enabled
+ $enableMagicLinks = $this->config->get( 'EnableMagicLinks' );
+ if ( $enableMagicLinks['ISBN'] ) {
+ $categories[] = 'magiclink-tracking-isbn';
+ }
+ if ( $enableMagicLinks['RFC'] ) {
+ $categories[] = 'magiclink-tracking-rfc';
+ }
+ if ( $enableMagicLinks['PMID'] ) {
+ $categories[] = 'magiclink-tracking-pmid';
+ }
+
+ $trackingCategories = [];
+ foreach ( $categories as $catMsg ) {
+ /*
+ * Check if the tracking category varies by namespace
+ * Otherwise only pages in the current namespace will be displayed
+ * If it does vary, show pages considering all namespaces
+ */
+ $msgObj = wfMessage( $catMsg )->inContentLanguage();
+ $allCats = [];
+ $catMsgTitle = Title::makeTitleSafe( NS_MEDIAWIKI, $catMsg );
+ if ( !$catMsgTitle ) {
+ continue;
+ }
+
+ // Match things like {{NAMESPACE}} and {{NAMESPACENUMBER}}.
+ // False positives are ok, this is just an efficiency shortcut
+ if ( strpos( $msgObj->plain(), '{{' ) !== false ) {
+ $ns = MWNamespace::getValidNamespaces();
+ foreach ( $ns as $namesp ) {
+ $tempTitle = Title::makeTitleSafe( $namesp, $catMsg );
+ if ( !$tempTitle ) {
+ continue;
+ }
+ $catName = $msgObj->title( $tempTitle )->text();
+ # Allow tracking categories to be disabled by setting them to "-"
+ if ( $catName !== '-' ) {
+ $catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
+ if ( $catTitle ) {
+ $allCats[] = $catTitle;
+ }
+ }
+ }
+ } else {
+ $catName = $msgObj->text();
+ # Allow tracking categories to be disabled by setting them to "-"
+ if ( $catName !== '-' ) {
+ $catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
+ if ( $catTitle ) {
+ $allCats[] = $catTitle;
+ }
+ }
+ }
+ $trackingCategories[$catMsg] = [
+ 'cats' => $allCats,
+ 'msg' => $catMsgTitle,
+ ];
+ }
+
+ return $trackingCategories;
+ }
+}
* to SpecialTrackingCategories::$coreTrackingCategories, and extensions
* should add to "TrackingCategories" in their extension.json.
*
+ * @todo Migrate some code to TrackingCategories
+ *
* @param string $msg Message key
* @param Title $title title of the page which is being tracked
* @return bool Whether the addition was successful
parent::__construct( 'TrackingCategories' );
}
- /**
- * Tracking categories that exist in core
- *
- * @var array
- */
- private static $coreTrackingCategories = [
- 'index-category',
- 'noindex-category',
- 'duplicate-args-category',
- 'expensive-parserfunction-category',
- 'post-expand-template-argument-category',
- 'post-expand-template-inclusion-category',
- 'hidden-category-category',
- 'broken-file-category',
- 'node-count-exceeded-category',
- 'expansion-depth-exceeded-category',
- 'restricted-displaytitle-ignored',
- 'deprecated-self-close-category',
- ];
-
function execute( $par ) {
$this->setHeaders();
$this->outputHeader();
</tr></thead>"
);
- $trackingCategories = $this->prepareTrackingCategoriesData();
+ $trackingCategories = new TrackingCategories( $this->getConfig() );
+ $categoryList = $trackingCategories->getTrackingCategories();
$batch = new LinkBatch();
- foreach ( $trackingCategories as $catMsg => $data ) {
+ foreach ( $categoryList as $catMsg => $data ) {
$batch->addObj( $data['msg'] );
foreach ( $data['cats'] as $catTitle ) {
$batch->addObj( $catTitle );
}
$batch->execute();
- Hooks::run( 'SpecialTrackingCategories::preprocess', [ $this, $trackingCategories ] );
+ Hooks::run( 'SpecialTrackingCategories::preprocess', [ $this, $categoryList ] );
$linkRenderer = $this->getLinkRenderer();
- foreach ( $trackingCategories as $catMsg => $data ) {
+ foreach ( $categoryList as $catMsg => $data ) {
$allMsgs = [];
$catDesc = $catMsg . '-desc';
$this->getOutput()->addHTML( Html::closeElement( 'table' ) );
}
- /**
- * Read the global and extract title objects from the corresponding messages
- * @return array Array( 'msg' => Title, 'cats' => Title[] )
- */
- private function prepareTrackingCategoriesData() {
- $categories = array_merge(
- self::$coreTrackingCategories,
- ExtensionRegistry::getInstance()->getAttribute( 'TrackingCategories' ),
- $this->getConfig()->get( 'TrackingCategories' ) // deprecated
- );
-
- // Only show magic link tracking categories if they are enabled
- $enableMagicLinks = $this->getConfig()->get( 'EnableMagicLinks' );
- if ( $enableMagicLinks['ISBN'] ) {
- $categories[] = 'magiclink-tracking-isbn';
- }
- if ( $enableMagicLinks['RFC'] ) {
- $categories[] = 'magiclink-tracking-rfc';
- }
- if ( $enableMagicLinks['PMID'] ) {
- $categories[] = 'magiclink-tracking-pmid';
- }
-
- $trackingCategories = [];
- foreach ( $categories as $catMsg ) {
- /*
- * Check if the tracking category varies by namespace
- * Otherwise only pages in the current namespace will be displayed
- * If it does vary, show pages considering all namespaces
- */
- $msgObj = $this->msg( $catMsg )->inContentLanguage();
- $allCats = [];
- $catMsgTitle = Title::makeTitleSafe( NS_MEDIAWIKI, $catMsg );
- if ( !$catMsgTitle ) {
- continue;
- }
-
- // Match things like {{NAMESPACE}} and {{NAMESPACENUMBER}}.
- // False positives are ok, this is just an efficiency shortcut
- if ( strpos( $msgObj->plain(), '{{' ) !== false ) {
- $ns = MWNamespace::getValidNamespaces();
- foreach ( $ns as $namesp ) {
- $tempTitle = Title::makeTitleSafe( $namesp, $catMsg );
- if ( !$tempTitle ) {
- continue;
- }
- $catName = $msgObj->title( $tempTitle )->text();
- # Allow tracking categories to be disabled by setting them to "-"
- if ( $catName !== '-' ) {
- $catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
- if ( $catTitle ) {
- $allCats[] = $catTitle;
- }
- }
- }
- } else {
- $catName = $msgObj->text();
- # Allow tracking categories to be disabled by setting them to "-"
- if ( $catName !== '-' ) {
- $catTitle = Title::makeTitleSafe( NS_CATEGORY, $catName );
- if ( $catTitle ) {
- $allCats[] = $catTitle;
- }
- }
- }
- $trackingCategories[$catMsg] = [
- 'cats' => $allCats,
- 'msg' => $catMsgTitle,
- ];
- }
-
- return $trackingCategories;
- }
-
protected function getGroupName() {
return 'pages';
}
* @ingroup Maintenance
*/
class RefreshLinks extends Maintenance {
+ const REPORTING_INTERVAL = 100;
+
/** @var int|bool */
protected $namespace = false;
$this->addOption( 'dfn-chunk-size', 'Maximum number of existent IDs to check per ' .
'query, default 100000', false, true );
$this->addOption( 'namespace', 'Only fix pages in this namespace', false, true );
+ $this->addOption( 'category', 'Only fix pages in this category', false, true );
+ $this->addOption( 'tracking-category', 'Only fix pages in this tracking category', false, true );
$this->addArg( 'start', 'Page_id to start from, default 1', false );
$this->setBatchSize( 100 );
}
} else {
$this->namespace = (int)$ns;
}
- if ( !$this->hasOption( 'dfn-only' ) ) {
+ if ( ( $category = $this->getOption( 'category', false ) ) !== false ) {
+ $title = Title::makeTitleSafe( NS_CATEGORY, $category );
+ if ( !$title ) {
+ $this->error( "'$category' is an invalid category name!\n", true );
+ }
+ $this->refreshCategory( $category );
+ } elseif ( ( $category = $this->getOption( 'tracking-category', false ) ) !== false ) {
+ $this->refreshTrackingCategory( $category );
+ } elseif ( !$this->hasOption( 'dfn-only' ) ) {
$new = $this->getOption( 'new-only', false );
$redir = $this->getOption( 'redirects-only', false );
$oldRedir = $this->getOption( 'old-redirects-only', false );
private function doRefreshLinks( $start, $newOnly = false,
$end = null, $redirectsOnly = false, $oldRedirectsOnly = false
) {
- $reportingInterval = 100;
$dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
if ( $start === null ) {
$i = 0;
foreach ( $res as $row ) {
- if ( !( ++$i % $reportingInterval ) ) {
+ if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
$this->output( "$i\n" );
wfWaitForSlaves();
}
$i = 0;
foreach ( $res as $row ) {
- if ( !( ++$i % $reportingInterval ) ) {
+ if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
$this->output( "$i\n" );
wfWaitForSlaves();
}
for ( $id = $start; $id <= $end; $id++ ) {
- if ( !( $id % $reportingInterval ) ) {
+ if ( !( $id % self::REPORTING_INTERVAL ) ) {
$this->output( "$id\n" );
wfWaitForSlaves();
}
for ( $id = $start; $id <= $end; $id++ ) {
- if ( !( $id % $reportingInterval ) ) {
+ if ( !( $id % self::REPORTING_INTERVAL ) ) {
$this->output( "$id\n" );
wfWaitForSlaves();
}
* @param string $var Field name
* @param mixed $start First value to include or null
* @param mixed $end Last value to include or null
+ * @return string
*/
private static function intervalCond( IDatabase $db, $var, $start, $end ) {
if ( $start === null && $end === null ) {
return "$var BETWEEN {$db->addQuotes( $start )} AND {$db->addQuotes( $end )}";
}
}
+
+ /**
+ * Refershes links for pages in a tracking category
+ *
+ * @param string $category Category key
+ */
+ private function refreshTrackingCategory( $category ) {
+ $cats = $this->getPossibleCategories( $category );
+
+ if ( !$cats ) {
+ $this->error( "Tracking category '$category' is disabled\n" );
+ // Output to stderr but don't bail out,
+ }
+
+ foreach ( $cats as $cat ) {
+ $this->refreshCategory( $cat );
+ }
+ }
+
+ /**
+ * Refreshes links to a category
+ *
+ * @param Title $category
+ */
+ private function refreshCategory( Title $category ) {
+ $this->output( "Refreshing pages in category '{$category->getText()}'...\n" );
+
+ $dbr = $this->getDB( DB_REPLICA );
+ $conds = [
+ 'page_id=cl_from',
+ 'cl_to' => $category->getDBkey(),
+ ];
+ if ( $this->namespace !== false ) {
+ $conds['page_namespace'] = $this->namespace;
+ }
+
+ $i = 0;
+ $timestamp = '';
+ $lastId = 0;
+ do {
+ $finalConds = $conds;
+ $timestamp = $dbr->addQuotes( $timestamp );
+ $finalConds []=
+ "(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))";
+ $res = $dbr->select( [ 'page', 'categorylinks' ],
+ [ 'page_id', 'cl_timestamp' ],
+ $finalConds,
+ __METHOD__,
+ [
+ 'ORDER BY' => [ 'cl_timestamp', 'cl_from' ],
+ 'LIMIT' => $this->mBatchSize,
+ ]
+ );
+
+ foreach ( $res as $row ) {
+ if ( !( ++$i % self::REPORTING_INTERVAL ) ) {
+ $this->output( "$i\n" );
+ wfWaitForSlaves();
+ }
+ $lastId = $row->page_id;
+ $timestamp = $row->cl_timestamp;
+ self::fixLinksFromArticle( $row->page_id );
+ }
+
+ } while ( $res->numRows() == $this->mBatchSize );
+ }
+
+ /**
+ * Returns a list of possible categories for a given tracking category key
+ *
+ * @param string $categoryKey
+ * @return Title[]
+ */
+ private function getPossibleCategories( $categoryKey ) {
+ $trackingCategories = new TrackingCategories( $this->getConfig() );
+ $cats = $trackingCategories->getTrackingCategories();
+ if ( isset( $cats[$categoryKey] ) ) {
+ return $cats[$categoryKey]['cats'];
+ }
+ $this->error( "Unknown tracking category {$categoryKey}\n", true );
+ }
}
$maintClass = 'RefreshLinks';