3 * Check for articles to fix after adding/deleting namespaces
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
24 * @ingroup Maintenance
27 require_once __DIR__
. '/Maintenance.php';
29 use MediaWiki\Linker\LinkTarget
;
30 use MediaWiki\MediaWikiServices
;
31 use Wikimedia\Rdbms\IResultWrapper
;
32 use Wikimedia\Rdbms\IMaintainableDatabase
;
35 * Maintenance script that checks for articles to fix after
36 * adding/deleting namespaces.
38 * @ingroup Maintenance
40 class NamespaceDupes
extends Maintenance
{
43 * @var IMaintainableDatabase
47 private $resolvablePages = 0;
48 private $totalPages = 0;
50 private $resolvableLinks = 0;
51 private $totalLinks = 0;
53 public function __construct() {
54 parent
::__construct();
55 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58 "the correct title" );
59 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60 "<text> appended after the article name", false, true );
61 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62 "<text> prepended before the article name", false, true );
63 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65 "the colon will be replaced with a hyphen.",
67 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68 "specify the namespace ID of the destination.", false, true );
69 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70 "begin with a conflicting prefix will be renamed, for example " .
71 "Talk:File:Foo -> File_Talk:Foo" );
74 public function execute() {
76 'fix' => $this->hasOption( 'fix' ),
77 'merge' => $this->hasOption( 'merge' ),
78 'add-suffix' => $this->getOption( 'add-suffix', '' ),
79 'add-prefix' => $this->getOption( 'add-prefix', '' ),
80 'move-talk' => $this->hasOption( 'move-talk' ),
81 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
82 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
84 if ( $options['source-pseudo-namespace'] !== '' ) {
85 $retval = $this->checkPrefix( $options );
87 $retval = $this->checkAll( $options );
91 $this->output( "\nLooks good!\n" );
93 $this->output( "\nOh noeees\n" );
98 * Check all namespaces
100 * @param array $options Associative array of validated command-line options
104 private function checkAll( $options ) {
105 $contLang = MediaWikiServices
::getInstance()->getContentLanguage();
108 // List interwikis first, so they'll be overridden
109 // by any conflicting local namespaces.
110 foreach ( $this->getInterwikiList() as $prefix ) {
111 $name = $contLang->ucfirst( $prefix );
115 // Now pull in all canonical and alias namespaces...
117 MediaWikiServices
::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
120 // This includes $wgExtraNamespaces
121 if ( $name !== '' ) {
122 $spaces[$name] = $ns;
125 foreach ( $contLang->getNamespaces() as $ns => $name ) {
126 if ( $name !== '' ) {
127 $spaces[$name] = $ns;
130 foreach ( $this->getConfig()->get( 'NamespaceAliases' ) as $name => $ns ) {
131 $spaces[$name] = $ns;
133 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
134 $spaces[$name] = $ns;
137 // We'll need to check for lowercase keys as well,
138 // since we're doing case-sensitive searches in the db.
139 $capitalLinks = $this->getConfig()->get( 'CapitalLinks' );
140 foreach ( $spaces as $name => $ns ) {
142 $moreNames[] = $contLang->uc( $name );
143 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
144 $moreNames[] = $contLang->ucwords( $name );
145 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
146 $moreNames[] = $contLang->ucwordbreaks( $name );
147 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
148 if ( !$capitalLinks ) {
149 foreach ( $moreNames as $altName ) {
150 $moreNames[] = $contLang->lcfirst( $altName );
152 $moreNames[] = $contLang->lcfirst( $name );
154 foreach ( array_unique( $moreNames ) as $altName ) {
155 if ( $altName !== $name ) {
156 $spaces[$altName] = $ns;
161 // Sort by namespace index, and if there are two with the same index,
162 // break the tie by sorting by name
163 $origSpaces = $spaces;
164 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
165 return $origSpaces[$a] <=> $origSpaces[$b]
170 foreach ( $spaces as $name => $ns ) {
171 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
174 $this->output( "{$this->totalPages} pages to fix, " .
175 "{$this->resolvablePages} were resolvable.\n\n" );
177 foreach ( $spaces as $name => $ns ) {
179 /* Fix up link destinations for non-interwiki links only.
181 * For example if a page has [[Foo:Bar]] and then a Foo namespace
182 * is introduced, pagelinks needs to be updated to have
183 * page_namespace = NS_FOO.
185 * If instead an interwiki prefix was introduced called "Foo",
186 * the link should instead be moved to the iwlinks table. If a new
187 * language is introduced called "Foo", or if there is a pagelink
188 * [[fr:Bar]] when interlanguage magic links are turned on, the
189 * link would have to be moved to the langlinks table. Let's put
190 * those cases in the too-hard basket for now. The consequences are
191 * not especially severe.
192 * @fixme Handle interwiki links, and pagelinks to Category:, File:
193 * which probably need reparsing.
196 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
197 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
199 // The redirect table has interwiki links randomly mixed in, we
200 // need to filter those out. For example [[w:Foo:Bar]] would
201 // have rd_interwiki=w and rd_namespace=0, which would match the
202 // query for a conflicting namespace "Foo" if filtering wasn't done.
203 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
204 [ 'rd_interwiki' => null ] );
205 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
206 [ 'rd_interwiki' => '' ] );
210 $this->output( "{$this->totalLinks} links to fix, " .
211 "{$this->resolvableLinks} were resolvable.\n" );
217 * Get the interwiki list
221 private function getInterwikiList() {
222 $result = MediaWikiServices
::getInstance()->getInterwikiLookup()->getAllPrefixes();
224 foreach ( $result as $row ) {
225 $prefixes[] = $row['iw_prefix'];
232 * Check a given prefix and try to move it into the given destination namespace
234 * @param int $ns Destination namespace id
235 * @param string $name
236 * @param array $options Associative array of validated command-line options
239 private function checkNamespace( $ns, $name, $options ) {
240 $targets = $this->getTargetList( $ns, $name, $options );
241 $count = $targets->numRows();
242 $this->totalPages +
= $count;
247 $dryRunNote = $options['fix'] ?
'' : ' DRY RUN ONLY';
250 foreach ( $targets as $row ) {
251 // Find the new title and determine the action to take
253 $newTitle = $this->getDestinationTitle(
254 $ns, $name, $row->page_namespace
, $row->page_title
);
257 $logStatus = 'invalid title';
259 } elseif ( $newTitle->exists() ) {
260 if ( $options['merge'] ) {
261 if ( $this->canMerge( $row->page_id
, $newTitle, $logStatus ) ) {
266 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
268 $logStatus = 'dest title exists and --add-prefix not specified';
270 $newTitle = $this->getAlternateTitle( $newTitle, $options );
273 $logStatus = 'alternate title is invalid';
274 } elseif ( $newTitle->exists() ) {
276 $logStatus = 'title conflict';
279 $logStatus = 'alternate';
284 $logStatus = 'no conflict';
287 // Take the action or log a dry run message
289 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
294 $this->output( "$logTitle *** $logStatus\n" );
298 $this->output( "$logTitle -> " .
299 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
301 if ( $options['fix'] ) {
302 $pageOK = $this->movePage( $row->page_id
, $newTitle );
306 $this->output( "$logTitle => " .
307 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
309 if ( $options['fix'] ) {
310 $pageOK = $this->mergePage( $row, $newTitle );
316 $this->resolvablePages++
;
326 * Check and repair the destination fields in a link table
327 * @param string $table The link table name
328 * @param string $fieldPrefix The field prefix in the link table
329 * @param int $ns Destination namespace id
330 * @param string $name
331 * @param array $options Associative array of validated command-line options
332 * @param array $extraConds Extra conditions for the SQL query
334 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
337 $dbw = $this->getDB( DB_MASTER
);
340 $fromField = "{$fieldPrefix}_from";
341 $namespaceField = "{$fieldPrefix}_namespace";
342 $titleField = "{$fieldPrefix}_title";
347 [ $fromField, $namespaceField, $titleField ],
348 array_merge( $batchConds, $extraConds, [
349 $namespaceField => 0,
350 $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
354 'ORDER BY' => [ $titleField, $fromField ],
355 'LIMIT' => $batchSize
359 if ( $res->numRows() == 0 ) {
362 foreach ( $res as $row ) {
363 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
364 "dbk={$row->$titleField}";
365 $destTitle = $this->getDestinationTitle(
366 $ns, $name, $row->$namespaceField, $row->$titleField );
369 $this->output( "$table $logTitle *** INVALID\n" );
372 $this->resolvableLinks++
;
373 if ( !$options['fix'] ) {
374 $this->output( "$table $logTitle -> " .
375 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
379 $dbw->update( $table,
382 $namespaceField => $destTitle->getNamespace(),
383 $titleField => $destTitle->getDBkey()
387 $namespaceField => 0,
388 $titleField => $row->$titleField,
389 $fromField => $row->$fromField
394 $this->output( "$table $logTitle -> " .
395 $destTitle->getPrefixedDBkey() . "\n" );
397 $encLastTitle = $dbw->addQuotes( $row->$titleField );
398 $encLastFrom = $dbw->addQuotes( $row->$fromField );
401 "$titleField > $encLastTitle " .
402 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
409 * Move the given pseudo-namespace, either replacing the colon with a hyphen
410 * (useful for pseudo-namespaces that conflict with interwiki links) or move
411 * them to another namespace if specified.
412 * @param array $options Associative array of validated command-line options
415 private function checkPrefix( $options ) {
416 $prefix = $options['source-pseudo-namespace'];
417 $ns = $options['dest-namespace'];
418 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
420 return $this->checkNamespace( $ns, $prefix, $options );
424 * Find pages in main and talk namespaces that have a prefix of the new
425 * namespace so we know titles that will need migrating
427 * @param int $ns Destination namespace id
428 * @param string $name Prefix that is being made a namespace
429 * @param array $options Associative array of validated command-line options
431 * @return IResultWrapper
433 private function getTargetList( $ns, $name, $options ) {
434 $dbw = $this->getDB( DB_MASTER
);
437 $options['move-talk'] &&
438 MediaWikiServices
::getInstance()->getNamespaceInfo()->isSubject( $ns )
440 $checkNamespaces = [ NS_MAIN
, NS_TALK
];
442 $checkNamespaces = NS_MAIN
;
445 return $dbw->select( 'page',
452 'page_namespace' => $checkNamespaces,
453 'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
460 * Get the preferred destination title for a given target page.
461 * @param int $ns The destination namespace ID
462 * @param string $name The conflicting prefix
463 * @param int $sourceNs The source namespace
464 * @param int $sourceDbk The source DB key (i.e. page_title)
465 * @return Title|false
467 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
468 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
470 // An interwiki; try an alternate encoding with '-' for ':'
471 $dbk = "$name-" . $dbk;
474 $nsInfo = MediaWikiServices
::getInstance()->getNamespaceInfo();
475 if ( $sourceNs == NS_TALK
&& $nsInfo->isSubject( $ns ) ) {
476 // This is an associated talk page moved with the --move-talk feature.
477 $destNS = $nsInfo->getTalk( $destNS );
479 $newTitle = Title
::makeTitleSafe( $destNS, $dbk );
480 if ( !$newTitle ||
!$newTitle->canExist() ) {
487 * Get an alternative title to move a page to. This is used if the
488 * preferred destination title already exists.
490 * @param LinkTarget $linkTarget
491 * @param array $options Associative array of validated command-line options
494 private function getAlternateTitle( LinkTarget
$linkTarget, $options ) {
495 $prefix = $options['add-prefix'];
496 $suffix = $options['add-suffix'];
497 if ( $prefix == '' && $suffix == '' ) {
501 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
502 $title = Title
::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
506 if ( !$title->exists() ) {
515 * @param integer $id The page_id
516 * @param LinkTarget $newLinkTarget The new title link target
519 private function movePage( $id, LinkTarget
$newLinkTarget ) {
520 $dbw = $this->getDB( DB_MASTER
);
522 $dbw->update( 'page',
524 "page_namespace" => $newLinkTarget->getNamespace(),
525 "page_title" => $newLinkTarget->getDBkey(),
532 // Update *_from_namespace in links tables
533 $fromNamespaceTables = [
534 [ 'pagelinks', 'pl' ],
535 [ 'templatelinks', 'tl' ],
536 [ 'imagelinks', 'il' ] ];
537 foreach ( $fromNamespaceTables as $tableInfo ) {
538 list( $table, $fieldPrefix ) = $tableInfo;
539 $dbw->update( $table,
541 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
543 [ "{$fieldPrefix}_from" => $id ],
551 * Determine if we can merge a page.
552 * We check if an inaccessible revision would become the latest and
553 * deny the merge if so -- it's theoretically possible to update the
554 * latest revision, but opens a can of worms -- search engine updates,
555 * recentchanges review, etc.
557 * @param integer $id The page_id
558 * @param LinkTarget $linkTarget The new link target
559 * @param string $logStatus This is set to the log status message on failure
562 private function canMerge( $id, LinkTarget
$linkTarget, &$logStatus ) {
563 $latestDest = Revision
::newFromTitle( $linkTarget, 0, Revision
::READ_LATEST
);
564 $latestSource = Revision
::newFromPageId( $id, 0, Revision
::READ_LATEST
);
565 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
566 $logStatus = 'cannot merge since source is later';
574 * Merge page histories
576 * @param stdClass $row Page row
577 * @param Title $newTitle The new title
580 private function mergePage( $row, Title
$newTitle ) {
581 $dbw = $this->getDB( DB_MASTER
);
585 // Construct the WikiPage object we will need later, while the
586 // page_id still exists. Note that this cannot use makeTitleSafe(),
587 // we are deliberately constructing an invalid title.
588 $sourceTitle = Title
::makeTitle( $row->page_namespace
, $row->page_title
);
589 $sourceTitle->resetArticleID( $id );
590 $wikiPage = new WikiPage( $sourceTitle );
591 $wikiPage->loadPageData( 'fromdbmaster' );
593 $destId = $newTitle->getArticleID();
594 $this->beginTransaction( $dbw, __METHOD__
);
595 $dbw->update( 'revision',
597 [ 'rev_page' => $destId ],
599 [ 'rev_page' => $id ],
602 $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__
);
604 $this->commitTransaction( $dbw, __METHOD__
);
606 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
607 * and update category counts.
609 * Calling external code with a fake broken Title is a fairly dubious
610 * idea. It's necessary because it's quite a lot of code to duplicate,
611 * but that also makes it fragile since it would be easy for someone to
612 * accidentally introduce an assumption of title validity to the code we
615 DeferredUpdates
::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
616 DeferredUpdates
::doUpdates();
622 $maintClass = NamespaceDupes
::class;
623 require_once RUN_MAINTENANCE_IF_MAIN
;