--- /dev/null
+<?php
+/**
+ * Obligatory redundant license notice. Exception to the GPL's "keep intact all
+ * the notices" clause with respect to this notice is hereby granted.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+use MediaWiki\MediaWikiServices;
+use Wikimedia\Rdbms\IDatabase;
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script to rename titles affected by changes to Unicode (or
+ * otherwise to Language::ucfirst).
+ *
+ * @ingroup Maintenance
+ */
+class UppercaseTitlesForUnicodeTransition extends Maintenance {
+
+ /** @var bool */
+ private $run = false;
+
+ /** @var array */
+ private $charmap = [];
+
+ /** @var User */
+ private $user;
+
+ /** @var string */
+ private $reason = 'Uppercasing title for Unicode upgrade';
+
+ /** @var string[] */
+ private $tags = [];
+
+ /** @var array */
+ private $seenUsers = [];
+
+ /** @var array|null */
+ private $namespaces = null;
+
+ /** @var string|null */
+ private $prefix = null, $suffix = null;
+
+ /** @var int|null */
+ private $prefixNs = null;
+
+ /** @var string[]|null */
+ private $tables = null;
+
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription(
+ "Rename titles when changing behavior of Language::ucfirst().\n"
+ . "\n"
+ . "This script skips User and User_talk pages for registered users, as renaming of users "
+ . "is too complex to try to implement here. Use something like Extension:Renameuser to "
+ . "clean those up; this script can provide a list of user names affected."
+ );
+ $this->addOption(
+ 'charmap', 'Character map generated by maintenance/language/generateUcfirstOverrides.php',
+ true, true
+ );
+ $this->addOption(
+ 'user', 'System user to use to do the renames. Default is "Maintenance script".', false, true
+ );
+ $this->addOption(
+ 'steal',
+ 'If the username specified by --user exists, specify this to force conversion to a system user.'
+ );
+ $this->addOption(
+ 'run', 'If not specified, the script will not actually perform any moves (i.e. it will dry-run).'
+ );
+ $this->addOption(
+ 'prefix', 'When the new title already exists, add this prefix.', false, true
+ );
+ $this->addOption(
+ 'suffix', 'When the new title already exists, add this suffix.', false, true
+ );
+ $this->addOption( 'reason', 'Reason to use when moving pages.', false, true );
+ $this->addOption( 'tag', 'Change tag to apply when moving pages.', false, true );
+ $this->addOption( 'tables', 'Comma-separated list of database tables to process.', false, true );
+ $this->addOption(
+ 'userlist', 'Filename to which to output usernames needing rename.', false, true
+ );
+ $this->setBatchSize( 1000 );
+ }
+
+ public function execute() {
+ $this->run = $this->getOption( 'run', false );
+
+ if ( $this->run ) {
+ $username = $this->getOption( 'user', 'Maintenance script' );
+ $steal = $this->getOption( 'steal', false );
+ $this->user = User::newSystemUser( $username, [ 'steal' => $steal ] );
+ if ( !$this->user ) {
+ $user = User::newFromName( $username );
+ if ( !$steal && $user && $user->isLoggedIn() ) {
+ $this->fatalError( "User $username already exists.\n"
+ . "Use --steal if you really want to steal it from the human who currently owns it."
+ );
+ }
+ $this->fatalError( "Could not obtain system user $username." );
+ }
+ }
+
+ $tables = $this->getOption( 'tables' );
+ if ( $tables !== null ) {
+ $this->tables = explode( ',', $tables );
+ }
+
+ $prefix = $this->getOption( 'prefix' );
+ if ( $prefix !== null ) {
+ $title = Title::newFromText( $prefix . 'X' );
+ if ( !$title || substr( $title->getDBkey(), -1 ) !== 'X' ) {
+ $this->fatalError( 'Invalid --prefix.' );
+ }
+ if ( $title->getNamespace() <= NS_MAIN || $title->isExternal() ) {
+ $this->fatalError( 'Invalid --prefix. It must not be in namespace 0 and must not be external' );
+ }
+ $this->prefixNs = $title->getNamespace();
+ $this->prefix = substr( $title->getText(), 0, -1 );
+ }
+ $this->suffix = $this->getOption( 'suffix' );
+
+ $this->reason = $this->getOption( 'reason' ) ?: $this->reason;
+ $this->tags = (array)$this->getOption( 'tag', null );
+
+ $charmapFile = $this->getOption( 'charmap' );
+ if ( !file_exists( $charmapFile ) ) {
+ $this->fatalError( "Charmap file $charmapFile does not exist." );
+ }
+ if ( !is_file( $charmapFile ) || !is_readable( $charmapFile ) ) {
+ $this->fatalError( "Charmap file $charmapFile is not readable." );
+ }
+ $this->charmap = require $charmapFile;
+ if ( !is_array( $this->charmap ) ) {
+ $this->fatalError( "Charmap file $charmapFile did not return a PHP array." );
+ }
+ $this->charmap = array_filter(
+ $this->charmap,
+ function ( $v, $k ) {
+ if ( mb_strlen( $k ) !== 1 ) {
+ $this->error( "Ignoring mapping from multi-character key '$k' to '$v'" );
+ return false;
+ }
+ return $k !== $v;
+ },
+ ARRAY_FILTER_USE_BOTH
+ );
+ if ( !$this->charmap ) {
+ $this->fatalError( "Charmap file $charmapFile did not contain any usable character mappings." );
+ }
+
+ $db = $this->getDB( $this->run ? DB_MASTER : DB_REPLICA );
+ $this->processTable( $db, true, 'page', 'page_namespace', 'page_title', [ 'page_id' ] );
+ $this->processTable( $db, true, 'image', NS_FILE, 'img_name', [] );
+ $this->processTable(
+ $db, false, 'archive', 'ar_namespace', 'ar_title', [ 'ar_timestamp', 'ar_id' ]
+ );
+ $this->processTable( $db, false, 'filearchive', NS_FILE, 'fa_name', [ 'fa_timestamp', 'fa_id' ] );
+ $this->processTable( $db, false, 'logging', 'log_namespace', 'log_title', [ 'log_id' ] );
+ $this->processTable( $db, false, 'redirect', 'rd_namespace', 'rd_title', [ 'rd_from' ] );
+ $this->processTable( $db, false, 'protected_titles', 'pt_namespace', 'pt_title', [] );
+ $this->processUsers( $db );
+ }
+
+ /**
+ * Get batched LIKE conditions from the charmap
+ * @param IDatabase $db Database handle
+ * @param string $field Field name
+ * @param int $batchSize Size of the batches
+ * @return array
+ */
+ private function getLikeBatches( IDatabase $db, $field, $batchSize = 100 ) {
+ $ret = [];
+ $likes = [];
+ foreach ( $this->charmap as $from => $to ) {
+ $likes[] = $field . $db->buildLike( $from, $db->anyString() );
+ if ( count( $likes ) >= $batchSize ) {
+ $ret[] = $db->makeList( $likes, $db::LIST_OR );
+ $likes = [];
+ }
+ }
+ if ( $likes ) {
+ $ret[] = $db->makeList( $likes, $db::LIST_OR );
+ }
+ return $ret;
+ }
+
+ /**
+ * Get the list of namespaces to operate on
+ *
+ * We only care about namespaces where we can move pages and titles are
+ * capitalized.
+ *
+ * @return int[]
+ */
+ private function getNamespaces() {
+ if ( $this->namespaces === null ) {
+ $nsinfo = MediaWikiServices::getInstance()->getNamespaceInfo();
+ $this->namespaces = array_filter(
+ array_keys( $nsinfo->getCanonicalNamespaces() ),
+ function ( $ns ) use ( $nsinfo ) {
+ return $nsinfo->isMovable( $ns ) && $nsinfo->isCapitalized( $ns );
+ }
+ );
+ usort( $this->namespaces, function ( $ns1, $ns2 ) use ( $nsinfo ) {
+ if ( $ns1 === $ns2 ) {
+ return 0;
+ }
+
+ $s1 = $nsinfo->getSubject( $ns1 );
+ $s2 = $nsinfo->getSubject( $ns2 );
+
+ // Order by subject namespace number first
+ if ( $s1 !== $s2 ) {
+ return $s1 < $s2 ? -1 : 1;
+ }
+
+ // Second, put subject namespaces before non-subject namespaces
+ if ( $s1 === $ns1 ) {
+ return -1;
+ }
+ if ( $s2 === $ns2 ) {
+ return 1;
+ }
+
+ // Don't care about the relative order if there are somehow
+ // multiple non-subject namespaces for a namespace.
+ return 0;
+ } );
+ }
+
+ return $this->namespaces;
+ }
+
+ /**
+ * Check if a ns+title is a registered user's page
+ * @param IDatabase $db Database handle
+ * @param int $ns
+ * @param string $title
+ * @return bool
+ */
+ private function isUserPage( IDatabase $db, $ns, $title ) {
+ if ( $ns !== NS_USER && $ns !== NS_USER_TALK ) {
+ return false;
+ }
+
+ list( $base ) = explode( '/', $title, 2 );
+ if ( !isset( $this->seenUsers[$base] ) ) {
+ // Can't use User directly because it might uppercase the name
+ $this->seenUsers[$base] = (bool)$db->selectField(
+ 'user',
+ 'user_id',
+ [ 'user_name' => strtr( $base, '_', ' ' ) ],
+ __METHOD__
+ );
+ }
+ return $this->seenUsers[$base];
+ }
+
+ /**
+ * Munge a target title, if necessary
+ * @param IDatabase $db Database handle
+ * @param Title $oldTitle
+ * @param Title &$newTitle
+ * @return bool If $newTitle is (now) ok
+ */
+ private function mungeTitle( IDatabase $db, Title $oldTitle, Title &$newTitle ) {
+ $nt = $newTitle->getPrefixedText();
+
+ $munge = false;
+ if ( $this->isUserPage( $db, $newTitle->getNamespace(), $newTitle->getText() ) ) {
+ $munge = 'Target title\'s user exists';
+ } else {
+ $mp = new MovePage( $oldTitle, $newTitle );
+ $status = $mp->isValidMove();
+ if ( !$status->isOK() && $status->hasMessage( 'articleexists' ) ) {
+ $munge = 'Target title exists';
+ }
+ }
+ if ( !$munge ) {
+ return true;
+ };
+
+ if ( $this->prefix !== null ) {
+ $newTitle = Title::makeTitle(
+ $this->prefixNs,
+ $this->prefix . $oldTitle->getPrefixedText() . ( $this->suffix ?? '' )
+ );
+ } elseif ( $this->suffix !== null ) {
+ $newTitle = Title::makeTitle( $newTitle->getNamespace(), $newTitle->getText() . $this->suffix );
+ } else {
+ $this->error(
+ "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
+ . "$munge and no --prefix or --suffix was given"
+ );
+ return false;
+ }
+
+ if ( !$newTitle->isValid() ) {
+ $this->error(
+ "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
+ . "$munge and munged title '{$newTitle->getPrefixedText()}' is not valid"
+ );
+ return false;
+ }
+ if ( $newTitle->exists() ) {
+ $this->error(
+ "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
+ . "$munge and munged title '{$newTitle->getPrefixedText()}' also exists"
+ );
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Use MovePage to move a title
+ * @param IDatabase $db Database handle
+ * @param int $ns
+ * @param string $title
+ * @return bool|null True on success, false on error, null if skipped
+ */
+ private function doMove( IDatabase $db, $ns, $title ) {
+ $char = mb_substr( $title, 0, 1 );
+ if ( !array_key_exists( $char, $this->charmap ) ) {
+ $this->error(
+ "Query returned NS$ns $title, which does not begin with a character in the charmap."
+ );
+ return false;
+ }
+
+ if ( $this->isUserPage( $db, $ns, $title ) ) {
+ $this->output( "... Skipping user page NS$ns $title\n" );
+ return null;
+ }
+
+ $oldTitle = Title::makeTitle( $ns, $title );
+ $newTitle = Title::makeTitle( $ns, $this->charmap[$char] . mb_substr( $title, 1 ) );
+ if ( !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
+ return false;
+ }
+
+ $mp = new MovePage( $oldTitle, $newTitle );
+ $status = $mp->isValidMove();
+ if ( !$status->isOK() ) {
+ $this->error(
+ "Invalid move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}: "
+ . $status->getMessage( false, false, 'en' )->useDatabase( false )->plain()
+ );
+ return false;
+ }
+
+ if ( !$this->run ) {
+ $this->output(
+ "Would rename {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}\n"
+ );
+ return true;
+ }
+
+ $status = $mp->move( $this->user, $this->reason, false, $this->tags );
+ if ( !$status->isOK() ) {
+ $this->error(
+ "Move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()} failed: "
+ . $status->getMessage( false, false, 'en' )->useDatabase( false )->plain()
+ );
+ }
+ return $status->isOK();
+ }
+
+ /**
+ * Directly update a database row
+ * @param IDatabase $db Database handle
+ * @param string $table
+ * @param string|int $nsField
+ * @param string $titleField
+ * @return bool|null True on success, false on error, null if skipped
+ */
+ private function doUpdate( IDatabase $db, $table, $nsField, $titleField, $row ) {
+ $ns = is_int( $nsField ) ? $nsField : (int)$row->$nsField;
+ $title = $row->$titleField;
+
+ $char = mb_substr( $title, 0, 1 );
+ if ( !array_key_exists( $char, $this->charmap ) ) {
+ $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
+ $this->error(
+ "Query returned $r, but title does not begin with a character in the charmap."
+ );
+ return false;
+ }
+
+ if ( $this->isUserPage( $db, $ns, $title ) ) {
+ $this->output( "... Skipping user page NS$ns $title\n" );
+ return null;
+ }
+
+ $oldTitle = Title::makeTitle( $ns, $title );
+ $newTitle = Title::makeTitle( $ns, $this->charmap[$char] . mb_substr( $title, 1 ) );
+ if ( !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
+ return false;
+ }
+
+ if ( $this->run ) {
+ $db->update(
+ $table,
+ array_merge(
+ is_int( $nsField ) ? [] : [ $nsField => $newTitle->getNamespace() ],
+ [ $titleField => $newTitle->getDBkey() ]
+ ),
+ (array)$row,
+ __METHOD__
+ );
+ } else {
+ $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
+ $this->output( "Would set $r to {$newTitle->getPrefixedText()}\n" );
+ }
+
+ return true;
+ }
+
+ /**
+ * Rename entries in other tables
+ * @param IDatabase $db Database handle
+ * @param bool $doMove Whether to use MovePage or direct table manipulation
+ * @param string $table
+ * @param string|int $nsField
+ * @param string $titleField
+ * @param string[] $pkFields Additional fields to match a unique index
+ * starting with $nsField and $titleField.
+ */
+ private function processTable( IDatabase $db, $doMove, $table, $nsField, $titleField, $pkFields ) {
+ if ( $this->tables !== null && !in_array( $table, $this->tables, true ) ) {
+ $this->output( "Skipping table `$table`, not in --tables.\n" );
+ return;
+ }
+
+ $batchSize = $this->getBatchSize();
+ $namespaces = $this->getNamespaces();
+ $likes = $this->getLikeBatches( $db, $titleField );
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+
+ if ( is_int( $nsField ) ) {
+ $namespaces = array_intersect( $namespaces, [ $nsField ] );
+ }
+
+ if ( !$namespaces ) {
+ $this->output( "Skipping table `$table`, no valid namespaces.\n" );
+ return;
+ }
+
+ $this->output( "Processing table `$table`...\n" );
+
+ $selectFields = array_merge(
+ is_int( $nsField ) ? [] : [ $nsField ],
+ [ $titleField ],
+ $pkFields
+ );
+ $contFields = array_reverse( array_merge( [ $titleField ], $pkFields ) );
+
+ $lastReplicationWait = 0.0;
+ $count = 0;
+ $errors = 0;
+ foreach ( $namespaces as $ns ) {
+ foreach ( $likes as $like ) {
+ $cont = [];
+ do {
+ $res = $db->select(
+ $table,
+ $selectFields,
+ array_merge( [ "$nsField = $ns", $like ], $cont ),
+ __METHOD__,
+ [ 'ORDER BY' => array_merge( [ $titleField ], $pkFields ), 'LIMIT' => $batchSize ]
+ );
+ $cont = [];
+ foreach ( $res as $row ) {
+ $cont = '';
+ foreach ( $contFields as $field ) {
+ $v = $db->addQuotes( $row->$field );
+ if ( $cont === '' ) {
+ $cont = "$field > $v";
+ } else {
+ $cont = "$field > $v OR $field = $v AND ($cont)";
+ }
+ }
+ $cont = [ $cont ];
+
+ if ( $doMove ) {
+ $ns = is_int( $nsField ) ? $nsField : (int)$row->$nsField;
+ $ret = $this->doMove( $db, $ns, $row->$titleField );
+ } else {
+ $ret = $this->doUpdate( $db, $table, $nsField, $titleField, $row );
+ }
+ if ( $ret === true ) {
+ $count++;
+ } elseif ( $ret === false ) {
+ $errors++;
+ }
+ }
+
+ if ( $this->run ) {
+ $r = $cont ? json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE ) : '<end>';
+ $this->output( "... $table: $count renames, $errors errors at $r\n" );
+ $lbFactory->waitForReplication(
+ [ 'timeout' => 30, 'ifWritesSince' => $lastReplicationWait ]
+ );
+ $lastReplicationWait = microtime( true );
+ }
+ } while ( $cont );
+ }
+ }
+
+ $this->output( "Done processing table `$table`.\n" );
+ }
+
+ /**
+ * List users needing renaming
+ * @param IDatabase $db Database handle
+ */
+ private function processUsers( IDatabase $db ) {
+ $userlistFile = $this->getOption( 'userlist' );
+ if ( $userlistFile === null ) {
+ $this->output( "Not generating user list, --userlist was not specified.\n" );
+ return;
+ }
+
+ $fh = fopen( $userlistFile, 'wb' );
+ if ( !$fh ) {
+ $this->error( "Could not open user list file $userlistFile" );
+ return;
+ }
+
+ $this->output( "Generating user list...\n" );
+ $count = 0;
+ $batchSize = $this->getBatchSize();
+ foreach ( $this->getLikeBatches( $db, 'user_name' ) as $like ) {
+ $cont = [];
+ while ( true ) {
+ $names = $db->selectFieldValues(
+ 'user',
+ 'user_name',
+ array_merge( [ $like ], $cont ),
+ __METHOD__,
+ [ 'ORDER BY' => 'user_name', 'LIMIT' => $batchSize ]
+ );
+ if ( !$names ) {
+ break;
+ }
+
+ $last = end( $names );
+ $cont = [ 'user_name > ' . $db->addQuotes( $last ) ];
+ foreach ( $names as $name ) {
+ $char = mb_substr( $name, 0, 1 );
+ if ( !array_key_exists( $char, $this->charmap ) ) {
+ $this->error(
+ "Query returned $name, but user name does not begin with a character in the charmap."
+ );
+ continue;
+ }
+ $newName = $this->charmap[$char] . mb_substr( $name, 1 );
+ fprintf( $fh, "%s\t%s\n", $name, $newName );
+ $count++;
+ }
+ $this->output( "... at $last, $count names so far\n" );
+ }
+ }
+
+ if ( !fclose( $fh ) ) {
+ $this->error( "fclose on $userlistFile failed" );
+ }
+ $this->output( "User list output to $userlistFile, $count users need renaming.\n" );
+ }
+}
+
+$maintClass = UppercaseTitlesForUnicodeTransition::class;
+require_once RUN_MAINTENANCE_IF_MAIN;