From 96bd79b4a36a7216dce4ad8b5915d592ba1dff8b Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Wed, 25 Oct 2017 15:26:53 -0400 Subject: [PATCH] Avoid DB rows with usable names but ID = 0 by introducing "interwiki" usernames Importing revisions in MediaWiki has long been weird: if the username on the imported revision exists locally it's automatically attributed to the local user, while if the name does not exist locally we wind up with revision table rows with rev_user = 0 and rev_user_text being a valid name that someone might later create. "Global" blocks too create rows with ipb_by = 0 an ipb_by_text being a valid name. The upcoming actor table change, as things currently stand, would regularize that a bit by automatically attributing those imported revisions to the newly-created user. But that's not necessarily what we actually want to happen. And it would certainly confuse CentralAuth's attempt to detect its own global blocks. Thus, this patch introduces "interwiki" usernames that aren't valid for local use, of the format "iw>Example".[1] Linker will interpret these names and generate an appropriate interwiki link in history pages and the like, as if from wikitext like `[[iw:User:Example]]`. Imports for non-existant local users (and optionally for existing local users too) will credit the edit to such an interwiki name. There is also a new hook, 'ImportHandleUnknownUser', to allow extension such as CentralAuth to create local users as their edits are imported. Block will no longer accept usable-but-nonexistent names for 'byText' or ->setBlocker(). CentralAuth's global blocks will be submitted with an interwiki username (see Ieae5d24f9). Wikis that have imported edits or CentralAuth global blocks should run the new maintenance/cleanupUsersWithNoId.php maintenance script. This isn't done by update.php because (1) it needs an interwiki prefix to use and (2) the updater can't know whether to pass the `--assign` flag. [1]: '>' was used instead of the more usual ':' because WMF wikis have many existing usernames containing colons. Bug: T9240 Bug: T20209 Bug: T111605 Change-Id: I5401941c06102e8faa813910519d55482dff36cb Depends-On: Ieae5d24f9098c1977447c50a8d4e2cab58a24d9f --- RELEASE-NOTES-1.31 | 13 +- autoload.php | 1 + docs/hooks.txt | 5 + includes/Block.php | 12 +- includes/DefaultSettings.php | 1 + includes/Linker.php | 38 +++- includes/api/ApiImport.php | 11 + includes/api/i18n/en.json | 2 + includes/api/i18n/qqq.json | 2 + includes/import/WikiImporter.php | 53 ++++- includes/specials/SpecialImport.php | 42 ++++ languages/i18n/en.json | 3 + languages/i18n/qqq.json | 3 + maintenance/cleanupUsersWithNoId.php | 212 +++++++++++++++++++ maintenance/importDump.php | 12 ++ tests/phpunit/includes/BlockTest.php | 14 +- tests/phpunit/includes/import/ImportTest.php | 101 +++++++++ 17 files changed, 504 insertions(+), 21 deletions(-) create mode 100644 maintenance/cleanupUsersWithNoId.php diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31 index c24d76146a..b32e3e755e 100644 --- a/RELEASE-NOTES-1.31 +++ b/RELEASE-NOTES-1.31 @@ -15,6 +15,8 @@ production. possible for fallback images such as png. * (T44246) $wgFilterLogTypes will no longer ignore 'patrol' when user does not have the right to mark things patrolled. +* Wikis that contain imported revisions or CentralAuth global blocks should run + maintenance/cleanupUsersWithNoId.php. === New features in 1.31 === * Wikimedia\Rdbms\IDatabase->select() and similar methods now support @@ -22,6 +24,13 @@ production. * As a first pass in standardizing dialog boxes across the MediaWiki product, Html class now provides helper methods for messageBox, successBox, errorBox and warningBox generation. +* (T9240) Imports will now record unknown (and, optionally, known) usernames in + a format like "iw>Example". +* (T20209) Linker (used on history pages, log pages, and so on) will display + usernames formed like "iw>Example" as interwiki links, as if by wikitext like + [[iw:User:Example|iw>Example]]. +* (T111605) The 'ImportHandleUnknownUser' hook allows extensions to auto-create + users during an import. === External library changes in 1.31 === @@ -107,7 +116,9 @@ changes to languages because of Phabricator reports. * Passing a ParserOptions object to OutputPage::parserOptions() is deprecated. * Browser support for Opera 12 and older was removed. Opera 15+ continues at Grade A support. -* … +* The Block class will no longer accept usable-but-missing usernames for + 'byText' or ->setBlocker(). Callers should either ensure the blocker exists + locally or use a new interwiki-format username like "iw>Example". == Compatibility == MediaWiki 1.31 requires PHP 5.5.9 or later. There is experimental support for diff --git a/autoload.php b/autoload.php index 51daceddaf..2661fd7ed3 100644 --- a/autoload.php +++ b/autoload.php @@ -264,6 +264,7 @@ $wgAutoloadLocalClasses = [ 'CleanupPreferences' => __DIR__ . '/maintenance/cleanupPreferences.php', 'CleanupRemovedModules' => __DIR__ . '/maintenance/cleanupRemovedModules.php', 'CleanupSpam' => __DIR__ . '/maintenance/cleanupSpam.php', + 'CleanupUsersWithNoId' => __DIR__ . '/maintenance/cleanupUsersWithNoId.php', 'ClearInterwikiCache' => __DIR__ . '/maintenance/clearInterwikiCache.php', 'ClearUserWatchlistJob' => __DIR__ . '/includes/jobqueue/jobs/ClearUserWatchlistJob.php', 'CliInstaller' => __DIR__ . '/includes/installer/CliInstaller.php', diff --git a/docs/hooks.txt b/docs/hooks.txt index 6c1597f3de..685a182a55 100644 --- a/docs/hooks.txt +++ b/docs/hooks.txt @@ -1840,6 +1840,11 @@ $revisionInfo: Array of revision information Return false to stop further processing of the tag $reader: XMLReader object +'ImportHandleUnknownUser': When a user does exist locally, this hook is called +to give extensions an opportunity to auto-create it. If the auto-creation is +successful, return false. +$name: User name + 'ImportHandleUploadXMLTag': When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader: XMLReader object diff --git a/includes/Block.php b/includes/Block.php index d1e78bb6cf..0999ad2063 100644 --- a/includes/Block.php +++ b/includes/Block.php @@ -1479,9 +1479,19 @@ class Block { /** * Set the user who implemented (or will implement) this block - * @param User|string $user Local User object or username string for foreign users + * @param User|string $user Local User object or username string */ public function setBlocker( $user ) { + if ( is_string( $user ) ) { + $user = User::newFromName( $user, false ); + } + + if ( $user->isAnon() && User::isUsableName( $user->getName() ) ) { + throw new InvalidArgumentException( + 'Blocker must be a local user or a name that cannot be a local user' + ); + } + $this->blocker = $user; } diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index dcbcb6ec27..7448cfcf34 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -4852,6 +4852,7 @@ $wgReservedUsernames = [ 'msg:double-redirect-fixer', // Automatic double redirect fix 'msg:usermessage-editor', // Default user for leaving user messages 'msg:proxyblocker', // For $wgProxyList and Special:Blockme (removed in 1.22) + 'msg:sorbs', // For $wgEnableDnsBlacklist etc. 'msg:spambot_username', // Used by cleanupSpam.php 'msg:autochange-username', // Used by anon category RC entries (parser functions, Lua & purges) ]; diff --git a/includes/Linker.php b/includes/Linker.php index 403b10a149..a0332cf615 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -892,10 +892,26 @@ class Linker { */ public static function userLink( $userId, $userName, $altUserName = false ) { $classes = 'mw-userlink'; + $page = null; if ( $userId == 0 ) { - $page = SpecialPage::getTitleFor( 'Contributions', $userName ); - if ( $altUserName === false ) { - $altUserName = IP::prettifyIP( $userName ); + $pos = strpos( $userName, '>' ); + if ( $pos !== false ) { + $iw = explode( ':', substr( $userName, 0, $pos ) ); + $firstIw = array_shift( $iw ); + $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup(); + if ( $interwikiLookup->isValidInterwiki( $firstIw ) ) { + $title = MWNamespace::getCanonicalName( NS_USER ) . ':' . substr( $userName, $pos + 1 ); + if ( $iw ) { + $title = join( ':', $iw ) . ':' . $title; + } + $page = Title::makeTitle( NS_MAIN, $title, '', $firstIw ); + } + $classes .= ' mw-extuserlink'; + } else { + $page = SpecialPage::getTitleFor( 'Contributions', $userName ); + if ( $altUserName === false ) { + $altUserName = IP::prettifyIP( $userName ); + } } $classes .= ' mw-anonuserlink'; // Separate link class for anons (T45179) } else { @@ -903,11 +919,12 @@ class Linker { } // Wrap the output with tags for directionality isolation - return self::link( - $page, - '' . htmlspecialchars( $altUserName !== false ? $altUserName : $userName ) . '', - [ 'class' => $classes ] - ); + $linkText = + '' . htmlspecialchars( $altUserName !== false ? $altUserName : $userName ) . ''; + + return $page + ? self::link( $page, $linkText, [ 'class' => $classes ] ) + : Html::rawElement( 'span', [ 'class' => $classes ], $linkText ); } /** @@ -931,6 +948,11 @@ class Linker { $blockable = !( $flags & self::TOOL_LINKS_NOBLOCK ); $addEmailLink = $flags & self::TOOL_LINKS_EMAIL && $userId; + if ( $userId == 0 && strpos( $userText, '>' ) !== false ) { + // No tools for an external user + return ''; + } + $items = []; if ( $talkable ) { $items[] = self::userTalkLink( $userId, $userText ); diff --git a/includes/api/ApiImport.php b/includes/api/ApiImport.php index b46f0b1e51..a0f0a8dbdb 100644 --- a/includes/api/ApiImport.php +++ b/includes/api/ApiImport.php @@ -53,12 +53,18 @@ class ApiImport extends ApiBase { $params['fullhistory'], $params['templates'] ); + $usernamePrefix = $params['interwikisource']; } else { $isUpload = true; if ( !$user->isAllowed( 'importupload' ) ) { $this->dieWithError( 'apierror-cantimport-upload' ); } $source = ImportStreamSource::newFromUpload( 'xml' ); + $usernamePrefix = (string)$params['interwikiprefix']; + if ( $usernamePrefix === '' ) { + $encParamName = $this->encodeParamName( 'interwikiprefix' ); + $this->dieWithError( [ 'apierror-missingparam', $encParamName ] ); + } } if ( !$source->isOK() ) { $this->dieStatus( $source ); @@ -81,6 +87,7 @@ class ApiImport extends ApiBase { $this->dieStatus( $statusRootPage ); } } + $importer->setUsernamePrefix( $usernamePrefix, $params['assignknownusers'] ); $reporter = new ApiImportReporter( $importer, $isUpload, @@ -141,6 +148,9 @@ class ApiImport extends ApiBase { 'xml' => [ ApiBase::PARAM_TYPE => 'upload', ], + 'interwikiprefix' => [ + ApiBase::PARAM_TYPE => 'string', + ], 'interwikisource' => [ ApiBase::PARAM_TYPE => $this->getAllowedImportSources(), ], @@ -150,6 +160,7 @@ class ApiImport extends ApiBase { 'namespace' => [ ApiBase::PARAM_TYPE => 'namespace' ], + 'assignknownusers' => false, 'rootpage' => null, 'tags' => [ ApiBase::PARAM_TYPE => 'tags', diff --git a/includes/api/i18n/en.json b/includes/api/i18n/en.json index 85f17debc2..91c3e185b0 100644 --- a/includes/api/i18n/en.json +++ b/includes/api/i18n/en.json @@ -250,6 +250,8 @@ "apihelp-import-extended-description": "Note that the HTTP POST must be done as a file upload (i.e. using multipart/form-data) when sending a file for the xml parameter.", "apihelp-import-param-summary": "Log entry import summary.", "apihelp-import-param-xml": "Uploaded XML file.", + "apihelp-import-param-interwikiprefix": "For uploaded imports: interwiki prefix to apply to unknown user names (and known users if $1assignknownusers is set).", + "apihelp-import-param-assignknownusers": "Assign edits to local users where the named user exists locally.", "apihelp-import-param-interwikisource": "For interwiki imports: wiki to import from.", "apihelp-import-param-interwikipage": "For interwiki imports: page to import.", "apihelp-import-param-fullhistory": "For interwiki imports: import the full history, not just the current version.", diff --git a/includes/api/i18n/qqq.json b/includes/api/i18n/qqq.json index 3bdf7c6d1d..47afdc12b9 100644 --- a/includes/api/i18n/qqq.json +++ b/includes/api/i18n/qqq.json @@ -240,6 +240,8 @@ "apihelp-import-extended-description": "{{doc-apihelp-extended-description|import}}", "apihelp-import-param-summary": "{{doc-apihelp-param|import|summary|info=The parameter being documented here provides the summary used on the log messages about the import. The phrase \"Import summary\" here is grammatically equivalent to a phrase such as \"science book\", not \"eat food\".}}", "apihelp-import-param-xml": "{{doc-apihelp-param|import|xml}}", + "apihelp-import-param-interwikiprefix": "{{doc-apihelp-param|import|interwikiprefix}}", + "apihelp-import-param-assignknownusers": "{{doc-apihelp-param|import|assignknownusers}}", "apihelp-import-param-interwikisource": "{{doc-apihelp-param|import|interwikisource}}", "apihelp-import-param-interwikipage": "{{doc-apihelp-param|import|interwikipage}}", "apihelp-import-param-fullhistory": "{{doc-apihelp-param|import|fullhistory}}", diff --git a/includes/import/WikiImporter.php b/includes/import/WikiImporter.php index a1f7e0c002..bffc1a9b14 100644 --- a/includes/import/WikiImporter.php +++ b/includes/import/WikiImporter.php @@ -47,6 +47,9 @@ class WikiImporter { private $countableCache = []; /** @var bool */ private $disableStatisticsUpdate = false; + private $usernamePrefix = 'imported'; + private $assignKnownUsers = false; + private $triedCreations = []; /** * Creates an ImportXMLReader drawing from the source provided @@ -311,6 +314,16 @@ class WikiImporter { $this->mImportUploads = $import; } + /** + * @since 1.31 + * @param string $usernamePrefix Prefix to apply to unknown (and possibly also known) usernames + * @param bool $assignKnownUsers Whether to apply the prefix to usernames that exist locally + */ + public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) { + $this->usernamePrefix = rtrim( (string)$usernamePrefix, ':>' ); + $this->assignKnownUsers = (bool)$assignKnownUsers; + } + /** * Statistics update can cause a lot of time * @since 1.29 @@ -716,9 +729,9 @@ class WikiImporter { } if ( !isset( $logInfo['contributor']['username'] ) ) { - $revision->setUsername( 'Unknown user' ); + $revision->setUsername( $this->usernamePrefix . '>Unknown user' ); } else { - $revision->setUsername( $logInfo['contributor']['username'] ); + $revision->setUsername( $this->prefixUsername( $logInfo['contributor']['username'] ) ); } return $this->logItemCallback( $revision ); @@ -911,9 +924,9 @@ class WikiImporter { if ( isset( $revisionInfo['contributor']['ip'] ) ) { $revision->setUserIP( $revisionInfo['contributor']['ip'] ); } elseif ( isset( $revisionInfo['contributor']['username'] ) ) { - $revision->setUsername( $revisionInfo['contributor']['username'] ); + $revision->setUsername( $this->prefixUsername( $revisionInfo['contributor']['username'] ) ); } else { - $revision->setUsername( 'Unknown user' ); + $revision->setUsername( $this->usernamePrefix . '>Unknown user' ); } if ( isset( $revisionInfo['sha1'] ) ) { $revision->setSha1Base36( $revisionInfo['sha1'] ); @@ -1020,13 +1033,43 @@ class WikiImporter { $revision->setUserIP( $uploadInfo['contributor']['ip'] ); } if ( isset( $uploadInfo['contributor']['username'] ) ) { - $revision->setUsername( $uploadInfo['contributor']['username'] ); + $revision->setUsername( $this->prefixUsername( $uploadInfo['contributor']['username'] ) ); } $revision->setNoUpdates( $this->mNoUpdates ); return call_user_func( $this->mUploadCallback, $revision ); } + /** + * Add an interwiki prefix to the username, if appropriate + * @since 1.31 + * @param string $name Name being imported + * @return string Name, possibly with the prefix prepended. + */ + protected function prefixUsername( $name ) { + if ( !User::isUsableName( $name ) ) { + return $name; + } + + if ( $this->assignKnownUsers ) { + if ( User::idFromName( $name ) ) { + return $name; + } + + // See if any extension wants to create it. + if ( !isset( $this->triedCreations[$name] ) ) { + $this->triedCreations[$name] = true; + if ( !Hooks::run( 'ImportHandleUnknownUser', [ $name ] ) && + User::idFromName( $name, User::READ_LATEST ) + ) { + return $name; + } + } + } + + return substr( $this->usernamePrefix . '>' . $name, 0, 255 ); + } + /** * @return array */ diff --git a/includes/specials/SpecialImport.php b/includes/specials/SpecialImport.php index 9ce52ef013..ab5d4d7238 100644 --- a/includes/specials/SpecialImport.php +++ b/includes/specials/SpecialImport.php @@ -43,6 +43,8 @@ class SpecialImport extends SpecialPage { private $includeTemplates = false; private $pageLinkDepth; private $importSources; + private $assignKnownUsers; + private $usernamePrefix; public function __construct() { parent::__construct( 'Import', 'import' ); @@ -110,6 +112,7 @@ class SpecialImport extends SpecialPage { $isUpload = false; $request = $this->getRequest(); $this->sourceName = $request->getVal( "source" ); + $this->assignKnownUsers = $request->getCheck( 'assignKnownUsers' ); $this->logcomment = $request->getText( 'log-comment' ); $this->pageLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' ) == 0 @@ -130,6 +133,7 @@ class SpecialImport extends SpecialPage { $source = Status::newFatal( 'import-token-mismatch' ); } elseif ( $this->sourceName === 'upload' ) { $isUpload = true; + $this->usernamePrefix = $this->fullInterwikiPrefix = $request->getVal( 'usernamePrefix' ); if ( $user->isAllowed( 'importupload' ) ) { $source = ImportStreamSource::newFromUpload( "xmlimport" ); } else { @@ -169,6 +173,10 @@ class SpecialImport extends SpecialPage { $source = Status::newFatal( "importunknownsource" ); } + if ( (string)$this->fullInterwikiPrefix === '' ) { + $source->fatal( 'importnoprefix' ); + } + $out = $this->getOutput(); if ( !$source->isGood() ) { $out->addWikiText( "

\n" . @@ -192,6 +200,7 @@ class SpecialImport extends SpecialPage { return; } } + $importer->setUsernamePrefix( $this->fullInterwikiPrefix, $this->assignKnownUsers ); $out->addWikiMsg( "importstart" ); @@ -336,6 +345,28 @@ class SpecialImport extends SpecialPage { Html::input( 'xmlimport', '', 'file', [ 'id' => 'xmlimport' ] ) . ' ' . " + + " . + Xml::label( $this->msg( 'import-upload-username-prefix' )->text(), + 'mw-import-usernamePrefix' ) . + " + " . + Xml::input( 'usernamePrefix', 50, + $this->usernamePrefix, + [ 'id' => 'usernamePrefix', 'type' => 'text' ] ) . ' ' . + " + + + + " . + Xml::checkLabel( + $this->msg( 'import-assign-known-users' )->text(), + 'assignKnownUsers', + 'assignKnownUsers', + $this->assignKnownUsers + ) . + " + " . Xml::label( $this->msg( 'import-comment' )->text(), 'mw-import-comment' ) . @@ -489,6 +520,17 @@ class SpecialImport extends SpecialPage { ) . " + + + " . + Xml::checkLabel( + $this->msg( 'import-assign-known-users' )->text(), + 'assignKnownUsers', + 'assignKnownUsers', + $this->assignKnownUsers + ) . + " + $importDepth " . diff --git a/languages/i18n/en.json b/languages/i18n/en.json index 1fecca0e3a..764d254569 100644 --- a/languages/i18n/en.json +++ b/languages/i18n/en.json @@ -2771,6 +2771,8 @@ "import-mapping-namespace": "Import to a namespace:", "import-mapping-subpage": "Import as subpages of the following page:", "import-upload-filename": "Filename:", + "import-upload-username-prefix": "Interwiki prefix:", + "import-assign-known-users": "Assign edits to local users where the named user exists locally", "import-comment": "Comment:", "importtext": "Please export the file from the source wiki using the [[Special:Export|export utility]].\nSave it to your computer and upload it here.", "importstart": "Importing pages...", @@ -2779,6 +2781,7 @@ "imported-log-entries": "Imported $1 {{PLURAL:$1|log entry|log entries}}.", "importfailed": "Import failed: $1", "importunknownsource": "Unknown import source type", + "importnoprefix": "No interwiki prefix was supplied", "importcantopen": "Could not open import file", "importbadinterwiki": "Bad interwiki link", "importsuccess": "Import finished!", diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json index 83c8a93f08..d8eb61b2fe 100644 --- a/languages/i18n/qqq.json +++ b/languages/i18n/qqq.json @@ -2966,6 +2966,8 @@ "import-mapping-namespace": "Used as label for the second of three radio buttons in Import form on [[Special:Import]]. The radio button is followed by a drop-down list from which the user can select a namespace.\n\nSee also:\n* {{msg-mw|Import-mapping-default}}\n* {{msg-mw|Import-mapping-subpage}}", "import-mapping-subpage": "Used as label for the third of three radio buttons in Import form on [[Special:Import]]. The radio button is followed by a text box in which the user can type a page name. The imported pages will be created as subpages of the entered page name.\n\nSee also:\n* {{msg-mw|Import-mapping-default}}\n* {{msg-mw|Import-mapping-namespace}}", "import-upload-filename": "Used on [[Special:Import]] as label for upload of an XML file containing the pages to import.\n{{Identical|Filename}}", + "import-upload-username-prefix": "Used as label for input box in [[Special:Import]].", + "import-assign-known-users": "Use as label for checkbox in [[Special:Import]].", "import-comment": "Used as label for input box in [[Special:Import]].\n\nSee also:\n* {{msg-mw|Import-interwiki-history}}\n* {{msg-mw|Import-interwiki-templates}}\n* {{msg-mw|Import-interwiki-namespace}}\n* {{msg-mw|Import-interwiki-rootpage}}\n* {{msg-mw|Import-interwiki-submit}}\n{{Identical|Comment}}", "importtext": "Used in the Import form on [[Special:Import]].", "importstart": "Used in [[Special:Import]].\n\nSee also:\n* {{msg-mw|Importsuccess}}\n* {{msg-mw|Importfailed}}", @@ -2974,6 +2976,7 @@ "imported-log-entries": "Used as success message. Parameters:\n* $1 - number of log items\nSee also:\n* {{msg-mw|Importnopages}} - fatal error message", "importfailed": "Used as error message in [[Special:Import]]. Parameters:\n* $1 - import source\nSee also:\n* {{msg-mw|Importstart}}\n* {{msg-mw|Importsuccess}}", "importunknownsource": "Used as error message in [[Special:Import]].\n\nSee also:\n* {{msg-mw|import-token-mismatch}}\n* {{msg-mw|import-invalid-interwiki}}\n* {{msg-mw|Importunknownsource}}", + "importnoprefix": "Used as error message in [[Special:Import]]. Usually this error means that import via upload was attempted and the {{msg-mw|import-upload-username-prefix}} field was left empty.", "importcantopen": "Used as error message when importing from file or from URL.", "importbadinterwiki": "Used as error message when importing from interwiki.\n\nSee also:\n* {{msg-mw|Import-noarticle}}\n* {{msg-mw|Importbadinterwiki}}", "importsuccess": "Used in [[Special:Import]].\n\nSee also:\n* {{msg-mw|Importstart}}\n* {{msg-mw|Importfailed}}", diff --git a/maintenance/cleanupUsersWithNoId.php b/maintenance/cleanupUsersWithNoId.php new file mode 100644 index 0000000000..74167d1355 --- /dev/null +++ b/maintenance/cleanupUsersWithNoId.php @@ -0,0 +1,212 @@ +addDescription( 'Cleans up tables that have valid usernames with no user ID' ); + $this->addOption( 'prefix', 'Interwiki prefix to apply to the usernames', true, true, 'p' ); + $this->addOption( 'table', 'Only clean up this table', false, true ); + $this->addOption( 'assign', 'Assign edits to existing local users if they exist', false, false ); + $this->setBatchSize( 100 ); + } + + protected function getUpdateKey() { + return __CLASS__; + } + + protected function doDBUpdates() { + $this->prefix = $this->getOption( 'prefix' ); + $this->table = $this->getOption( 'table', null ); + $this->assign = $this->getOption( 'assign' ); + + $this->cleanup( + 'revision', 'rev_id', 'rev_user', 'rev_user_text', + [ 'rev_user' => 0 ], [ 'rev_timestamp', 'rev_id' ] + ); + $this->cleanup( + 'archive', 'ar_id', 'ar_user', 'ar_user_text', + [], [ 'ar_id' ] + ); + $this->cleanup( + 'logging', 'log_id', 'log_user', 'log_user_text', + [ 'log_user' => 0 ], [ 'log_timestamp', 'log_id' ] + ); + $this->cleanup( + 'image', 'img_name', 'img_user', 'img_user_text', + [ 'img_user' => 0 ], [ 'img_timestamp', 'img_name' ] + ); + $this->cleanup( + 'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', + [], [ 'oi_name', 'oi_timestamp' ] + ); + $this->cleanup( + 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', + [], [ 'fa_id' ] + ); + $this->cleanup( + 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', + [], [ 'ipb_id' ] + ); + $this->cleanup( + 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', + [], [ 'rc_id' ] + ); + + return true; + } + + /** + * Calculate a "next" condition and progress display string + * @param IDatabase $dbw + * @param string[] $indexFields Fields in the index being ordered by + * @param object $row Database row + * @return array [ string $next, string $display ] + */ + private function makeNextCond( $dbw, $indexFields, $row ) { + $next = ''; + $display = []; + for ( $i = count( $indexFields ) - 1; $i >= 0; $i-- ) { + $field = $indexFields[$i]; + $display[] = $field . '=' . $row->$field; + $value = $dbw->addQuotes( $row->$field ); + if ( $next === '' ) { + $next = "$field > $value"; + } else { + $next = "$field > $value OR $field = $value AND ($next)"; + } + } + $display = join( ' ', array_reverse( $display ) ); + return [ $next, $display ]; + } + + /** + * Cleanup a table + * + * @param string $table Table to migrate + * @param string|string[] $primaryKey Primary key of the table. + * @param string $idField User ID field name + * @param string $nameField User name field name + * @param array $conds Query conditions + * @param string[] $orderby Fields to order by + */ + protected function cleanup( + $table, $primaryKey, $idField, $nameField, array $conds, array $orderby + ) { + if ( $this->table !== null && $this->table !== $table ) { + return; + } + + $primaryKey = (array)$primaryKey; + $pkFilter = array_flip( $primaryKey ); + $this->output( + "Beginning cleanup of $table\n" + ); + + $dbw = $this->getDB( DB_MASTER ); + $next = '1=1'; + $countAssigned = 0; + $countPrefixed = 0; + while ( true ) { + // Fetch the rows needing update + $res = $dbw->select( + $table, + array_merge( $primaryKey, [ $idField, $nameField ], $orderby ), + array_merge( $conds, [ $next ] ), + __METHOD__, + [ + 'ORDER BY' => $orderby, + 'LIMIT' => $this->mBatchSize, + ] + ); + if ( !$res->numRows() ) { + break; + } + + // Update the existing rows + foreach ( $res as $row ) { + $name = $row->$nameField; + if ( $row->$idField || !User::isUsableName( $name ) ) { + continue; + } + + $id = 0; + if ( $this->assign ) { + $id = (int)User::idFromName( $name ); + if ( !$id ) { + // See if any extension wants to create it. + if ( !isset( $this->triedCreations[$name] ) ) { + $this->triedCreations[$name] = true; + if ( !Hooks::run( 'ImportHandleUnknownUser', [ $name ] ) ) { + $id = (int)User::idFromName( $name, User::READ_LATEST ); + } + } + } + } + if ( $id ) { + $set = [ $idField => $id ]; + $counter = &$countAssigned; + } else { + $set = [ $nameField => substr( $this->prefix . '>' . $name, 0, 255 ) ]; + $counter = &$countPrefixed; + } + + $dbw->update( + $table, + $set, + array_intersect_key( (array)$row, $pkFilter ) + [ + $idField => 0, + $nameField => $name, + ], + __METHOD__ + ); + $counter += $dbw->affectedRows(); + } + + list( $next, $display ) = $this->makeNextCond( $dbw, $orderby, $row ); + $this->output( "... $display\n" ); + wfWaitForSlaves(); + } + + $this->output( + "Completed cleanup, assigned $countAssigned and prefixed $countPrefixed row(s)\n" + ); + } +} + +$maintClass = "CleanupUsersWithNoId"; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/importDump.php b/maintenance/importDump.php index cf0e7d83ca..b6bbc2a6d6 100644 --- a/maintenance/importDump.php +++ b/maintenance/importDump.php @@ -82,6 +82,12 @@ TEXT ); $this->addOption( 'image-base-path', 'Import files from a specified path', false, true ); $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true ); + $this->addOption( 'username-interwiki', 'Use interwiki usernames with this prefix', false, true ); + $this->addOption( 'no-local-users', + 'Treat all usernames as interwiki. ' . + 'The default is to assign edits to local users where they exist.', + false, false + ); $this->addArg( 'file', 'Dump file to import [else use stdin]', false ); } @@ -295,6 +301,12 @@ TEXT if ( $this->hasOption( 'no-updates' ) ) { $importer->setNoUpdates( true ); } + if ( $this->hasOption( 'username-prefix' ) ) { + $importer->setUsernamePrefix( + $this->getOption( 'username-prefix' ), + !$this->hasOption( 'no-local-users' ) + ); + } if ( $this->hasOption( 'rootpage' ) ) { $statusRootPage = $importer->setTargetRootPage( $this->getOption( 'rootpage' ) ); if ( !$statusRootPage->isGood() ) { diff --git a/tests/phpunit/includes/BlockTest.php b/tests/phpunit/includes/BlockTest.php index c422b515cc..70715e26bd 100644 --- a/tests/phpunit/includes/BlockTest.php +++ b/tests/phpunit/includes/BlockTest.php @@ -160,7 +160,7 @@ class BlockTest extends MediaWikiLangTestCase { 'enableAutoblock' => true, 'hideName' => true, 'blockEmail' => true, - 'byText' => 'MetaWikiUser', + 'byText' => 'm>MetaWikiUser', ]; $block = new Block( $blockOptions ); $block->insert(); @@ -214,7 +214,7 @@ class BlockTest extends MediaWikiLangTestCase { 'enableAutoblock' => true, 'hideName' => true, 'blockEmail' => true, - 'byText' => 'MetaWikiUser', + 'byText' => 'Meta>MetaWikiUser', ]; $block = new Block( $blockOptions ); @@ -230,8 +230,9 @@ class BlockTest extends MediaWikiLangTestCase { 'Correct blockee name' ); $this->assertEquals( $userId, $block->getTarget()->getId(), 'Correct blockee id' ); - $this->assertEquals( 'MetaWikiUser', $block->getBlocker(), 'Correct blocker name' ); - $this->assertEquals( 'MetaWikiUser', $block->getByName(), 'Correct blocker name' ); + $this->assertEquals( 'Meta>MetaWikiUser', $block->getBlocker()->getName(), + 'Correct blocker name' ); + $this->assertEquals( 'Meta>MetaWikiUser', $block->getByName(), 'Correct blocker name' ); $this->assertEquals( 0, $block->getBy(), 'Correct blocker id' ); } @@ -282,6 +283,7 @@ class BlockTest extends MediaWikiLangTestCase { ], ]; + $blocker = $this->getTestUser()->getUser(); foreach ( $blockList as $insBlock ) { $target = $insBlock['target']; @@ -293,7 +295,7 @@ class BlockTest extends MediaWikiLangTestCase { $block = new Block(); $block->setTarget( $target ); - $block->setBlocker( 'testblocker@global' ); + $block->setBlocker( $blocker ); $block->mReason = $insBlock['desc']; $block->mExpiry = 'infinity'; $block->prevents( 'createaccount', $insBlock['ACDisable'] ); @@ -425,7 +427,7 @@ class BlockTest extends MediaWikiLangTestCase { 'reason' => 'test system block', 'timestamp' => wfTimestampNow(), 'expiry' => $this->db->getInfinity(), - 'byText' => 'MetaWikiUser', + 'byText' => 'MediaWiki default', 'systemBlock' => 'test', 'enableAutoblock' => true, ]; diff --git a/tests/phpunit/includes/import/ImportTest.php b/tests/phpunit/includes/import/ImportTest.php index 53d91c6593..505653d41c 100644 --- a/tests/phpunit/includes/import/ImportTest.php +++ b/tests/phpunit/includes/import/ImportTest.php @@ -220,4 +220,105 @@ EOF // @codingStandardsIgnoreEnd } + /** + * @dataProvider provideUnknownUserHandling + * @param bool $assign + * @param bool $create + */ + public function testUnknownUserHandling( $assign, $create ) { + $hookId = -99; + $this->setMwGlobals( 'wgHooks', [ + 'ImportHandleUnknownUser' => [ function ( $name ) use ( $assign, $create, &$hookId ) { + if ( !$assign ) { + $this->fail( 'ImportHandleUnknownUser was called unexpectedly' ); + } + + $this->assertEquals( 'UserDoesNotExist', $name ); + if ( $create ) { + $user = User::createNew( $name ); + $this->assertNotNull( $user ); + $hookId = $user->getId(); + return false; + } + return true; + } ] + ] ); + + $user = $this->getTestUser()->getUser(); + + $n = ( $assign ? 1 : 0 ) + ( $create ? 2 : 0 ); + + // @codingStandardsIgnoreStart Generic.Files.LineLength + $source = $this->getDataSource( << + + TestImportPage + 0 + 14 + + 15 + 2016-01-01T0$n:00:00Z + + UserDoesNotExist + 1 + + wikitext + text/x-wiki + foo + 1e6gpc3ehk0mu2jqu8cg42g009s796b + + + 16 + 2016-01-01T0$n:00:01Z + + {$user->getName()} + {$user->getId()} + + wikitext + text/x-wiki + bar + bjhlo6dxh5wivnszm93u4b78fheiy4t + + + +EOF + ); + // @codingStandardsIgnoreEnd + + $importer = new WikiImporter( $source, MediaWikiServices::getInstance()->getMainConfig() ); + $importer->setUsernamePrefix( 'Xxx', $assign ); + $importer->doImport(); + + $db = wfGetDB( DB_MASTER ); + + $row = $db->selectRow( + 'revision', + [ 'rev_user', 'rev_user_text' ], + [ 'rev_timestamp' => "201601010{$n}0000" ], + __METHOD__ + ); + $this->assertSame( + $assign && $create ? 'UserDoesNotExist' : 'Xxx>UserDoesNotExist', + $row->rev_user_text + ); + $this->assertSame( $assign && $create ? $hookId : 0, (int)$row->rev_user ); + + $row = $db->selectRow( + 'revision', + [ 'rev_user', 'rev_user_text' ], + [ 'rev_timestamp' => "201601010{$n}0001" ], + __METHOD__ + ); + $this->assertSame( ( $assign ? '' : 'Xxx>' ) . $user->getName(), $row->rev_user_text ); + $this->assertSame( $assign ? $user->getId() : 0, (int)$row->rev_user ); + } + + public static function provideUnknownUserHandling() { + return [ + 'no assign' => [ false, false ], + 'assign, no create' => [ true, false ], + 'assign, create' => [ true, true ], + ]; + } + } -- 2.20.1