private $config;
/** @var ImportTitleFactory */
private $importTitleFactory;
+ /** @var array */
+ private $countableCache = array();
/**
* Creates an ImportXMLReader drawing from the source provided
- * @param ImportStreamSource $source
+ * @param ImportSource $source
* @param Config $config
*/
- function __construct( ImportStreamSource $source, Config $config = null ) {
+ function __construct( ImportSource $source, Config $config = null ) {
$this->reader = new XMLReader();
if ( !$config ) {
wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
}
// Default callbacks
+ $this->setPageCallback( array( $this, 'beforeImportPage' ) );
$this->setRevisionCallback( array( $this, "importRevision" ) );
$this->setUploadCallback( array( $this, 'importUpload' ) );
$this->setLogItemCallback( array( $this, 'importLogItem' ) );
$this->mImportUploads = $import;
}
+ /**
+ * Default per-page callback. Sets up some things related to site statistics
+ * @param array $titleAndForeignTitle Two-element array, with Title object at
+ * index 0 and ForeignTitle object at index 1
+ * @return bool
+ */
+ public function beforeImportPage( $titleAndForeignTitle ) {
+ $title = $titleAndForeignTitle[0];
+ $page = WikiPage::factory( $title );
+ $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
+ return true;
+ }
+
/**
* Default per-revision callback, performs the import.
* @param WikiRevision $revision
*/
public function finishImportPage( $title, $foreignTitle, $revCount,
$sRevCount, $pageInfo ) {
+
+ // Update article count statistics (T42009)
+ // The normal counting logic in WikiPage->doEditUpdates() is designed for
+ // one-revision-at-a-time editing, not bulk imports. In this situation it
+ // suffers from issues of slave lag. We let WikiPage handle the total page
+ // and revision count, and we implement our own custom logic for the
+ // article (content page) count.
+ $page = WikiPage::factory( $title );
+ $page->loadPageData( 'fromdbmaster' );
+ $content = $page->getContent();
+ $editInfo = $page->prepareContentForEdit( $content );
+
+ $countable = $page->isCountable( $editInfo );
+ $oldcountable = $this->countableCache['title_' . $title->getPrefixedText()];
+ if ( isset( $oldcountable ) && $countable != $oldcountable ) {
+ DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array(
+ 'articles' => ( (int)$countable - (int)$oldcountable )
+ ) ) );
+ }
+
$args = func_get_args();
return Hooks::run( 'AfterImportPage', $args );
}
$keepReading = $this->reader->read();
$skip = false;
- while ( $keepReading ) {
- $tag = $this->reader->name;
- $type = $this->reader->nodeType;
-
- if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
- // Do nothing
- } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
- break;
- } elseif ( $tag == 'siteinfo' ) {
- $this->handleSiteInfo();
- } elseif ( $tag == 'page' ) {
- $this->handlePage();
- } elseif ( $tag == 'logitem' ) {
- $this->handleLogItem();
- } elseif ( $tag != '#text' ) {
- $this->warn( "Unhandled top-level XML tag $tag" );
-
- $skip = true;
- }
+ $rethrow = null;
+ try {
+ while ( $keepReading ) {
+ $tag = $this->reader->name;
+ $type = $this->reader->nodeType;
+
+ if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
+ // Do nothing
+ } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
+ break;
+ } elseif ( $tag == 'siteinfo' ) {
+ $this->handleSiteInfo();
+ } elseif ( $tag == 'page' ) {
+ $this->handlePage();
+ } elseif ( $tag == 'logitem' ) {
+ $this->handleLogItem();
+ } elseif ( $tag != '#text' ) {
+ $this->warn( "Unhandled top-level XML tag $tag" );
+
+ $skip = true;
+ }
- if ( $skip ) {
- $keepReading = $this->reader->next();
- $skip = false;
- $this->debug( "Skip" );
- } else {
- $keepReading = $this->reader->read();
+ if ( $skip ) {
+ $keepReading = $this->reader->next();
+ $skip = false;
+ $this->debug( "Skip" );
+ } else {
+ $keepReading = $this->reader->read();
+ }
}
+ } catch ( Exception $ex ) {
+ $rethrow = $ex;
}
+ // finally
libxml_disable_entity_loader( $oldDisable );
+ $this->reader->close();
+
+ if ( $rethrow ) {
+ throw $rethrow;
+ }
+
return true;
}
private $mPosition;
/**
- * @param ImportStreamSource $source
+ * @param ImportSource $source
* @return string
*/
- static function registerSource( ImportStreamSource $source ) {
+ static function registerSource( ImportSource $source ) {
$id = wfRandomString();
self::$sourceRegistrations[$id] = $source;
$this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
return false;
}
- $oldcountable = $page->isCountable();
}
# @todo FIXME: Use original rev_id optionally (better for backups)
if ( $changed !== false && !$this->mNoUpdates ) {
wfDebug( __METHOD__ . ": running updates\n" );
+ // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
$page->doEditUpdates(
$revision,
$userObj,
- array( 'created' => $created, 'oldcountable' => $oldcountable )
+ array( 'created' => $created, 'oldcountable' => 'no-change' )
);
}
wfDebug( __METHOD__ . ": Successful\n" );
return true;
} else {
- wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
+ wfDebug( __METHOD__ . ': failed: ' . $status->getHTML() . "\n" );
return false;
}
}
}
+/**
+ * Source interface for XML import.
+ */
+interface ImportSource {
+
+ /**
+ * Indicates whether the end of the input has been reached.
+ * Will return true after a finite number of calls to readChunk.
+ *
+ * @return bool true if there is no more input, false otherwise.
+ */
+ function atEnd();
+
+ /**
+ * Return a chunk of the input, as a (possibly empty) string.
+ * When the end of input is reached, readChunk() returns false.
+ * If atEnd() returns false, readChunk() will return a string.
+ * If atEnd() returns true, readChunk() will return false.
+ *
+ * @return bool|string
+ */
+ function readChunk();
+}
+
/**
* Used for importing XML dumps where the content of the dump is in a string.
* This class is ineffecient, and should only be used for small dumps.
*
* @ingroup SpecialPage
*/
-class ImportStringSource {
+class ImportStringSource implements ImportSource {
function __construct( $string ) {
$this->mString = $string;
$this->mRead = false;
* Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
* @ingroup SpecialPage
*/
-class ImportStreamSource {
+class ImportStreamSource implements ImportSource {
function __construct( $handle ) {
$this->mHandle = $handle;
}
* @return Revision|null
*/
public function getOldestRevision() {
- wfProfileIn( __METHOD__ );
// Try using the slave database first, then try the master
$continue = 2;
}
}
- wfProfileOut( __METHOD__ );
return $row ? Revision::newFromRow( $row ) : null;
}
* @return array Array of authors, duplicates not removed
*/
public function getLastNAuthors( $num, $revLatest = 0 ) {
- wfProfileIn( __METHOD__ );
// First try the slave
// If that doesn't have the latest revision, try the master
$continue = 2;
);
if ( !$res ) {
- wfProfileOut( __METHOD__ );
return array();
}
$authors[] = $row->rev_user_text;
}
- wfProfileOut( __METHOD__ );
return $authors;
}
* @return ParserOutput|bool ParserOutput or false if the revision was not found
*/
public function getParserOutput( ParserOptions $parserOptions, $oldid = null ) {
- wfProfileIn( __METHOD__ );
$useParserCache = $this->isParserCacheUsed( $parserOptions, $oldid );
wfDebug( __METHOD__ . ': using parser cache: ' . ( $useParserCache ? 'yes' : 'no' ) . "\n" );
if ( $useParserCache ) {
$parserOutput = ParserCache::singleton()->get( $this, $parserOptions );
if ( $parserOutput !== false ) {
- wfProfileOut( __METHOD__ );
return $parserOutput;
}
}
$pool = new PoolWorkArticleView( $this, $parserOptions, $oldid, $useParserCache );
$pool->execute();
- wfProfileOut( __METHOD__ );
-
return $pool->getParserOutput();
}
* @return int The newly created page_id key, or false if the title already existed
*/
public function insertOn( $dbw ) {
- wfProfileIn( __METHOD__ );
$page_id = $dbw->nextSequenceValue( 'page_page_id_seq' );
$dbw->insert( 'page', array(
$this->mId = $newid;
$this->mTitle->resetArticleID( $newid );
}
- wfProfileOut( __METHOD__ );
return $affected ? $newid : false;
}
) {
global $wgContentHandlerUseDB;
- wfProfileIn( __METHOD__ );
-
$content = $revision->getContent();
$len = $content ? $content->getSize() : 0;
$rt = $content ? $content->getUltimateRedirectTarget() : null;
$this->mLatest, $revision->getContentModel() );
}
- wfProfileOut( __METHOD__ );
return $result;
}
return true;
}
- wfProfileIn( __METHOD__ );
if ( $isRedirect ) {
$this->insertRedirectEntry( $redirectTitle );
} else {
if ( $this->getTitle()->getNamespace() == NS_FILE ) {
RepoGroup::singleton()->getLocalRepo()->invalidateImageRedirect( $this->getTitle() );
}
- wfProfileOut( __METHOD__ );
return ( $dbw->affectedRows() != 0 );
}
* @return bool
*/
public function updateIfNewerOn( $dbw, $revision ) {
- wfProfileIn( __METHOD__ );
$row = $dbw->selectRow(
array( 'revision', 'page' ),
if ( $row ) {
if ( wfTimestamp( TS_MW, $row->rev_timestamp ) >= $revision->getTimestamp() ) {
- wfProfileOut( __METHOD__ );
return false;
}
$prev = $row->rev_id;
$ret = $this->updateRevisionOn( $dbw, $revision, $prev, $lastRevIsRedirect );
- wfProfileOut( __METHOD__ );
return $ret;
}
*/
public function replaceSectionContent( $sectionId, Content $sectionContent, $sectionTitle = '',
$edittime = null ) {
- wfProfileIn( __METHOD__ );
$baseRevId = null;
if ( $edittime && $sectionId !== 'new' ) {
}
}
- wfProfileOut( __METHOD__ );
return $this->replaceSectionAtRev( $sectionId, $sectionContent, $sectionTitle, $baseRevId );
}
public function replaceSectionAtRev( $sectionId, Content $sectionContent,
$sectionTitle = '', $baseRevId = null
) {
- wfProfileIn( __METHOD__ );
if ( strval( $sectionId ) === '' ) {
// Whole-page edit; let the whole text through
$newContent = $sectionContent;
} else {
if ( !$this->supportsSections() ) {
- wfProfileOut( __METHOD__ );
throw new MWException( "sections not supported for content model " .
$this->getContentHandler()->getModelID() );
}
if ( !$rev ) {
wfDebug( __METHOD__ . " asked for bogus section (page: " .
$this->getId() . "; section: $sectionId)\n" );
- wfProfileOut( __METHOD__ );
return null;
}
if ( !$oldContent ) {
wfDebug( __METHOD__ . ": no page text\n" );
- wfProfileOut( __METHOD__ );
return null;
}
$newContent = $oldContent->replaceSection( $sectionId, $sectionContent, $sectionTitle );
}
- wfProfileOut( __METHOD__ );
return $newContent;
}
throw new MWException( 'Something is trying to edit an article with an empty title' );
}
- wfProfileIn( __METHOD__ );
-
if ( !$content->getContentHandler()->canBeUsedOn( $this->getTitle() ) ) {
- wfProfileOut( __METHOD__ );
return Status::newFatal( 'content-not-allowed-here',
ContentHandler::getLocalizedName( $content->getModel() ),
$this->getTitle()->getPrefixedText() );
$status->fatal( 'edit-hook-aborted' );
}
- wfProfileOut( __METHOD__ );
return $status;
}
wfDebug( __METHOD__ . ": EDIT_UPDATE specified but article doesn't exist\n" );
$status->fatal( 'edit-gone-missing' );
- wfProfileOut( __METHOD__ );
return $status;
} elseif ( !$old_content ) {
// Sanity check for bug 37225
- wfProfileOut( __METHOD__ );
throw new MWException( "Could not find text for current revision {$oldid}." );
}
if ( !$status->isOK() ) {
$dbw->rollback( __METHOD__ );
- wfProfileOut( __METHOD__ );
return $status;
}
$revisionId = $revision->insertOn( $dbw );
$dbw->rollback( __METHOD__ );
- wfProfileOut( __METHOD__ );
return $status;
}
}
}
$user->incEditCount();
- } catch ( MWException $e ) {
+ } catch ( Exception $e ) {
$dbw->rollback( __METHOD__ );
// Question: Would it perhaps be better if this method turned all
// exceptions into $status's?
if ( !$status->isOK() ) {
$dbw->rollback( __METHOD__ );
- wfProfileOut( __METHOD__ );
return $status;
}
$dbw->rollback( __METHOD__ );
$status->fatal( 'edit-already-exists' );
- wfProfileOut( __METHOD__ );
return $status;
}
}
$user->incEditCount();
- } catch ( MWException $e ) {
+ } catch ( Exception $e ) {
$dbw->rollback( __METHOD__ );
throw $e;
}
$user->addAutopromoteOnceGroups( 'onEdit' );
} );
- wfProfileOut( __METHOD__ );
return $status;
}
* Returns a stdClass with source, pst and output members
*
* @param Content $content
- * @param int|null $revid
+ * @param Revision|int|null $revision Revision object. For backwards compatibility, a
+ * revision ID is also accepted, but this is deprecated.
* @param User|null $user
* @param string|null $serialFormat
* @param bool $useCache Check shared prepared edit cache
* @since 1.21
*/
public function prepareContentForEdit(
- Content $content, $revid = null, User $user = null, $serialFormat = null, $useCache = true
+ Content $content, $revision = null, User $user = null, $serialFormat = null, $useCache = true
) {
- global $wgContLang, $wgUser;
+ global $wgContLang, $wgUser, $wgAjaxEditStash;
+
+ if ( is_object( $revision ) ) {
+ $revid = $revision->getId();
+ } else {
+ $revid = $revision;
+ // This code path is deprecated, and nothing is known to
+ // use it, so performance here shouldn't be a worry.
+ if ( $revid !== null ) {
+ $revision = Revision::newFromId( $revid, Revision::READ_LATEST );
+ } else {
+ $revision = null;
+ }
+ }
$user = is_null( $user ) ? $wgUser : $user;
//XXX: check $user->getId() here???
}
// The edit may have already been prepared via api.php?action=stashedit
- $cachedEdit = $useCache
+ $cachedEdit = $useCache && $wgAjaxEditStash
? ApiStashEdit::checkCache( $this->getTitle(), $content, $user )
: false;
if ( $cachedEdit ) {
$edit->output = $cachedEdit->output;
} else {
+ if ( $revision ) {
+ // We get here if vary-revision is set. This means that this page references
+ // itself (such as via self-transclusion). In this case, we need to make sure
+ // that any such self-references refer to the newly-saved revision, and not
+ // to the previous one, which could otherwise happen due to slave lag.
+ $oldCallback = $edit->popts->setCurrentRevisionCallback(
+ function ( $title, $parser = false ) use ( $revision, &$oldCallback ) {
+ if ( $title->equals( $revision->getTitle() ) ) {
+ return $revision;
+ } else {
+ return call_user_func(
+ $oldCallback,
+ $title,
+ $parser
+ );
+ }
+ }
+ );
+ }
$edit->output = $edit->pstContent
? $edit->pstContent->getParserOutput( $this->mTitle, $revid, $edit->popts )
: null;
* - changed: boolean, whether the revision changed the content (default true)
* - created: boolean, whether the revision created the page (default false)
* - moved: boolean, whether the page was moved (default false)
- * - oldcountable: boolean or null (default null):
+ * - oldcountable: boolean, null, or string 'no-change' (default null):
* - boolean: whether the page was counted as an article before that
* revision, only used in changed is true and created is false
- * - null: don't change the article count
+ * - null: if created is false, don't update the article count; if created
+ * is true, do update the article count
+ * - 'no-change': don't update the article count, ever
*/
public function doEditUpdates( Revision $revision, User $user, array $options = array() ) {
global $wgEnableParserCache;
- wfProfileIn( __METHOD__ );
-
$options += array(
'changed' => true,
'created' => false,
// already pre-save transformed once.
if ( !$this->mPreparedEdit || $this->mPreparedEdit->output->getFlag( 'vary-revision' ) ) {
wfDebug( __METHOD__ . ": No prepared edit or vary-revision is set...\n" );
- $editInfo = $this->prepareContentForEdit( $content, $revision->getId(), $user );
+ $editInfo = $this->prepareContentForEdit( $content, $revision, $user );
} else {
wfDebug( __METHOD__ . ": No vary-revision, using prepared edit...\n" );
$editInfo = $this->mPreparedEdit;
Hooks::run( 'ArticleEditUpdates', array( &$this, &$editInfo, $options['changed'] ) );
if ( Hooks::run( 'ArticleEditUpdatesDeleteFromRecentchanges', array( &$this ) ) ) {
- if ( 0 == mt_rand( 0, 99 ) ) {
- // Flush old entries from the `recentchanges` table; we do this on
- // random requests so as to avoid an increase in writes for no good reason
- RecentChange::purgeExpiredChanges();
- }
+ // Flush old entries from the `recentchanges` table
+ JobQueueGroup::singleton()->push( RecentChangesUpdateJob::newPurgeJob() );
}
if ( !$this->exists() ) {
- wfProfileOut( __METHOD__ );
return;
}
$title = $this->mTitle->getPrefixedDBkey();
$shortTitle = $this->mTitle->getDBkey();
- if ( !$options['changed'] && !$options['moved'] ) {
+ if ( $options['oldcountable'] === 'no-change' ||
+ ( !$options['changed'] && !$options['moved'] )
+ ) {
$good = 0;
} elseif ( $options['created'] ) {
$good = (int)$this->isCountable( $editInfo );
self::onArticleEdit( $this->mTitle );
}
- wfProfileOut( __METHOD__ );
}
/**
public function doQuickEditContent( Content $content, User $user, $comment = '', $minor = false,
$serialFormat = null
) {
- wfProfileIn( __METHOD__ );
$serialized = $content->serialize( $serialFormat );
Hooks::run( 'NewRevisionFromEditComplete', array( $this, $revision, false, $user ) );
- wfProfileOut( __METHOD__ );
}
/**
// Get the last edit not by this guy...
// Note: these may not be public values
- $user = intval( $current->getRawUser() );
- $user_text = $dbw->addQuotes( $current->getRawUserText() );
+ $user = intval( $current->getUser( Revision::RAW ) );
+ $user_text = $dbw->addQuotes( $current->getUserText( Revision::RAW ) );
$s = $dbw->selectRow( 'revision',
array( 'rev_id', 'rev_timestamp', 'rev_deleted' ),
array( 'rev_page' => $current->getPage(),
*
* @param Title $title
*/
- public static function onArticleCreate( $title ) {
+ public static function onArticleCreate( Title $title ) {
// Update existence markers on article/talk tabs...
$other = $title->getOtherPage();
*
* @param Title $title
*/
- public static function onArticleDelete( $title ) {
+ public static function onArticleDelete( Title $title ) {
// Update existence markers on article/talk tabs...
$other = $title->getOtherPage();
* Purge caches on page update etc
*
* @param Title $title
- * @todo Verify that $title is always a Title object (and never false or
- * null), add Title hint to parameter $title.
*/
- public static function onArticleEdit( $title ) {
+ public static function onArticleEdit( Title $title ) {
// Invalidate caches of articles which include this page
DeferredUpdates::addHTMLCacheUpdate( $title, 'templatelinks' );