private $reader = null;
private $foreignNamespaces = null;
private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
- private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
+ private $mSiteInfoCallback, $mPageOutCallback;
private $mNoticeCallback, $mDebug;
private $mImportUploads, $mImageBasePath;
private $mNoUpdates = false;
* Creates an ImportXMLReader drawing from the source provided
* @param ImportSource $source
* @param Config $config
+ * @throws Exception
*/
function __construct( ImportSource $source, Config $config = null ) {
+ if ( !class_exists( 'XMLReader' ) ) {
+ throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
+ }
+
$this->reader = new XMLReader();
if ( !$config ) {
wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
}
$id = UploadSourceAdapter::registerSource( $source );
+
+ // Enable the entity loader, as it is needed for loading external URLs via
+ // XMLReader::open (T86036)
+ $oldDisable = libxml_disable_entity_loader( false );
if ( defined( 'LIBXML_PARSEHUGE' ) ) {
- $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
+ $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
} else {
- $this->reader->open( "uploadsource://$id" );
+ $status = $this->reader->open( "uploadsource://$id" );
}
+ if ( !$status ) {
+ $error = libxml_get_last_error();
+ libxml_disable_entity_loader( $oldDisable );
+ throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
+ $error->message );
+ }
+ libxml_disable_entity_loader( $oldDisable );
// Default callbacks
$this->setPageCallback( array( $this, 'beforeImportPage' ) );
public function setTargetNamespace( $namespace ) {
if ( is_null( $namespace ) ) {
// Don't override namespaces
- $this->mTargetNamespace = null;
$this->setImportTitleFactory( new NaiveImportTitleFactory() );
return true;
} elseif (
MWNamespace::exists( intval( $namespace ) )
) {
$namespace = intval( $namespace );
- $this->mTargetNamespace = $namespace;
$this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
return true;
} else {
// No rootpage
$this->setImportTitleFactory( new NaiveImportTitleFactory() );
} elseif ( $rootpage !== '' ) {
- $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
- $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace )
- ? $this->mTargetNamespace
- : NS_MAIN
- );
+ $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
+ $title = Title::newFromText( $rootpage );
if ( !$title || $title->isExternal() ) {
$status->fatal( 'import-rootpage-invalid' );
$page = WikiPage::factory( $title );
$page->loadPageData( 'fromdbmaster' );
$content = $page->getContent();
- $editInfo = $page->prepareContentForEdit( $content );
- $countKey = 'title_' . $title->getPrefixedText();
- $countable = $page->isCountable( $editInfo );
- if ( array_key_exists( $countKey, $this->countableCache ) &&
- $countable != $this->countableCache[ $countKey ] ) {
- DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array(
- 'articles' => ( (int)$countable - (int)$this->countableCache[ $countKey ] )
- ) ) );
+ if ( $content === null ) {
+ wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
+ ' because WikiPage::getContent() returned null' );
+ } else {
+ $editInfo = $page->prepareContentForEdit( $content );
+ $countKey = 'title_' . $title->getPrefixedText();
+ $countable = $page->isCountable( $editInfo );
+ if ( array_key_exists( $countKey, $this->countableCache ) &&
+ $countable != $this->countableCache[$countKey] ) {
+ DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array(
+ 'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
+ ) ) );
+ }
}
$args = func_get_args();
$oldDisable = libxml_disable_entity_loader( true );
$this->reader->read();
- if ( $this->reader->name != 'mediawiki' ) {
+ if ( $this->reader->localName != 'mediawiki' ) {
libxml_disable_entity_loader( $oldDisable );
throw new MWException( "Expected <mediawiki> tag, got " .
- $this->reader->name );
+ $this->reader->localName );
}
$this->debug( "<mediawiki> tag is correct." );
$rethrow = null;
try {
while ( $keepReading ) {
- $tag = $this->reader->name;
+ $tag = $this->reader->localName;
$type = $this->reader->nodeType;
if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
while ( $this->reader->read() ) {
if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
- $this->reader->name == 'siteinfo' ) {
+ $this->reader->localName == 'siteinfo' ) {
break;
}
- $tag = $this->reader->name;
+ $tag = $this->reader->localName;
if ( $tag == 'namespace' ) {
- $this->foreignNamespaces[ $this->nodeAttribute( 'key' ) ] =
+ $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
$this->nodeContents();
} elseif ( in_array( $tag, $normalFields ) ) {
$siteInfo[$tag] = $this->nodeContents();
while ( $this->reader->read() ) {
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
- $this->reader->name == 'logitem' ) {
+ $this->reader->localName == 'logitem' ) {
break;
}
- $tag = $this->reader->name;
+ $tag = $this->reader->localName;
if ( !Hooks::run( 'ImportHandleLogItemXMLTag', array(
$this, $logInfo
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
- $this->reader->name == 'page' ) {
+ $this->reader->localName == 'page' ) {
break;
}
$skip = false;
- $tag = $this->reader->name;
+ $tag = $this->reader->localName;
if ( $badTitle ) {
// The title is invalid, bail out of this page
$title = $this->processTitle( $pageInfo['title'],
isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
- if ( !$title ) {
+ // $title is either an array of two titles or false.
+ if ( is_array( $title ) ) {
+ $this->pageCallback( $title );
+ list( $pageInfo['_title'], $foreignTitle ) = $title;
+ } else {
$badTitle = true;
$skip = true;
}
-
- $this->pageCallback( $title );
- list( $pageInfo['_title'], $foreignTitle ) = $title;
}
if ( $title ) {
}
}
- $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
+ // @note $pageInfo is only set if a valid $title is processed above with
+ // no error. If we have a valid $title, then pageCallback is called
+ // above, $pageInfo['title'] is set and we do pageOutCallback here.
+ // If $pageInfo['_title'] is not set, then $foreignTitle is also not
+ // set since they both come from $title above.
+ if ( array_key_exists( '_title', $pageInfo ) ) {
+ $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
$pageInfo['revisionCount'],
$pageInfo['successfulRevisionCount'],
$pageInfo );
+ }
}
/**
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
- $this->reader->name == 'revision' ) {
+ $this->reader->localName == 'revision' ) {
break;
}
- $tag = $this->reader->name;
+ $tag = $this->reader->localName;
if ( !Hooks::run( 'ImportHandleRevisionXMLTag', array(
$this, $pageInfo, $revisionInfo
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
- $this->reader->name == 'upload' ) {
+ $this->reader->localName == 'upload' ) {
break;
}
- $tag = $this->reader->name;
+ $tag = $this->reader->localName;
if ( !Hooks::run( 'ImportHandleUploadXMLTag', array(
$this, $pageInfo
while ( $this->reader->read() ) {
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
- $this->reader->name == 'contributor' ) {
+ $this->reader->localName == 'contributor' ) {
break;
}
- $tag = $this->reader->name;
+ $tag = $this->reader->localName;
if ( in_array( $tag, $fields ) ) {
$info[$tag] = $this->nodeContents();
}
// avoid memory leak...?
- $linkCache = LinkCache::singleton();
- $linkCache->clear();
+ Title::clearCaches();
$page = WikiPage::factory( $this->title );
$page->loadPageData( 'fromdbmaster' );
'page' => $pageId,
'content_model' => $this->getModel(),
'content_format' => $this->getFormat(),
- //XXX: just set 'content' => $this->getContent()?
+ // XXX: just set 'content' => $this->getContent()?
'text' => $this->getContent()->serialize( $this->getFormat() ),
'comment' => $this->getComment(),
'user' => $userId,
'log_namespace' => $this->getTitle()->getNamespace(),
'log_title' => $this->getTitle()->getDBkey(),
'log_comment' => $this->getComment(),
- #'log_user_text' => $this->user_text,
+ # 'log_user_text' => $this->user_text,
'log_params' => $this->params ),
__METHOD__
);
'log_action' => $this->action,
'log_timestamp' => $dbw->timestamp( $this->timestamp ),
'log_user' => User::idFromName( $this->user_text ),
- #'log_user_text' => $this->user_text,
+ # 'log_user_text' => $this->user_text,
'log_namespace' => $this->getTitle()->getNamespace(),
'log_title' => $this->getTitle()->getDBkey(),
'log_comment' => $this->getComment(),
* @return Status
*/
static function newFromFile( $filename ) {
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$file = fopen( $filename, 'rt' );
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
if ( !$file ) {
return Status::newFatal( "importcantopen" );
}