<?php
/**
* MediaWiki page data importer
- * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
+ *
+ * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
* http://www.mediawiki.org/
*
* This program is free software; you can redistribute it and/or modify
*/
/**
- *
+ * @todo document (e.g. one-sentence class description).
* @ingroup SpecialPage
*/
class WikiRevision {
} elseif( $changed ) {
wfDebug( __METHOD__ . ": running onArticleEdit\n" );
- Article::onArticleEdit( $this->title, 'skiptransclusions' ); // leave templatelinks for editUpdates()
+ Article::onArticleEdit( $this->title );
wfDebug( __METHOD__ . ": running edit updates\n" );
$article->editUpdates(
$this->timestamp . "\n" );
return false;
}
- $log_id = $dbw->nextSequenceValue( 'log_log_id_seq' );
+ $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
$data = array(
'log_id' => $log_id,
'log_type' => $this->type,
$resultDetails = array( 'internal' => $status->getWikiText() );
*/
- // @fixme upload() uses $wgUser, which is wrong here
+ // @todo Fixme: upload() uses $wgUser, which is wrong here
// it may also create a page without our desire, also wrong potentially.
// and, it will record a *current* upload, but we might want an archive version here
$file = wfLocalFile( $this->getTitle() );
if( !$file ) {
- var_dump( $file );
wfDebug( "IMPORT: Bad file. :(\n" );
return false;
}
return false;
}
- // @fixme!
+ // @todo Fixme!
$src = $this->getSrc();
$data = Http::get( $src );
if( !$data ) {
}
-/**
- * implements Special:Import
- * @ingroup SpecialPage
- */
-class WikiImporter {
- var $mDebug = false;
- var $mSource = null;
- var $mPageCallback = null;
- var $mPageOutCallback = null;
- var $mRevisionCallback = null;
- var $mLogItemCallback = null;
- var $mUploadCallback = null;
- var $mTargetNamespace = null;
- var $mXmlNamespace = false;
- var $lastfield;
- var $tagStack = array();
-
- function __construct( $source ) {
- $this->setRevisionCallback( array( $this, "importRevision" ) );
- $this->setUploadCallback( array( $this, "importUpload" ) );
- $this->setLogItemCallback( array( $this, "importLogItem" ) );
- $this->mSource = $source;
- }
-
- function throwXmlError( $err ) {
- $this->debug( "FAILURE: $err" );
- wfDebug( "WikiImporter XML error: $err\n" );
- }
-
- function handleXmlNamespace ( $parser, $data, $prefix=false, $uri=false ) {
- if( preg_match( '/www.mediawiki.org/',$prefix ) ) {
- $prefix = str_replace( '/','\/',$prefix );
- $this->mXmlNamespace='/^'.$prefix.':/';
- }
- }
-
- function stripXmlNamespace($name) {
- if( $this->mXmlNamespace ) {
- return(preg_replace($this->mXmlNamespace,'',$name,1));
- }
- else {
- return($name);
- }
- }
-
- # --------------
-
- function doImport() {
- if( empty( $this->mSource ) ) {
- return new WikiErrorMsg( "importnotext" );
- }
-
- $parser = xml_parser_create_ns( "UTF-8" );
-
- # case folding violates XML standard, turn it off
- xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
-
- xml_set_object( $parser, $this );
- xml_set_element_handler( $parser, "in_start", "" );
- xml_set_start_namespace_decl_handler( $parser, "handleXmlNamespace" );
-
- $offset = 0; // for context extraction on error reporting
- do {
- $chunk = $this->mSource->readChunk();
- if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
- wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
- return new WikiXmlError( $parser, wfMsgHtml( 'import-parse-failure' ), $chunk, $offset );
- }
- $offset += strlen( $chunk );
- } while( $chunk !== false && !$this->mSource->atEnd() );
- xml_parser_free( $parser );
-
- return true;
- }
-
- function debug( $data ) {
- if( $this->mDebug ) {
- wfDebug( "IMPORT: $data\n" );
- }
- }
-
- function notice( $data ) {
- global $wgCommandLineMode;
- if( $wgCommandLineMode ) {
- print "$data\n";
- } else {
- global $wgOut;
- $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
- }
- }
-
- /**
- * Set debug mode...
- */
- function setDebug( $debug ) {
- $this->mDebug = $debug;
- }
-
- /**
- * Sets the action to perform as each new page in the stream is reached.
- * @param $callback callback
- * @return callback
- */
- function setPageCallback( $callback ) {
- $previous = $this->mPageCallback;
- $this->mPageCallback = $callback;
- return $previous;
- }
-
- /**
- * Sets the action to perform as each page in the stream is completed.
- * Callback accepts the page title (as a Title object), a second object
- * with the original title form (in case it's been overridden into a
- * local namespace), and a count of revisions.
- *
- * @param $callback callback
- * @return callback
- */
- function setPageOutCallback( $callback ) {
- $previous = $this->mPageOutCallback;
- $this->mPageOutCallback = $callback;
- return $previous;
- }
-
- /**
- * Sets the action to perform as each page revision is reached.
- * @param $callback callback
- * @return callback
- */
- function setRevisionCallback( $callback ) {
- $previous = $this->mRevisionCallback;
- $this->mRevisionCallback = $callback;
- return $previous;
- }
-
- /**
- * Sets the action to perform as each file upload version is reached.
- * @param $callback callback
- * @return callback
- */
- function setUploadCallback( $callback ) {
- $previous = $this->mUploadCallback;
- $this->mUploadCallback = $callback;
- return $previous;
- }
-
- /**
- * Sets the action to perform as each log item reached.
- * @param $callback callback
- * @return callback
- */
- function setLogItemCallback( $callback ) {
- $previous = $this->mLogItemCallback;
- $this->mLogItemCallback = $callback;
- return $previous;
- }
-
- /**
- * Set a target namespace to override the defaults
- */
- function setTargetNamespace( $namespace ) {
- if( is_null( $namespace ) ) {
- // Don't override namespaces
- $this->mTargetNamespace = null;
- } elseif( $namespace >= 0 ) {
- // FIXME: Check for validity
- $this->mTargetNamespace = intval( $namespace );
- } else {
- return false;
- }
- }
-
- /**
- * Default per-revision callback, performs the import.
- * @param $revision WikiRevision
- * @private
- */
- function importRevision( $revision ) {
- $dbw = wfGetDB( DB_MASTER );
- return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
- }
-
- /**
- * Default per-revision callback, performs the import.
- * @param $revision WikiRevision
- * @private
- */
- function importLogItem( $rev ) {
- $dbw = wfGetDB( DB_MASTER );
- return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
- }
-
- /**
- * Dummy for now...
- */
- function importUpload( $revision ) {
- //$dbw = wfGetDB( DB_MASTER );
- //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
- return false;
- }
-
- /**
- * Alternate per-revision callback, for debugging.
- * @param $revision WikiRevision
- * @private
- */
- function debugRevisionHandler( &$revision ) {
- $this->debug( "Got revision:" );
- if( is_object( $revision->title ) ) {
- $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
- } else {
- $this->debug( "-- Title: <invalid>" );
- }
- $this->debug( "-- User: " . $revision->user_text );
- $this->debug( "-- Timestamp: " . $revision->timestamp );
- $this->debug( "-- Comment: " . $revision->comment );
- $this->debug( "-- Text: " . $revision->text );
- }
-
- /**
- * Notify the callback function when a new <page> is reached.
- * @param $title Title
- * @private
- */
- function pageCallback( $title ) {
- if( is_callable( $this->mPageCallback ) ) {
- call_user_func( $this->mPageCallback, $title );
- }
- }
-
- /**
- * Notify the callback function when a </page> is closed.
- * @param $title Title
- * @param $origTitle Title
- * @param $revisionCount int
- * @param $successCount Int: number of revisions for which callback returned true
- * @private
- */
- function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
- if( is_callable( $this->mPageOutCallback ) ) {
- call_user_func( $this->mPageOutCallback, $title, $origTitle,
- $revisionCount, $successCount );
- }
- }
-
- # XML parser callbacks from here out -- beware!
- function donothing( $parser, $x, $y="" ) {
- #$this->debug( "donothing" );
- }
-
- function in_start( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_start $name" );
- if( $name != "mediawiki" ) {
- return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
- }
- xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
- }
-
- function in_mediawiki( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_mediawiki $name" );
- if( $name == 'siteinfo' ) {
- xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
- } elseif( $name == 'page' ) {
- $this->push( $name );
- $this->workRevisionCount = 0;
- $this->workSuccessCount = 0;
- $this->uploadCount = 0;
- $this->uploadSuccessCount = 0;
- xml_set_element_handler( $parser, "in_page", "out_page" );
- } elseif( $name == 'logitem' ) {
- $this->push( $name );
- $this->workRevision = new WikiRevision;
- xml_set_element_handler( $parser, "in_logitem", "out_logitem" );
- } else {
- return $this->throwXMLerror( "Expected <page>, got <$name>" );
- }
- }
- function out_mediawiki( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "out_mediawiki $name" );
- if( $name != "mediawiki" ) {
- return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
- }
- xml_set_element_handler( $parser, "donothing", "donothing" );
- }
-
-
- function in_siteinfo( $parser, $name, $attribs ) {
- // no-ops for now
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_siteinfo $name" );
- switch( $name ) {
- case "sitename":
- case "base":
- case "generator":
- case "case":
- case "namespaces":
- case "namespace":
- break;
- default:
- return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
- }
- }
-
- function out_siteinfo( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- if( $name == "siteinfo" ) {
- xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
- }
- }
-
-
- function in_page( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_page $name" );
- switch( $name ) {
- case "id":
- case "title":
- case "restrictions":
- $this->appendfield = $name;
- $this->appenddata = "";
- xml_set_element_handler( $parser, "in_nothing", "out_append" );
- xml_set_character_data_handler( $parser, "char_append" );
- break;
- case "revision":
- $this->push( "revision" );
- if( is_object( $this->pageTitle ) ) {
- $this->workRevision = new WikiRevision;
- $this->workRevision->setTitle( $this->pageTitle );
- $this->workRevisionCount++;
- } else {
- // Skipping items due to invalid page title
- $this->workRevision = null;
- }
- xml_set_element_handler( $parser, "in_revision", "out_revision" );
- break;
- case "upload":
- $this->push( "upload" );
- if( is_object( $this->pageTitle ) ) {
- $this->workRevision = new WikiRevision;
- $this->workRevision->setTitle( $this->pageTitle );
- $this->uploadCount++;
- } else {
- // Skipping items due to invalid page title
- $this->workRevision = null;
- }
- xml_set_element_handler( $parser, "in_upload", "out_upload" );
- break;
- default:
- return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
- }
- }
-
- function out_page( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "out_page $name" );
- $this->pop();
- if( $name != "page" ) {
- return $this->throwXMLerror( "Expected </page>, got </$name>" );
- }
- xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
-
- $this->pageOutCallback( $this->pageTitle, $this->origTitle,
- $this->workRevisionCount, $this->workSuccessCount );
-
- $this->workTitle = null;
- $this->workRevision = null;
- $this->workRevisionCount = 0;
- $this->workSuccessCount = 0;
- $this->pageTitle = null;
- $this->origTitle = null;
- }
-
- function in_nothing( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_nothing $name" );
- return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
- }
-
- function char_append( $parser, $data ) {
- $this->debug( "char_append '$data'" );
- $this->appenddata .= $data;
- }
-
- function out_append( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "out_append $name" );
- if( $name != $this->appendfield ) {
- return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
- }
-
- switch( $this->appendfield ) {
- case "title":
- $this->workTitle = $this->appenddata;
- $this->origTitle = Title::newFromText( $this->workTitle );
- if( !is_null( $this->mTargetNamespace ) && !is_null( $this->origTitle ) ) {
- $this->pageTitle = Title::makeTitle( $this->mTargetNamespace,
- $this->origTitle->getDBkey() );
- } else {
- $this->pageTitle = Title::newFromText( $this->workTitle );
- }
- if( is_null( $this->pageTitle ) ) {
- // Invalid page title? Ignore the page
- $this->notice( "Skipping invalid page title '$this->workTitle'" );
- } else {
- $this->pageCallback( $this->workTitle );
- }
- break;
- case "id":
- if ( $this->parentTag() == 'revision' || $this->parentTag() == 'logitem' ) {
- if( $this->workRevision )
- $this->workRevision->setID( $this->appenddata );
- }
- break;
- case "text":
- if( $this->workRevision )
- $this->workRevision->setText( $this->appenddata );
- break;
- case "username":
- if( $this->workRevision )
- $this->workRevision->setUsername( $this->appenddata );
- break;
- case "ip":
- if( $this->workRevision )
- $this->workRevision->setUserIP( $this->appenddata );
- break;
- case "timestamp":
- if( $this->workRevision )
- $this->workRevision->setTimestamp( $this->appenddata );
- break;
- case "comment":
- if( $this->workRevision )
- $this->workRevision->setComment( $this->appenddata );
- break;
- case "type":
- if( $this->workRevision )
- $this->workRevision->setType( $this->appenddata );
- break;
- case "action":
- if( $this->workRevision )
- $this->workRevision->setAction( $this->appenddata );
- break;
- case "logtitle":
- if( $this->workRevision )
- $this->workRevision->setTitle( Title::newFromText( $this->appenddata ) );
- break;
- case "params":
- if( $this->workRevision )
- $this->workRevision->setParams( $this->appenddata );
- break;
- case "minor":
- if( $this->workRevision )
- $this->workRevision->setMinor( true );
- break;
- case "filename":
- if( $this->workRevision )
- $this->workRevision->setFilename( $this->appenddata );
- break;
- case "src":
- if( $this->workRevision )
- $this->workRevision->setSrc( $this->appenddata );
- break;
- case "size":
- if( $this->workRevision )
- $this->workRevision->setSize( intval( $this->appenddata ) );
- break;
- default:
- $this->debug( "Bad append: {$this->appendfield}" );
- }
- $this->appendfield = "";
- $this->appenddata = "";
-
- $parent = $this->parentTag();
- xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
- xml_set_character_data_handler( $parser, "donothing" );
- }
-
- function in_revision( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_revision $name" );
- switch( $name ) {
- case "id":
- case "timestamp":
- case "comment":
- case "minor":
- case "text":
- $this->appendfield = $name;
- xml_set_element_handler( $parser, "in_nothing", "out_append" );
- xml_set_character_data_handler( $parser, "char_append" );
- break;
- case "contributor":
- $this->push( "contributor" );
- xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
- break;
- default:
- return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
- }
- }
-
- function out_revision( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "out_revision $name" );
- $this->pop();
- if( $name != "revision" ) {
- return $this->throwXMLerror( "Expected </revision>, got </$name>" );
- }
- xml_set_element_handler( $parser, "in_page", "out_page" );
-
- if( $this->workRevision ) {
- $ok = call_user_func_array( $this->mRevisionCallback,
- array( $this->workRevision, $this ) );
- if( $ok ) {
- $this->workSuccessCount++;
- }
- }
- }
-
- function in_logitem( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_logitem $name" );
- switch( $name ) {
- case "id":
- case "timestamp":
- case "comment":
- case "type":
- case "action":
- case "logtitle":
- case "params":
- $this->appendfield = $name;
- xml_set_element_handler( $parser, "in_nothing", "out_append" );
- xml_set_character_data_handler( $parser, "char_append" );
- break;
- case "contributor":
- $this->push( "contributor" );
- xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
- break;
- default:
- return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
- }
- }
-
- function out_logitem( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "out_logitem $name" );
- $this->pop();
- if( $name != "logitem" ) {
- return $this->throwXMLerror( "Expected </logitem>, got </$name>" );
- }
- xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
-
- if( $this->workRevision ) {
- $ok = call_user_func_array( $this->mLogItemCallback,
- array( $this->workRevision, $this ) );
- if( $ok ) {
- $this->workSuccessCount++;
- }
- }
- }
-
- function in_upload( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_upload $name" );
- switch( $name ) {
- case "timestamp":
- case "comment":
- case "text":
- case "filename":
- case "src":
- case "size":
- $this->appendfield = $name;
- xml_set_element_handler( $parser, "in_nothing", "out_append" );
- xml_set_character_data_handler( $parser, "char_append" );
- break;
- case "contributor":
- $this->push( "contributor" );
- xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
- break;
- default:
- return $this->throwXMLerror( "Element <$name> not allowed in an <upload>." );
- }
- }
-
- function out_upload( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "out_revision $name" );
- $this->pop();
- if( $name != "upload" ) {
- return $this->throwXMLerror( "Expected </upload>, got </$name>" );
- }
- xml_set_element_handler( $parser, "in_page", "out_page" );
-
- if( $this->workRevision ) {
- $ok = call_user_func_array( $this->mUploadCallback,
- array( $this->workRevision, $this ) );
- if( $ok ) {
- $this->workUploadSuccessCount++;
- }
- }
- }
-
- function in_contributor( $parser, $name, $attribs ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "in_contributor $name" );
- switch( $name ) {
- case "username":
- case "ip":
- case "id":
- $this->appendfield = $name;
- xml_set_element_handler( $parser, "in_nothing", "out_append" );
- xml_set_character_data_handler( $parser, "char_append" );
- break;
- default:
- $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
- }
- }
-
- function out_contributor( $parser, $name ) {
- $name = $this->stripXmlNamespace($name);
- $this->debug( "out_contributor $name" );
- $this->pop();
- if( $name != "contributor" ) {
- return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
- }
- $parent = $this->parentTag();
- xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
- }
-
- private function push( $name ) {
- array_push( $this->tagStack, $name );
- $this->debug( "PUSH $name" );
- }
-
- private function pop() {
- $name = array_pop( $this->tagStack );
- $this->debug( "POP $name" );
- return $name;
- }
-
- private function parentTag() {
- $name = $this->tagStack[count( $this->tagStack ) - 1];
- $this->debug( "PARENT $name" );
- return $name;
- }
-
-}
-
/**
* @todo document (e.g. one-sentence class description).
* @ingroup SpecialPage
static function newFromFile( $filename ) {
$file = @fopen( $filename, 'rt' );
if( !$file ) {
- return new WikiErrorMsg( "importcantopen" );
+ return Status::newFatal( "importcantopen" );
}
- return new ImportStreamSource( $file );
+ return Status::newGood( new ImportStreamSource( $file ) );
}
static function newFromUpload( $fieldname = "xmlimport" ) {
$upload =& $_FILES[$fieldname];
if( !isset( $upload ) || !$upload['name'] ) {
- return new WikiErrorMsg( 'importnofile' );
+ return Status::newFatal( 'importnofile' );
}
if( !empty( $upload['error'] ) ) {
switch($upload['error']){
case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
- return new WikiErrorMsg( 'importuploaderrorsize' );
+ return Status::newFatal( 'importuploaderrorsize' );
case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
- return new WikiErrorMsg( 'importuploaderrorsize' );
+ return Status::newFatal( 'importuploaderrorsize' );
case 3: # The uploaded file was only partially uploaded
- return new WikiErrorMsg( 'importuploaderrorpartial' );
- case 6: #Missing a temporary folder. Introduced in PHP 4.3.10 and PHP 5.0.3.
- return new WikiErrorMsg( 'importuploaderrortemp' );
- # case else: # Currently impossible
+ return Status::newFatal( 'importuploaderrorpartial' );
+ case 6: #Missing a temporary folder.
+ return Status::newFatal( 'importuploaderrortemp' );
+ # case else: # Currently impossible
}
}
if( is_uploaded_file( $fname ) ) {
return ImportStreamSource::newFromFile( $fname );
} else {
- return new WikiErrorMsg( 'importnofile' );
+ return Status::newFatal( 'importnofile' );
}
}
fwrite( $file, $data );
fflush( $file );
fseek( $file, 0 );
- return new ImportStreamSource( $file );
+ return Status::newGood( new ImportStreamSource( $file ) );
} else {
- return new WikiErrorMsg( 'importcantopen' );
+ return Status::newFatal( 'importcantopen' );
}
}
- public static function newFromInterwiki( $interwiki, $page, $history=false ) {
+ public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
if( $page == '' ) {
- return new WikiErrorMsg( 'import-noarticle' );
+ return Status::newFatal( 'import-noarticle' );
}
$link = Title::newFromText( "$interwiki:Special:Export/$page" );
if( is_null( $link ) || $link->getInterwiki() == '' ) {
- return new WikiErrorMsg( 'importbadinterwiki' );
+ return Status::newFatal( 'importbadinterwiki' );
} else {
- $params = $history ? 'history=1' : '';
+ $params = array();
+ if ( $history ) $params['history'] = 1;
+ if ( $templates ) $params['templates'] = 1;
+ if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
$url = $link->getFullUrl( $params );
# For interwikis, use POST to avoid redirects.
return ImportStreamSource::newFromURL( $url, "POST" );