9c04111adc1bb1cfea905af151d42ee959112869
3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
23 * @subpackage SpecialPage
29 function wfSpecialImport( $page = '' ) {
30 global $wgUser, $wgOut, $wgRequest, $wgTitle, $wgImportSources;
31 global $wgImportTargetNamespace;
34 $namespace = $wgImportTargetNamespace;
38 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
40 $namespace = $wgRequest->getIntOrNull( 'namespace' );
42 switch( $wgRequest->getVal( "source" ) ) {
45 if( $wgUser->isAllowed( 'importupload' ) ) {
46 $source = ImportStreamSource
::newFromUpload( "xmlimport" );
48 return $wgOut->permissionRequired( 'importupload' );
52 $interwiki = $wgRequest->getVal( 'interwiki' );
53 $history = $wgRequest->getCheck( 'interwikiHistory' );
54 $frompage = $wgRequest->getText( "frompage" );
55 $source = ImportStreamSource
::newFromInterwiki(
61 $source = new WikiErrorMsg( "importunknownsource" );
64 if( WikiError
::isError( $source ) ) {
65 $wgOut->addWikiText( wfEscapeWikiText( $source->getMessage() ) );
67 $wgOut->addWikiText( wfMsg( "importstart" ) );
69 $importer = new WikiImporter( $source );
70 if( !is_null( $namespace ) ) {
71 $importer->setTargetNamespace( $namespace );
73 $reporter = new ImportReporter( $importer, $isUpload, $interwiki );
76 $result = $importer->doImport();
79 if( WikiError
::isError( $result ) ) {
80 $wgOut->addWikiText( wfMsg( "importfailed",
81 wfEscapeWikiText( $result->getMessage() ) ) );
84 $wgOut->addWikiText( wfMsg( "importsuccess" ) );
89 $action = $wgTitle->escapeLocalUrl( 'action=submit' );
91 if( $wgUser->isAllowed( 'importupload' ) ) {
92 $wgOut->addWikiText( wfMsg( "importtext" ) );
95 <legend>" . wfMsgHtml('upload') . "</legend>
96 <form enctype='multipart/form-data' method='post' action=\"$action\">
97 <input type='hidden' name='action' value='submit' />
98 <input type='hidden' name='source' value='upload' />
99 <input type='hidden' name='MAX_FILE_SIZE' value='2000000' />
100 <input type='file' name='xmlimport' value='' size='30' />
101 <input type='submit' value=\"" . wfMsgHtml( "uploadbtn" ) . "\" />
106 if( empty( $wgImportSources ) ) {
107 $wgOut->addWikiText( wfMsg( 'importnosources' ) );
111 if( !empty( $wgImportSources ) ) {
114 <legend>" . wfMsgHtml('importinterwiki') . "</legend>
115 <form method='post' action=\"$action\">" .
116 $wgOut->parse( wfMsg( 'import-interwiki-text' ) ) . "
117 <input type='hidden' name='action' value='submit' />
118 <input type='hidden' name='source' value='interwiki' />
122 <select name='interwiki'>" );
123 foreach( $wgImportSources as $prefix ) {
124 $iw = htmlspecialchars( $prefix );
125 $selected = ($interwiki === $prefix) ?
' selected="selected"' : '';
126 $wgOut->addHTML( "<option value=\"$iw\"$selected>$iw</option>\n" );
132 wfInput( 'frompage', 50, $frompage ) .
138 wfCheckLabel( wfMsg( 'import-interwiki-history' ),
139 'interwikiHistory', 'interwikiHistory', $history ) .
145 " . wfMsgHtml( 'import-interwiki-namespace' ) . " " .
146 HTMLnamespaceselector( $namespace, '' ) . "
152 wfSubmitButton( wfMsg( 'import-interwiki-submit' ) ) .
165 class ImportReporter
{
166 function __construct( $importer, $upload, $interwiki ) {
167 $importer->setPageOutCallback( array( $this, 'reportPage' ) );
168 $this->mPageCount
= 0;
169 $this->mIsUpload
= $upload;
170 $this->mInterwiki
= $interwiki;
175 $wgOut->addHtml( "<ul>\n" );
178 function reportPage( $title, $origTitle, $revisionCount, $successCount ) {
179 global $wgOut, $wgLang, $wgContLang;
183 $localCount = $wgLang->formatNum( $successCount );
184 $contentCount = $wgContLang->formatNum( $successCount );
186 $wgOut->addHtml( "<li>" . Linker
::makeKnownLinkObj( $title ) .
188 wfMsgExt( 'import-revision-count', array( 'parsemag', 'escape' ), $localCount ) .
191 if( $successCount > 0 ) {
192 $log = new LogPage( 'import' );
193 if( $this->mIsUpload
) {
194 $detail = wfMsgForContent( 'import-logentry-upload-detail',
196 $log->addEntry( 'upload', $title, $detail );
198 $interwiki = '[[:' . $this->mInterwiki
. ':' .
199 $origTitle->getPrefixedText() . ']]';
200 $detail = wfMsgForContent( 'import-logentry-interwiki-detail',
201 $contentCount, $interwiki );
202 $log->addEntry( 'interwiki', $title, $detail );
205 $comment = $detail; // quick
206 $dbw = wfGetDB( DB_MASTER
);
207 $nullRevision = Revision
::newNullRevision(
208 $dbw, $title->getArticleId(), $comment, true );
209 $nullRevId = $nullRevision->insertOn( $dbw );
215 if( $this->mPageCount
== 0 ) {
216 $wgOut->addHtml( "<li>" . wfMsgHtml( 'importnopages' ) . "</li>\n" );
218 $wgOut->addHtml( "</ul>\n" );
225 * @subpackage SpecialPage
230 var $timestamp = "20010115000000";
237 function setTitle( $title ) {
238 if( is_object( $title ) ) {
239 $this->title
= $title;
240 } elseif( is_null( $title ) ) {
241 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
243 throw new MWException( "WikiRevision given non-object title in import." );
247 function setID( $id ) {
251 function setTimestamp( $ts ) {
252 # 2003-08-05T18:30:02Z
253 $this->timestamp
= wfTimestamp( TS_MW
, $ts );
256 function setUsername( $user ) {
257 $this->user_text
= $user;
260 function setUserIP( $ip ) {
261 $this->user_text
= $ip;
264 function setText( $text ) {
268 function setComment( $text ) {
269 $this->comment
= $text;
272 function setMinor( $minor ) {
273 $this->minor
= (bool)$minor;
276 function getTitle() {
284 function getTimestamp() {
285 return $this->timestamp
;
289 return $this->user_text
;
296 function getComment() {
297 return $this->comment
;
300 function getMinor() {
304 function importOldRevision() {
305 $fname = "WikiImporter::importOldRevision";
306 $dbw =& wfGetDB( DB_MASTER
);
308 # Sneak a single revision into place
309 $user = User
::newFromName( $this->getUser() );
311 $userId = intval( $user->getId() );
312 $userText = $user->getName();
315 $userText = $this->getUser();
318 // avoid memory leak...?
319 $linkCache =& LinkCache
::singleton();
322 $article = new Article( $this->title
);
323 $pageId = $article->getId();
325 # must create the page...
326 $pageId = $article->insertOn( $dbw );
331 $prior = Revision
::loadFromTimestamp( $dbw, $this->title
, $this->timestamp
);
332 if( !is_null( $prior ) ) {
333 // FIXME: this could fail slightly for multiple matches :P
334 wfDebug( __METHOD__
. ": skipping existing revision for [[" .
335 $this->title
->getPrefixedText() . "]], timestamp " .
336 $this->timestamp
. "\n" );
341 # FIXME: Use original rev_id optionally
342 # FIXME: blah blah blah
344 #if( $numrows > 0 ) {
345 # return wfMsg( "importhistoryconflict" );
349 $revision = new Revision( array(
351 'text' => $this->getText(),
352 'comment' => $this->getComment(),
354 'user_text' => $userText,
355 'timestamp' => $this->timestamp
,
356 'minor_edit' => $this->minor
,
358 $revId = $revision->insertOn( $dbw );
359 $changed = $article->updateIfNewerOn( $dbw, $revision );
362 wfDebug( __METHOD__
. ": running onArticleCreate\n" );
363 Article
::onArticleCreate( $this->title
);
365 wfDebug( __METHOD__
. ": running create updates\n" );
366 $article->createUpdates( $revision );
368 } elseif( $changed ) {
369 wfDebug( __METHOD__
. ": running onArticleEdit\n" );
370 Article
::onArticleEdit( $this->title
);
372 wfDebug( __METHOD__
. ": running edit updates\n" );
373 $article->editUpdates(
389 * @subpackage SpecialPage
393 var $mPageCallback = null;
394 var $mPageOutCallback = null;
395 var $mRevisionCallback = null;
396 var $mTargetNamespace = null;
399 function WikiImporter( $source ) {
400 $this->setRevisionCallback( array( &$this, "importRevision" ) );
401 $this->mSource
= $source;
404 function throwXmlError( $err ) {
405 $this->debug( "FAILURE: $err" );
406 wfDebug( "WikiImporter XML error: $err\n" );
411 function doImport() {
412 if( empty( $this->mSource
) ) {
413 return new WikiErrorMsg( "importnotext" );
416 $parser = xml_parser_create( "UTF-8" );
418 # case folding violates XML standard, turn it off
419 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING
, false );
421 xml_set_object( $parser, $this );
422 xml_set_element_handler( $parser, "in_start", "" );
424 $offset = 0; // for context extraction on error reporting
426 $chunk = $this->mSource
->readChunk();
427 if( !xml_parse( $parser, $chunk, $this->mSource
->atEnd() ) ) {
428 wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
429 return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset );
431 $offset +
= strlen( $chunk );
432 } while( $chunk !== false && !$this->mSource
->atEnd() );
433 xml_parser_free( $parser );
438 function debug( $data ) {
439 #wfDebug( "IMPORT: $data\n" );
442 function notice( $data ) {
443 global $wgCommandLineMode;
444 if( $wgCommandLineMode ) {
448 $wgOut->addHTML( "<li>$data</li>\n" );
453 * Sets the action to perform as each new page in the stream is reached.
454 * @param callable $callback
457 function setPageCallback( $callback ) {
458 $previous = $this->mPageCallback
;
459 $this->mPageCallback
= $callback;
464 * Sets the action to perform as each page in the stream is completed.
465 * Callback accepts the page title (as a Title object), a second object
466 * with the original title form (in case it's been overridden into a
467 * local namespace), and a count of revisions.
469 * @param callable $callback
472 function setPageOutCallback( $callback ) {
473 $previous = $this->mPageOutCallback
;
474 $this->mPageOutCallback
= $callback;
479 * Sets the action to perform as each page revision is reached.
480 * @param callable $callback
483 function setRevisionCallback( $callback ) {
484 $previous = $this->mRevisionCallback
;
485 $this->mRevisionCallback
= $callback;
490 * Set a target namespace to override the defaults
492 function setTargetNamespace( $namespace ) {
493 if( is_null( $namespace ) ) {
494 // Don't override namespaces
495 $this->mTargetNamespace
= null;
496 } elseif( $namespace >= 0 ) {
497 // FIXME: Check for validity
498 $this->mTargetNamespace
= intval( $namespace );
505 * Default per-revision callback, performs the import.
506 * @param WikiRevision $revision
509 function importRevision( &$revision ) {
510 $dbw =& wfGetDB( DB_MASTER
);
511 return $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) );
515 * Alternate per-revision callback, for debugging.
516 * @param WikiRevision $revision
519 function debugRevisionHandler( &$revision ) {
520 $this->debug( "Got revision:" );
521 if( is_object( $revision->title
) ) {
522 $this->debug( "-- Title: " . $revision->title
->getPrefixedText() );
524 $this->debug( "-- Title: <invalid>" );
526 $this->debug( "-- User: " . $revision->user_text
);
527 $this->debug( "-- Timestamp: " . $revision->timestamp
);
528 $this->debug( "-- Comment: " . $revision->comment
);
529 $this->debug( "-- Text: " . $revision->text
);
533 * Notify the callback function when a new <page> is reached.
534 * @param Title $title
537 function pageCallback( $title ) {
538 if( is_callable( $this->mPageCallback
) ) {
539 call_user_func( $this->mPageCallback
, $title );
544 * Notify the callback function when a </page> is closed.
545 * @param Title $title
546 * @param Title $origTitle
547 * @param int $revisionCount
548 * @param int $successCount number of revisions for which callback returned true
551 function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
552 if( is_callable( $this->mPageOutCallback
) ) {
553 call_user_func( $this->mPageOutCallback
, $title, $origTitle,
554 $revisionCount, $successCount );
559 # XML parser callbacks from here out -- beware!
560 function donothing( $parser, $x, $y="" ) {
561 #$this->debug( "donothing" );
564 function in_start( $parser, $name, $attribs ) {
565 $this->debug( "in_start $name" );
566 if( $name != "mediawiki" ) {
567 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
569 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
572 function in_mediawiki( $parser, $name, $attribs ) {
573 $this->debug( "in_mediawiki $name" );
574 if( $name == 'siteinfo' ) {
575 xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
576 } elseif( $name == 'page' ) {
577 $this->workRevisionCount
= 0;
578 $this->workSuccessCount
= 0;
579 xml_set_element_handler( $parser, "in_page", "out_page" );
581 return $this->throwXMLerror( "Expected <page>, got <$name>" );
584 function out_mediawiki( $parser, $name ) {
585 $this->debug( "out_mediawiki $name" );
586 if( $name != "mediawiki" ) {
587 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
589 xml_set_element_handler( $parser, "donothing", "donothing" );
593 function in_siteinfo( $parser, $name, $attribs ) {
595 $this->debug( "in_siteinfo $name" );
605 return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
609 function out_siteinfo( $parser, $name ) {
610 if( $name == "siteinfo" ) {
611 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
616 function in_page( $parser, $name, $attribs ) {
617 $this->debug( "in_page $name" );
622 $this->appendfield
= $name;
623 $this->appenddata
= "";
624 $this->parenttag
= "page";
625 xml_set_element_handler( $parser, "in_nothing", "out_append" );
626 xml_set_character_data_handler( $parser, "char_append" );
629 $this->workRevision
= new WikiRevision
;
630 $this->workRevision
->setTitle( $this->pageTitle
);
631 $this->workRevisionCount++
;
632 xml_set_element_handler( $parser, "in_revision", "out_revision" );
635 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
639 function out_page( $parser, $name ) {
640 $this->debug( "out_page $name" );
641 if( $name != "page" ) {
642 return $this->throwXMLerror( "Expected </page>, got </$name>" );
644 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
646 $this->pageOutCallback( $this->pageTitle
, $this->origTitle
,
647 $this->workRevisionCount
, $this->workSuccessCount
);
649 $this->workTitle
= null;
650 $this->workRevision
= null;
651 $this->workRevisionCount
= 0;
652 $this->workSuccessCount
= 0;
653 $this->pageTitle
= null;
654 $this->origTitle
= null;
657 function in_nothing( $parser, $name, $attribs ) {
658 $this->debug( "in_nothing $name" );
659 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
661 function char_append( $parser, $data ) {
662 $this->debug( "char_append '$data'" );
663 $this->appenddata
.= $data;
665 function out_append( $parser, $name ) {
666 $this->debug( "out_append $name" );
667 if( $name != $this->appendfield
) {
668 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
670 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
671 xml_set_character_data_handler( $parser, "donothing" );
673 switch( $this->appendfield
) {
675 $this->workTitle
= $this->appenddata
;
676 $this->origTitle
= Title
::newFromText( $this->workTitle
);
677 if( !is_null( $this->mTargetNamespace
) && !is_null( $this->origTitle
) ) {
678 $this->pageTitle
= Title
::makeTitle( $this->mTargetNamespace
,
679 $this->origTitle
->getDbKey() );
681 $this->pageTitle
= Title
::newFromText( $this->workTitle
);
683 $this->pageCallback( $this->workTitle
);
686 if ( $this->parenttag
== 'revision' ) {
687 $this->workRevision
->setID( $this->appenddata
);
691 $this->workRevision
->setText( $this->appenddata
);
694 $this->workRevision
->setUsername( $this->appenddata
);
697 $this->workRevision
->setUserIP( $this->appenddata
);
700 $this->workRevision
->setTimestamp( $this->appenddata
);
703 $this->workRevision
->setComment( $this->appenddata
);
706 $this->workRevision
->setMinor( true );
709 $this->debug( "Bad append: {$this->appendfield}" );
711 $this->appendfield
= "";
712 $this->appenddata
= "";
715 function in_revision( $parser, $name, $attribs ) {
716 $this->debug( "in_revision $name" );
723 $this->parenttag
= "revision";
724 $this->appendfield
= $name;
725 xml_set_element_handler( $parser, "in_nothing", "out_append" );
726 xml_set_character_data_handler( $parser, "char_append" );
729 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
732 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
736 function out_revision( $parser, $name ) {
737 $this->debug( "out_revision $name" );
738 if( $name != "revision" ) {
739 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
741 xml_set_element_handler( $parser, "in_page", "out_page" );
743 $ok = call_user_func_array( $this->mRevisionCallback
,
744 array( &$this->workRevision
, &$this ) );
746 $this->workSuccessCount++
;
750 function in_contributor( $parser, $name, $attribs ) {
751 $this->debug( "in_contributor $name" );
756 $this->parenttag
= "contributor";
757 $this->appendfield
= $name;
758 xml_set_element_handler( $parser, "in_nothing", "out_append" );
759 xml_set_character_data_handler( $parser, "char_append" );
762 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
766 function out_contributor( $parser, $name ) {
767 $this->debug( "out_contributor $name" );
768 if( $name != "contributor" ) {
769 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
771 xml_set_element_handler( $parser, "in_revision", "out_revision" );
776 /** @package MediaWiki */
777 class ImportStringSource
{
778 function ImportStringSource( $string ) {
779 $this->mString
= $string;
780 $this->mRead
= false;
787 function readChunk() {
788 if( $this->atEnd() ) {
792 return $this->mString
;
797 /** @package MediaWiki */
798 class ImportStreamSource
{
799 function ImportStreamSource( $handle ) {
800 $this->mHandle
= $handle;
804 return feof( $this->mHandle
);
807 function readChunk() {
808 return fread( $this->mHandle
, 32768 );
811 function newFromFile( $filename ) {
812 $file = @fopen
( $filename, 'rt' );
814 return new WikiErrorMsg( "importcantopen" );
816 return new ImportStreamSource( $file );
819 function newFromUpload( $fieldname = "xmlimport" ) {
820 $upload =& $_FILES[$fieldname];
822 if( !isset( $upload ) ||
!$upload['name'] ) {
823 return new WikiErrorMsg( 'importnofile' );
825 if( !empty( $upload['error'] ) ) {
826 return new WikiErrorMsg( 'importuploaderror', $upload['error'] );
828 $fname = $upload['tmp_name'];
829 if( is_uploaded_file( $fname ) ) {
830 return ImportStreamSource
::newFromFile( $fname );
832 return new WikiErrorMsg( 'importnofile' );
836 function newFromURL( $url ) {
837 wfDebug( __METHOD__
. ": opening $url\n" );
838 # fopen-wrappers are normally turned off for security.
839 ini_set( "allow_url_fopen", true );
840 $ret = ImportStreamSource
::newFromFile( $url );
841 ini_set( "allow_url_fopen", false );
845 function newFromInterwiki( $interwiki, $page, $history=false ) {
846 $link = Title
::newFromText( "$interwiki:Special:Export/$page" );
847 if( is_null( $link ) ||
$link->getInterwiki() == '' ) {
848 return new WikiErrorMsg( 'importbadinterwiki' );
850 $params = $history ?
'history=1' : '';
851 $url = $link->getFullUrl( $params );
852 return ImportStreamSource
::newFromURL( $url );