From: Brion Vibber Date: Tue, 5 Jul 2005 03:16:56 +0000 (+0000) Subject: Some code cleanup on Special:Import, add initial version of command-line X-Git-Tag: 1.5.0beta2~5 X-Git-Url: http://git.cyclocoop.org/%22.%28%24lien.?a=commitdiff_plain;h=6a9585b4aaed16740183ab3fd049da3016c6ecd9;p=lhc%2Fweb%2Fwiklou.git Some code cleanup on Special:Import, add initial version of command-line dump importer script (importDump.php). Can read from stdin or from file on command-line (file can be auto-decompressed if gzip and zlib support). Still needs some work, but basically functions. --- diff --git a/includes/SpecialExport.php b/includes/SpecialExport.php index b04edb0123..0504c05573 100644 --- a/includes/SpecialExport.php +++ b/includes/SpecialExport.php @@ -121,7 +121,7 @@ class WikiExporter { * * @param mixed $callback */ - function setRevCallback( $callback ) { + function setRevisionCallback( $callback ) { $this->revCallback = $callback; } diff --git a/includes/SpecialImport.php b/includes/SpecialImport.php index 820c08dcad..ae3ca83be5 100644 --- a/includes/SpecialImport.php +++ b/includes/SpecialImport.php @@ -38,36 +38,34 @@ function wfSpecialImport( $page = '' ) { ### if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') { - $importer = new WikiImporter(); - switch( $wgRequest->getVal( "source" ) ) { case "upload": if( $wgUser->isAllowed( 'importupload' ) ) { - $result = $importer->setupFromUpload( "xmlimport" ); + $source = ImportStreamSource::newFromUpload( "xmlimport" ); } else { return $wgOut->permissionRequired( 'importupload' ); } break; case "interwiki": - $result = $importer->setupFromInterwiki( + $source = ImportStreamSource::newFromInterwiki( $wgRequest->getVal( "interwiki" ), $wgRequest->getText( "frompage" ) ); break; default: - $result = new WikiError( "Unknown import source type" ); + $source = new WikiError( "Unknown import source type" ); } - if( WikiError::isError( $result ) ) { - $wgOut->addWikiText( wfEscapeWikiText( $result->getMessage() ) ); + if( WikiError::isError( $source ) ) { + $wgOut->addWikiText( wfEscapeWikiText( $source->getMessage() ) ); } else { - $importer->setRevisionHandler( "wfImportOldRevision" ); + $importer = new WikiImporter( $source ); $result = $importer->doImport(); if( WikiError::isError( $result ) ) { - $wgOut->addHTML( "

" . wfMsg( "importfailed", - htmlspecialchars( $result->getMessage() ) ) . "

" ); + $wgOut->addWikiText( wfMsg( "importfailed", + wfEscapeWikiText( $result->getMessage() ) ) ); } else { # Success! - $wgOut->addHTML( "

" . wfMsg( "importsuccess" ) . "

" ); + $wgOut->addWikiText( wfMsg( "importsuccess" ) ); } } } @@ -78,13 +76,13 @@ function wfSpecialImport( $page = '' ) { $wgOut->addWikiText( wfMsg( "importtext" ) ); $wgOut->addHTML( "
- " . wfMsg('upload') . " + " . wfMsgHtml('upload') . "
- +
" ); @@ -97,7 +95,7 @@ function wfSpecialImport( $page = '' ) { if( !empty( $wgImportSources ) ) { $wgOut->addHTML( "
- " . wfMsg('importinterwiki') . " + " . wfMsgHtml('importinterwiki') . "
@@ -117,11 +115,6 @@ function wfSpecialImport( $page = '' ) { } } -function wfImportOldRevision( &$revision ) { - $dbw =& wfGetDB( DB_MASTER ); - $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) ); -} - /** * * @package MediaWiki @@ -186,6 +179,13 @@ class WikiRevision { # Sneak a single revision into place $user = User::newFromName( $this->getUser() ); + if( $user ) { + $userId = IntVal( $user->getId() ); + $userText = $user->getName(); + } else { + $userId = 0; + $userText = $this->getUser(); + } $article = new Article( $this->title ); $pageId = $article->getId(); @@ -207,8 +207,8 @@ class WikiRevision { 'page' => $pageId, 'text' => $this->getText(), 'comment' => $this->getComment(), - 'user' => IntVal( $user->getId() ), - 'user_text' => $user->getName(), + 'user' => $userId, + 'user_text' => $userText, 'timestamp' => $this->timestamp, 'minor_edit' => 0 ) ); @@ -226,65 +226,20 @@ class WikiRevision { * @subpackage SpecialPage */ class WikiImporter { - var $mSource = NULL; - var $mRevisionHandler = NULL; + var $mSource = null; + var $mPageCallback = null; + var $mRevisionCallback = null; var $lastfield; - function WikiImporter() { - $this->setRevisionHandler( array( &$this, "defaultRevisionHandler" ) ); + function WikiImporter( $source ) { + $this->setRevisionCallback( array( &$this, "importRevision" ) ); + $this->mSource = $source; } function throwXmlError( $err ) { $this->debug( "FAILURE: $err" ); } - function setupFromFile( $filename ) { - $this->mSource = @file_get_contents( $filename ); - if( $this->mSource === false ) { - return new WikiError( "Couldn't open import file" ); - } - return true; - } - - function setupFromUpload( $fieldname = "xmlimport" ) { - global $wgOut; - - $upload =& $_FILES[$fieldname]; - - if( !isset( $upload ) ) { - return new WikiErrorMsg( 'importnofile' ); - } - if( !empty( $upload['error'] ) ) { - return new WikiErrorMsg( 'importuploaderror', $upload['error'] ); - } - $fname = $upload['tmp_name']; - if( is_uploaded_file( $fname ) ) { - return $this->setupFromFile( $fname ); - } else { - return new WikiErrorMsg( 'importnofile' ); - } - } - - function setupFromURL( $url ) { - # fopen-wrappers are normally turned off for security. - ini_set( "allow_url_fopen", true ); - $ret = $this->setupFromFile( $url ); - ini_set( "allow_url_fopen", false ); - return $ret; - } - - function setupFromInterwiki( $interwiki, $page ) { - $base = Title::getInterwikiLink( $interwiki ); - if( empty( $base ) ) { - return new WikiError( 'Bad interwiki link' ); - } else { - $import = wfUrlencode( "Special:Export/$page" ); - $url = str_replace( "$1", $import, $base ); - $this->notice( "Importing from $url" ); - return $this->setupFromURL( $url ); - } - } - # -------------- function doImport() { @@ -300,16 +255,19 @@ class WikiImporter { xml_set_object( $parser, &$this ); xml_set_element_handler( $parser, "in_start", "" ); - if( !xml_parse( $parser, $this->mSource, true ) ) { - return new WikiXmlError( $parser ); - } + do { + $chunk = $this->mSource->readChunk(); + if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) { + return new WikiXmlError( $parser ); + } + } while( $chunk !== false && !$this->mSource->atEnd() ); xml_parser_free( $parser ); return true; } function debug( $data ) { - #$this->notice( "DEBUG: $data\n" ); + #wfDebug( "IMPORT: $data\n" ); } function notice( $data ) { @@ -322,11 +280,44 @@ class WikiImporter { } } - function setRevisionHandler( $functionref ) { - $this->mRevisionHandler = $functionref; + /** + * Sets the action to perform as each new page in the stream is reached. + * @param callable $callback + * @return callable + */ + function setPageCallback( $callback ) { + $previous = $this->mPageCallback; + $this->mPageCallback = $callback; + return $previous; + } + + /** + * Sets the action to perform as each page revision is reached. + * @param callable $callback + * @return callable + */ + function setRevisionCallback( $callback ) { + $previous = $this->mRevisionCallback; + $this->mRevisionCallback = $callback; + return $previous; + } + + /** + * Default per-revision callback, performs the import. + * @param WikiRevision $revision + * @access private + */ + function importRevision( &$revision ) { + $dbw =& wfGetDB( DB_MASTER ); + $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) ); } - - function defaultRevisionHandler( &$revision ) { + + /** + * Alternate per-revision callback, for debugging. + * @param WikiRevision $revision + * @access private + */ + function debugRevisionHandler( &$revision ) { $this->debug( "Got revision:" ); if( is_object( $revision->title ) ) { $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); @@ -339,6 +330,16 @@ class WikiImporter { $this->debug( "-- Text: " . $revision->text ); } + /** + * Notify the callback function when a new is reached. + * @param Title $title + * @access private + */ + function pageCallback( $title ) { + if( is_callable( $this->mPageCallback ) ) { + call_user_func( $this->mPageCallback, $title ); + } + } # XML parser callbacks from here out -- beware! @@ -356,10 +357,13 @@ class WikiImporter { function in_mediawiki( $parser, $name, $attribs ) { $this->debug( "in_mediawiki $name" ); - if( $name != "page" ) { + if( $name == 'siteinfo' ) { + xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" ); + } elseif( $name == 'page' ) { + xml_set_element_handler( $parser, "in_page", "out_page" ); + } else { return $this->throwXMLerror( "Expected , got <$name>" ); } - xml_set_element_handler( $parser, "in_page", "out_page" ); } function out_mediawiki( $parser, $name ) { $this->debug( "out_mediawiki $name" ); @@ -368,6 +372,29 @@ class WikiImporter { } xml_set_element_handler( $parser, "donothing", "donothing" ); } + + + function in_siteinfo( $parser, $name, $attribs ) { + // no-ops for now + $this->debug( "in_siteinfo $name" ); + switch( $name ) { + case "sitename": + case "generator": + case "case": + case "namespaces": + case "namespace": + break; + default: + return $this->throwXMLerror( "Element <$name> not allowed in ." ); + } + } + + function out_siteinfo( $parser, $name ) { + if( $name == "siteinfo" ) { + xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" ); + } + } + function in_page( $parser, $name, $attribs ) { $this->debug( "in_page $name" ); @@ -417,9 +444,11 @@ class WikiImporter { } xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" ); xml_set_character_data_handler( $parser, "donothing" ); + switch( $this->appendfield ) { case "title": $this->workTitle = $this->appenddata; + $this->pageCallback( $this->workTitle ); break; case "text": $this->workRevision->setText( $this->appenddata ); @@ -470,7 +499,9 @@ class WikiImporter { } xml_set_element_handler( $parser, "in_page", "out_page" ); - $out = call_user_func( $this->mRevisionHandler, &$this->workRevision, &$this ); + $out = call_user_func( $this->mRevisionCallback, + &$this->workRevision, + &$this ); if( !empty( $out ) ) { global $wgOut; $wgOut->addHTML( "
  • " . $out . "
  • \n" ); @@ -502,5 +533,85 @@ class WikiImporter { } +class ImportStringSource { + function ImportStringSource( $string ) { + $this->mString = $string; + $this->mRead = false; + } + + function atEnd() { + return $this->mRead; + } + + function readChunk() { + if( $this->atEnd() ) { + return false; + } else { + $this->mRead = true; + return $this->mString; + } + } +} + +class ImportStreamSource { + function ImportStreamSource( $handle ) { + $this->mHandle = $handle; + } + + function atEnd() { + return feof( $this->mHandle ); + } + + function readChunk() { + return fread( $this->mHandle, 32768 ); + } + + function newFromFile( $filename ) { + $file = @fopen( $filename, 'rt' ); + if( !$file ) { + return new WikiError( "Couldn't open import file" ); + } + return new ImportStreamSource( $file ); + } + + function newFromUpload( $fieldname = "xmlimport" ) { + global $wgOut; + + $upload =& $_FILES[$fieldname]; + + if( !isset( $upload ) ) { + return new WikiErrorMsg( 'importnofile' ); + } + if( !empty( $upload['error'] ) ) { + return new WikiErrorMsg( 'importuploaderror', $upload['error'] ); + } + $fname = $upload['tmp_name']; + if( is_uploaded_file( $fname ) ) { + return ImportStreamSource::newFromFile( $fname ); + } else { + return new WikiErrorMsg( 'importnofile' ); + } + } + + function newFromURL( $url ) { + # fopen-wrappers are normally turned off for security. + ini_set( "allow_url_fopen", true ); + $ret = ImportStreamSource::newFromFile( $url ); + ini_set( "allow_url_fopen", false ); + return $ret; + } + + function newFromInterwiki( $interwiki, $page ) { + $base = Title::getInterwikiLink( $interwiki ); + if( empty( $base ) ) { + return new WikiError( 'Bad interwiki link' ); + } else { + $import = wfUrlencode( "Special:Export/$page" ); + $url = str_replace( "$1", $import, $base ); + return ImportStreamSource::newFromURL( $url ); + } + } +} + ?> diff --git a/maintenance/dumpBackup.php b/maintenance/dumpBackup.php index bc4ce7a5f1..28ff27cb31 100644 --- a/maintenance/dumpBackup.php +++ b/maintenance/dumpBackup.php @@ -50,7 +50,7 @@ class BackupDumper { $db =& $this->backupDb(); $exporter = new WikiExporter( $db, $history, MW_EXPORT_STREAM ); $exporter->setPageCallback( array( &$this, 'reportPage' ) ); - $exporter->setRevCallback( array( &$this, 'revCount' ) ); + $exporter->setRevisionCallback( array( &$this, 'revCount' ) ); $exporter->openStream(); $exporter->allPages(); diff --git a/maintenance/importDump.php b/maintenance/importDump.php new file mode 100644 index 0000000000..f2f956aa06 --- /dev/null +++ b/maintenance/importDump.php @@ -0,0 +1,127 @@ + + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage Maintenance + */ + +$options = array( 'full', 'verbose', 'dry-run', 'preserve' ); + +require_once( 'commandLine.inc' ); +require_once( 'SpecialImport.php' ); + +class BackupReader { + var $reportingInterval = 100; + var $reporting = true; + var $pageCount = 0; + var $revCount = 0; + var $dryRun = false; + + function BackupReader() { + $this->stderr = fopen( "php://stderr", "wt" ); + } + + function reportPage( $page ) { + $this->pageCount++; + } + + function handleRevision( $rev ) { + $title = $rev->getTitle(); + $display = $title->getPrefixedText(); + $timestamp = $rev->getTimestamp(); + #echo "$display $timestamp\n"; + + $this->revCount++; + $this->report(); + + if( !$this->dryRun ) { + call_user_func( $this->importCallback, $rev ); + } + } + + function report( $final = false ) { + if( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) { + $this->showReport(); + } + } + + function showReport() { + if( $this->reporting ) { + $delta = wfTime() - $this->startTime; + if( $delta ) { + $rate = $this->pageCount / $delta; + $revrate = $this->revCount / $delta; + } else { + $rate = '-'; + $revrate = '-'; + } + $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" ); + } + } + + function progress( $string ) { + fwrite( $this->stderr, $string . "\n" ); + } + + function importFromFile( $filename ) { + if( preg_match( '/\.gz$/', $filename ) ) { + $filename = 'compress.zlib://' . $filename; + } + $file = fopen( $filename, 'rt' ); + $this->importFromHandle( $file ); + } + + function importFromStdin() { + $file = fopen( 'php://stdin', 'rt' ); + $this->importFromHandle( $file ); + } + + function importFromHandle( $handle ) { + $this->startTime = wfTime(); + + $source = new ImportStreamSource( $handle ); + $importer = new WikiImporter( $source ); + + $importer->setPageCallback( array( &$this, 'reportPage' ) ); + $this->importCallback = $importer->setRevisionCallback( + array( &$this, 'handleRevision' ) ); + + $importer->doImport(); + } +} + +$reader = new BackupReader(); +if( isset( $options['quiet'] ) ) { + $reader->reporting = false; +} +if( isset( $options['report'] ) ) { + $reader->reportingInterval = IntVal( $options['report'] ); +} +if( isset( $options['dry-run'] ) ) { + $reader->dryRun = true; +} + +if( isset( $args[0] ) ) { + $reader->importFromFile( $args[0] ); +} else { + $reader->importFromStdin(); +} + +?> \ No newline at end of file