X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2FImport.php;h=b3ca0416359e41dfd9d5da1a75c9c7156a18f5f8;hb=0aa7ba71e0034b0a4a17722cdaa326d3fb76be01;hp=59fa58371c29cde85ad2a04eaae47e976b4231d9;hpb=a82e1acdb35289bba4f9e14e8a5fcc57f577f3ea;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Import.php b/includes/Import.php index 59fa58371c..b3ca041635 100644 --- a/includes/Import.php +++ b/includes/Import.php @@ -40,12 +40,14 @@ class WikiImporter { /** * Creates an ImportXMLReader drawing from the source provided - * @param string $source + * @param ImportStreamSource $source */ - function __construct( $source ) { + function __construct( ImportStreamSource $source ) { $this->reader = new XMLReader(); - stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); + if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) { + stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); + } $id = UploadSourceAdapter::registerSource( $source ); if ( defined( 'LIBXML_PARSEHUGE' ) ) { $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); @@ -218,7 +220,11 @@ class WikiImporter { $this->mTargetRootPage = null; } elseif ( $rootpage !== '' ) { $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes - $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN ); + $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) + ? $this->mTargetNamespace + : NS_MAIN + ); + if ( !$title || $title->isExternal() ) { $status->fatal( 'import-rootpage-invalid' ); } else { @@ -259,7 +265,7 @@ class WikiImporter { * @return bool */ public function importRevision( $revision ) { - if ( !$revision->getContent()->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) { + if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) { $this->notice( 'import-error-bad-location', $revision->getTitle()->getPrefixedText(), $revision->getID(), @@ -285,12 +291,12 @@ class WikiImporter { /** * Default per-revision callback, performs the import. - * @param WikiRevision $rev + * @param WikiRevision $revision * @return bool */ - public function importLogItem( $rev ) { + public function importLogItem( $revision ) { $dbw = wfGetDB( DB_MASTER ); - return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) ); + return $dbw->deadlockLoop( array( $revision, 'importLogItem' ) ); } /** @@ -310,7 +316,7 @@ class WikiImporter { * @param int $revCount * @param int $sRevCount * @param array $pageInfo - * @return + * @return bool */ public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { $args = func_get_args(); @@ -387,6 +393,15 @@ class WikiImporter { } } + /** + * Retrieves the contents of the named attribute of the current element. + * @param string $attr The name of the attribute + * @return string The value of the attribute or an empty string if it is not set in the current element. + */ + public function nodeAttribute( $attr ) { + return $this->reader->getAttribute( $attr ); + } + /** * Shouldn't something like this be built-in to XMLReader? * Fetches text contents of the current element, assuming @@ -414,53 +429,12 @@ class WikiImporter { return ''; } - # -------------- - - /** Left in for debugging */ - private function dumpElement() { - static $lookup = null; - if ( !$lookup ) { - $xmlReaderConstants = array( - "NONE", - "ELEMENT", - "ATTRIBUTE", - "TEXT", - "CDATA", - "ENTITY_REF", - "ENTITY", - "PI", - "COMMENT", - "DOC", - "DOC_TYPE", - "DOC_FRAGMENT", - "NOTATION", - "WHITESPACE", - "SIGNIFICANT_WHITESPACE", - "END_ELEMENT", - "END_ENTITY", - "XML_DECLARATION", - ); - $lookup = array(); - - foreach ( $xmlReaderConstants as $name ) { - $lookup[constant( "XmlReader::$name" )] = $name; - } - } - - print var_dump( - $lookup[$this->reader->nodeType], - $this->reader->name, - $this->reader->value - ) . "\n\n"; - } - /** * Primary entry point * @throws MWException * @return bool */ public function doImport() { - // Calls to reader->read need to be wrapped in calls to // libxml_disable_entity_loader() to avoid local file // inclusion attacks (bug 46932). @@ -518,7 +492,7 @@ class WikiImporter { private function handleSiteInfo() { // Site info is useful, but not actually used for dump imports. // Includes a quick short-circuit to save performance. - if ( ! $this->mSiteInfoCallback ) { + if ( !$this->mSiteInfoCallback ) { $this->reader->next(); return true; } @@ -612,17 +586,28 @@ class WikiImporter { &$pageInfo ) ) ) { // Do nothing } elseif ( in_array( $tag, $normalFields ) ) { - $pageInfo[$tag] = $this->nodeContents(); - if ( $tag == 'title' ) { - $title = $this->processTitle( $pageInfo['title'] ); + // An XML snippet: + // + // 123 + // Page + // + // ... + // Because the redirect tag is built differently, we need special handling for that case. + if ( $tag == 'redirect' ) { + $pageInfo[$tag] = $this->nodeAttribute( 'title' ); + } else { + $pageInfo[$tag] = $this->nodeContents(); + if ( $tag == 'title' ) { + $title = $this->processTitle( $pageInfo['title'] ); - if ( !$title ) { - $badTitle = true; - $skip = true; - } + if ( !$title ) { + $badTitle = true; + $skip = true; + } - $this->pageCallback( $title ); - list( $pageInfo['_title'], $origTitle ) = $title; + $this->pageCallback( $title ); + list( $pageInfo['_title'], $origTitle ) = $title; + } } } elseif ( $tag == 'revision' ) { $this->handleRevision( $pageInfo ); @@ -690,9 +675,6 @@ class WikiImporter { if ( isset( $revisionInfo['id'] ) ) { $revision->setID( $revisionInfo['id'] ); } - if ( isset( $revisionInfo['text'] ) ) { - $revision->setText( $revisionInfo['text'] ); - } if ( isset( $revisionInfo['model'] ) ) { $revision->setModel( $revisionInfo['model'] ); } @@ -701,6 +683,14 @@ class WikiImporter { } $revision->setTitle( $pageInfo['_title'] ); + if ( isset( $revisionInfo['text'] ) ) { + $handler = $revision->getContentHandler(); + $text = $handler->importTransform( + $revisionInfo['text'], + $revision->getFormat() ); + + $revision->setText( $text ); + } if ( isset( $revisionInfo['timestamp'] ) ) { $revision->setTimestamp( $revisionInfo['timestamp'] ); } else { @@ -900,17 +890,23 @@ class WikiImporter { /** This is a horrible hack used to keep source compatibility */ class UploadSourceAdapter { - static $sourceRegistrations = array(); + /** @var array */ + private static $sourceRegistrations = array(); + /** @var string */ private $mSource; + + /** @var string */ private $mBuffer; + + /** @var int */ private $mPosition; /** - * @param string $source + * @param ImportStreamSource $source * @return string */ - static function registerSource( $source ) { + static function registerSource( ImportStreamSource $source ) { $id = wfRandomString(); self::$sourceRegistrations[$id] = $source; @@ -1013,59 +1009,88 @@ class UploadSourceAdapter { } } -class XMLReader2 extends XMLReader { - - /** - * @return bool|string - */ - function nodeContents() { - if ( $this->isEmptyElement ) { - return ""; - } - $buffer = ""; - while ( $this->read() ) { - switch ( $this->nodeType ) { - case XmlReader::TEXT: - case XmlReader::SIGNIFICANT_WHITESPACE: - $buffer .= $this->value; - break; - case XmlReader::END_ELEMENT: - return $buffer; - } - } - return $this->close(); - } -} - /** * @todo document (e.g. one-sentence class description). * @ingroup SpecialPage */ class WikiRevision { - var $importer = null; - - /** - * @var Title - */ - var $title = null; - var $id = 0; - var $timestamp = "20010115000000"; - var $user = 0; - var $user_text = ""; - var $model = null; - var $format = null; - var $text = ""; - var $content = null; - var $comment = ""; - var $minor = false; - var $type = ""; - var $action = ""; - var $params = ""; - var $fileSrc = ''; - var $sha1base36 = false; - var $isTemp = false; - var $archiveName = ''; - var $fileIsTemp; + /** @todo Unused? */ + private $importer = null; + + /** @var Title */ + public $title = null; + + /** @var int */ + private $id = 0; + + /** @var string */ + public $timestamp = "20010115000000"; + + /** + * @var int + * @todo Can't find any uses. Public, because that's suspicious. Get clarity. */ + public $user = 0; + + /** @var string */ + public $user_text = ""; + + /** @var string */ + protected $model = null; + + /** @var string */ + protected $format = null; + + /** @var string */ + public $text = ""; + + /** @var int */ + protected $size; + + /** @var Content */ + protected $content = null; + + /** @var ContentHandler */ + protected $contentHandler = null; + + /** @var string */ + public $comment = ""; + + /** @var bool */ + protected $minor = false; + + /** @var string */ + protected $type = ""; + + /** @var string */ + protected $action = ""; + + /** @var string */ + protected $params = ""; + + /** @var string */ + protected $fileSrc = ''; + + /** @var bool|string */ + protected $sha1base36 = false; + + /** + * @var bool + * @todo Unused? + */ + private $isTemp = false; + + /** @var string */ + protected $archiveName = ''; + + protected $filename; + + /** @var mixed */ + protected $src; + + /** @todo Unused? */ + private $fileIsTemp; + + /** @var bool */ private $mNoUpdates = false; /** @@ -1076,7 +1101,8 @@ class WikiRevision { if ( is_object( $title ) ) { $this->title = $title; } elseif ( is_null( $title ) ) { - throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." ); + throw new MWException( "WikiRevision given a null title in import. " + . "You may need to adjust \$wgLegalTitleChars." ); } else { throw new MWException( "WikiRevision given non-object title in import." ); } @@ -1257,18 +1283,24 @@ class WikiRevision { return $this->text; } + /** + * @return ContentHandler + */ + function getContentHandler() { + if ( is_null( $this->contentHandler ) ) { + $this->contentHandler = ContentHandler::getForModelID( $this->getModel() ); + } + + return $this->contentHandler; + } + /** * @return Content */ function getContent() { if ( is_null( $this->content ) ) { - $this->content = - ContentHandler::makeContent( - $this->text, - $this->getTitle(), - $this->getModel(), - $this->getFormat() - ); + $handler = $this->getContentHandler(); + $this->content = $handler->unserializeContent( $this->text, $this->getFormat() ); } return $this->content; @@ -1289,8 +1321,8 @@ class WikiRevision { * @return string */ function getFormat() { - if ( is_null( $this->model ) ) { - $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat(); + if ( is_null( $this->format ) ) { + $this->format = $this->getContentHandler()->getDefaultFormat(); } return $this->format; @@ -1406,6 +1438,7 @@ class WikiRevision { $linkCache->clear(); $page = WikiPage::factory( $this->title ); + $page->loadPageData( 'fromdbmaster' ); if ( !$page->exists() ) { # must create the page... $pageId = $page->insertOn( $dbw ); @@ -1438,7 +1471,8 @@ class WikiRevision { 'page' => $pageId, 'content_model' => $this->getModel(), 'content_format' => $this->getFormat(), - 'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()? + //XXX: just set 'content' => $this->getContent()? + 'text' => $this->getContent()->serialize( $this->getFormat() ), 'comment' => $this->getComment(), 'user' => $userId, 'user_text' => $userText, @@ -1450,15 +1484,16 @@ class WikiRevision { if ( $changed !== false && !$this->mNoUpdates ) { wfDebug( __METHOD__ . ": running updates\n" ); - $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); + $page->doEditUpdates( + $revision, + $userObj, + array( 'created' => $created, 'oldcountable' => $oldcountable ) + ); } return true; } - /** - * @return mixed - */ function importLogItem() { $dbw = wfGetDB( DB_MASTER ); # @todo FIXME: This will not record autoblocks @@ -1482,8 +1517,9 @@ class WikiRevision { ); // @todo FIXME: This could fail slightly for multiple matches :P if ( $prior ) { - wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . - $this->timestamp . "\n" ); + wfDebug( __METHOD__ + . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " + . $this->timestamp . "\n" ); return; } $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); @@ -1680,13 +1716,18 @@ class ImportStreamSource { } if ( !empty( $upload['error'] ) ) { switch ( $upload['error'] ) { - case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini. + case 1: + # The uploaded file exceeds the upload_max_filesize directive in php.ini. return Status::newFatal( 'importuploaderrorsize' ); - case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form. + case 2: + # The uploaded file exceeds the MAX_FILE_SIZE directive that + # was specified in the HTML form. return Status::newFatal( 'importuploaderrorsize' ); - case 3: # The uploaded file was only partially uploaded + case 3: + # The uploaded file was only partially uploaded return Status::newFatal( 'importuploaderrorpartial' ); - case 6: #Missing a temporary folder. + case 6: + # Missing a temporary folder. return Status::newFatal( 'importuploaderrortemp' ); # case else: # Currently impossible } @@ -1731,7 +1772,9 @@ class ImportStreamSource { * @param int $pageLinkDepth * @return Status */ - public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) { + public static function newFromInterwiki( $interwiki, $page, $history = false, + $templates = false, $pageLinkDepth = 0 + ) { if ( $page == '' ) { return Status::newFatal( 'import-noarticle' ); }