Merge "Use interwiki cache directly to resolve transwiki import sources"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Thu, 5 Nov 2015 16:52:31 +0000 (16:52 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Thu, 5 Nov 2015 16:52:31 +0000 (16:52 +0000)
1  2 
includes/Import.php

diff --combined includes/Import.php
@@@ -265,7 -265,7 +265,7 @@@ class WikiImporter 
                        // No rootpage
                        $this->setImportTitleFactory( new NaiveImportTitleFactory() );
                } elseif ( $rootpage !== '' ) {
 -                      $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
 +                      $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
                        $title = Title::newFromText( $rootpage );
  
                        if ( !$title || $title->isExternal() ) {
                while ( $this->reader->read() ) {
                        switch ( $this->reader->nodeType ) {
                        case XMLReader::TEXT:
 +                      case XMLReader::CDATA:
                        case XMLReader::SIGNIFICANT_WHITESPACE:
                                $buffer .= $this->reader->value;
                                break;
                                        $title = $this->processTitle( $pageInfo['title'],
                                                isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
  
 -                                      if ( !$title ) {
 +                                      // $title is either an array of two titles or false.
 +                                      if ( is_array( $title ) ) {
 +                                              $this->pageCallback( $title );
 +                                              list( $pageInfo['_title'], $foreignTitle ) = $title;
 +                                      } else {
                                                $badTitle = true;
                                                $skip = true;
                                        }
 -
 -                                      $this->pageCallback( $title );
 -                                      list( $pageInfo['_title'], $foreignTitle ) = $title;
                                }
  
                                if ( $title ) {
                        }
                }
  
 -              $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
 +              // @note $pageInfo is only set if a valid $title is processed above with
 +              //       no error. If we have a valid $title, then pageCallback is called
 +              //       above, $pageInfo['title'] is set and we do pageOutCallback here.
 +              //       If $pageInfo['_title'] is not set, then $foreignTitle is also not
 +              //       set since they both come from $title above.
 +              if ( array_key_exists( '_title', $pageInfo ) ) {
 +                      $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
                                        $pageInfo['revisionCount'],
                                        $pageInfo['successfulRevisionCount'],
                                        $pageInfo );
 +              }
        }
  
        /**
@@@ -1607,20 -1598,6 +1607,20 @@@ class WikiRevision 
                        }
                }
  
 +              // Select previous version to make size diffs correct
 +              $prevId = $dbw->selectField( 'revision', 'rev_id',
 +                      array(
 +                              'rev_page' => $pageId,
 +                              'rev_timestamp <= ' . $dbw->timestamp( $this->timestamp ),
 +                      ),
 +                      __METHOD__,
 +                      array( 'ORDER BY' => array(
 +                                      'rev_timestamp DESC',
 +                                      'rev_id DESC', // timestamp is not unique per page
 +                              )
 +                      )
 +              );
 +
                # @todo FIXME: Use original rev_id optionally (better for backups)
                # Insert the row
                $revision = new Revision( array(
                        'page' => $pageId,
                        'content_model' => $this->getModel(),
                        'content_format' => $this->getFormat(),
 -                      //XXX: just set 'content' => $this->getContent()?
 +                      // XXX: just set 'content' => $this->getContent()?
                        'text' => $this->getContent()->serialize( $this->getFormat() ),
                        'comment' => $this->getComment(),
                        'user' => $userId,
                        'user_text' => $userText,
                        'timestamp' => $this->timestamp,
                        'minor_edit' => $this->minor,
 +                      'parent_id' => $prevId,
                        ) );
                $revision->insertOn( $dbw );
                $changed = $page->updateIfNewerOn( $dbw, $revision );
                                'log_namespace' => $this->getTitle()->getNamespace(),
                                'log_title' => $this->getTitle()->getDBkey(),
                                'log_comment' => $this->getComment(),
 -                              #'log_user_text' => $this->user_text,
 +                              # 'log_user_text' => $this->user_text,
                                'log_params' => $this->params ),
                        __METHOD__
                );
                        'log_action' => $this->action,
                        'log_timestamp' => $dbw->timestamp( $this->timestamp ),
                        'log_user' => User::idFromName( $this->user_text ),
 -                      #'log_user_text' => $this->user_text,
 +                      # 'log_user_text' => $this->user_text,
                        'log_namespace' => $this->getTitle()->getNamespace(),
                        'log_title' => $this->getTitle()->getDBkey(),
                        'log_comment' => $this->getComment(),
@@@ -1964,23 -1940,38 +1964,38 @@@ class ImportStreamSource implements Imp
                if ( $page == '' ) {
                        return Status::newFatal( 'import-noarticle' );
                }
-               $link = Title::newFromText( "$interwiki:Special:Export/$page" );
-               if ( is_null( $link ) || !$link->isExternal() ) {
+               # Look up the first interwiki prefix, and let the foreign site handle
+               # subsequent interwiki prefixes
+               $firstIwPrefix = strtok( $interwiki, ':' );
+               $firstIw = Interwiki::fetch( $firstIwPrefix );
+               if ( !$firstIw ) {
                        return Status::newFatal( 'importbadinterwiki' );
-               } else {
-                       $params = array();
-                       if ( $history ) {
-                               $params['history'] = 1;
-                       }
-                       if ( $templates ) {
-                               $params['templates'] = 1;
-                       }
-                       if ( $pageLinkDepth ) {
-                               $params['pagelink-depth'] = $pageLinkDepth;
-                       }
-                       $url = $link->getFullURL( $params );
-                       # For interwikis, use POST to avoid redirects.
-                       return ImportStreamSource::newFromURL( $url, "POST" );
                }
+               $additionalIwPrefixes = strtok( '' );
+               if ( $additionalIwPrefixes ) {
+                       $additionalIwPrefixes .= ':';
+               }
+               # Have to do a DB-key replacement ourselves; otherwise spaces get
+               # URL-encoded to +, which is wrong in this case. Similar to logic in
+               # Title::getLocalURL
+               $link = $firstIw->getURL( strtr( "${additionalIwPrefixes}Special:Export/$page",
+                       ' ', '_' ) );
+               $params = array();
+               if ( $history ) {
+                       $params['history'] = 1;
+               }
+               if ( $templates ) {
+                       $params['templates'] = 1;
+               }
+               if ( $pageLinkDepth ) {
+                       $params['pagelink-depth'] = $pageLinkDepth;
+               }
+               $url = wfAppendQuery( $link, $params );
+               # For interwikis, use POST to avoid redirects.
+               return ImportStreamSource::newFromURL( $url, "POST" );
        }
  }