Merge "Revert "MediaWiki.php: Redirect non-standard title urls to canonical""
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 9 Nov 2016 01:27:38 +0000 (01:27 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 9 Nov 2016 01:27:38 +0000 (01:27 +0000)
1  2 
includes/MediaWiki.php

diff --combined includes/MediaWiki.php
@@@ -313,8 -313,6 +313,6 @@@ class MediaWiki 
         * - Normalise empty title:
         *   /wiki/ -> /wiki/Main
         *   /w/index.php?title= -> /wiki/Main
-        * - Normalise non-standard title urls:
-        *   /w/index.php?title=Foo_Bar -> /wiki/Foo_Bar
         * - Don't redirect anything with query parameters other than 'title' or 'action=view'.
         *
         * @param Title $title
  
                if ( $request->getVal( 'action', 'view' ) != 'view'
                        || $request->wasPosted()
+                       || ( $request->getVal( 'title' ) !== null
+                               && $title->getPrefixedDBkey() == $request->getVal( 'title' ) )
                        || count( $request->getValueNames( [ 'action', 'title' ] ) )
                        || !Hooks::run( 'TestCanonicalRedirect', [ $request, $title, $output ] )
                ) {
                }
                // Redirect to canonical url, make it a 301 to allow caching
                $targetUrl = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT );
-               if ( $targetUrl != $request->getFullRequestURL() ) {
-                       $output->setCdnMaxage( 1200 );
-                       $output->redirect( $targetUrl, '301' );
-                       return true;
-               }
-               // If there is no title, or the title is in a non-standard encoding, we demand
-               // a redirect. If cgi somehow changed the 'title' query to be non-standard while
-               // the url is standard, the server is misconfigured.
-               if ( $request->getVal( 'title' ) === null
-                       || $title->getPrefixedDBkey() != $request->getVal( 'title' )
-               ) {
+               if ( $targetUrl == $request->getFullRequestURL() ) {
                        $message = "Redirect loop detected!\n\n" .
                                "This means the wiki got confused about what page was " .
                                "requested; this sometimes happens when moving a wiki " .
                        }
                        throw new HttpError( 500, $message );
                }
-               return false;
+               $output->setSquidMaxage( 1200 );
+               $output->redirect( $targetUrl, '301' );
+               return true;
        }
  
        /**
         */
        public function run() {
                try {
 +                      $this->setDBProfilingAgent();
                        try {
                                $this->main();
                        } catch ( ErrorPageError $e ) {
                                $e->report(); // display the GUI error
                        }
                } catch ( Exception $e ) {
 +                      $context = $this->context;
 +                      $action = $context->getRequest()->getVal( 'action', 'view' );
 +                      if (
 +                              $e instanceof DBConnectionError &&
 +                              $context->hasTitle() &&
 +                              $context->getTitle()->canExist() &&
 +                              in_array( $action, [ 'view', 'history' ], true ) &&
 +                              HTMLFileCache::useFileCache( $this->context, HTMLFileCache::MODE_OUTAGE )
 +                      ) {
 +                              // Try to use any (even stale) file during outages...
 +                              $cache = new HTMLFileCache( $context->getTitle(), 'view' );
 +                              if ( $cache->isCached() ) {
 +                                      $cache->loadFromFileCache( $context, HTMLFileCache::MODE_OUTAGE );
 +                                      print MWExceptionRenderer::getHTML( $e );
 +                                      exit;
 +                              }
 +
 +                      }
 +
                        MWExceptionHandler::handleException( $e );
                }
  
                $this->doPostOutputShutdown( 'normal' );
        }
  
 +      private function setDBProfilingAgent() {
 +              $services = MediaWikiServices::getInstance();
 +              // Add a comment for easy SHOW PROCESSLIST interpretation
 +              $name = $this->context->getUser()->getName();
 +              $services->getDBLoadBalancerFactory()->setAgentName(
 +                      mb_strlen( $name ) > 15 ? mb_substr( $name, 0, 15 ) . '...' : $name
 +              );
 +      }
 +
        /**
         * @see MediaWiki::preOutputCommit()
 +       * @param callable $postCommitWork [default: null]
         * @since 1.26
         */
 -      public function doPreOutputCommit() {
 -              self::preOutputCommit( $this->context );
 +      public function doPreOutputCommit( callable $postCommitWork = null ) {
 +              self::preOutputCommit( $this->context, $postCommitWork );
        }
  
        /**
         * the user can receive a response (in case commit fails)
         *
         * @param IContextSource $context
 +       * @param callable $postCommitWork [default: null]
         * @since 1.27
         */
 -      public static function preOutputCommit( IContextSource $context ) {
 +      public static function preOutputCommit(
 +              IContextSource $context, callable $postCommitWork = null
 +      ) {
                // Either all DBs should commit or none
                ignore_user_abort( true );
  
                $config = $context->getConfig();
 -
 +              $request = $context->getRequest();
 +              $output = $context->getOutput();
                $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
 +
                // Commit all changes
                $lbFactory->commitMasterChanges(
                        __METHOD__,
                        // Abort if any transaction was too big
                        [ 'maxWriteDuration' => $config->get( 'MaxUserDBWriteDuration' ) ]
                );
 +              wfDebug( __METHOD__ . ': primary transaction round committed' );
  
 +              // Run updates that need to block the user or affect output (this is the last chance)
                DeferredUpdates::doUpdates( 'enqueue', DeferredUpdates::PRESEND );
                wfDebug( __METHOD__ . ': pre-send deferred updates completed' );
  
 -              // Record ChronologyProtector positions
 -              $lbFactory->shutdown();
 -              wfDebug( __METHOD__ . ': all transactions committed' );
 +              // Decide when clients block on ChronologyProtector DB position writes
 +              $urlDomainDistance = (
 +                      $request->wasPosted() &&
 +                      $output->getRedirect() &&
 +                      $lbFactory->hasOrMadeRecentMasterChanges( INF )
 +              ) ? self::getUrlDomainDistance( $output->getRedirect(), $context ) : false;
 +
 +              if ( $urlDomainDistance === 'local' || $urlDomainDistance === 'remote' ) {
 +                      // OutputPage::output() will be fast; $postCommitWork will not be useful for
 +                      // masking the latency of syncing DB positions accross all datacenters synchronously.
 +                      // Instead, make use of the RTT time of the client follow redirects.
 +                      $flags = $lbFactory::SHUTDOWN_CHRONPROT_ASYNC;
 +                      $cpPosTime = microtime( true );
 +                      // Client's next request should see 1+ positions with this DBMasterPos::asOf() time
 +                      if ( $urlDomainDistance === 'local' ) {
 +                              // Client will stay on this domain, so set an unobtrusive cookie
 +                              $expires = time() + ChronologyProtector::POSITION_TTL;
 +                              $options = [ 'prefix' => '' ];
 +                              $request->response()->setCookie( 'cpPosTime', $cpPosTime, $expires, $options );
 +                      } else {
 +                              // Cookies may not work across wiki domains, so use a URL parameter
 +                              $safeUrl = $lbFactory->appendPreShutdownTimeAsQuery(
 +                                      $output->getRedirect(),
 +                                      $cpPosTime
 +                              );
 +                              $output->redirect( $safeUrl );
 +                      }
 +              } else {
 +                      // OutputPage::output() is fairly slow; run it in $postCommitWork to mask
 +                      // the latency of syncing DB positions accross all datacenters synchronously
 +                      $flags = $lbFactory::SHUTDOWN_CHRONPROT_SYNC;
 +                      if ( $lbFactory->hasOrMadeRecentMasterChanges( INF ) ) {
 +                              $cpPosTime = microtime( true );
 +                              // Set a cookie in case the DB position store cannot sync accross datacenters.
 +                              // This will at least cover the common case of the user staying on the domain.
 +                              $expires = time() + ChronologyProtector::POSITION_TTL;
 +                              $options = [ 'prefix' => '' ];
 +                              $request->response()->setCookie( 'cpPosTime', $cpPosTime, $expires, $options );
 +                      }
 +              }
 +              // Record ChronologyProtector positions for DBs affected in this request at this point
 +              $lbFactory->shutdown( $flags, $postCommitWork );
 +              wfDebug( __METHOD__ . ': LBFactory shutdown completed' );
  
                // Set a cookie to tell all CDN edge nodes to "stick" the user to the DC that handles this
                // POST request (e.g. the "master" data center). Also have the user briefly bypass CDN so
                // ChronologyProtector works for cacheable URLs.
 -              $request = $context->getRequest();
                if ( $request->wasPosted() && $lbFactory->hasOrMadeRecentMasterChanges() ) {
                        $expires = time() + $config->get( 'DataCenterUpdateStickTTL' );
                        $options = [ 'prefix' => '' ];
                // also intimately related to the value of $wgCdnReboundPurgeDelay.
                if ( $lbFactory->laggedReplicaUsed() ) {
                        $maxAge = $config->get( 'CdnMaxageLagged' );
 -                      $context->getOutput()->lowerCdnMaxage( $maxAge );
 +                      $output->lowerCdnMaxage( $maxAge );
                        $request->response()->header( "X-Database-Lagged: true" );
                        wfDebugLog( 'replication', "Lagged DB used; CDN cache TTL limited to $maxAge seconds" );
                }
                // Avoid long-term cache pollution due to message cache rebuild timeouts (T133069)
                if ( MessageCache::singleton()->isDisabled() ) {
                        $maxAge = $config->get( 'CdnMaxageSubstitute' );
 -                      $context->getOutput()->lowerCdnMaxage( $maxAge );
 +                      $output->lowerCdnMaxage( $maxAge );
                        $request->response()->header( "X-Response-Substitute: true" );
                }
        }
  
 +      /**
 +       * @param string $url
 +       * @param IContextSource $context
 +       * @return string Either "local", "remote" if in the farm, "external" otherwise
 +       */
 +      private static function getUrlDomainDistance( $url, IContextSource $context ) {
 +              static $relevantKeys = [ 'host' => true, 'port' => true ];
 +
 +              $infoCandidate = wfParseUrl( $url );
 +              if ( $infoCandidate === false ) {
 +                      return 'external';
 +              }
 +
 +              $infoCandidate = array_intersect_key( $infoCandidate, $relevantKeys );
 +              $clusterHosts = array_merge(
 +                      // Local wiki host (the most common case)
 +                      [ $context->getConfig()->get( 'CanonicalServer' ) ],
 +                      // Any local/remote wiki virtual hosts for this wiki farm
 +                      $context->getConfig()->get( 'LocalVirtualHosts' )
 +              );
 +
 +              foreach ( $clusterHosts as $i => $clusterHost ) {
 +                      $parseUrl = wfParseUrl( $clusterHost );
 +                      if ( !$parseUrl ) {
 +                              continue;
 +                      }
 +                      $infoHost = array_intersect_key( $parseUrl, $relevantKeys );
 +                      if ( $infoCandidate === $infoHost ) {
 +                              return ( $i === 0 ) ? 'local' : 'remote';
 +                      }
 +              }
 +
 +              return 'external';
 +      }
 +
        /**
         * This function does work that can be done *after* the
         * user gets the HTTP response so they don't block on it
                // Show visible profiling data if enabled (which cannot be post-send)
                Profiler::instance()->logDataPageOutputOnly();
  
 -              $that = $this;
 -              $callback = function () use ( $that, $mode ) {
 +              $callback = function () use ( $mode ) {
                        try {
 -                              $that->restInPeace( $mode );
 +                              $this->restInPeace( $mode );
                        } catch ( Exception $e ) {
                                MWExceptionHandler::handleException( $e );
                        }
        private function main() {
                global $wgTitle;
  
 +              $output = $this->context->getOutput();
                $request = $this->context->getRequest();
  
                // Send Ajax requests to the Ajax dispatcher.
  
                        $dispatcher = new AjaxDispatcher( $this->config );
                        $dispatcher->performAction( $this->context->getUser() );
 +
                        return;
                }
  
                                // Setup dummy Title, otherwise OutputPage::redirect will fail
                                $title = Title::newFromText( 'REDIR', NS_MAIN );
                                $this->context->setTitle( $title );
 -                              $output = $this->context->getOutput();
                                // Since we only do this redir to change proto, always send a vary header
                                $output->addVaryHeader( 'X-Forwarded-Proto' );
                                $output->redirect( $redirUrl );
                                $output->output();
 +
                                return;
                        }
                }
  
 -              if ( $this->config->get( 'UseFileCache' ) && $title->getNamespace() >= 0 ) {
 -                      if ( HTMLFileCache::useFileCache( $this->context ) ) {
 -                              // Try low-level file cache hit
 -                              $cache = new HTMLFileCache( $title, $action );
 -                              if ( $cache->isCacheGood( /* Assume up to date */ ) ) {
 -                                      // Check incoming headers to see if client has this cached
 -                                      $timestamp = $cache->cacheTimestamp();
 -                                      if ( !$this->context->getOutput()->checkLastModified( $timestamp ) ) {
 -                                              $cache->loadFromFileCache( $this->context );
 -                                      }
 -                                      // Do any stats increment/watchlist stuff
 -                                      // Assume we're viewing the latest revision (this should always be the case with file cache)
 -                                      $this->context->getWikiPage()->doViewUpdates( $this->context->getUser() );
 -                                      // Tell OutputPage that output is taken care of
 -                                      $this->context->getOutput()->disable();
 -                                      return;
 +              if ( $title->canExist() && HTMLFileCache::useFileCache( $this->context ) ) {
 +                      // Try low-level file cache hit
 +                      $cache = new HTMLFileCache( $title, $action );
 +                      if ( $cache->isCacheGood( /* Assume up to date */ ) ) {
 +                              // Check incoming headers to see if client has this cached
 +                              $timestamp = $cache->cacheTimestamp();
 +                              if ( !$output->checkLastModified( $timestamp ) ) {
 +                                      $cache->loadFromFileCache( $this->context );
                                }
 +                              // Do any stats increment/watchlist stuff, assuming user is viewing the
 +                              // latest revision (which should always be the case for file cache)
 +                              $this->context->getWikiPage()->doViewUpdates( $this->context->getUser() );
 +                              // Tell OutputPage that output is taken care of
 +                              $output->disable();
 +
 +                              return;
                        }
                }
  
                // Actually do the work of the request and build up any output
                $this->performRequest();
  
 +              // GUI-ify and stash the page output in MediaWiki::doPreOutputCommit() while
 +              // ChronologyProtector synchronizes DB positions or slaves accross all datacenters.
 +              $buffer = null;
 +              $outputWork = function () use ( $output, &$buffer ) {
 +                      if ( $buffer === null ) {
 +                              $buffer = $output->output( true );
 +                      }
 +
 +                      return $buffer;
 +              };
 +
                // Now commit any transactions, so that unreported errors after
                // output() don't roll back the whole DB transaction and so that
                // we avoid having both success and error text in the response
 -              $this->doPreOutputCommit();
 +              $this->doPreOutputCommit( $outputWork );
  
 -              // Output everything!
 -              $this->context->getOutput()->output();
 +              // Now send the actual output
 +              print $outputWork();
        }
  
        /**
  
                // Do any deferred jobs
                DeferredUpdates::doUpdates( 'enqueue' );
 +              DeferredUpdates::setImmediateMode( true );
  
                // Make sure any lazy jobs are pushed
                JobQueueGroup::pushLazyJobs();