Create GuzzleHttpRequest class as new default for HttpRequestFactory
authorBill Pirkle <bpirkle@wikimedia.org>
Fri, 17 Aug 2018 14:53:59 +0000 (09:53 -0500)
committerKunal Mehta <legoktm@member.fsf.org>
Mon, 10 Dec 2018 08:36:59 +0000 (00:36 -0800)
Create a GuzzleHttpRequest class using the external Guzzle
(docs.guzzlephp.org) library. This will be the new default request type,
but CurlHttpRequest and PhpHttpRequest remain available and accessible
via Http::$httpEngine.

Bug: T202110
Change-Id: Ie720be2628d7baf427b002847f103fd86ee4cff3
Depends-On: I143a6410d111e75f01dbbfd43f300e2e60247451

RELEASE-NOTES-1.33
autoload.php
composer.json
includes/http/GuzzleHttpRequest.php [new file with mode: 0644]
includes/http/HttpRequestFactory.php
includes/http/MWHttpRequest.php
tests/phpunit/includes/http/HttpTest.php

index baf3689..f68875b 100644 (file)
@@ -44,6 +44,11 @@ production.
 ==== New external libraries ====
 * Added wikimedia/password-blacklist 0.1.4.
 * …
+* Added guzzlehttp/guzzle 6.3.3 and dependents:
+  * guzzlehttp/promises 1.3.1
+  * guzzlehttp/psr7 1.5.0
+  * psr/http-message 1.0.1
+  * ralouphie/getallheaders 2.0.5
 
 ==== Changed external libraries ====
 * Updated wikimedia/xmp-reader from 0.6.0 to 0.6.1.
index 68b3305..c411948 100644 (file)
@@ -571,6 +571,7 @@ $wgAutoloadLocalClasses = [
        'GitInfo' => __DIR__ . '/includes/GitInfo.php',
        'GlobalDependency' => __DIR__ . '/includes/cache/CacheDependency.php',
        'GlobalVarConfig' => __DIR__ . '/includes/config/GlobalVarConfig.php',
+       'GuzzleHttpRequest' => __DIR__ . '/includes/http/GuzzleHttpRequest.php',
        'HHVMMakeRepo' => __DIR__ . '/maintenance/hhvm/makeRepo.php',
        'HTMLApiField' => __DIR__ . '/includes/htmlform/fields/HTMLApiField.php',
        'HTMLAutoCompleteSelectField' => __DIR__ . '/includes/htmlform/fields/HTMLAutoCompleteSelectField.php',
index bf3a101..08b0e01 100644 (file)
                "ext-json": "*",
                "ext-mbstring": "*",
                "ext-xml": "*",
+               "guzzlehttp/guzzle": "6.3.3",
+               "guzzlehttp/promises": "1.3.1",
+               "guzzlehttp/psr7": "1.5.0",
                "liuggio/statsd-php-client": "1.0.18",
                "oojs/oojs-ui": "0.29.6",
                "pear/mail": "1.4.1",
                "pear/mail_mime": "1.10.2",
                "pear/net_smtp": "1.8.0",
                "php": ">=5.6.99",
+               "psr/http-message": "1.0.1",
                "psr/log": "1.0.2",
+               "ralouphie/getallheaders": "2.0.5",
                "wikimedia/assert": "0.2.2",
                "wikimedia/at-ease": "1.2.0",
                "wikimedia/base-convert": "2.0.0",
diff --git a/includes/http/GuzzleHttpRequest.php b/includes/http/GuzzleHttpRequest.php
new file mode 100644 (file)
index 0000000..5654a71
--- /dev/null
@@ -0,0 +1,202 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+use GuzzleHttp\Client;
+use GuzzleHttp\Psr7\Request;
+
+/**
+ * MWHttpRequest implemented using the Guzzle library
+ *
+ * Differences from the CurlHttpRequest implementation:
+ *   1) the MWHttpRequest 'callback" option is unsupported.  Instead, use the 'sink' option to
+ *      send a filename/stream (see http://docs.guzzlephp.org/en/stable/request-options.html#sink)
+ *   2) callers may set a custom handler via the 'handler' option.
+ *      If this is not set, Guzzle will use curl (if available) or PHP streams (otherwise)
+ *   3) setting either sslVerifyHost or sslVerifyCert will enable both.  Guzzle does not allow
+ *      them to be set separately.
+ *
+ * @since 1.33
+ */
+class GuzzleHttpRequest extends MWHttpRequest {
+       const SUPPORTS_FILE_POSTS = true;
+
+       protected $handler = null;
+       protected $sink = null;
+       protected $guzzleOptions = [ 'http_errors' => false ];
+
+       /**
+        * @param string $url Url to use. If protocol-relative, will be expanded to an http:// URL
+        * @param array $options (optional) extra params to pass (see Http::request())
+        * @param string $caller The method making this request, for profiling
+        * @param Profiler|null $profiler An instance of the profiler for profiling, or null
+        * @throws Exception
+        */
+       public function __construct(
+               $url, array $options = [], $caller = __METHOD__, $profiler = null
+       ) {
+               parent::__construct( $url, $options, $caller, $profiler );
+
+               if ( isset( $options['handler'] ) ) {
+                       $this->handler = $options['handler'];
+               }
+               if ( isset( $options['sink'] ) ) {
+                       $this->sink = $options['sink'];
+               }
+       }
+
+       /**
+        * @see MWHttpRequest::execute
+        *
+        * @return Status
+        */
+       public function execute() {
+               $this->prepare();
+
+               if ( !$this->status->isOK() ) {
+                       return Status::wrap( $this->status ); // TODO B/C; move this to callers
+               }
+
+               if ( $this->proxy ) {
+                       $this->guzzleOptions['proxy'] = $this->proxy;
+               }
+
+               $this->guzzleOptions['timeout'] = $this->timeout;
+               $this->guzzleOptions['connect_timeout'] = $this->connectTimeout;
+               $this->guzzleOptions['version'] = '1.1';
+
+               if ( !$this->followRedirects ) {
+                       $this->guzzleOptions['allow_redirects'] = false;
+               } else {
+                       $this->guzzleOptions['allow_redirects'] = [
+                               'max' => $this->maxRedirects
+                       ];
+               }
+
+               if ( $this->method == 'POST' ) {
+                       $postData = $this->postData;
+                       $this->guzzleOptions['body'] = $postData;
+
+                       // Suppress 'Expect: 100-continue' header, as some servers
+                       // will reject it with a 417 and Curl won't auto retry
+                       // with HTTP 1.0 fallback
+                       $this->guzzleOptions['expect'] = false;
+               }
+
+               $this->guzzleOptions['headers'] = $this->reqHeaders;
+
+               if ( $this->handler ) {
+                       $this->guzzleOptions['handler'] = $this->handler;
+               }
+
+               if ( $this->sink ) {
+                       $this->guzzleOptions['sink'] = $this->sink;
+               }
+
+               if ( $this->caInfo ) {
+                       $this->guzzleOptions['verify'] = $this->caInfo;
+               } elseif ( !$this->sslVerifyHost && !$this->sslVerifyCert ) {
+                       $this->guzzleOptions['verify'] = false;
+               }
+
+               try {
+                       $client = new Client( $this->guzzleOptions );
+                       $request = new Request( $this->method, $this->url );
+                       $response = $client->send( $request );
+                       $this->headerList = $response->getHeaders();
+                       $this->content = $response->getBody()->getContents();
+
+                       $this->respVersion = $response->getProtocolVersion();
+                       $this->respStatus = $response->getStatusCode() . ' ' . $response->getReasonPhrase();
+
+               } catch ( GuzzleHttp\Exception\ConnectException $e ) {
+                       // ConnectException is thrown for several reasons besides generic "timeout":
+                       //   Connection refused
+                       //   couldn't connect to host
+                       //   connection attempt failed
+                       //   Could not resolve IPv4 address for host
+                       //   Could not resolve IPv6 address for host
+                       if ( $this->usingCurl() ) {
+                               $handlerContext = $e->getHandlerContext();
+                               if ( $handlerContext['errno'] == CURLE_OPERATION_TIMEOUTED ) {
+                                       $this->status->fatal( 'http-timed-out', $this->url );
+                               } else {
+                                       $this->status->fatal( 'http-curl-error', $handlerContext['error'] );
+                               }
+                       } else {
+                               $this->status->fatal( 'http-request-error' );
+                       }
+               } catch ( GuzzleHttp\Exception\RequestException $e ) {
+                       if ( $this->usingCurl() ) {
+                               $handlerContext = $e->getHandlerContext();
+                               $this->status->fatal( 'http-curl-error', $handlerContext['error'] );
+                       } else {
+                               // Non-ideal, but the only way to identify connection timeout vs other conditions
+                               $needle = 'Connection timed out';
+                               if ( strpos( $e->getMessage(), $needle ) !== false ) {
+                                       $this->status->fatal( 'http-timed-out', $this->url );
+                               } else {
+                                       $this->status->fatal( 'http-request-error' );
+                               }
+                       }
+               } catch ( GuzzleHttp\Exception\GuzzleException $e ) {
+                       $this->status->fatal( 'http-internal-error' );
+               }
+
+               if ( $this->profiler ) {
+                       $profileSection = $this->profiler->scopedProfileIn(
+                               __METHOD__ . '-' . $this->profileName
+                       );
+               }
+
+               if ( $this->profiler ) {
+                       $this->profiler->scopedProfileOut( $profileSection );
+               }
+
+               $this->parseHeader();
+               $this->setStatus();
+
+               return Status::wrap( $this->status ); // TODO B/C; move this to callers
+       }
+
+       /**
+        * @return bool
+        */
+       protected function usingCurl() {
+               return ( $this->handler && is_a( $this->handler, 'GuzzleHttp\Handler\CurlHandler' ) ) ||
+                       ( !$this->handler && extension_loaded( 'curl' ) );
+       }
+
+       /**
+        * Guzzle provides headers as an array.  Reprocess to match our expectations.  Guzzle will
+        * have already parsed and removed the status line (in EasyHandle::createResponse)z.
+        */
+       protected function parseHeader() {
+               // Failure without (valid) headers gets a response status of zero
+               if ( !$this->status->isOK() ) {
+                       $this->respStatus = '0 Error';
+               }
+
+               foreach ( $this->headerList as $name => $values ) {
+                       $this->respHeaders[strtolower( $name )] = $values;
+               }
+
+               $this->parseCookies();
+       }
+}
index c5413b3..a3a14d0 100644 (file)
@@ -26,6 +26,7 @@ use MediaWiki\Logger\LoggerFactory;
 use MWHttpRequest;
 use PhpHttpRequest;
 use Profiler;
+use GuzzleHttpRequest;
 
 /**
  * Factory creating MWHttpRequest objects.
@@ -43,7 +44,7 @@ class HttpRequestFactory {
         */
        public function create( $url, array $options = [], $caller = __METHOD__ ) {
                if ( !Http::$httpEngine ) {
-                       Http::$httpEngine = function_exists( 'curl_init' ) ? 'curl' : 'php';
+                       Http::$httpEngine = 'guzzle';
                } elseif ( Http::$httpEngine == 'curl' && !function_exists( 'curl_init' ) ) {
                        throw new DomainException( __METHOD__ . ': curl (https://secure.php.net/curl) is not ' .
                           'installed, but Http::$httpEngine is set to "curl"' );
@@ -54,6 +55,8 @@ class HttpRequestFactory {
                }
 
                switch ( Http::$httpEngine ) {
+                       case 'guzzle':
+                               return new GuzzleHttpRequest( $url, $options, $caller, Profiler::instance() );
                        case 'curl':
                                return new CurlHttpRequest( $url, $options, $caller, Profiler::instance() );
                        case 'php':
index 435c34d..b087019 100644 (file)
@@ -88,6 +88,7 @@ abstract class MWHttpRequest implements LoggerAwareInterface {
         * @param array $options (optional) extra params to pass (see Http::request())
         * @param string $caller The method making this request, for profiling
         * @param Profiler|null $profiler An instance of the profiler for profiling, or null
+        * @throws Exception
         */
        public function __construct(
                $url, array $options = [], $caller = __METHOD__, $profiler = null
@@ -408,18 +409,20 @@ abstract class MWHttpRequest implements LoggerAwareInterface {
        /**
         * Sets HTTPRequest status member to a fatal value with the error
         * message if the returned integer value of the status code was
-        * not successful (< 300) or a redirect (>=300 and < 400).  (see
-        * RFC2616, section 10,
-        * http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html for a
-        * list of status codes.)
+        * not successful (1-299) or a redirect (300-399).
+        * See RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
+        * for a list of status codes.
         */
        protected function setStatus() {
                if ( !$this->respHeaders ) {
                        $this->parseHeader();
                }
 
-               if ( (int)$this->respStatus > 399 ) {
+               if ( ( (int)$this->respStatus > 0 && (int)$this->respStatus < 400 ) ) {
+                       $this->status->setResult( true, (int)$this->respStatus );
+               } else {
                        list( $code, $message ) = explode( " ", $this->respStatus, 2 );
+                       $this->status->setResult( false, (int)$this->respStatus );
                        $this->status->fatal( "http-bad-status", $code, $message );
                }
        }
index cd24be4..ac7ef80 100644 (file)
@@ -1,5 +1,9 @@
 <?php
 
+use GuzzleHttp\Handler\MockHandler;
+use GuzzleHttp\HandlerStack;
+use GuzzleHttp\Psr7\Response;
+
 /**
  * @group Http
  * @group small
@@ -503,6 +507,18 @@ class HttpTest extends MediaWikiTestCase {
 
                $this->assertTrue( defined( $value ), $value . ' not defined' );
        }
+
+       /**
+        * No actual request is made herein
+        */
+       public function testGuzzleHttpRequest() {
+               $handler = HandlerStack::create( new MockHandler( [ new Response( 200 ) ] ) );
+               $r = new GuzzleHttpRequest( 'http://www.example.text', [ 'handler' => $handler ] );
+               $r->execute();
+               $this->assertEquals( 200, $r->getStatus() );
+
+               // @TODO: add failure tests (404s and failure to connect)
+       }
 }
 
 /**
@@ -513,13 +529,15 @@ class MWHttpRequestTester extends MWHttpRequest {
        // returns appropriate tester class here
        public static function factory( $url, array $options = null, $caller = __METHOD__ ) {
                if ( !Http::$httpEngine ) {
-                       Http::$httpEngine = function_exists( 'curl_init' ) ? 'curl' : 'php';
+                       Http::$httpEngine = 'guzzle';
                } elseif ( Http::$httpEngine == 'curl' && !function_exists( 'curl_init' ) ) {
                        throw new DomainException( __METHOD__ . ': curl (https://secure.php.net/curl) is not ' .
                                'installed, but Http::$httpEngine is set to "curl"' );
                }
 
                switch ( Http::$httpEngine ) {
+                       case 'guzzle':
+                               return new GuzzleHttpRequestTester( $url, $options, $caller );
                        case 'curl':
                                return new CurlHttpRequestTester( $url, $options, $caller );
                        case 'php':
@@ -535,6 +553,12 @@ class MWHttpRequestTester extends MWHttpRequest {
        }
 }
 
+class GuzzleHttpRequestTester extends GuzzleHttpRequest {
+       function setRespHeaders( $name, $value ) {
+               $this->respHeaders[$name] = $value;
+       }
+}
+
 class CurlHttpRequestTester extends CurlHttpRequest {
        function setRespHeaders( $name, $value ) {
                $this->respHeaders[$name] = $value;