From 0584339f5edf42b7f90d9cfbeb45820f483fe1f7 Mon Sep 17 00:00:00 2001 From: Bill Pirkle Date: Thu, 12 Jul 2018 17:07:54 -0500 Subject: [PATCH] Added non-parallel fallback to MultiHttpClient when curl is unavailable If the curl extension is not available, fall back to the existing HttpRequestFactory and associated classes. Also added related phpunit tests. Bug: T139169 Change-Id: I2f9d4acbb491bce28d7105e124c5cee7e16e86d7 --- includes/http/CurlHttpRequest.php | 1 + includes/http/MWHttpRequest.php | 6 + includes/libs/MultiHttpClient.php | 261 +++++++++++++----- languages/i18n/en.json | 1 + languages/i18n/qqq.json | 1 + .../phpunit/includes/MultiHttpClientTest.php | 168 +++++++++++ 6 files changed, 376 insertions(+), 62 deletions(-) create mode 100644 tests/phpunit/includes/MultiHttpClientTest.php diff --git a/includes/http/CurlHttpRequest.php b/includes/http/CurlHttpRequest.php index f457b2192c..bab85216c0 100644 --- a/includes/http/CurlHttpRequest.php +++ b/includes/http/CurlHttpRequest.php @@ -98,6 +98,7 @@ class CurlHttpRequest extends MWHttpRequest { $curlHandle = curl_init( $this->url ); if ( !curl_setopt_array( $curlHandle, $this->curlOptions ) ) { + $this->status->fatal( 'http-internal-error' ); throw new InvalidArgumentException( "Error setting curl options." ); } diff --git a/includes/http/MWHttpRequest.php b/includes/http/MWHttpRequest.php index 71e08a815f..257955c13d 100644 --- a/includes/http/MWHttpRequest.php +++ b/includes/http/MWHttpRequest.php @@ -332,6 +332,7 @@ abstract class MWHttpRequest implements LoggerAwareInterface { if ( is_null( $callback ) ) { $callback = [ $this, 'read' ]; } elseif ( !is_callable( $callback ) ) { + $this->status->fatal( 'http-internal-error' ); throw new InvalidArgumentException( __METHOD__ . ': invalid callback' ); } $this->callback = $callback; @@ -387,6 +388,11 @@ abstract class MWHttpRequest implements LoggerAwareInterface { protected function parseHeader() { $lastname = ""; + // Failure without (valid) headers gets a response status of zero + if ( !$this->status->isOK() ) { + $this->respStatus = '0'; + } + foreach ( $this->headerList as $header ) { if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) { $this->respVersion = $match[1]; diff --git a/includes/libs/MultiHttpClient.php b/includes/libs/MultiHttpClient.php index 20ddf723f9..36f49e0e6b 100644 --- a/includes/libs/MultiHttpClient.php +++ b/includes/libs/MultiHttpClient.php @@ -23,9 +23,13 @@ use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; +use MediaWiki\MediaWikiServices; /** - * Class to handle concurrent HTTP requests + * Class to handle multiple HTTP requests + * + * If curl is available, requests will be made concurrently. + * Otherwise, they will be made serially. * * HTTP request maps are arrays that use the following format: * - method : GET/HEAD/PUT/POST/DELETE @@ -78,6 +82,8 @@ class MultiHttpClient implements LoggerAwareInterface { * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) * - maxConnsPerHost : maximum number of concurrent connections (per host) * - userAgent : The User-Agent header value to send + * - logger : a \Psr\Log\LoggerInterface instance for debug logging + * - caBundlePath : path to specific Certificate Authority bundle (if any) * @throws Exception */ public function __construct( array $options ) { @@ -105,11 +111,11 @@ class MultiHttpClient implements LoggerAwareInterface { * Execute an HTTP(S) request * * This method returns a response map of: - * - code : HTTP response code or 0 if there was a serious cURL error - * - reason : HTTP response reason (empty if there was a serious cURL error) + * - code : HTTP response code or 0 if there was a serious error + * - reason : HTTP response reason (empty if there was a serious error) * - headers :
* - body : HTTP response body or resource (if "stream" was set) - * - error : Any cURL error string + * - error : Any error string * The map also stores integer-indexed copies of these values. This lets callers do: * @code * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req ); @@ -125,14 +131,17 @@ class MultiHttpClient implements LoggerAwareInterface { } /** - * Execute a set of HTTP(S) requests concurrently + * Execute a set of HTTP(S) requests. + * + * If curl is available, requests will be made concurrently. + * Otherwise, they will be made serially. * * The maps are returned by this method with the 'response' field set to a map of: - * - code : HTTP response code or 0 if there was a serious cURL error - * - reason : HTTP response reason (empty if there was a serious cURL error) + * - code : HTTP response code or 0 if there was a serious error + * - reason : HTTP response reason (empty if there was a serious error) * - headers :
* - body : HTTP response body or resource (if "stream" was set) - * - error : Any cURL error string + * - error : Any error string * The map also stores integer-indexed copies of these values. This lets callers do: * @code * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $req['response']; @@ -151,47 +160,45 @@ class MultiHttpClient implements LoggerAwareInterface { * @throws Exception */ public function runMulti( array $reqs, array $opts = [] ) { + $this->normalizeRequests( $reqs ); + if ( $this->isCurlEnabled() ) { + return $this->runMultiCurl( $reqs, $opts ); + } else { + return $this->runMultiHttp( $reqs, $opts ); + } + } + + /** + * Determines if the curl extension is available + * + * @return bool true if curl is available, false otherwise. + */ + protected function isCurlEnabled() { + return extension_loaded( 'curl' ); + } + + /** + * Execute a set of HTTP(S) requests concurrently + * + * @see MultiHttpClient::runMulti() + * + * @param array $reqs Map of HTTP request arrays + * @param array $opts + * - connTimeout : connection timeout per request (seconds) + * - reqTimeout : post-connection timeout per request (seconds) + * - usePipelining : whether to use HTTP pipelining if possible + * - maxConnsPerHost : maximum number of concurrent connections (per host) + * @return array $reqs With response array populated for each + * @throws Exception + */ + private function runMultiCurl( array $reqs, array $opts = [] ) { $chm = $this->getCurlMulti(); $selectTimeout = $this->getSelectTimeout( $opts ); - // Normalize $reqs and add all of the required cURL handles... + // Add all of the required cURL handles... $handles = []; foreach ( $reqs as $index => &$req ) { - $req['response'] = [ - 'code' => 0, - 'reason' => '', - 'headers' => [], - 'body' => '', - 'error' => '' - ]; - if ( isset( $req[0] ) ) { - $req['method'] = $req[0]; // short-form - unset( $req[0] ); - } - if ( isset( $req[1] ) ) { - $req['url'] = $req[1]; // short-form - unset( $req[1] ); - } - if ( !isset( $req['method'] ) ) { - throw new Exception( "Request has no 'method' field set." ); - } elseif ( !isset( $req['url'] ) ) { - throw new Exception( "Request has no 'url' field set." ); - } - $this->logger->debug( "{$req['method']}: {$req['url']}" ); - $req['query'] = $req['query'] ?? []; - $headers = []; // normalized headers - if ( isset( $req['headers'] ) ) { - foreach ( $req['headers'] as $name => $value ) { - $headers[strtolower( $name )] = $value; - } - } - $req['headers'] = $headers; - if ( !isset( $req['body'] ) ) { - $req['body'] = ''; - $req['headers']['content-length'] = 0; - } - $req['flags'] = $req['flags'] ?? []; $handles[$index] = $this->getCurlHandle( $req, $opts ); if ( count( $reqs ) > 1 ) { // https://github.com/guzzle/guzzle/issues/349 @@ -391,7 +398,13 @@ class MultiHttpClient implements LoggerAwareInterface { return $length; } list( $name, $value ) = explode( ":", $header, 2 ); - $req['response']['headers'][strtolower( $name )] = trim( $value ); + $name = strtolower( $name ); + $value = trim( $value ); + if ( isset( $req['response']['headers'][$name] ) ) { + $req['response']['headers'][$name] .= ', ' . $value; + } else { + $req['response']['headers'][$name] = $value; + } return $length; } ); @@ -417,6 +430,148 @@ class MultiHttpClient implements LoggerAwareInterface { return $ch; } + /** + * @return resource + * @throws Exception + */ + protected function getCurlMulti() { + if ( !$this->multiHandle ) { + if ( !function_exists( 'curl_multi_init' ) ) { + throw new Exception( "PHP cURL function curl_multi_init missing. " . + "Check https://www.mediawiki.org/wiki/Manual:CURL" ); + } + $cmh = curl_multi_init(); + curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining ); + curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); + $this->multiHandle = $cmh; + } + return $this->multiHandle; + } + + /** + * Execute a set of HTTP(S) requests sequentially. + * + * @see MultiHttpClient::runMulti() + * @todo Remove dependency on MediaWikiServices: use a separate HTTP client + * library or copy code from PhpHttpRequest + * @param array $reqs Map of HTTP request arrays + * @param array $opts + * - connTimeout : connection timeout per request (seconds) + * - reqTimeout : post-connection timeout per request (seconds) + * @return array $reqs With response array populated for each + * @throws Exception + */ + private function runMultiHttp( array $reqs, array $opts = [] ) { + $httpOptions = [ + 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout, + 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout, + 'logger' => $this->logger, + 'caInfo' => $this->caBundlePath, + ]; + foreach ( $reqs as &$req ) { + $reqOptions = $httpOptions + [ + 'method' => $req['method'], + 'proxy' => $req['proxy'] ?? $this->proxy, + 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent, + 'postData' => $req['body'], + ]; + + $url = $req['url']; + $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); + if ( $query != '' ) { + $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; + } + + $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( + $url, $reqOptions ); + $sv = $httpRequest->execute()->getStatusValue(); + + $respHeaders = array_map( + function ( $v ) { + return implode( ', ', $v ); + }, + $httpRequest->getResponseHeaders() ); + + $req['response'] = [ + 'code' => $httpRequest->getStatus(), + 'reason' => '', + 'headers' => $respHeaders, + 'body' => $httpRequest->getContent(), + 'error' => '', + ]; + + if ( !$sv->isOk() ) { + $svErrors = $sv->getErrors(); + if ( isset( $svErrors[0] ) ) { + $req['response']['error'] = $svErrors[0]['message']; + + // param values vary per failure type (ex. unknown host vs unknown page) + if ( isset( $svErrors[0]['params'][0] ) ) { + if ( is_numeric( $svErrors[0]['params'][0] ) ) { + if ( isset( $svErrors[0]['params'][1] ) ) { + $req['response']['reason'] = $svErrors[0]['params'][1]; + } + } else { + $req['response']['reason'] = $svErrors[0]['params'][0]; + } + } + } + } + + $req['response'][0] = $req['response']['code']; + $req['response'][1] = $req['response']['reason']; + $req['response'][2] = $req['response']['headers']; + $req['response'][3] = $req['response']['body']; + $req['response'][4] = $req['response']['error']; + } + + return $reqs; + } + + /** + * Normalize request information + * + * @param array $reqs the requests to normalize + */ + private function normalizeRequests( array &$reqs ) { + foreach ( $reqs as &$req ) { + $req['response'] = [ + 'code' => 0, + 'reason' => '', + 'headers' => [], + 'body' => '', + 'error' => '' + ]; + if ( isset( $req[0] ) ) { + $req['method'] = $req[0]; // short-form + unset( $req[0] ); + } + if ( isset( $req[1] ) ) { + $req['url'] = $req[1]; // short-form + unset( $req[1] ); + } + if ( !isset( $req['method'] ) ) { + throw new Exception( "Request has no 'method' field set." ); + } elseif ( !isset( $req['url'] ) ) { + throw new Exception( "Request has no 'url' field set." ); + } + $this->logger->debug( "{$req['method']}: {$req['url']}" ); + $req['query'] = $req['query'] ?? []; + $headers = []; // normalized headers + if ( isset( $req['headers'] ) ) { + foreach ( $req['headers'] as $name => $value ) { + $headers[strtolower( $name )] = $value; + } + } + $req['headers'] = $headers; + if ( !isset( $req['body'] ) ) { + $req['body'] = ''; + $req['headers']['content-length'] = 0; + } + $req['flags'] = $req['flags'] ?? []; + } + } + /** * Get a suitable select timeout for the given options. * @@ -439,24 +594,6 @@ class MultiHttpClient implements LoggerAwareInterface { return $selectTimeout; } - /** - * @return resource - * @throws Exception - */ - protected function getCurlMulti() { - if ( !$this->multiHandle ) { - if ( !function_exists( 'curl_multi_init' ) ) { - throw new Exception( "PHP cURL extension missing. " . - "Check https://www.mediawiki.org/wiki/Manual:CURL" ); - } - $cmh = curl_multi_init(); - curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining ); - curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); - $this->multiHandle = $cmh; - } - return $this->multiHandle; - } - /** * Register a logger * diff --git a/languages/i18n/en.json b/languages/i18n/en.json index bc35c9e02c..d350e3fdd0 100644 --- a/languages/i18n/en.json +++ b/languages/i18n/en.json @@ -1799,6 +1799,7 @@ "http-timed-out": "HTTP request timed out.", "http-curl-error": "Error fetching URL: $1", "http-bad-status": "There was a problem during the HTTP request: $1 $2", + "http-internal-error": "HTTP internal error.", "upload-curl-error6": "Could not reach URL", "upload-curl-error6-text": "The URL provided could not be reached.\nPlease double-check that the URL is correct and the site is up.", "upload-curl-error28": "Upload timeout", diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json index 573c0290fd..e7e9e40fe1 100644 --- a/languages/i18n/qqq.json +++ b/languages/i18n/qqq.json @@ -1999,6 +1999,7 @@ "http-timed-out": "Used as error message when executing HTTP request.\n\nSee also:\n* {{msg-mw|Http-request-error}}\n* {{msg-mw|Http-read-error}}\n* {{msg-mw|Http-host-unreachable|6}}", "http-curl-error": "Used as curl error message when the error is other than known messages.\n* $1 - error code; not URL\nKnown messages are:\n* {{msg-mw|http-host-unreachable}}\n* {{msg-mw|http-timed-out}}", "http-bad-status": "Parameters:\n* $1 - an HTTP error code (e.g. 404)\n* $2 - the HTTP error message (e.g. File Not Found)", + "http-internal-error": "Used as generic error message when executing HTTP request and no more specific message is available.", "upload-curl-error6": "See also:\n* {{msg-mw|Upload-curl-error6|title}}\n* {{msg-mw|Upload-curl-error6-text|body}}", "upload-curl-error6-text": "See also:\n* {{msg-mw|Upload-curl-error6|title}}\n* {{msg-mw|Upload-curl-error6-text|body}}", "upload-curl-error28": "See also:\n* {{msg-mw|Upload-curl-error28|title}}\n* {{msg-mw|Upload-curl-error28-text|body}}", diff --git a/tests/phpunit/includes/MultiHttpClientTest.php b/tests/phpunit/includes/MultiHttpClientTest.php new file mode 100644 index 0000000000..7073b71023 --- /dev/null +++ b/tests/phpunit/includes/MultiHttpClientTest.php @@ -0,0 +1,168 @@ +getMockBuilder( MultiHttpClient::class ) + ->setConstructorArgs( [ [] ] ) + ->setMethods( [ 'isCurlEnabled' ] )->getMock(); + $client->method( 'isCurlEnabled' )->willReturn( false ); + $this->client = $client; + } + + private function getHttpRequest( $statusValue, $statusCode, $headers = [] ) { + $httpRequest = $this->getMockBuilder( PhpHttpRequest::class ) + ->setConstructorArgs( [ '', [] ] ) + ->getMock(); + $httpRequest->expects( $this->any() ) + ->method( 'execute' ) + ->willReturn( Status::wrap( $statusValue ) ); + $httpRequest->expects( $this->any() ) + ->method( 'getResponseHeaders' ) + ->willReturn( $headers ); + $httpRequest->expects( $this->any() ) + ->method( 'getStatus' ) + ->willReturn( $statusCode ); + return $httpRequest; + } + + private function mockHttpRequestFactory( $httpRequest ) { + $factory = $this->getMockBuilder( MediaWiki\Http\HttpRequestFactory::class ) + ->getMock(); + $factory->expects( $this->any() ) + ->method( 'create' ) + ->willReturn( $httpRequest ); + return $factory; + } + + /** + * Test call of a single url that should succeed + */ + public function testMultiHttpClientSingleSuccess() { + // Mock success + $httpRequest = $this->getHttpRequest( StatusValue::newGood( 200 ), 200 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $this->client->run( [ + 'method' => 'GET', + 'url' => "http://example.test", + ] ); + + $this->assertEquals( 200, $rcode ); + } + + /** + * Test call of a single url that should not exist, and therefore fail + */ + public function testMultiHttpClientSingleFailure() { + // Mock an invalid tld + $httpRequest = $this->getHttpRequest( + StatusValue::newFatal( 'http-invalid-url', 'http://www.example.test' ), 0 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $this->client->run( [ + 'method' => 'GET', + 'url' => "http://www.example.test", + ] ); + + $failure = $rcode < 200 || $rcode >= 400; + $this->assertTrue( $failure ); + } + + /** + * Test call of multiple urls that should all succeed + */ + public function testMultiHttpClientMultipleSuccess() { + // Mock success + $httpRequest = $this->getHttpRequest( StatusValue::newGood( 200 ), 200 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + $reqs = [ + [ + 'method' => 'GET', + 'url' => 'http://example.test', + ], + [ + 'method' => 'GET', + 'url' => 'https://get.test', + ], + ]; + $responses = $this->client->runMulti( $reqs ); + foreach ( $responses as $response ) { + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $response['response']; + $this->assertEquals( 200, $rcode ); + } + } + + /** + * Test call of multiple urls that should all fail + */ + public function testMultiHttpClientMultipleFailure() { + // Mock page not found + $httpRequest = $this->getHttpRequest( + StatusValue::newFatal( "http-bad-status", 404, 'Not Found' ), 404 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + $reqs = [ + [ + 'method' => 'GET', + 'url' => 'http://example.test/12345', + ], + [ + 'method' => 'GET', + 'url' => 'http://example.test/67890' , + ] + ]; + $responses = $this->client->runMulti( $reqs ); + foreach ( $responses as $response ) { + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $response['response']; + $failure = $rcode < 200 || $rcode >= 400; + $this->assertTrue( $failure ); + } + } + + /** + * Test of response header handling + */ + public function testMultiHttpClientHeaders() { + // Represenative headers for typical requests, per MWHttpRequest::getResponseHeaders() + $headers = [ + 'content-type' => [ + 'text/html; charset=utf-8', + ], + 'date' => [ + 'Wed, 18 Jul 2018 14:52:41 GMT', + ], + 'set-cookie' => [ + 'COUNTRY=NAe6; expires=Wed, 25-Jul-2018 14:52:41 GMT; path=/; domain=.example.test', + 'LAST_NEWS=1531925562; expires=Thu, 18-Jul-2019 14:52:41 GMT; path=/; domain=.example.test', + ] + ]; + + // Mock success with specific headers + $httpRequest = $this->getHttpRequest( StatusValue::newGood( 200 ), 200, $headers ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $this->client->run( [ + 'method' => 'GET', + 'url' => 'http://example.test', + ] ); + + $this->assertEquals( 200, $rcode ); + $this->assertEquals( count( $headers ), count( $rhdrs ) ); + foreach ( $headers as $name => $values ) { + $value = implode( ', ', $values ); + $this->assertArrayHasKey( $name, $rhdrs ); + $this->assertEquals( $value, $rhdrs[$name] ); + } + } +} -- 2.20.1