From e8cb9f5f83534dac16bedb8a3af6cdf0f0fd617f Mon Sep 17 00:00:00 2001 From: Bill Pirkle Date: Wed, 2 Jan 2019 16:16:44 -0600 Subject: [PATCH] http: Support callback functions in GuzzleHttpRequest Provide backward compatibility for callback functions in GuzzleHttpRequest, which was missing in T202110, and restore GuzzleHttpRequest as the default provided by HttpRequestFactory. Bug: T212175 Depends-On: I4b45e79d35252d13f714f3271b87301ca515121a Change-Id: I60d1a034b44874f6d24a04058db264eeb565f5e1 --- autoload.php | 1 + includes/http/GuzzleHttpRequest.php | 59 ++++++- includes/http/Http.php | 8 +- includes/http/MWCallbackStream.php | 46 ++++++ includes/http/MWHttpRequest.php | 21 ++- .../includes/http/GuzzleHttpRequestTest.php | 151 ++++++++++++++++++ tests/phpunit/includes/http/HttpTest.php | 16 -- .../site/MediaWikiPageNameNormalizerTest.php | 3 +- 8 files changed, 272 insertions(+), 33 deletions(-) create mode 100644 includes/http/MWCallbackStream.php create mode 100644 tests/phpunit/includes/http/GuzzleHttpRequestTest.php diff --git a/autoload.php b/autoload.php index 61b8177891..df1c0b2505 100644 --- a/autoload.php +++ b/autoload.php @@ -819,6 +819,7 @@ $wgAutoloadLocalClasses = [ 'MIMEsearchPage' => __DIR__ . '/includes/specials/SpecialMIMEsearch.php', 'MSCompoundFileReader' => __DIR__ . '/includes/libs/mime/MSCompoundFileReader.php', 'MWCallableUpdate' => __DIR__ . '/includes/deferred/MWCallableUpdate.php', + 'MWCallbackStream' => __DIR__ . '/includes/http/MWCallbackStream.php', 'MWContentSerializationException' => __DIR__ . '/includes/exception/MWContentSerializationException.php', 'MWCryptHKDF' => __DIR__ . '/includes/utils/MWCryptHKDF.php', 'MWCryptHash' => __DIR__ . '/includes/libs/MWCryptHash.php', diff --git a/includes/http/GuzzleHttpRequest.php b/includes/http/GuzzleHttpRequest.php index db8a09b970..ef7046724f 100644 --- a/includes/http/GuzzleHttpRequest.php +++ b/includes/http/GuzzleHttpRequest.php @@ -25,8 +25,10 @@ use GuzzleHttp\Psr7\Request; * MWHttpRequest implemented using the Guzzle library * * Differences from the CurlHttpRequest implementation: - * 1) the MWHttpRequest 'callback" option is unsupported. Instead, use the 'sink' option to - * send a filename/stream (see http://docs.guzzlephp.org/en/stable/request-options.html#sink) + * 1) a new 'sink' option is available as an alternative to callbacks. See: + * http://docs.guzzlephp.org/en/stable/request-options.html#sink) + * The 'callback' option remains available as well. If both 'sink' and 'callback' are + * specified, 'sink' is used. * 2) callers may set a custom handler via the 'handler' option. * If this is not set, Guzzle will use curl (if available) or PHP streams (otherwise) * 3) setting either sslVerifyHost or sslVerifyCert will enable both. Guzzle does not allow @@ -49,7 +51,7 @@ class GuzzleHttpRequest extends MWHttpRequest { * @throws Exception */ public function __construct( - $url, array $options = [], $caller = __METHOD__, $profiler = null + $url, array $options = [], $caller = __METHOD__, Profiler $profiler = null ) { parent::__construct( $url, $options, $caller, $profiler ); @@ -61,6 +63,48 @@ class GuzzleHttpRequest extends MWHttpRequest { } } + /** + * Set a read callback to accept data read from the HTTP request. + * By default, data is appended to an internal buffer which can be + * retrieved through $req->getContent(). + * + * To handle data as it comes in -- especially for large files that + * would not fit in memory -- you can instead set your own callback, + * in the form function($resource, $buffer) where the first parameter + * is the low-level resource being read (implementation specific), + * and the second parameter is the data buffer. + * + * You MUST return the number of bytes handled in the buffer; if fewer + * bytes are reported handled than were passed to you, the HTTP fetch + * will be aborted. + * + * This function overrides any 'sink' or 'callback' constructor option. + * + * @param callable|null $callback + * @throws InvalidArgumentException + */ + public function setCallback( $callback ) { + $this->sink = null; + $this->doSetCallback( $callback ); + } + + /** + * Worker function for setting callbacks. Calls can originate both internally and externally + * via setCallback). Defaults to the internal read callback if $callback is null. + * + * If a sink is already specified, this does nothing. This causes the 'sink' constructor + * option to override the 'callback' constructor option. + * + * @param $callback|null $callback + * @throws InvalidArgumentException + */ + protected function doSetCallback( $callback ) { + if ( !$this->sink ) { + parent::doSetCallback( $callback ); + $this->sink = new MWCallbackStream( $this->callback ); + } + } + /** * @see MWHttpRequest::execute * @@ -124,11 +168,9 @@ class GuzzleHttpRequest extends MWHttpRequest { $request = new Request( $this->method, $this->url ); $response = $client->send( $request ); $this->headerList = $response->getHeaders(); - $this->content = $response->getBody()->getContents(); $this->respVersion = $response->getProtocolVersion(); $this->respStatus = $response->getStatusCode() . ' ' . $response->getReasonPhrase(); - } catch ( GuzzleHttp\Exception\ConnectException $e ) { // ConnectException is thrown for several reasons besides generic "timeout": // Connection refused @@ -179,6 +221,11 @@ class GuzzleHttpRequest extends MWHttpRequest { return Status::wrap( $this->status ); // TODO B/C; move this to callers } + protected function prepare() { + $this->doSetCallback( $this->callback ); + parent::prepare(); + } + /** * @return bool */ @@ -189,7 +236,7 @@ class GuzzleHttpRequest extends MWHttpRequest { /** * Guzzle provides headers as an array. Reprocess to match our expectations. Guzzle will - * have already parsed and removed the status line (in EasyHandle::createResponse)z. + * have already parsed and removed the status line (in EasyHandle::createResponse). */ protected function parseHeader() { // Failure without (valid) headers gets a response status of zero diff --git a/includes/http/Http.php b/includes/http/Http.php index c0005c5ae5..eaf276303e 100644 --- a/includes/http/Http.php +++ b/includes/http/Http.php @@ -58,7 +58,7 @@ class Http { * @param string $caller The method making this request, for profiling * @return string|bool (bool)false on failure or a string on success */ - public static function request( $method, $url, $options = [], $caller = __METHOD__ ) { + public static function request( $method, $url, array $options = [], $caller = __METHOD__ ) { $logger = LoggerFactory::getInstance( 'http' ); $logger->debug( "$method: $url" ); @@ -95,7 +95,7 @@ class Http { * @param string $caller The method making this request, for profiling * @return string|bool false on error */ - public static function get( $url, $options = [], $caller = __METHOD__ ) { + public static function get( $url, array $options = [], $caller = __METHOD__ ) { $args = func_get_args(); if ( isset( $args[1] ) && ( is_string( $args[1] ) || is_numeric( $args[1] ) ) ) { // Second was used to be the timeout @@ -118,7 +118,7 @@ class Http { * @param string $caller The method making this request, for profiling * @return string|bool false on error */ - public static function post( $url, $options = [], $caller = __METHOD__ ) { + public static function post( $url, array $options = [], $caller = __METHOD__ ) { return self::request( 'POST', $url, $options, $caller ); } @@ -170,7 +170,7 @@ class Http { * @param array $options * @return MultiHttpClient */ - public static function createMultiClient( $options = [] ) { + public static function createMultiClient( array $options = [] ) { global $wgHTTPConnectTimeout, $wgHTTPTimeout, $wgHTTPProxy; return new MultiHttpClient( $options + [ diff --git a/includes/http/MWCallbackStream.php b/includes/http/MWCallbackStream.php new file mode 100644 index 0000000000..a4120a3578 --- /dev/null +++ b/includes/http/MWCallbackStream.php @@ -0,0 +1,46 @@ +stream = GuzzleHttp\Psr7\stream_for(); + $this->callback = $cb; + } + + public function write( $string ) { + return call_user_func( $this->callback, $this, $string ); + } +} diff --git a/includes/http/MWHttpRequest.php b/includes/http/MWHttpRequest.php index 19912394da..3b56f21614 100644 --- a/includes/http/MWHttpRequest.php +++ b/includes/http/MWHttpRequest.php @@ -91,7 +91,7 @@ abstract class MWHttpRequest implements LoggerAwareInterface { * @throws Exception */ public function __construct( - $url, array $options = [], $caller = __METHOD__, $profiler = null + $url, array $options = [], $caller = __METHOD__, Profiler $profiler = null ) { global $wgHTTPTimeout, $wgHTTPConnectTimeout; @@ -202,7 +202,7 @@ abstract class MWHttpRequest implements LoggerAwareInterface { * @param array $args * @todo overload the args param */ - public function setData( $args ) { + public function setData( array $args ) { $this->postData = $args; } @@ -326,6 +326,17 @@ abstract class MWHttpRequest implements LoggerAwareInterface { * @throws InvalidArgumentException */ public function setCallback( $callback ) { + return $this->doSetCallback( $callback ); + } + + /** + * Worker function for setting callbacks. Calls can originate both internally and externally + * via setCallback). Defaults to the internal read callback if $callback is null. + * + * @param $callback|null $callback + * @throws InvalidArgumentException + */ + protected function doSetCallback( $callback ) { if ( is_null( $callback ) ) { $callback = [ $this, 'read' ]; } elseif ( !is_callable( $callback ) ) { @@ -369,7 +380,7 @@ abstract class MWHttpRequest implements LoggerAwareInterface { $this->proxySetup(); // set up any proxy as needed if ( !$this->callback ) { - $this->setCallback( null ); + $this->doSetCallback( null ); } if ( !isset( $this->reqHeaders['User-Agent'] ) ) { @@ -504,7 +515,7 @@ abstract class MWHttpRequest implements LoggerAwareInterface { * * @param CookieJar $jar */ - public function setCookieJar( $jar ) { + public function setCookieJar( CookieJar $jar ) { $this->cookieJar = $jar; } @@ -530,7 +541,7 @@ abstract class MWHttpRequest implements LoggerAwareInterface { * @param string $value * @param array $attr */ - public function setCookie( $name, $value, $attr = [] ) { + public function setCookie( $name, $value, array $attr = [] ) { if ( !$this->cookieJar ) { $this->cookieJar = new CookieJar; } diff --git a/tests/phpunit/includes/http/GuzzleHttpRequestTest.php b/tests/phpunit/includes/http/GuzzleHttpRequestTest.php new file mode 100644 index 0000000000..c9356b6b10 --- /dev/null +++ b/tests/phpunit/includes/http/GuzzleHttpRequestTest.php @@ -0,0 +1,151 @@ +bodyTextReceived .= $buffer; + return strlen( $buffer ); + } + + public function testSuccess() { + $handler = HandlerStack::create( new MockHandler( [ new Response( 200, [ + 'status' => 200, + ], $this->exampleBodyText ) ] ) ); + $r = new GuzzleHttpRequest( $this->exampleUrl, [ 'handler' => $handler ] ); + $r->execute(); + + $this->assertEquals( 200, $r->getStatus() ); + $this->assertEquals( $this->exampleBodyText, $r->getContent() ); + } + + public function testSuccessConstructorCallback() { + $this->bodyTextReceived = ''; + $handler = HandlerStack::create( new MockHandler( [ new Response( 200, [ + 'status' => 200, + ], $this->exampleBodyText ) ] ) ); + $r = new GuzzleHttpRequest( $this->exampleUrl, [ + 'callback' => [ $this, 'processHttpDataChunk' ], + 'handler' => $handler, + ] ); + $r->execute(); + + $this->assertEquals( 200, $r->getStatus() ); + $this->assertEquals( $this->exampleBodyText, $this->bodyTextReceived ); + } + + public function testSuccessSetCallback() { + $this->bodyTextReceived = ''; + $handler = HandlerStack::create( new MockHandler( [ new Response( 200, [ + 'status' => 200, + ], $this->exampleBodyText ) ] ) ); + $r = new GuzzleHttpRequest( $this->exampleUrl, [ + 'handler' => $handler, + ] ); + $r->setCallback( [ $this, 'processHttpDataChunk' ] ); + $r->execute(); + + $this->assertEquals( 200, $r->getStatus() ); + $this->assertEquals( $this->exampleBodyText, $this->bodyTextReceived ); + } + + /** + * use a callback stream to pipe the mocked response data to our callback function + */ + public function testSuccessSink() { + $this->bodyTextReceived = ''; + $handler = HandlerStack::create( new MockHandler( [ new Response( 200, [ + 'status' => 200, + ], $this->exampleBodyText ) ] ) ); + $r = new GuzzleHttpRequest( $this->exampleUrl, [ + 'handler' => $handler, + 'sink' => new MWCallbackStream( [ $this, 'processHttpDataChunk' ] ), + ] ); + $r->execute(); + + $this->assertEquals( 200, $r->getStatus() ); + $this->assertEquals( $this->exampleBodyText, $this->bodyTextReceived ); + } + + public function testBadUrl() { + $r = new GuzzleHttpRequest( '' ); + $s = $r->execute(); + $errorMsg = $s->getErrorsByType( 'error' )[0]['message']; + + $this->assertEquals( 0, $r->getStatus() ); + $this->assertEquals( 'http-invalid-url', $errorMsg ); + } + + public function testConnectException() { + $handler = HandlerStack::create( new MockHandler( [ new GuzzleHttp\Exception\ConnectException( + 'Mock Connection Exception', new Request( 'GET', $this->exampleUrl ) + ) ] ) ); + $r = new GuzzleHttpRequest( $this->exampleUrl, [ 'handler' => $handler ] ); + $s = $r->execute(); + $errorMsg = $s->getErrorsByType( 'error' )[0]['message']; + + $this->assertEquals( 0, $r->getStatus() ); + $this->assertEquals( 'http-request-error', $errorMsg ); + } + + public function testTimeout() { + $handler = HandlerStack::create( new MockHandler( [ new GuzzleHttp\Exception\RequestException( + 'Connection timed out', new Request( 'GET', $this->exampleUrl ) + ) ] ) ); + $r = new GuzzleHttpRequest( $this->exampleUrl, [ 'handler' => $handler ] ); + $s = $r->execute(); + $errorMsg = $s->getErrorsByType( 'error' )[0]['message']; + + $this->assertEquals( 0, $r->getStatus() ); + $this->assertEquals( 'http-timed-out', $errorMsg ); + } + + public function testNotFound() { + $handler = HandlerStack::create( new MockHandler( [ new Response( 404, [ + 'status' => '404', + ] ) ] ) ); + $r = new GuzzleHttpRequest( $this->exampleUrl, [ 'handler' => $handler ] ); + $s = $r->execute(); + $errorMsg = $s->getErrorsByType( 'error' )[0]['message']; + + $this->assertEquals( 404, $r->getStatus() ); + $this->assertEquals( 'http-bad-status', $errorMsg ); + } +} diff --git a/tests/phpunit/includes/http/HttpTest.php b/tests/phpunit/includes/http/HttpTest.php index ac7ef8027c..fedfac6aa1 100644 --- a/tests/phpunit/includes/http/HttpTest.php +++ b/tests/phpunit/includes/http/HttpTest.php @@ -1,9 +1,5 @@ assertTrue( defined( $value ), $value . ' not defined' ); } - - /** - * No actual request is made herein - */ - public function testGuzzleHttpRequest() { - $handler = HandlerStack::create( new MockHandler( [ new Response( 200 ) ] ) ); - $r = new GuzzleHttpRequest( 'http://www.example.text', [ 'handler' => $handler ] ); - $r->execute(); - $this->assertEquals( 200, $r->getStatus() ); - - // @TODO: add failure tests (404s and failure to connect) - } } /** diff --git a/tests/phpunit/includes/site/MediaWikiPageNameNormalizerTest.php b/tests/phpunit/includes/site/MediaWikiPageNameNormalizerTest.php index 2ac2714650..15894a3d9a 100644 --- a/tests/phpunit/includes/site/MediaWikiPageNameNormalizerTest.php +++ b/tests/phpunit/includes/site/MediaWikiPageNameNormalizerTest.php @@ -107,9 +107,8 @@ class MediaWikiPageNameNormalizerTestMockHttp extends Http { */ public static $response; - public static function get( $url, $options = [], $caller = __METHOD__ ) { + public static function get( $url, array $options = [], $caller = __METHOD__ ) { PHPUnit_Framework_Assert::assertInternalType( 'string', $url ); - PHPUnit_Framework_Assert::assertInternalType( 'array', $options ); PHPUnit_Framework_Assert::assertInternalType( 'string', $caller ); return self::$response; -- 2.20.1