Merge "Split HttpFunctions.php into separate files"
[lhc/web/wiklou.git] / includes / http / MWHttpRequest.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21 /**
22 * This wrapper class will call out to curl (if available) or fallback
23 * to regular PHP if necessary for handling internal HTTP requests.
24 *
25 * Renamed from HttpRequest to MWHttpRequest to avoid conflict with
26 * PHP's HTTP extension.
27 */
28 class MWHttpRequest {
29 const SUPPORTS_FILE_POSTS = false;
30
31 protected $content;
32 protected $timeout = 'default';
33 protected $headersOnly = null;
34 protected $postData = null;
35 protected $proxy = null;
36 protected $noProxy = false;
37 protected $sslVerifyHost = true;
38 protected $sslVerifyCert = true;
39 protected $caInfo = null;
40 protected $method = "GET";
41 protected $reqHeaders = [];
42 protected $url;
43 protected $parsedUrl;
44 protected $callback;
45 protected $maxRedirects = 5;
46 protected $followRedirects = false;
47
48 /**
49 * @var CookieJar
50 */
51 protected $cookieJar;
52
53 protected $headerList = [];
54 protected $respVersion = "0.9";
55 protected $respStatus = "200 Ok";
56 protected $respHeaders = [];
57
58 public $status;
59
60 /**
61 * @var Profiler
62 */
63 protected $profiler;
64
65 /**
66 * @var string
67 */
68 protected $profileName;
69
70 /**
71 * @param string $url Url to use. If protocol-relative, will be expanded to an http:// URL
72 * @param array $options (optional) extra params to pass (see Http::request())
73 * @param string $caller The method making this request, for profiling
74 * @param Profiler $profiler An instance of the profiler for profiling, or null
75 */
76 protected function __construct(
77 $url, $options = [], $caller = __METHOD__, $profiler = null
78 ) {
79 global $wgHTTPTimeout, $wgHTTPConnectTimeout;
80
81 $this->url = wfExpandUrl( $url, PROTO_HTTP );
82 $this->parsedUrl = wfParseUrl( $this->url );
83
84 if ( !$this->parsedUrl || !Http::isValidURI( $this->url ) ) {
85 $this->status = Status::newFatal( 'http-invalid-url', $url );
86 } else {
87 $this->status = Status::newGood( 100 ); // continue
88 }
89
90 if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
91 $this->timeout = $options['timeout'];
92 } else {
93 $this->timeout = $wgHTTPTimeout;
94 }
95 if ( isset( $options['connectTimeout'] ) && $options['connectTimeout'] != 'default' ) {
96 $this->connectTimeout = $options['connectTimeout'];
97 } else {
98 $this->connectTimeout = $wgHTTPConnectTimeout;
99 }
100 if ( isset( $options['userAgent'] ) ) {
101 $this->setUserAgent( $options['userAgent'] );
102 }
103
104 $members = [ "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
105 "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" ];
106
107 foreach ( $members as $o ) {
108 if ( isset( $options[$o] ) ) {
109 // ensure that MWHttpRequest::method is always
110 // uppercased. Bug 36137
111 if ( $o == 'method' ) {
112 $options[$o] = strtoupper( $options[$o] );
113 }
114 $this->$o = $options[$o];
115 }
116 }
117
118 if ( $this->noProxy ) {
119 $this->proxy = ''; // noProxy takes precedence
120 }
121
122 // Profile based on what's calling us
123 $this->profiler = $profiler;
124 $this->profileName = $caller;
125 }
126
127 /**
128 * Simple function to test if we can make any sort of requests at all, using
129 * cURL or fopen()
130 * @return bool
131 */
132 public static function canMakeRequests() {
133 return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' );
134 }
135
136 /**
137 * Generate a new request object
138 * @param string $url Url to use
139 * @param array $options (optional) extra params to pass (see Http::request())
140 * @param string $caller The method making this request, for profiling
141 * @throws MWException
142 * @return CurlHttpRequest|PhpHttpRequest
143 * @see MWHttpRequest::__construct
144 */
145 public static function factory( $url, $options = null, $caller = __METHOD__ ) {
146 if ( !Http::$httpEngine ) {
147 Http::$httpEngine = function_exists( 'curl_init' ) ? 'curl' : 'php';
148 } elseif ( Http::$httpEngine == 'curl' && !function_exists( 'curl_init' ) ) {
149 throw new MWException( __METHOD__ . ': curl (http://php.net/curl) is not installed, but' .
150 ' Http::$httpEngine is set to "curl"' );
151 }
152
153 switch ( Http::$httpEngine ) {
154 case 'curl':
155 return new CurlHttpRequest( $url, $options, $caller, Profiler::instance() );
156 case 'php':
157 if ( !wfIniGetBool( 'allow_url_fopen' ) ) {
158 throw new MWException( __METHOD__ . ': allow_url_fopen ' .
159 'needs to be enabled for pure PHP http requests to ' .
160 'work. If possible, curl should be used instead. See ' .
161 'http://php.net/curl.'
162 );
163 }
164 return new PhpHttpRequest( $url, $options, $caller, Profiler::instance() );
165 default:
166 throw new MWException( __METHOD__ . ': The setting of Http::$httpEngine is not valid.' );
167 }
168 }
169
170 /**
171 * Get the body, or content, of the response to the request
172 *
173 * @return string
174 */
175 public function getContent() {
176 return $this->content;
177 }
178
179 /**
180 * Set the parameters of the request
181 *
182 * @param array $args
183 * @todo overload the args param
184 */
185 public function setData( $args ) {
186 $this->postData = $args;
187 }
188
189 /**
190 * Take care of setting up the proxy (do nothing if "noProxy" is set)
191 *
192 * @return void
193 */
194 public function proxySetup() {
195 // If there is an explicit proxy set and proxies are not disabled, then use it
196 if ( $this->proxy && !$this->noProxy ) {
197 return;
198 }
199
200 // Otherwise, fallback to $wgHTTPProxy if this is not a machine
201 // local URL and proxies are not disabled
202 if ( self::isLocalURL( $this->url ) || $this->noProxy ) {
203 $this->proxy = '';
204 } else {
205 $this->proxy = Http::getProxy();
206 }
207 }
208
209 /**
210 * Check if the URL can be served by localhost
211 *
212 * @param string $url Full url to check
213 * @return bool
214 */
215 private static function isLocalURL( $url ) {
216 global $wgCommandLineMode, $wgLocalVirtualHosts;
217
218 if ( $wgCommandLineMode ) {
219 return false;
220 }
221
222 // Extract host part
223 $matches = [];
224 if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) {
225 $host = $matches[1];
226 // Split up dotwise
227 $domainParts = explode( '.', $host );
228 // Check if this domain or any superdomain is listed as a local virtual host
229 $domainParts = array_reverse( $domainParts );
230
231 $domain = '';
232 $countParts = count( $domainParts );
233 for ( $i = 0; $i < $countParts; $i++ ) {
234 $domainPart = $domainParts[$i];
235 if ( $i == 0 ) {
236 $domain = $domainPart;
237 } else {
238 $domain = $domainPart . '.' . $domain;
239 }
240
241 if ( in_array( $domain, $wgLocalVirtualHosts ) ) {
242 return true;
243 }
244 }
245 }
246
247 return false;
248 }
249
250 /**
251 * Set the user agent
252 * @param string $UA
253 */
254 public function setUserAgent( $UA ) {
255 $this->setHeader( 'User-Agent', $UA );
256 }
257
258 /**
259 * Set an arbitrary header
260 * @param string $name
261 * @param string $value
262 */
263 public function setHeader( $name, $value ) {
264 // I feel like I should normalize the case here...
265 $this->reqHeaders[$name] = $value;
266 }
267
268 /**
269 * Get an array of the headers
270 * @return array
271 */
272 public function getHeaderList() {
273 $list = [];
274
275 if ( $this->cookieJar ) {
276 $this->reqHeaders['Cookie'] =
277 $this->cookieJar->serializeToHttpRequest(
278 $this->parsedUrl['path'],
279 $this->parsedUrl['host']
280 );
281 }
282
283 foreach ( $this->reqHeaders as $name => $value ) {
284 $list[] = "$name: $value";
285 }
286
287 return $list;
288 }
289
290 /**
291 * Set a read callback to accept data read from the HTTP request.
292 * By default, data is appended to an internal buffer which can be
293 * retrieved through $req->getContent().
294 *
295 * To handle data as it comes in -- especially for large files that
296 * would not fit in memory -- you can instead set your own callback,
297 * in the form function($resource, $buffer) where the first parameter
298 * is the low-level resource being read (implementation specific),
299 * and the second parameter is the data buffer.
300 *
301 * You MUST return the number of bytes handled in the buffer; if fewer
302 * bytes are reported handled than were passed to you, the HTTP fetch
303 * will be aborted.
304 *
305 * @param callable $callback
306 * @throws MWException
307 */
308 public function setCallback( $callback ) {
309 if ( !is_callable( $callback ) ) {
310 throw new MWException( 'Invalid MwHttpRequest callback' );
311 }
312 $this->callback = $callback;
313 }
314
315 /**
316 * A generic callback to read the body of the response from a remote
317 * server.
318 *
319 * @param resource $fh
320 * @param string $content
321 * @return int
322 */
323 public function read( $fh, $content ) {
324 $this->content .= $content;
325 return strlen( $content );
326 }
327
328 /**
329 * Take care of whatever is necessary to perform the URI request.
330 *
331 * @return Status
332 */
333 public function execute() {
334
335 $this->content = "";
336
337 if ( strtoupper( $this->method ) == "HEAD" ) {
338 $this->headersOnly = true;
339 }
340
341 $this->proxySetup(); // set up any proxy as needed
342
343 if ( !$this->callback ) {
344 $this->setCallback( [ $this, 'read' ] );
345 }
346
347 if ( !isset( $this->reqHeaders['User-Agent'] ) ) {
348 $this->setUserAgent( Http::userAgent() );
349 }
350
351 }
352
353 /**
354 * Parses the headers, including the HTTP status code and any
355 * Set-Cookie headers. This function expects the headers to be
356 * found in an array in the member variable headerList.
357 */
358 protected function parseHeader() {
359
360 $lastname = "";
361
362 foreach ( $this->headerList as $header ) {
363 if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
364 $this->respVersion = $match[1];
365 $this->respStatus = $match[2];
366 } elseif ( preg_match( "#^[ \t]#", $header ) ) {
367 $last = count( $this->respHeaders[$lastname] ) - 1;
368 $this->respHeaders[$lastname][$last] .= "\r\n$header";
369 } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
370 $this->respHeaders[strtolower( $match[1] )][] = $match[2];
371 $lastname = strtolower( $match[1] );
372 }
373 }
374
375 $this->parseCookies();
376
377 }
378
379 /**
380 * Sets HTTPRequest status member to a fatal value with the error
381 * message if the returned integer value of the status code was
382 * not successful (< 300) or a redirect (>=300 and < 400). (see
383 * RFC2616, section 10,
384 * http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html for a
385 * list of status codes.)
386 */
387 protected function setStatus() {
388 if ( !$this->respHeaders ) {
389 $this->parseHeader();
390 }
391
392 if ( (int)$this->respStatus > 399 ) {
393 list( $code, $message ) = explode( " ", $this->respStatus, 2 );
394 $this->status->fatal( "http-bad-status", $code, $message );
395 }
396 }
397
398 /**
399 * Get the integer value of the HTTP status code (e.g. 200 for "200 Ok")
400 * (see RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
401 * for a list of status codes.)
402 *
403 * @return int
404 */
405 public function getStatus() {
406 if ( !$this->respHeaders ) {
407 $this->parseHeader();
408 }
409
410 return (int)$this->respStatus;
411 }
412
413 /**
414 * Returns true if the last status code was a redirect.
415 *
416 * @return bool
417 */
418 public function isRedirect() {
419 if ( !$this->respHeaders ) {
420 $this->parseHeader();
421 }
422
423 $status = (int)$this->respStatus;
424
425 if ( $status >= 300 && $status <= 303 ) {
426 return true;
427 }
428
429 return false;
430 }
431
432 /**
433 * Returns an associative array of response headers after the
434 * request has been executed. Because some headers
435 * (e.g. Set-Cookie) can appear more than once the, each value of
436 * the associative array is an array of the values given.
437 *
438 * @return array
439 */
440 public function getResponseHeaders() {
441 if ( !$this->respHeaders ) {
442 $this->parseHeader();
443 }
444
445 return $this->respHeaders;
446 }
447
448 /**
449 * Returns the value of the given response header.
450 *
451 * @param string $header
452 * @return string|null
453 */
454 public function getResponseHeader( $header ) {
455 if ( !$this->respHeaders ) {
456 $this->parseHeader();
457 }
458
459 if ( isset( $this->respHeaders[strtolower( $header )] ) ) {
460 $v = $this->respHeaders[strtolower( $header )];
461 return $v[count( $v ) - 1];
462 }
463
464 return null;
465 }
466
467 /**
468 * Tells the MWHttpRequest object to use this pre-loaded CookieJar.
469 *
470 * @param CookieJar $jar
471 */
472 public function setCookieJar( $jar ) {
473 $this->cookieJar = $jar;
474 }
475
476 /**
477 * Returns the cookie jar in use.
478 *
479 * @return CookieJar
480 */
481 public function getCookieJar() {
482 if ( !$this->respHeaders ) {
483 $this->parseHeader();
484 }
485
486 return $this->cookieJar;
487 }
488
489 /**
490 * Sets a cookie. Used before a request to set up any individual
491 * cookies. Used internally after a request to parse the
492 * Set-Cookie headers.
493 * @see Cookie::set
494 * @param string $name
495 * @param mixed $value
496 * @param array $attr
497 */
498 public function setCookie( $name, $value = null, $attr = null ) {
499 if ( !$this->cookieJar ) {
500 $this->cookieJar = new CookieJar;
501 }
502
503 $this->cookieJar->setCookie( $name, $value, $attr );
504 }
505
506 /**
507 * Parse the cookies in the response headers and store them in the cookie jar.
508 */
509 protected function parseCookies() {
510
511 if ( !$this->cookieJar ) {
512 $this->cookieJar = new CookieJar;
513 }
514
515 if ( isset( $this->respHeaders['set-cookie'] ) ) {
516 $url = parse_url( $this->getFinalUrl() );
517 foreach ( $this->respHeaders['set-cookie'] as $cookie ) {
518 $this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] );
519 }
520 }
521
522 }
523
524 /**
525 * Returns the final URL after all redirections.
526 *
527 * Relative values of the "Location" header are incorrect as
528 * stated in RFC, however they do happen and modern browsers
529 * support them. This function loops backwards through all
530 * locations in order to build the proper absolute URI - Marooned
531 * at wikia-inc.com
532 *
533 * Note that the multiple Location: headers are an artifact of
534 * CURL -- they shouldn't actually get returned this way. Rewrite
535 * this when bug 29232 is taken care of (high-level redirect
536 * handling rewrite).
537 *
538 * @return string
539 */
540 public function getFinalUrl() {
541 $headers = $this->getResponseHeaders();
542
543 // return full url (fix for incorrect but handled relative location)
544 if ( isset( $headers['location'] ) ) {
545 $locations = $headers['location'];
546 $domain = '';
547 $foundRelativeURI = false;
548 $countLocations = count( $locations );
549
550 for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
551 $url = parse_url( $locations[$i] );
552
553 if ( isset( $url['host'] ) ) {
554 $domain = $url['scheme'] . '://' . $url['host'];
555 break; // found correct URI (with host)
556 } else {
557 $foundRelativeURI = true;
558 }
559 }
560
561 if ( $foundRelativeURI ) {
562 if ( $domain ) {
563 return $domain . $locations[$countLocations - 1];
564 } else {
565 $url = parse_url( $this->url );
566 if ( isset( $url['host'] ) ) {
567 return $url['scheme'] . '://' . $url['host'] .
568 $locations[$countLocations - 1];
569 }
570 }
571 } else {
572 return $locations[$countLocations - 1];
573 }
574 }
575
576 return $this->url;
577 }
578
579 /**
580 * Returns true if the backend can follow redirects. Overridden by the
581 * child classes.
582 * @return bool
583 */
584 public function canFollowRedirects() {
585 return true;
586 }
587 }