72def7927512c6fec37e43bf9c8f37c84bd5c4bf
[lhc/web/wiklou.git] / includes / HttpFunctions.php
1 <?php
2 /**
3 * @defgroup HTTP HTTP
4 */
5
6 /**
7 * Various HTTP related functions
8 * @ingroup HTTP
9 */
10 class Http {
11 static $httpEngine = false;
12
13 /**
14 * Perform an HTTP request
15 *
16 * @param $method String: HTTP method. Usually GET/POST
17 * @param $url String: full URL to act on
18 * @param $options Array: options to pass to MWHttpRequest object.
19 * Possible keys for the array:
20 * - timeout Timeout length in seconds
21 * - postData An array of key-value pairs or a url-encoded form data
22 * - proxy The proxy to use.
23 * Will use $wgHTTPProxy (if set) otherwise.
24 * - noProxy Override $wgHTTPProxy (if set) and don't use any proxy at all.
25 * - sslVerifyHost (curl only) Verify hostname against certificate
26 * - sslVerifyCert (curl only) Verify SSL certificate
27 * - caInfo (curl only) Provide CA information
28 * - maxRedirects Maximum number of redirects to follow (defaults to 5)
29 * - followRedirects Whether to follow redirects (defaults to false).
30 * Note: this should only be used when the target URL is trusted,
31 * to avoid attacks on intranet services accessible by HTTP.
32 * @return Mixed: (bool)false on failure or a string on success
33 */
34 public static function request( $method, $url, $options = array() ) {
35 $url = wfExpandUrl( $url );
36 wfDebug( "HTTP: $method: $url\n" );
37 $options['method'] = strtoupper( $method );
38
39 if ( !isset( $options['timeout'] ) ) {
40 $options['timeout'] = 'default';
41 }
42
43 $req = MWHttpRequest::factory( $url, $options );
44 $status = $req->execute();
45
46 if ( $status->isOK() ) {
47 return $req->getContent();
48 } else {
49 return false;
50 }
51 }
52
53 /**
54 * Simple wrapper for Http::request( 'GET' )
55 * @see Http::request()
56 */
57 public static function get( $url, $timeout = 'default', $options = array() ) {
58 $options['timeout'] = $timeout;
59 return Http::request( 'GET', $url, $options );
60 }
61
62 /**
63 * Simple wrapper for Http::request( 'POST' )
64 * @see Http::request()
65 */
66 public static function post( $url, $options = array() ) {
67 return Http::request( 'POST', $url, $options );
68 }
69
70 /**
71 * Check if the URL can be served by localhost
72 *
73 * @param $url String: full url to check
74 * @return Boolean
75 */
76 public static function isLocalURL( $url ) {
77 global $wgCommandLineMode, $wgConf;
78
79 if ( $wgCommandLineMode ) {
80 return false;
81 }
82
83 // Extract host part
84 $matches = array();
85 if ( preg_match( '!^http://([\w.-]+)[/:].*$!', $url, $matches ) ) {
86 $host = $matches[1];
87 // Split up dotwise
88 $domainParts = explode( '.', $host );
89 // Check if this domain or any superdomain is listed in $wgConf as a local virtual host
90 $domainParts = array_reverse( $domainParts );
91
92 $domain = '';
93 for ( $i = 0; $i < count( $domainParts ); $i++ ) {
94 $domainPart = $domainParts[$i];
95 if ( $i == 0 ) {
96 $domain = $domainPart;
97 } else {
98 $domain = $domainPart . '.' . $domain;
99 }
100
101 if ( $wgConf->isLocalVHost( $domain ) ) {
102 return true;
103 }
104 }
105 }
106
107 return false;
108 }
109
110 /**
111 * A standard user-agent we can use for external requests.
112 * @return String
113 */
114 public static function userAgent() {
115 return "MediaWiki/" . MW_VERSION;
116 }
117
118 /**
119 * Checks that the given URI is a valid one. Hardcoding the
120 * protocols, because we only want protocols that both cURL
121 * and php support.
122 *
123 * @param $uri Mixed: URI to check for validity
124 * @returns Boolean
125 */
126 public static function isValidURI( $uri ) {
127 return preg_match(
128 '/^(f|ht)tps?:\/\/[^\/\s]\S*$/D',
129 $uri
130 );
131 }
132 }
133
134 /**
135 * This wrapper class will call out to curl (if available) or fallback
136 * to regular PHP if necessary for handling internal HTTP requests.
137 *
138 * Renamed from HttpRequest to MWHttpRequest to avoid conflict with
139 * PHP's HTTP extension.
140 */
141 class MWHttpRequest {
142 const SUPPORTS_FILE_POSTS = false;
143
144 protected $content;
145 protected $timeout = 'default';
146 protected $headersOnly = null;
147 protected $postData = null;
148 protected $proxy = null;
149 protected $noProxy = false;
150 protected $sslVerifyHost = true;
151 protected $sslVerifyCert = true;
152 protected $caInfo = null;
153 protected $method = "GET";
154 protected $reqHeaders = array();
155 protected $url;
156 protected $parsedUrl;
157 protected $callback;
158 protected $maxRedirects = 5;
159 protected $followRedirects = false;
160
161 /**
162 * @var CookieJar
163 */
164 protected $cookieJar;
165
166 protected $headerList = array();
167 protected $respVersion = "0.9";
168 protected $respStatus = "200 Ok";
169 protected $respHeaders = array();
170
171 public $status;
172
173 /**
174 * @param $url String: url to use
175 * @param $options Array: (optional) extra params to pass (see Http::request())
176 */
177 function __construct( $url, $options = array() ) {
178 global $wgHTTPTimeout;
179
180 $this->url = $url;
181 $this->parsedUrl = parse_url( $url );
182
183 if ( !Http::isValidURI( $this->url ) ) {
184 $this->status = Status::newFatal( 'http-invalid-url' );
185 } else {
186 $this->status = Status::newGood( 100 ); // continue
187 }
188
189 if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
190 $this->timeout = $options['timeout'];
191 } else {
192 $this->timeout = $wgHTTPTimeout;
193 }
194
195 $members = array( "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
196 "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" );
197
198 foreach ( $members as $o ) {
199 if ( isset( $options[$o] ) ) {
200 $this->$o = $options[$o];
201 }
202 }
203 }
204
205 /**
206 * Generate a new request object
207 * @param $url String: url to use
208 * @param $options Array: (optional) extra params to pass (see Http::request())
209 * @see MWHttpRequest::__construct
210 */
211 public static function factory( $url, $options = null ) {
212 if ( !Http::$httpEngine ) {
213 Http::$httpEngine = function_exists( 'curl_init' ) ? 'curl' : 'php';
214 } elseif ( Http::$httpEngine == 'curl' && !function_exists( 'curl_init' ) ) {
215 throw new MWException( __METHOD__ . ': curl (http://php.net/curl) is not installed, but' .
216 ' Http::$httpEngine is set to "curl"' );
217 }
218
219 switch( Http::$httpEngine ) {
220 case 'curl':
221 return new CurlHttpRequest( $url, $options );
222 case 'php':
223 if ( !wfIniGetBool( 'allow_url_fopen' ) ) {
224 throw new MWException( __METHOD__ . ': allow_url_fopen needs to be enabled for pure PHP' .
225 ' http requests to work. If possible, curl should be used instead. See http://php.net/curl.' );
226 }
227 return new PhpHttpRequest( $url, $options );
228 default:
229 throw new MWException( __METHOD__ . ': The setting of Http::$httpEngine is not valid.' );
230 }
231 }
232
233 /**
234 * Get the body, or content, of the response to the request
235 *
236 * @return String
237 */
238 public function getContent() {
239 return $this->content;
240 }
241
242 /**
243 * Set the parameters of the request
244
245 * @param $args Array
246 * @todo overload the args param
247 */
248 public function setData( $args ) {
249 $this->postData = $args;
250 }
251
252 /**
253 * Take care of setting up the proxy
254 * (override in subclass)
255 *
256 * @return String
257 */
258 public function proxySetup() {
259 global $wgHTTPProxy;
260
261 if ( $this->proxy ) {
262 return;
263 }
264
265 if ( Http::isLocalURL( $this->url ) ) {
266 $this->proxy = 'http://localhost:80/';
267 } elseif ( $wgHTTPProxy ) {
268 $this->proxy = $wgHTTPProxy ;
269 } elseif ( getenv( "http_proxy" ) ) {
270 $this->proxy = getenv( "http_proxy" );
271 }
272 }
273
274 /**
275 * Set the refererer header
276 */
277 public function setReferer( $url ) {
278 $this->setHeader( 'Referer', $url );
279 }
280
281 /**
282 * Set the user agent
283 */
284 public function setUserAgent( $UA ) {
285 $this->setHeader( 'User-Agent', $UA );
286 }
287
288 /**
289 * Set an arbitrary header
290 */
291 public function setHeader( $name, $value ) {
292 // I feel like I should normalize the case here...
293 $this->reqHeaders[$name] = $value;
294 }
295
296 /**
297 * Get an array of the headers
298 */
299 public function getHeaderList() {
300 $list = array();
301
302 if ( $this->cookieJar ) {
303 $this->reqHeaders['Cookie'] =
304 $this->cookieJar->serializeToHttpRequest(
305 $this->parsedUrl['path'],
306 $this->parsedUrl['host']
307 );
308 }
309
310 foreach ( $this->reqHeaders as $name => $value ) {
311 $list[] = "$name: $value";
312 }
313
314 return $list;
315 }
316
317 /**
318 * Set the callback
319 *
320 * @param $callback Callback
321 */
322 public function setCallback( $callback ) {
323 $this->callback = $callback;
324 }
325
326 /**
327 * A generic callback to read the body of the response from a remote
328 * server.
329 *
330 * @param $fh handle
331 * @param $content String
332 */
333 public function read( $fh, $content ) {
334 $this->content .= $content;
335 return strlen( $content );
336 }
337
338 /**
339 * Take care of whatever is necessary to perform the URI request.
340 *
341 * @return Status
342 */
343 public function execute() {
344 global $wgTitle;
345
346 $this->content = "";
347
348 if ( strtoupper( $this->method ) == "HEAD" ) {
349 $this->headersOnly = true;
350 }
351
352 if ( is_object( $wgTitle ) && !isset( $this->reqHeaders['Referer'] ) ) {
353 $this->setReferer( $wgTitle->getFullURL() );
354 }
355
356 if ( !$this->noProxy ) {
357 $this->proxySetup();
358 }
359
360 if ( !$this->callback ) {
361 $this->setCallback( array( $this, 'read' ) );
362 }
363
364 if ( !isset( $this->reqHeaders['User-Agent'] ) ) {
365 $this->setUserAgent( Http::userAgent() );
366 }
367 }
368
369 /**
370 * Parses the headers, including the HTTP status code and any
371 * Set-Cookie headers. This function expectes the headers to be
372 * found in an array in the member variable headerList.
373 *
374 * @return nothing
375 */
376 protected function parseHeader() {
377 $lastname = "";
378
379 foreach ( $this->headerList as $header ) {
380 if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
381 $this->respVersion = $match[1];
382 $this->respStatus = $match[2];
383 } elseif ( preg_match( "#^[ \t]#", $header ) ) {
384 $last = count( $this->respHeaders[$lastname] ) - 1;
385 $this->respHeaders[$lastname][$last] .= "\r\n$header";
386 } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
387 $this->respHeaders[strtolower( $match[1] )][] = $match[2];
388 $lastname = strtolower( $match[1] );
389 }
390 }
391
392 $this->parseCookies();
393 }
394
395 /**
396 * Sets HTTPRequest status member to a fatal value with the error
397 * message if the returned integer value of the status code was
398 * not successful (< 300) or a redirect (>=300 and < 400). (see
399 * RFC2616, section 10,
400 * http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html for a
401 * list of status codes.)
402 *
403 * @return nothing
404 */
405 protected function setStatus() {
406 if ( !$this->respHeaders ) {
407 $this->parseHeader();
408 }
409
410 if ( (int)$this->respStatus > 399 ) {
411 list( $code, $message ) = explode( " ", $this->respStatus, 2 );
412 $this->status->fatal( "http-bad-status", $code, $message );
413 }
414 }
415
416 /**
417 * Get the integer value of the HTTP status code (e.g. 200 for "200 Ok")
418 * (see RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
419 * for a list of status codes.)
420 *
421 * @return Integer
422 */
423 public function getStatus() {
424 if ( !$this->respHeaders ) {
425 $this->parseHeader();
426 }
427
428 return (int)$this->respStatus;
429 }
430
431
432 /**
433 * Returns true if the last status code was a redirect.
434 *
435 * @return Boolean
436 */
437 public function isRedirect() {
438 if ( !$this->respHeaders ) {
439 $this->parseHeader();
440 }
441
442 $status = (int)$this->respStatus;
443
444 if ( $status >= 300 && $status <= 303 ) {
445 return true;
446 }
447
448 return false;
449 }
450
451 /**
452 * Returns an associative array of response headers after the
453 * request has been executed. Because some headers
454 * (e.g. Set-Cookie) can appear more than once the, each value of
455 * the associative array is an array of the values given.
456 *
457 * @return Array
458 */
459 public function getResponseHeaders() {
460 if ( !$this->respHeaders ) {
461 $this->parseHeader();
462 }
463
464 return $this->respHeaders;
465 }
466
467 /**
468 * Returns the value of the given response header.
469 *
470 * @param $header String
471 * @return String
472 */
473 public function getResponseHeader( $header ) {
474 if ( !$this->respHeaders ) {
475 $this->parseHeader();
476 }
477
478 if ( isset( $this->respHeaders[strtolower ( $header ) ] ) ) {
479 $v = $this->respHeaders[strtolower ( $header ) ];
480 return $v[count( $v ) - 1];
481 }
482
483 return null;
484 }
485
486 /**
487 * Tells the MWHttpRequest object to use this pre-loaded CookieJar.
488 *
489 * @param $jar CookieJar
490 */
491 public function setCookieJar( $jar ) {
492 $this->cookieJar = $jar;
493 }
494
495 /**
496 * Returns the cookie jar in use.
497 *
498 * @returns CookieJar
499 */
500 public function getCookieJar() {
501 if ( !$this->respHeaders ) {
502 $this->parseHeader();
503 }
504
505 return $this->cookieJar;
506 }
507
508 /**
509 * Sets a cookie. Used before a request to set up any individual
510 * cookies. Used internally after a request to parse the
511 * Set-Cookie headers.
512 * @see Cookie::set
513 */
514 public function setCookie( $name, $value = null, $attr = null ) {
515 if ( !$this->cookieJar ) {
516 $this->cookieJar = new CookieJar;
517 }
518
519 $this->cookieJar->setCookie( $name, $value, $attr );
520 }
521
522 /**
523 * Parse the cookies in the response headers and store them in the cookie jar.
524 */
525 protected function parseCookies() {
526 if ( !$this->cookieJar ) {
527 $this->cookieJar = new CookieJar;
528 }
529
530 if ( isset( $this->respHeaders['set-cookie'] ) ) {
531 $url = parse_url( $this->getFinalUrl() );
532 foreach ( $this->respHeaders['set-cookie'] as $cookie ) {
533 $this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] );
534 }
535 }
536 }
537
538 /**
539 * Returns the final URL after all redirections.
540 *
541 * @return String
542 */
543 public function getFinalUrl() {
544 $location = $this->getResponseHeader( "Location" );
545
546 if ( $location ) {
547 return $location;
548 }
549
550 return $this->url;
551 }
552
553 /**
554 * Returns true if the backend can follow redirects. Overridden by the
555 * child classes.
556 */
557 public function canFollowRedirects() {
558 return true;
559 }
560 }
561
562 /**
563 * MWHttpRequest implemented using internal curl compiled into PHP
564 */
565 class CurlHttpRequest extends MWHttpRequest {
566 const SUPPORTS_FILE_POSTS = true;
567
568 static $curlMessageMap = array(
569 6 => 'http-host-unreachable',
570 28 => 'http-timed-out'
571 );
572
573 protected $curlOptions = array();
574 protected $headerText = "";
575
576 protected function readHeader( $fh, $content ) {
577 $this->headerText .= $content;
578 return strlen( $content );
579 }
580
581 public function execute() {
582 parent::execute();
583
584 if ( !$this->status->isOK() ) {
585 return $this->status;
586 }
587
588 $this->curlOptions[CURLOPT_PROXY] = $this->proxy;
589 $this->curlOptions[CURLOPT_TIMEOUT] = $this->timeout;
590 $this->curlOptions[CURLOPT_HTTP_VERSION] = CURL_HTTP_VERSION_1_0;
591 $this->curlOptions[CURLOPT_WRITEFUNCTION] = $this->callback;
592 $this->curlOptions[CURLOPT_HEADERFUNCTION] = array( $this, "readHeader" );
593 $this->curlOptions[CURLOPT_MAXREDIRS] = $this->maxRedirects;
594 $this->curlOptions[CURLOPT_ENCODING] = ""; # Enable compression
595
596 /* not sure these two are actually necessary */
597 if ( isset( $this->reqHeaders['Referer'] ) ) {
598 $this->curlOptions[CURLOPT_REFERER] = $this->reqHeaders['Referer'];
599 }
600 $this->curlOptions[CURLOPT_USERAGENT] = $this->reqHeaders['User-Agent'];
601
602 if ( isset( $this->sslVerifyHost ) ) {
603 $this->curlOptions[CURLOPT_SSL_VERIFYHOST] = $this->sslVerifyHost;
604 }
605
606 if ( isset( $this->sslVerifyCert ) ) {
607 $this->curlOptions[CURLOPT_SSL_VERIFYPEER] = $this->sslVerifyCert;
608 }
609
610 if ( $this->caInfo ) {
611 $this->curlOptions[CURLOPT_CAINFO] = $this->caInfo;
612 }
613
614 if ( $this->headersOnly ) {
615 $this->curlOptions[CURLOPT_NOBODY] = true;
616 $this->curlOptions[CURLOPT_HEADER] = true;
617 } elseif ( $this->method == 'POST' ) {
618 $this->curlOptions[CURLOPT_POST] = true;
619 $this->curlOptions[CURLOPT_POSTFIELDS] = $this->postData;
620 // Suppress 'Expect: 100-continue' header, as some servers
621 // will reject it with a 417 and Curl won't auto retry
622 // with HTTP 1.0 fallback
623 $this->reqHeaders['Expect'] = '';
624 } else {
625 $this->curlOptions[CURLOPT_CUSTOMREQUEST] = $this->method;
626 }
627
628 $this->curlOptions[CURLOPT_HTTPHEADER] = $this->getHeaderList();
629
630 $curlHandle = curl_init( $this->url );
631
632 if ( !curl_setopt_array( $curlHandle, $this->curlOptions ) ) {
633 throw new MWException( "Error setting curl options." );
634 }
635
636 if ( $this->followRedirects && $this->canFollowRedirects() ) {
637 wfSuppressWarnings();
638 if ( ! curl_setopt( $curlHandle, CURLOPT_FOLLOWLOCATION, true ) ) {
639 wfDebug( __METHOD__ . ": Couldn't set CURLOPT_FOLLOWLOCATION. " .
640 "Probably safe_mode or open_basedir is set.\n" );
641 // Continue the processing. If it were in curl_setopt_array,
642 // processing would have halted on its entry
643 }
644 wfRestoreWarnings();
645 }
646
647 if ( false === curl_exec( $curlHandle ) ) {
648 $code = curl_error( $curlHandle );
649
650 if ( isset( self::$curlMessageMap[$code] ) ) {
651 $this->status->fatal( self::$curlMessageMap[$code] );
652 } else {
653 $this->status->fatal( 'http-curl-error', curl_error( $curlHandle ) );
654 }
655 } else {
656 $this->headerList = explode( "\r\n", $this->headerText );
657 }
658
659 curl_close( $curlHandle );
660
661 $this->parseHeader();
662 $this->setStatus();
663
664 return $this->status;
665 }
666
667 public function canFollowRedirects() {
668 if ( strval( ini_get( 'open_basedir' ) ) !== '' || wfIniGetBool( 'safe_mode' ) ) {
669 wfDebug( "Cannot follow redirects in safe mode\n" );
670 return false;
671 }
672
673 if ( !defined( 'CURLOPT_REDIR_PROTOCOLS' ) ) {
674 wfDebug( "Cannot follow redirects with libcurl < 7.19.4 due to CVE-2009-0037\n" );
675 return false;
676 }
677
678 return true;
679 }
680 }
681
682 class PhpHttpRequest extends MWHttpRequest {
683 protected function urlToTcp( $url ) {
684 $parsedUrl = parse_url( $url );
685
686 return 'tcp://' . $parsedUrl['host'] . ':' . $parsedUrl['port'];
687 }
688
689 public function execute() {
690 parent::execute();
691
692 if ( is_array( $this->postData ) ) {
693 $this->postData = wfArrayToCGI( $this->postData );
694 }
695
696 if ( $this->parsedUrl['scheme'] != 'http' ) {
697 $this->status->fatal( 'http-invalid-scheme', $this->parsedUrl['scheme'] );
698 }
699
700 $this->reqHeaders['Accept'] = "*/*";
701 if ( $this->method == 'POST' ) {
702 // Required for HTTP 1.0 POSTs
703 $this->reqHeaders['Content-Length'] = strlen( $this->postData );
704 $this->reqHeaders['Content-type'] = "application/x-www-form-urlencoded";
705 }
706
707 $options = array();
708 if ( $this->proxy && !$this->noProxy ) {
709 $options['proxy'] = $this->urlToTCP( $this->proxy );
710 $options['request_fulluri'] = true;
711 }
712
713 if ( !$this->followRedirects ) {
714 $options['max_redirects'] = 0;
715 } else {
716 $options['max_redirects'] = $this->maxRedirects;
717 }
718
719 $options['method'] = $this->method;
720 $options['header'] = implode( "\r\n", $this->getHeaderList() );
721 // Note that at some future point we may want to support
722 // HTTP/1.1, but we'd have to write support for chunking
723 // in version of PHP < 5.3.1
724 $options['protocol_version'] = "1.0";
725
726 // This is how we tell PHP we want to deal with 404s (for example) ourselves.
727 // Only works on 5.2.10+
728 $options['ignore_errors'] = true;
729
730 if ( $this->postData ) {
731 $options['content'] = $this->postData;
732 }
733
734 $options['timeout'] = $this->timeout;
735
736 $context = stream_context_create( array( 'http' => $options ) );
737
738 $this->headerList = array();
739 $reqCount = 0;
740 $url = $this->url;
741
742 $result = array();
743
744 do {
745 $reqCount++;
746 wfSuppressWarnings();
747 $fh = fopen( $url, "r", false, $context );
748 wfRestoreWarnings();
749
750 if ( !$fh ) {
751 break;
752 }
753
754 $result = stream_get_meta_data( $fh );
755 $this->headerList = $result['wrapper_data'];
756 $this->parseHeader();
757
758 if ( !$this->followRedirects ) {
759 break;
760 }
761
762 # Handle manual redirection
763 if ( !$this->isRedirect() || $reqCount > $this->maxRedirects ) {
764 break;
765 }
766 # Check security of URL
767 $url = $this->getResponseHeader( "Location" );
768
769 if ( substr( $url, 0, 7 ) !== 'http://' ) {
770 wfDebug( __METHOD__ . ": insecure redirection\n" );
771 break;
772 }
773 } while ( true );
774
775 $this->setStatus();
776
777 if ( $fh === false ) {
778 $this->status->fatal( 'http-request-error' );
779 return $this->status;
780 }
781
782 if ( $result['timed_out'] ) {
783 $this->status->fatal( 'http-timed-out', $this->url );
784 return $this->status;
785 }
786
787 // If everything went OK, or we recieved some error code
788 // get the response body content.
789 if ( $this->status->isOK()
790 || (int)$this->respStatus >= 300) {
791 while ( !feof( $fh ) ) {
792 $buf = fread( $fh, 8192 );
793
794 if ( $buf === false ) {
795 $this->status->fatal( 'http-read-error' );
796 break;
797 }
798
799 if ( strlen( $buf ) ) {
800 call_user_func( $this->callback, $fh, $buf );
801 }
802 }
803 }
804 fclose( $fh );
805
806 return $this->status;
807 }
808 }