3 * Various HTTP related functions.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
29 * Various HTTP related functions
33 static $httpEngine = false;
36 * Perform an HTTP request
38 * @param string $method HTTP method. Usually GET/POST
39 * @param string $url full URL to act on. If protocol-relative, will be expanded to an http:// URL
40 * @param array $options options to pass to MWHttpRequest object.
41 * Possible keys for the array:
42 * - timeout Timeout length in seconds
43 * - postData An array of key-value pairs or a url-encoded form data
44 * - proxy The proxy to use.
45 * Otherwise it will use $wgHTTPProxy (if set)
46 * Otherwise it will use the environment variable "http_proxy" (if set)
47 * - noProxy Don't use any proxy at all. Takes precedence over proxy value(s).
48 * - sslVerifyHost (curl only) Verify hostname against certificate
49 * - sslVerifyCert (curl only) Verify SSL certificate
50 * - caInfo (curl only) Provide CA information
51 * - maxRedirects Maximum number of redirects to follow (defaults to 5)
52 * - followRedirects Whether to follow redirects (defaults to false).
53 * Note: this should only be used when the target URL is trusted,
54 * to avoid attacks on intranet services accessible by HTTP.
55 * - userAgent A user agent, if you want to override the default
56 * MediaWiki/$wgVersion
57 * @return Mixed: (bool)false on failure or a string on success
59 public static function request( $method, $url, $options = array() ) {
60 wfDebug( "HTTP: $method: $url\n" );
61 $options['method'] = strtoupper( $method );
63 if ( !isset( $options['timeout'] ) ) {
64 $options['timeout'] = 'default';
67 $req = MWHttpRequest
::factory( $url, $options );
68 $status = $req->execute();
70 if ( $status->isOK() ) {
71 return $req->getContent();
78 * Simple wrapper for Http::request( 'GET' )
79 * @see Http::request()
82 * @param $timeout string
83 * @param $options array
86 public static function get( $url, $timeout = 'default', $options = array() ) {
87 $options['timeout'] = $timeout;
88 return Http
::request( 'GET', $url, $options );
92 * Simple wrapper for Http::request( 'POST' )
93 * @see Http::request()
96 * @param $options array
99 public static function post( $url, $options = array() ) {
100 return Http
::request( 'POST', $url, $options );
104 * Check if the URL can be served by localhost
106 * @param string $url full url to check
109 public static function isLocalURL( $url ) {
110 global $wgCommandLineMode, $wgConf;
112 if ( $wgCommandLineMode ) {
118 if ( preg_match( '!^http://([\w.-]+)[/:].*$!', $url, $matches ) ) {
121 $domainParts = explode( '.', $host );
122 // Check if this domain or any superdomain is listed in $wgConf as a local virtual host
123 $domainParts = array_reverse( $domainParts );
126 for ( $i = 0; $i < count( $domainParts ); $i++
) {
127 $domainPart = $domainParts[$i];
129 $domain = $domainPart;
131 $domain = $domainPart . '.' . $domain;
134 if ( $wgConf->isLocalVHost( $domain ) ) {
144 * A standard user-agent we can use for external requests.
147 public static function userAgent() {
149 return "MediaWiki/$wgVersion";
153 * Checks that the given URI is a valid one. Hardcoding the
154 * protocols, because we only want protocols that both cURL
157 * file:// should not be allowed here for security purpose (r67684)
159 * @todo FIXME this is wildly inaccurate and fails to actually check most stuff
161 * @param $uri Mixed: URI to check for validity
164 public static function isValidURI( $uri ) {
166 '/^https?:\/\/[^\/\s]\S*$/D',
173 * This wrapper class will call out to curl (if available) or fallback
174 * to regular PHP if necessary for handling internal HTTP requests.
176 * Renamed from HttpRequest to MWHttpRequest to avoid conflict with
177 * PHP's HTTP extension.
179 class MWHttpRequest
{
180 const SUPPORTS_FILE_POSTS
= false;
183 protected $timeout = 'default';
184 protected $headersOnly = null;
185 protected $postData = null;
186 protected $proxy = null;
187 protected $noProxy = false;
188 protected $sslVerifyHost = true;
189 protected $sslVerifyCert = true;
190 protected $caInfo = null;
191 protected $method = "GET";
192 protected $reqHeaders = array();
194 protected $parsedUrl;
196 protected $maxRedirects = 5;
197 protected $followRedirects = false;
202 protected $cookieJar;
204 protected $headerList = array();
205 protected $respVersion = "0.9";
206 protected $respStatus = "200 Ok";
207 protected $respHeaders = array();
212 * @param string $url url to use. If protocol-relative, will be expanded to an http:// URL
213 * @param array $options (optional) extra params to pass (see Http::request())
215 protected function __construct( $url, $options = array() ) {
216 global $wgHTTPTimeout;
218 $this->url
= wfExpandUrl( $url, PROTO_HTTP
);
219 $this->parsedUrl
= wfParseUrl( $this->url
);
221 if ( !$this->parsedUrl ||
!Http
::isValidURI( $this->url
) ) {
222 $this->status
= Status
::newFatal( 'http-invalid-url' );
224 $this->status
= Status
::newGood( 100 ); // continue
227 if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
228 $this->timeout
= $options['timeout'];
230 $this->timeout
= $wgHTTPTimeout;
232 if( isset( $options['userAgent'] ) ) {
233 $this->setUserAgent( $options['userAgent'] );
236 $members = array( "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
237 "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" );
239 foreach ( $members as $o ) {
240 if ( isset( $options[$o] ) ) {
241 // ensure that MWHttpRequest::method is always
242 // uppercased. Bug 36137
243 if ( $o == 'method' ) {
244 $options[$o] = strtoupper( $options[$o] );
246 $this->$o = $options[$o];
250 if ( $this->noProxy
) {
251 $this->proxy
= ''; // noProxy takes precedence
256 * Simple function to test if we can make any sort of requests at all, using
260 public static function canMakeRequests() {
261 return function_exists( 'curl_init' ) ||
wfIniGetBool( 'allow_url_fopen' );
265 * Generate a new request object
266 * @param string $url url to use
267 * @param array $options (optional) extra params to pass (see Http::request())
268 * @throws MWException
269 * @return CurlHttpRequest|PhpHttpRequest
270 * @see MWHttpRequest::__construct
272 public static function factory( $url, $options = null ) {
273 if ( !Http
::$httpEngine ) {
274 Http
::$httpEngine = function_exists( 'curl_init' ) ?
'curl' : 'php';
275 } elseif ( Http
::$httpEngine == 'curl' && !function_exists( 'curl_init' ) ) {
276 throw new MWException( __METHOD__
. ': curl (http://php.net/curl) is not installed, but' .
277 ' Http::$httpEngine is set to "curl"' );
280 switch( Http
::$httpEngine ) {
282 return new CurlHttpRequest( $url, $options );
284 if ( !wfIniGetBool( 'allow_url_fopen' ) ) {
285 throw new MWException( __METHOD__
. ': allow_url_fopen needs to be enabled for pure PHP' .
286 ' http requests to work. If possible, curl should be used instead. See http://php.net/curl.' );
288 return new PhpHttpRequest( $url, $options );
290 throw new MWException( __METHOD__
. ': The setting of Http::$httpEngine is not valid.' );
295 * Get the body, or content, of the response to the request
299 public function getContent() {
300 return $this->content
;
304 * Set the parameters of the request
307 * @todo overload the args param
309 public function setData( $args ) {
310 $this->postData
= $args;
314 * Take care of setting up the proxy (do nothing if "noProxy" is set)
318 public function proxySetup() {
321 // If there is an explicit proxy set and proxies are not disabled, then use it
322 if ( $this->proxy
&& !$this->noProxy
) {
326 // Otherwise, fallback to $wgHTTPProxy/http_proxy (when set) if this is not a machine
327 // local URL and proxies are not disabled
328 if ( Http
::isLocalURL( $this->url
) ||
$this->noProxy
) {
330 } elseif ( $wgHTTPProxy ) {
331 $this->proxy
= $wgHTTPProxy;
332 } elseif ( getenv( "http_proxy" ) ) {
333 $this->proxy
= getenv( "http_proxy" );
338 * Set the refererer header
340 public function setReferer( $url ) {
341 $this->setHeader( 'Referer', $url );
348 public function setUserAgent( $UA ) {
349 $this->setHeader( 'User-Agent', $UA );
353 * Set an arbitrary header
357 public function setHeader( $name, $value ) {
358 // I feel like I should normalize the case here...
359 $this->reqHeaders
[$name] = $value;
363 * Get an array of the headers
366 public function getHeaderList() {
369 if ( $this->cookieJar
) {
370 $this->reqHeaders
['Cookie'] =
371 $this->cookieJar
->serializeToHttpRequest(
372 $this->parsedUrl
['path'],
373 $this->parsedUrl
['host']
377 foreach ( $this->reqHeaders
as $name => $value ) {
378 $list[] = "$name: $value";
385 * Set a read callback to accept data read from the HTTP request.
386 * By default, data is appended to an internal buffer which can be
387 * retrieved through $req->getContent().
389 * To handle data as it comes in -- especially for large files that
390 * would not fit in memory -- you can instead set your own callback,
391 * in the form function($resource, $buffer) where the first parameter
392 * is the low-level resource being read (implementation specific),
393 * and the second parameter is the data buffer.
395 * You MUST return the number of bytes handled in the buffer; if fewer
396 * bytes are reported handled than were passed to you, the HTTP fetch
399 * @param $callback Callback
400 * @throws MWException
402 public function setCallback( $callback ) {
403 if ( !is_callable( $callback ) ) {
404 throw new MWException( 'Invalid MwHttpRequest callback' );
406 $this->callback
= $callback;
410 * A generic callback to read the body of the response from a remote
414 * @param $content String
417 public function read( $fh, $content ) {
418 $this->content
.= $content;
419 return strlen( $content );
423 * Take care of whatever is necessary to perform the URI request.
427 public function execute() {
432 if ( strtoupper( $this->method
) == "HEAD" ) {
433 $this->headersOnly
= true;
436 if ( is_object( $wgTitle ) && !isset( $this->reqHeaders
['Referer'] ) ) {
437 $this->setReferer( wfExpandUrl( $wgTitle->getFullURL(), PROTO_CURRENT
) );
440 $this->proxySetup(); // set up any proxy as needed
442 if ( !$this->callback
) {
443 $this->setCallback( array( $this, 'read' ) );
446 if ( !isset( $this->reqHeaders
['User-Agent'] ) ) {
447 $this->setUserAgent( Http
::userAgent() );
452 * Parses the headers, including the HTTP status code and any
453 * Set-Cookie headers. This function expectes the headers to be
454 * found in an array in the member variable headerList.
456 protected function parseHeader() {
459 foreach ( $this->headerList
as $header ) {
460 if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
461 $this->respVersion
= $match[1];
462 $this->respStatus
= $match[2];
463 } elseif ( preg_match( "#^[ \t]#", $header ) ) {
464 $last = count( $this->respHeaders
[$lastname] ) - 1;
465 $this->respHeaders
[$lastname][$last] .= "\r\n$header";
466 } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
467 $this->respHeaders
[strtolower( $match[1] )][] = $match[2];
468 $lastname = strtolower( $match[1] );
472 $this->parseCookies();
476 * Sets HTTPRequest status member to a fatal value with the error
477 * message if the returned integer value of the status code was
478 * not successful (< 300) or a redirect (>=300 and < 400). (see
479 * RFC2616, section 10,
480 * http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html for a
481 * list of status codes.)
483 protected function setStatus() {
484 if ( !$this->respHeaders
) {
485 $this->parseHeader();
488 if ( (int)$this->respStatus
> 399 ) {
489 list( $code, $message ) = explode( " ", $this->respStatus
, 2 );
490 $this->status
->fatal( "http-bad-status", $code, $message );
495 * Get the integer value of the HTTP status code (e.g. 200 for "200 Ok")
496 * (see RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
497 * for a list of status codes.)
501 public function getStatus() {
502 if ( !$this->respHeaders
) {
503 $this->parseHeader();
506 return (int)$this->respStatus
;
510 * Returns true if the last status code was a redirect.
514 public function isRedirect() {
515 if ( !$this->respHeaders
) {
516 $this->parseHeader();
519 $status = (int)$this->respStatus
;
521 if ( $status >= 300 && $status <= 303 ) {
529 * Returns an associative array of response headers after the
530 * request has been executed. Because some headers
531 * (e.g. Set-Cookie) can appear more than once the, each value of
532 * the associative array is an array of the values given.
536 public function getResponseHeaders() {
537 if ( !$this->respHeaders
) {
538 $this->parseHeader();
541 return $this->respHeaders
;
545 * Returns the value of the given response header.
547 * @param $header String
550 public function getResponseHeader( $header ) {
551 if ( !$this->respHeaders
) {
552 $this->parseHeader();
555 if ( isset( $this->respHeaders
[strtolower ( $header ) ] ) ) {
556 $v = $this->respHeaders
[strtolower ( $header ) ];
557 return $v[count( $v ) - 1];
564 * Tells the MWHttpRequest object to use this pre-loaded CookieJar.
566 * @param $jar CookieJar
568 public function setCookieJar( $jar ) {
569 $this->cookieJar
= $jar;
573 * Returns the cookie jar in use.
577 public function getCookieJar() {
578 if ( !$this->respHeaders
) {
579 $this->parseHeader();
582 return $this->cookieJar
;
586 * Sets a cookie. Used before a request to set up any individual
587 * cookies. Used internally after a request to parse the
588 * Set-Cookie headers.
594 public function setCookie( $name, $value = null, $attr = null ) {
595 if ( !$this->cookieJar
) {
596 $this->cookieJar
= new CookieJar
;
599 $this->cookieJar
->setCookie( $name, $value, $attr );
603 * Parse the cookies in the response headers and store them in the cookie jar.
605 protected function parseCookies() {
606 if ( !$this->cookieJar
) {
607 $this->cookieJar
= new CookieJar
;
610 if ( isset( $this->respHeaders
['set-cookie'] ) ) {
611 $url = parse_url( $this->getFinalUrl() );
612 foreach ( $this->respHeaders
['set-cookie'] as $cookie ) {
613 $this->cookieJar
->parseCookieResponseHeader( $cookie, $url['host'] );
619 * Returns the final URL after all redirections.
621 * Relative values of the "Location" header are incorrect as stated in RFC, however they do happen and modern browsers support them.
622 * This function loops backwards through all locations in order to build the proper absolute URI - Marooned at wikia-inc.com
624 * Note that the multiple Location: headers are an artifact of CURL -- they
625 * shouldn't actually get returned this way. Rewrite this when bug 29232 is
626 * taken care of (high-level redirect handling rewrite).
630 public function getFinalUrl() {
631 $headers = $this->getResponseHeaders();
633 //return full url (fix for incorrect but handled relative location)
634 if ( isset( $headers[ 'location' ] ) ) {
635 $locations = $headers[ 'location' ];
637 $foundRelativeURI = false;
638 $countLocations = count( $locations );
640 for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
641 $url = parse_url( $locations[ $i ] );
643 if ( isset( $url['host'] ) ) {
644 $domain = $url[ 'scheme' ] . '://' . $url[ 'host' ];
645 break; //found correct URI (with host)
647 $foundRelativeURI = true;
651 if ( $foundRelativeURI ) {
653 return $domain . $locations[ $countLocations - 1 ];
655 $url = parse_url( $this->url
);
656 if ( isset($url[ 'host' ]) ) {
657 return $url[ 'scheme' ] . '://' . $url[ 'host' ] . $locations[ $countLocations - 1 ];
661 return $locations[ $countLocations - 1 ];
669 * Returns true if the backend can follow redirects. Overridden by the
673 public function canFollowRedirects() {
679 * MWHttpRequest implemented using internal curl compiled into PHP
681 class CurlHttpRequest
extends MWHttpRequest
{
682 const SUPPORTS_FILE_POSTS
= true;
684 static $curlMessageMap = array(
685 6 => 'http-host-unreachable',
686 28 => 'http-timed-out'
689 protected $curlOptions = array();
690 protected $headerText = "";
697 protected function readHeader( $fh, $content ) {
698 $this->headerText
.= $content;
699 return strlen( $content );
702 public function execute() {
705 if ( !$this->status
->isOK() ) {
706 return $this->status
;
709 $this->curlOptions
[CURLOPT_PROXY
] = $this->proxy
;
710 $this->curlOptions
[CURLOPT_TIMEOUT
] = $this->timeout
;
711 $this->curlOptions
[CURLOPT_HTTP_VERSION
] = CURL_HTTP_VERSION_1_0
;
712 $this->curlOptions
[CURLOPT_WRITEFUNCTION
] = $this->callback
;
713 $this->curlOptions
[CURLOPT_HEADERFUNCTION
] = array( $this, "readHeader" );
714 $this->curlOptions
[CURLOPT_MAXREDIRS
] = $this->maxRedirects
;
715 $this->curlOptions
[CURLOPT_ENCODING
] = ""; # Enable compression
717 /* not sure these two are actually necessary */
718 if ( isset( $this->reqHeaders
['Referer'] ) ) {
719 $this->curlOptions
[CURLOPT_REFERER
] = $this->reqHeaders
['Referer'];
721 $this->curlOptions
[CURLOPT_USERAGENT
] = $this->reqHeaders
['User-Agent'];
723 $this->curlOptions
[CURLOPT_SSL_VERIFYHOST
] = $this->sslVerifyHost ?
2 : 0;
724 $this->curlOptions
[CURLOPT_SSL_VERIFYPEER
] = $this->sslVerifyCert
;
726 if ( $this->caInfo
) {
727 $this->curlOptions
[CURLOPT_CAINFO
] = $this->caInfo
;
730 if ( $this->headersOnly
) {
731 $this->curlOptions
[CURLOPT_NOBODY
] = true;
732 $this->curlOptions
[CURLOPT_HEADER
] = true;
733 } elseif ( $this->method
== 'POST' ) {
734 $this->curlOptions
[CURLOPT_POST
] = true;
735 $this->curlOptions
[CURLOPT_POSTFIELDS
] = $this->postData
;
736 // Suppress 'Expect: 100-continue' header, as some servers
737 // will reject it with a 417 and Curl won't auto retry
738 // with HTTP 1.0 fallback
739 $this->reqHeaders
['Expect'] = '';
741 $this->curlOptions
[CURLOPT_CUSTOMREQUEST
] = $this->method
;
744 $this->curlOptions
[CURLOPT_HTTPHEADER
] = $this->getHeaderList();
746 $curlHandle = curl_init( $this->url
);
748 if ( !curl_setopt_array( $curlHandle, $this->curlOptions
) ) {
749 throw new MWException( "Error setting curl options." );
752 if ( $this->followRedirects
&& $this->canFollowRedirects() ) {
753 wfSuppressWarnings();
754 if ( ! curl_setopt( $curlHandle, CURLOPT_FOLLOWLOCATION
, true ) ) {
755 wfDebug( __METHOD__
. ": Couldn't set CURLOPT_FOLLOWLOCATION. " .
756 "Probably safe_mode or open_basedir is set.\n" );
757 // Continue the processing. If it were in curl_setopt_array,
758 // processing would have halted on its entry
763 if ( false === curl_exec( $curlHandle ) ) {
764 $code = curl_error( $curlHandle );
766 if ( isset( self
::$curlMessageMap[$code] ) ) {
767 $this->status
->fatal( self
::$curlMessageMap[$code] );
769 $this->status
->fatal( 'http-curl-error', curl_error( $curlHandle ) );
772 $this->headerList
= explode( "\r\n", $this->headerText
);
775 curl_close( $curlHandle );
777 $this->parseHeader();
780 return $this->status
;
786 public function canFollowRedirects() {
787 if ( strval( ini_get( 'open_basedir' ) ) !== '' ||
wfIniGetBool( 'safe_mode' ) ) {
788 wfDebug( "Cannot follow redirects in safe mode\n" );
792 if ( !defined( 'CURLOPT_REDIR_PROTOCOLS' ) ) {
793 wfDebug( "Cannot follow redirects with libcurl < 7.19.4 due to CVE-2009-0037\n" );
801 class PhpHttpRequest
extends MWHttpRequest
{
807 protected function urlToTcp( $url ) {
808 $parsedUrl = parse_url( $url );
810 return 'tcp://' . $parsedUrl['host'] . ':' . $parsedUrl['port'];
813 public function execute() {
816 if ( is_array( $this->postData
) ) {
817 $this->postData
= wfArrayToCgi( $this->postData
);
820 if ( $this->parsedUrl
['scheme'] != 'http' &&
821 $this->parsedUrl
['scheme'] != 'https' ) {
822 $this->status
->fatal( 'http-invalid-scheme', $this->parsedUrl
['scheme'] );
825 $this->reqHeaders
['Accept'] = "*/*";
826 if ( $this->method
== 'POST' ) {
827 // Required for HTTP 1.0 POSTs
828 $this->reqHeaders
['Content-Length'] = strlen( $this->postData
);
829 if( !isset( $this->reqHeaders
['Content-Type'] ) ) {
830 $this->reqHeaders
['Content-Type'] = "application/x-www-form-urlencoded";
835 if ( $this->proxy
) {
836 $options['proxy'] = $this->urlToTCP( $this->proxy
);
837 $options['request_fulluri'] = true;
840 if ( !$this->followRedirects
) {
841 $options['max_redirects'] = 0;
843 $options['max_redirects'] = $this->maxRedirects
;
846 $options['method'] = $this->method
;
847 $options['header'] = implode( "\r\n", $this->getHeaderList() );
848 // Note that at some future point we may want to support
849 // HTTP/1.1, but we'd have to write support for chunking
850 // in version of PHP < 5.3.1
851 $options['protocol_version'] = "1.0";
853 // This is how we tell PHP we want to deal with 404s (for example) ourselves.
854 // Only works on 5.2.10+
855 $options['ignore_errors'] = true;
857 if ( $this->postData
) {
858 $options['content'] = $this->postData
;
861 $options['timeout'] = $this->timeout
;
863 $context = stream_context_create( array( 'http' => $options ) );
865 $this->headerList
= array();
873 wfSuppressWarnings();
874 $fh = fopen( $url, "r", false, $context );
881 $result = stream_get_meta_data( $fh );
882 $this->headerList
= $result['wrapper_data'];
883 $this->parseHeader();
885 if ( !$this->followRedirects
) {
889 # Handle manual redirection
890 if ( !$this->isRedirect() ||
$reqCount > $this->maxRedirects
) {
893 # Check security of URL
894 $url = $this->getResponseHeader( "Location" );
896 if ( !Http
::isValidURI( $url ) ) {
897 wfDebug( __METHOD__
. ": insecure redirection\n" );
904 if ( $fh === false ) {
905 $this->status
->fatal( 'http-request-error' );
906 return $this->status
;
909 if ( $result['timed_out'] ) {
910 $this->status
->fatal( 'http-timed-out', $this->url
);
911 return $this->status
;
914 // If everything went OK, or we received some error code
915 // get the response body content.
916 if ( $this->status
->isOK()
917 ||
(int)$this->respStatus
>= 300) {
918 while ( !feof( $fh ) ) {
919 $buf = fread( $fh, 8192 );
921 if ( $buf === false ) {
922 $this->status
->fatal( 'http-read-error' );
926 if ( strlen( $buf ) ) {
927 call_user_func( $this->callback
, $fh, $buf );
933 return $this->status
;