* (bug 19761) Removed autogenerated <meta keywords> tag with link data.
[lhc/web/wiklou.git] / includes / HttpFunctions.php
1 <?php
2 /**
3 * HTTP handling class
4 * @defgroup HTTP HTTP
5 * @file
6 * @ingroup HTTP
7 */
8
9 class Http {
10 const SYNC_DOWNLOAD = 1; // syncronys upload (in a single request)
11 const ASYNC_DOWNLOAD = 2; // asynchronous upload we should spawn out another process and monitor progress if possible)
12
13 var $body = '';
14 public static function request($method, $url, $opts = Array() ){
15 $opts['method'] = ( strtoupper( $method ) == 'GET' || strtoupper( $method ) == 'POST' ) ? strtoupper( $method ) : null;
16 $req = new HttpRequest( $url, $opts );
17 $status = $req->doRequest();
18 if( $status->isOK() ){
19 return $status->value;
20 } else {
21 wfDebug( 'http error: ' . $status->getWikiText() );
22 return false;
23 }
24 }
25 /**
26 * Simple wrapper for Http::request( 'GET' )
27 */
28 public static function get( $url, $timeout = false ) {
29 $opts = Array();
30 if( $timeout )
31 $opts['timeout'] = $timeout;
32 return Http::request( 'GET', $url, $opts );
33 }
34
35 /**
36 * Simple wrapper for Http::request( 'POST' )
37 */
38 public static function post( $url, $opts = array() ) {
39 return Http::request( 'POST', $url, $opts );
40 }
41
42 public static function doDownload( $url, $target_file_path , $dl_mode = self::SYNC_DOWNLOAD , $redirectCount = 0 ){
43 global $wgPhpCli, $wgMaxUploadSize, $wgMaxRedirects;
44 // do a quick check to HEAD to insure the file size is not > $wgMaxUploadSize
45 $head = @get_headers( $url, 1 );
46
47
48 // check for redirects:
49 if( isset( $head['Location'] ) && strrpos( $head[0], '302' ) !== false ){
50 if( $redirectCount < $wgMaxRedirects ){
51 if( UploadFromUrl::isValidURI( $head['Location'] ) ){
52 return self::doDownload( $head['Location'], $target_file_path , $dl_mode, $redirectCount++ );
53 } else {
54 return Status::newFatal( 'upload-proto-error' );
55 }
56 } else {
57 return Status::newFatal( 'upload-too-many-redirects' );
58 }
59 }
60 // we did not get a 200 ok response:
61 if( strrpos( $head[0], '200 OK' ) === false ){
62 return Status::newFatal( 'upload-http-error', htmlspecialchars( $head[0] ) );
63 }
64
65 $content_length = ( isset( $head['Content-Length'] ) ) ? $head['Content-Length'] : null;
66 if( $content_length ){
67 if( $content_length > $wgMaxUploadSize ){
68 return Status::newFatal( 'requested file length ' . $content_length . ' is greater than $wgMaxUploadSize: ' . $wgMaxUploadSize );
69 }
70 }
71
72 // check if we can find phpCliPath (for doing a background shell request to php to do the download:
73 if( $wgPhpCli && wfShellExecEnabled() && $dl_mode == self::ASYNC_DOWNLOAD ){
74 wfDebug( __METHOD__ . "\ASYNC_DOWNLOAD\n" );
75 // setup session and shell call:
76 return self::initBackgroundDownload( $url, $target_file_path, $content_length );
77 } else if( $dl_mode == self::SYNC_DOWNLOAD ){
78 wfDebug( __METHOD__ . "\nSYNC_DOWNLOAD\n" );
79 // SYNC_DOWNLOAD download as much as we can in the time we have to execute
80 $opts['method'] = 'GET';
81 $opts['target_file_path'] = $target_file_path;
82 $req = new HttpRequest( $url, $opts );
83 return $req->doRequest();
84 }
85 }
86
87 /**
88 * a non blocking request (generally an exit point in the application)
89 * should write to a file location and give updates
90 *
91 */
92 private function initBackgroundDownload( $url, $target_file_path, $content_length = null ){
93 global $wgMaxUploadSize, $IP, $wgPhpCli;
94 $status = Status::newGood();
95
96 // generate a session id with all the details for the download (pid, target_file_path )
97 $upload_session_key = self::getUploadSessionKey();
98 $session_id = session_id();
99
100 // store the url and target path:
101 $_SESSION['wsDownload'][$upload_session_key]['url'] = $url;
102 $_SESSION['wsDownload'][$upload_session_key]['target_file_path'] = $target_file_path;
103
104 if( $content_length )
105 $_SESSION['wsDownload'][$upload_session_key]['content_length'] = $content_length;
106
107 // set initial loaded bytes:
108 $_SESSION['wsDownload'][$upload_session_key]['loaded'] = 0;
109
110 // run the background download request:
111 $cmd = $wgPhpCli . ' ' . $IP . "/maintenance/http_session_download.php --sid {$session_id} --usk {$upload_session_key}";
112 $pid = wfShellBackgroundExec( $cmd, $retval );
113 // the pid is not of much use since we won't be visiting this same apache any-time soon.
114 if( !$pid )
115 return Status::newFatal( 'could not run background shell exec' );
116
117 // update the status value with the $upload_session_key (for the user to check on the status of the upload)
118 $status->value = $upload_session_key;
119
120 // return good status
121 return $status;
122 }
123
124 function getUploadSessionKey(){
125 $key = mt_rand( 0, 0x7fffffff );
126 $_SESSION['wsUploadData'][$key] = array();
127 return $key;
128 }
129
130 /**
131 * used to run a session based download. Is initiated via the shell.
132 *
133 * @param $session_id String: the session id to grab download details from
134 * @param $upload_session_key String: the key of the given upload session
135 * (a given client could have started a few http uploads at once)
136 */
137 public static function doSessionIdDownload( $session_id, $upload_session_key ){
138 global $wgUser, $wgEnableWriteAPI, $wgAsyncHTTPTimeout;
139 wfDebug( __METHOD__ . "\n\ndoSessionIdDownload\n\n" );
140 // set session to the provided key:
141 session_id( $session_id );
142 // start the session
143 if( session_start() === false ){
144 wfDebug( __METHOD__ . ' could not start session' );
145 }
146 //get all the vars we need from session_id
147 if(!isset($_SESSION[ 'wsDownload' ][$upload_session_key])){
148 wfDebug( __METHOD__ .' Error:could not find upload session');
149 exit();
150 }
151 // setup the global user from the session key we just inherited
152 $wgUser = User::newFromSession();
153
154 // grab the session data to setup the request:
155 $sd =& $_SESSION['wsDownload'][$upload_session_key];
156 // close down the session so we can other http queries can get session updates:
157 session_write_close();
158
159 $req = new HttpRequest( $sd['url'], array(
160 'target_file_path' => $sd['target_file_path'],
161 'upload_session_key'=> $upload_session_key,
162 'timeout' => $wgAsyncHTTPTimeout
163 ) );
164 // run the actual request .. (this can take some time)
165 wfDebug( __METHOD__ . "do Request: " . $sd['url'] . ' tf: ' . $sd['target_file_path'] );
166 $status = $req->doRequest();
167 //wfDebug("done with req status is: ". $status->isOK(). ' '.$status->getWikiText(). "\n");
168
169 // start up the session again:
170 if( session_start() === false ){
171 wfDebug( __METHOD__ . ' ERROR:: Could not start session');
172 }
173 // grab the updated session data pointer
174 $sd =& $_SESSION['wsDownload'][$upload_session_key];
175 // if error update status:
176 if( !$status->isOK() ){
177 $sd['apiUploadResult'] = ApiFormatJson::getJsonEncode(
178 array( 'error' => $status->getWikiText() )
179 );
180 }
181 // if status okay process upload using fauxReq to api:
182 if( $status->isOK() ){
183 // setup the FauxRequest
184 $fauxReqData = $sd['mParams'];
185 $fauxReqData['action'] = 'upload';
186 $fauxReqData['format'] = 'json';
187 $fauxReqData['internalhttpsession'] = $upload_session_key;
188
189 // evil but no other clean way about it:
190 $faxReq = new FauxRequest( $fauxReqData, true );
191 $processor = new ApiMain( $faxReq, $wgEnableWriteAPI );
192
193 //init the mUpload var for the $processor
194 $processor->execute();
195 $processor->getResult()->cleanUpUTF8();
196 $printer = $processor->createPrinterByName( 'json' );
197 $printer->initPrinter( false );
198 ob_start();
199 $printer->execute();
200 $apiUploadResult = ob_get_clean();
201
202 wfDebug( __METHOD__ . "\n\n got api result:: $apiUploadResult \n" );
203 // the status updates runner will grab the result form the session:
204 $sd['apiUploadResult'] = $apiUploadResult;
205 }
206 // close the session:
207 session_write_close();
208 }
209
210 /**
211 * Check if the URL can be served by localhost
212 * @param $url string Full url to check
213 * @return bool
214 */
215 public static function isLocalURL( $url ) {
216 global $wgCommandLineMode, $wgConf;
217 if ( $wgCommandLineMode ) {
218 return false;
219 }
220
221 // Extract host part
222 $matches = array();
223 if ( preg_match( '!^http://([\w.-]+)[/:].*$!', $url, $matches ) ) {
224 $host = $matches[1];
225 // Split up dotwise
226 $domainParts = explode( '.', $host );
227 // Check if this domain or any superdomain is listed in $wgConf as a local virtual host
228 $domainParts = array_reverse( $domainParts );
229 for ( $i = 0; $i < count( $domainParts ); $i++ ) {
230 $domainPart = $domainParts[$i];
231 if ( $i == 0 ) {
232 $domain = $domainPart;
233 } else {
234 $domain = $domainPart . '.' . $domain;
235 }
236 if ( $wgConf->isLocalVHost( $domain ) ) {
237 return true;
238 }
239 }
240 }
241 return false;
242 }
243
244 /**
245 * Return a standard user-agent we can use for external requests.
246 */
247 public static function userAgent() {
248 global $wgVersion;
249 return "MediaWiki/$wgVersion";
250 }
251 }
252 class HttpRequest{
253 var $target_file_path;
254 var $upload_session_key;
255
256 function __construct( $url, $opt ){
257 global $wgSyncHTTPTimeout;
258 //double check its a valid url:
259 $this->url = $url;
260
261 // set the timeout to default sync timeout (unless the timeout option is provided)
262 $this->timeout = ( isset( $opt['timeout'] ) ) ? $opt['timeout'] : $wgSyncHTTPTimeout;
263 $this->method = ( isset( $opt['method'] ) ) ? $opt['method'] : 'GET';
264 $this->target_file_path = ( isset( $opt['target_file_path'] ) ) ? $opt['target_file_path'] : false;
265 $this->upload_session_key = ( isset( $opt['upload_session_key'] ) ) ? $opt['upload_session_key'] : false;
266 }
267
268 /**
269 * Get the contents of a file by HTTP
270 * @param $url string Full URL to act on
271 * @param $Opt associative array Optional array of options:
272 * 'method' => 'GET', 'POST' etc.
273 * 'target_file_path' => if curl should output to a target file
274 * 'adapter' => 'curl', 'soket'
275 */
276 public function doRequest() {
277
278 #make sure we have a valid url
279 if( !UploadFromUrl::isValidURI( $this->url ) )
280 return Status::newFatal('bad-url');
281
282 #check for php.ini allow_url_fopen
283 if( ini_get('allow_url_fopen') == 0){
284 return Status::newFatal('allow_url_fopen needs to be enabled for http copy to work');
285 }
286
287 # Use curl if available
288 if ( function_exists( 'curl_init' ) ) {
289 return $this->doCurlReq();
290 } else {
291 return $this->doPhpReq();
292 }
293 }
294
295 private function doCurlReq(){
296 global $wgHTTPProxy, $wgTitle;
297
298 $status = Status::newGood();
299 $c = curl_init( $this->url );
300
301 // proxy setup:
302 if ( Http::isLocalURL( $this->url ) ) {
303 curl_setopt( $c, CURLOPT_PROXY, 'localhost:80' );
304 } else if ( $wgHTTPProxy ) {
305 curl_setopt( $c, CURLOPT_PROXY, $wgHTTPProxy );
306 }
307
308 curl_setopt( $c, CURLOPT_TIMEOUT, $this->timeout );
309 curl_setopt( $c, CURLOPT_USERAGENT, Http::userAgent() );
310
311 if ( $this->method == 'POST' ) {
312 curl_setopt( $c, CURLOPT_POST, true );
313 curl_setopt( $c, CURLOPT_POSTFIELDS, '' );
314 } else {
315 curl_setopt( $c, CURLOPT_CUSTOMREQUEST, $this->method );
316 }
317
318 # Set the referer to $wgTitle, even in command-line mode
319 # This is useful for interwiki transclusion, where the foreign
320 # server wants to know what the referring page is.
321 # $_SERVER['REQUEST_URI'] gives a less reliable indication of the
322 # referring page.
323 if ( is_object( $wgTitle ) ) {
324 curl_setopt( $c, CURLOPT_REFERER, $wgTitle->getFullURL() );
325 }
326
327 // set the write back function (if we are writing to a file)
328 if( $this->target_file_path ){
329 $cwrite = new simpleFileWriter( $this->target_file_path, $this->upload_session_key );
330 if( !$cwrite->status->isOK() ){
331 wfDebug( __METHOD__ . "ERROR in setting up simpleFileWriter\n" );
332 $status = $cwrite->status;
333 return $status;
334 }
335 curl_setopt( $c, CURLOPT_WRITEFUNCTION, array( $cwrite, 'callbackWriteBody' ) );
336 }
337
338 // start output grabber:
339 if( !$this->target_file_path )
340 ob_start();
341
342 //run the actual curl_exec:
343 try {
344 if ( false === curl_exec( $c ) ) {
345 $error_txt ='Error sending request: #' . curl_errno( $c ) .' '. curl_error( $c );
346 wfDebug( __METHOD__ . $error_txt . "\n" );
347 $status = Status::newFatal( $error_txt );
348 }
349 } catch ( Exception $e ) {
350 // do something with curl exec error?
351 }
352 // if direct request output the results to the stats value:
353 if( !$this->target_file_path && $status->isOK() ){
354 $status->value = ob_get_contents();
355 ob_end_clean();
356 }
357 // if we wrote to a target file close up or return error
358 if( $this->target_file_path ){
359 $cwrite->close();
360 if( !$cwrite->status->isOK() ){
361 return $cwrite->status;
362 }
363 }
364
365 # Don't return the text of error messages, return false on error
366 $retcode = curl_getinfo( $c, CURLINFO_HTTP_CODE );
367 if ( $retcode != 200 ) {
368 wfDebug( __METHOD__ . ": HTTP return code $retcode\n" );
369 $status = Status::newFatal( "HTTP return code $retcode\n" );
370 }
371 # Don't return truncated output
372 $errno = curl_errno( $c );
373 if ( $errno != CURLE_OK ) {
374 $errstr = curl_error( $c );
375 wfDebug( __METHOD__ . ": CURL error code $errno: $errstr\n" );
376 $status = Status::newFatal( " CURL error code $errno: $errstr\n" );
377 }
378 curl_close( $c );
379
380 // return the result obj
381 return $status;
382 }
383
384 public function doPhpReq(){
385 global $wgTitle, $wgHTTPProxy;
386 //start with good status:
387 $status = Status::newGood();
388
389 //setup the headers
390 $headers = array( "User-Agent: " . Http :: userAgent() );
391 if ( is_object( $wgTitle ) ) {
392 $headers[] = "Referer: ". $wgTitle->getFullURL();
393 }
394
395 if( strcasecmp( $this->method, 'post' ) == 0 ) {
396 // Required for HTTP 1.0 POSTs
397 $headers[] = "Content-Length: 0";
398 }
399 $fcontext = stream_context_create ( array(
400 'http' => array(
401 'method' => $this->method,
402 'header' => implode( "\r\n", $headers ),
403 'timeout' => $this->timeout )
404 )
405 );
406
407 $fh = fopen( $this->url, "r", false, $fcontext);
408
409 // set the write back function (if we are writing to a file)
410 if( $this->target_file_path ){
411 $cwrite = new simpleFileWriter( $this->target_file_path, $this->upload_session_key );
412 if( !$cwrite->status->isOK() ){
413 wfDebug( __METHOD__ . "ERROR in setting up simpleFileWriter\n" );
414 $status = $cwrite->status;
415 return $status;
416 }
417 //read $fh into the simpleFileWriter (grab in 64K chunks since its likely a media file)
418 while ( !feof( $fh )) {
419 $contents = fread($fh, 65536);
420 $cwrite->callbackWriteBody($fh, $contents );
421 }
422
423 $cwrite->close();
424 //check for simpleFileWriter error:
425 if( !$cwrite->status->isOK() ){
426 return $cwrite->status;
427 }
428 }else{
429 //read $fh into status->value
430 $status->value = @stream_get_contents( $fh );
431 }
432 //close the url file wrapper
433 fclose( $fh );
434
435 //check for "false"
436 if( $status->value === false ){
437 $status->error( 'file_get_contents-failed' );
438 }
439 return $status;
440 }
441
442 }
443
444 /**
445 * a simpleFileWriter with session id updates
446 */
447 class simpleFileWriter {
448 var $target_file_path;
449 var $status = null;
450 var $session_id = null;
451 var $session_update_interval = 0; // how often to update the session while downloading
452
453 function simpleFileWriter( $target_file_path, $upload_session_key ){
454 $this->target_file_path = $target_file_path;
455 $this->upload_session_key = $upload_session_key;
456 $this->status = Status::newGood();
457 // open the file:
458 $this->fp = fopen( $this->target_file_path, 'w' );
459 if( $this->fp === false ){
460 $this->status = Status::newFatal( 'HTTP::could-not-open-file-for-writing' );
461 }
462 // true start time
463 $this->prevTime = time();
464 }
465
466 public function callbackWriteBody($ch, $data_packet){
467 global $wgMaxUploadSize;
468
469 // write out the content
470 if( fwrite( $this->fp, $data_packet ) === false ){
471 wfDebug( __METHOD__ ." ::could-not-write-to-file\n" );
472 $this->status = Status::newFatal( 'HTTP::could-not-write-to-file' );
473 return 0;
474 }
475
476 // check file size:
477 clearstatcache();
478 $this->current_fsize = filesize( $this->target_file_path );
479
480 if( $this->current_fsize > $wgMaxUploadSize ){
481 wfDebug( __METHOD__ . " ::http download too large\n" );
482 $this->status = Status::newFatal( 'HTTP::file-has-grown-beyond-upload-limit-killing: downloaded more than ' .
483 Language::formatSize( $wgMaxUploadSize ) . ' ' );
484 return 0;
485 }
486
487 // if more than session_update_interval second have passed update_session_progress
488 if( $this->upload_session_key && ( ( time() - $this->prevTime ) > $this->session_update_interval ) ) {
489 $this->prevTime = time();
490 $session_status = $this->update_session_progress();
491 if( !$session_status->isOK() ){
492 $this->status = $session_status;
493 wfDebug( __METHOD__ . ' update session failed or was canceled');
494 return 0;
495 }
496 }
497 return strlen( $data_packet );
498 }
499
500 public function update_session_progress(){
501 $status = Status::newGood();
502 // start the session
503 if( session_start() === false){
504 wfDebug( __METHOD__ . ' could not start session' );
505 exit( 0 );
506 }
507 $sd =& $_SESSION['wsDownload'][$this->upload_session_key];
508 // check if the user canceled the request:
509 if( $sd['user_cancel'] == true ){
510 // kill the download
511 return Status::newFatal( 'user-canceled-request' );
512 }
513 // update the progress bytes download so far:
514 $sd['loaded'] = $this->current_fsize;
515 wfDebug( __METHOD__ . ': set session loaded amount to: ' . $sd['loaded'] . "\n");
516 // close down the session so we can other http queries can get session updates:
517 session_write_close();
518 return $status;
519 }
520
521 public function close(){
522 // do a final session update:
523 $this->update_session_progress();
524 // close up the file handle:
525 if( false === fclose( $this->fp ) ){
526 $this->status = Status::newFatal( 'HTTP::could-not-close-file' );
527 }
528 }
529
530 }