From 24aa72de84af6be14e556ede018548dcc8400e09 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Sun, 18 Sep 2016 15:49:34 -0700 Subject: [PATCH] Add HTTPFileStreamer class * Move most StreamFile code to this new class * Remove StreamFile depedency from FileBackendStore Change-Id: I4a272ef49497b589114fc2b37ba800bc26d9161f --- autoload.php | 1 + includes/StreamFile.php | 157 +--------- includes/filebackend/FileBackendGroup.php | 10 +- includes/filebackend/FileBackendStore.php | 17 +- includes/libs/filebackend/FileBackend.php | 44 +-- .../libs/filebackend/HTTPFileStreamer.php | 268 ++++++++++++++++++ includes/libs/time/ConvertibleTimestamp.php | 2 +- 7 files changed, 328 insertions(+), 171 deletions(-) create mode 100644 includes/libs/filebackend/HTTPFileStreamer.php diff --git a/autoload.php b/autoload.php index bbde55ed0c..117e660cba 100644 --- a/autoload.php +++ b/autoload.php @@ -557,6 +557,7 @@ $wgAutoloadLocalClasses = [ 'HTMLTextFieldWithButton' => __DIR__ . '/includes/htmlform/fields/HTMLTextFieldWithButton.php', 'HTMLTitleTextField' => __DIR__ . '/includes/htmlform/fields/HTMLTitleTextField.php', 'HTMLUserTextField' => __DIR__ . '/includes/htmlform/fields/HTMLUserTextField.php', + 'HTTPFileStreamer' => __DIR__ . '/includes/libs/filebackend/HTTPFileStreamer.php', 'HWLDFWordAccumulator' => __DIR__ . '/includes/diff/DairikiDiff.php', 'HashBagOStuff' => __DIR__ . '/includes/libs/objectcache/HashBagOStuff.php', 'HashConfig' => __DIR__ . '/includes/config/HashConfig.php', diff --git a/includes/StreamFile.php b/includes/StreamFile.php index 0fc79802f3..cce3fc464b 100644 --- a/includes/StreamFile.php +++ b/includes/StreamFile.php @@ -25,9 +25,9 @@ */ class StreamFile { // Do not send any HTTP headers unless requested by caller (e.g. body only) - const STREAM_HEADLESS = 1; + const STREAM_HEADLESS = HTTPFileStreamer::STREAM_HEADLESS; // Do not try to tear down any PHP output buffers - const STREAM_ALLOW_OB = 2; + const STREAM_ALLOW_OB = HTTPFileStreamer::STREAM_ALLOW_OB; /** * Stream a file to the browser, adding all the headings and fun stuff. @@ -45,115 +45,19 @@ class StreamFile { public static function stream( $fname, $headers = [], $sendErrors = true, $optHeaders = [], $flags = 0 ) { - $section = new ProfileSection( __METHOD__ ); - if ( FileBackend::isStoragePath( $fname ) ) { // sanity - throw new MWException( __FUNCTION__ . " given storage path '$fname'." ); - } - - // Don't stream it out as text/html if there was a PHP error - if ( ( ( $flags & self::STREAM_HEADLESS ) == 0 || $headers ) && headers_sent() ) { - echo "Headers already sent, terminating.\n"; - return false; - } - - $headerFunc = ( $flags & self::STREAM_HEADLESS ) - ? function ( $header ) { - // no-op - } - : function ( $header ) { - is_int( $header ) ? HttpStatus::header( $header ) : header( $header ); - }; - - MediaWiki\suppressWarnings(); - $info = stat( $fname ); - MediaWiki\restoreWarnings(); - - if ( !is_array( $info ) ) { - if ( $sendErrors ) { - self::send404Message( $fname, $flags ); - } - return false; - } - - // Send Last-Modified HTTP header for client-side caching - $headerFunc( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $info['mtime'] ) ); - - if ( ( $flags & self::STREAM_ALLOW_OB ) == 0 ) { - // Cancel output buffering and gzipping if set - wfResetOutputBuffers(); - } - - $type = self::contentTypeFromPath( $fname ); - if ( $type && $type != 'unknown/unknown' ) { - $headerFunc( "Content-type: $type" ); - } else { - // Send a content type which is not known to Internet Explorer, to - // avoid triggering IE's content type detection. Sending a standard - // unknown content type here essentially gives IE license to apply - // whatever content type it likes. - $headerFunc( 'Content-type: application/x-wiki' ); + throw new InvalidArgumentException( __FUNCTION__ . " given storage path '$fname'." ); } - // Don't send if client has up to date cache - if ( isset( $optHeaders['if-modified-since'] ) ) { - $modsince = preg_replace( '/;.*$/', '', $optHeaders['if-modified-since'] ); - if ( wfTimestamp( TS_UNIX, $info['mtime'] ) <= strtotime( $modsince ) ) { - ini_set( 'zlib.output_compression', 0 ); - $headerFunc( 304 ); - return true; // ok - } - } - - // Send additional headers - foreach ( $headers as $header ) { - header( $header ); // always use header(); specifically requested - } - - if ( isset( $optHeaders['range'] ) ) { - $range = self::parseRange( $optHeaders['range'], $info['size'] ); - if ( is_array( $range ) ) { - $headerFunc( 206 ); - $headerFunc( 'Content-Length: ' . $range[2] ); - $headerFunc( "Content-Range: bytes {$range[0]}-{$range[1]}/{$info['size']}" ); - } elseif ( $range === 'invalid' ) { - if ( $sendErrors ) { - $headerFunc( 416 ); - $headerFunc( 'Cache-Control: no-cache' ); - $headerFunc( 'Content-Type: text/html; charset=utf-8' ); - $headerFunc( 'Content-Range: bytes */' . $info['size'] ); - } - return false; - } else { // unsupported Range request (e.g. multiple ranges) - $range = null; - $headerFunc( 'Content-Length: ' . $info['size'] ); - } - } else { - $range = null; - $headerFunc( 'Content-Length: ' . $info['size'] ); - } + $streamer = new HTTPFileStreamer( + $fname, + [ + 'obResetFunc' => 'wfResetOutputBuffers', + 'streamMimeFunc' => [ __CLASS__, 'contentTypeFromPath' ] + ] + ); - if ( is_array( $range ) ) { - $handle = fopen( $fname, 'rb' ); - if ( $handle ) { - $ok = true; - fseek( $handle, $range[0] ); - $remaining = $range[2]; - while ( $remaining > 0 && $ok ) { - $bytes = min( $remaining, 8 * 1024 ); - $data = fread( $handle, $bytes ); - $remaining -= $bytes; - $ok = ( $data !== false ); - print $data; - } - } else { - return false; - } - } else { - return readfile( $fname ) !== false; // faster - } - - return true; + return $streamer->stream( $headers, $sendErrors, $optHeaders, $flags ); } /** @@ -164,19 +68,7 @@ class StreamFile { * @since 1.24 */ public static function send404Message( $fname, $flags = 0 ) { - if ( ( $flags & self::STREAM_HEADLESS ) == 0 ) { - HttpStatus::header( 404 ); - header( 'Cache-Control: no-cache' ); - header( 'Content-Type: text/html; charset=utf-8' ); - } - $encFile = htmlspecialchars( $fname ); - $encScript = htmlspecialchars( $_SERVER['SCRIPT_NAME'] ); - echo " -

File not found

-

Although this PHP script ($encScript) exists, the file requested for output - ($encFile) does not.

- - "; + HTTPFileStreamer::send404Message( $fname, $flags ); } /** @@ -188,30 +80,7 @@ class StreamFile { * @since 1.24 */ public static function parseRange( $range, $size ) { - $m = []; - if ( preg_match( '#^bytes=(\d*)-(\d*)$#', $range, $m ) ) { - list( , $start, $end ) = $m; - if ( $start === '' && $end === '' ) { - $absRange = [ 0, $size - 1 ]; - } elseif ( $start === '' ) { - $absRange = [ $size - $end, $size - 1 ]; - } elseif ( $end === '' ) { - $absRange = [ $start, $size - 1 ]; - } else { - $absRange = [ $start, $end ]; - } - if ( $absRange[0] >= 0 && $absRange[1] >= $absRange[0] ) { - if ( $absRange[0] < $size ) { - $absRange[1] = min( $absRange[1], $size - 1 ); // stop at EOF - $absRange[2] = $absRange[1] - $absRange[0] + 1; - return $absRange; - } elseif ( $absRange[0] == 0 && $size == 0 ) { - return 'unrecognized'; // the whole file should just be sent - } - } - return 'invalid'; - } - return 'unrecognized'; + return HTTPFileStreamer::parseRange( $range, $size ); } /** diff --git a/includes/filebackend/FileBackendGroup.php b/includes/filebackend/FileBackendGroup.php index d9e5a540e2..18a7cca59d 100644 --- a/includes/filebackend/FileBackendGroup.php +++ b/includes/filebackend/FileBackendGroup.php @@ -158,16 +158,18 @@ class FileBackendGroup { if ( !isset( $this->backends[$name]['instance'] ) ) { $class = $this->backends[$name]['class']; $config = $this->backends[$name]['config']; - $config['wikiId'] = isset( $config['wikiId'] ) - ? $config['wikiId'] - : wfWikiID(); // e.g. "my_wiki-en_" + $config += [ + 'wikiId' => wfWikiID(), // e.g. "my_wiki-en_" + 'mimeCallback' => [ $this, 'guessMimeInternal' ], + 'obResetFunc' => 'wfResetOutputBuffers', + 'streamMimeFunc' => [ 'StreamFile', 'contentTypeFromPath' ] + ]; $config['lockManager'] = LockManagerGroup::singleton( $config['wikiId'] )->get( $config['lockManager'] ); $config['fileJournal'] = isset( $config['fileJournal'] ) ? FileJournal::factory( $config['fileJournal'], $name ) : FileJournal::factory( [ 'class' => 'NullFileJournal' ], $name ); $config['wanCache'] = ObjectCache::getMainWANInstance(); - $config['mimeCallback'] = [ $this, 'guessMimeInternal' ]; $config['statusWrapper'] = [ 'Status', 'wrap' ]; $config['tmpDirectory'] = wfTempDir(); $config['logger'] = LoggerFactory::getInstance( 'FileOperation' ); diff --git a/includes/filebackend/FileBackendStore.php b/includes/filebackend/FileBackendStore.php index 31c94ce56f..3fd1ce6f5b 100644 --- a/includes/filebackend/FileBackendStore.php +++ b/includes/filebackend/FileBackendStore.php @@ -869,17 +869,22 @@ abstract class FileBackendStore extends FileBackend { $status = $this->newStatus(); $flags = 0; - $flags |= !empty( $params['headless'] ) ? StreamFile::STREAM_HEADLESS : 0; - $flags |= !empty( $params['allowOB'] ) ? StreamFile::STREAM_ALLOW_OB : 0; + $flags |= !empty( $params['headless'] ) ? HTTPFileStreamer::STREAM_HEADLESS : 0; + $flags |= !empty( $params['allowOB'] ) ? HTTPFileStreamer::STREAM_ALLOW_OB : 0; $fsFile = $this->getLocalReference( $params ); - if ( $fsFile ) { - $res = StreamFile::stream( $fsFile->getPath(), - $params['headers'], true, $params['options'], $flags ); + $streamer = new HTTPFileStreamer( + $fsFile->getPath(), + [ + 'obResetFunc' => $this->obResetFunc, + 'streamMimeFunc' => $this->streamMimeFunc + ] + ); + $res = $streamer->stream( $params['headers'], true, $params['options'], $flags ); } else { $res = false; - StreamFile::send404Message( $params['src'], $flags ); + HTTPFileStreamer::send404Message( $params['src'], $flags ); } if ( !$res ) { diff --git a/includes/libs/filebackend/FileBackend.php b/includes/libs/filebackend/FileBackend.php index b53c545be5..aa25f4343a 100644 --- a/includes/libs/filebackend/FileBackend.php +++ b/includes/libs/filebackend/FileBackend.php @@ -117,6 +117,10 @@ abstract class FileBackend implements LoggerAwareInterface { /** @var object|string Class name or object With profileIn/profileOut methods */ protected $profiler; + /** @var callable */ + protected $obResetFunc; + /** @var callable */ + protected $streamMimeFunc; /** @var callable */ protected $statusWrapper; @@ -130,27 +134,29 @@ abstract class FileBackend implements LoggerAwareInterface { * This should only be called from within FileBackendGroup. * * @param array $config Parameters include: - * - name : The unique name of this backend. - * This should consist of alphanumberic, '-', and '_' characters. - * This name should not be changed after use (e.g. with journaling). - * Note that the name is *not* used in actual container names. - * - domainId : Prefix to container names that is unique to this backend. - * It should only consist of alphanumberic, '-', and '_' characters. - * This ID is what avoids collisions if multiple logical backends - * use the same storage system, so this should be set carefully. + * - name : The unique name of this backend. + * This should consist of alphanumberic, '-', and '_' characters. + * This name should not be changed after use (e.g. with journaling). + * Note that the name is *not* used in actual container names. + * - domainId : Prefix to container names that is unique to this backend. + * It should only consist of alphanumberic, '-', and '_' characters. + * This ID is what avoids collisions if multiple logical backends + * use the same storage system, so this should be set carefully. * - lockManager : LockManager object to use for any file locking. - * If not provided, then no file locking will be enforced. + * If not provided, then no file locking will be enforced. * - fileJournal : FileJournal object to use for logging changes to files. - * If not provided, then change journaling will be disabled. - * - readOnly : Write operations are disallowed if this is a non-empty string. - * It should be an explanation for the backend being read-only. + * If not provided, then change journaling will be disabled. + * - readOnly : Write operations are disallowed if this is a non-empty string. + * It should be an explanation for the backend being read-only. * - parallelize : When to do file operations in parallel (when possible). - * Allowed values are "implicit", "explicit" and "off". + * Allowed values are "implicit", "explicit" and "off". * - concurrency : How many file operations can be done in parallel. * - tmpDirectory : Directory to use for temporary files. If this is not set or null, - * then the backend will try to discover a usable temporary directory. - * - logger : Optional PSR logger object. - * - profiler : Optional class name or object With profileIn/profileOut methods. + * then the backend will try to discover a usable temporary directory. + * - obResetFunc : alternative callback to clear the output buffer + * - streamMimeFunc : alternative method to determine the content type from the path + * - logger : Optional PSR logger object. + * - profiler : Optional class name or object With profileIn/profileOut methods. * @throws InvalidArgumentException */ public function __construct( array $config ) { @@ -179,6 +185,12 @@ abstract class FileBackend implements LoggerAwareInterface { $this->concurrency = isset( $config['concurrency'] ) ? (int)$config['concurrency'] : 50; + $this->obResetFunc = isset( $params['obResetFunc'] ) ? $params['obResetFunc'] : null; + $this->streamMimeFunc = isset( $params['streamMimeFunc'] ) + ? $params['streamMimeFunc'] + : null; + $this->statusWrapper = isset( $config['statusWrapper'] ) ? $config['statusWrapper'] : null; + $this->profiler = isset( $params['profiler'] ) ? $params['profiler'] : null; $this->logger = isset( $config['logger'] ) ? $config['logger'] : new \Psr\Log\NullLogger(); $this->statusWrapper = isset( $config['statusWrapper'] ) ? $config['statusWrapper'] : null; diff --git a/includes/libs/filebackend/HTTPFileStreamer.php b/includes/libs/filebackend/HTTPFileStreamer.php new file mode 100644 index 0000000000..800fdfad3e --- /dev/null +++ b/includes/libs/filebackend/HTTPFileStreamer.php @@ -0,0 +1,268 @@ +path = $path; + $this->obResetFunc = isset( $params['obResetFunc'] ) + ? $params['obResetFunc'] + : [ __CLASS__, 'resetOutputBuffers' ]; + $this->streamMimeFunc = isset( $params['streamMimeFunc'] ) + ? $params['streamMimeFunc'] + : [ __CLASS__, 'contentTypeFromPath' ]; + } + + /** + * Stream a file to the browser, adding all the headings and fun stuff. + * Headers sent include: Content-type, Content-Length, Last-Modified, + * and Content-Disposition. + * + * @param array $headers Any additional headers to send if the file exists + * @param bool $sendErrors Send error messages if errors occur (like 404) + * @param array $optHeaders HTTP request header map (e.g. "range") (use lowercase keys) + * @param integer $flags Bitfield of STREAM_* constants + * @throws MWException + * @return bool Success + */ + public function stream( + $headers = [], $sendErrors = true, $optHeaders = [], $flags = 0 + ) { + // Don't stream it out as text/html if there was a PHP error + if ( ( ( $flags & self::STREAM_HEADLESS ) == 0 || $headers ) && headers_sent() ) { + echo "Headers already sent, terminating.\n"; + return false; + } + + $headerFunc = ( $flags & self::STREAM_HEADLESS ) + ? function ( $header ) { + // no-op + } + : function ( $header ) { + is_int( $header ) ? HttpStatus::header( $header ) : header( $header ); + }; + + MediaWiki\suppressWarnings(); + $info = stat( $this->path ); + MediaWiki\restoreWarnings(); + + if ( !is_array( $info ) ) { + if ( $sendErrors ) { + self::send404Message( $this->path, $flags ); + } + return false; + } + + // Send Last-Modified HTTP header for client-side caching + $mtimeCT = new ConvertibleTimestamp( $info['mtime'] ); + $headerFunc( 'Last-Modified: ' . $mtimeCT->getTimestamp( TS_RFC2822 ) ); + + if ( ( $flags & self::STREAM_ALLOW_OB ) == 0 ) { + call_user_func( $this->obResetFunc ); + } + + $type = call_user_func( $this->streamMimeFunc, $this->path ); + if ( $type && $type != 'unknown/unknown' ) { + $headerFunc( "Content-type: $type" ); + } else { + // Send a content type which is not known to Internet Explorer, to + // avoid triggering IE's content type detection. Sending a standard + // unknown content type here essentially gives IE license to apply + // whatever content type it likes. + $headerFunc( 'Content-type: application/x-wiki' ); + } + + // Don't send if client has up to date cache + if ( isset( $optHeaders['if-modified-since'] ) ) { + $modsince = preg_replace( '/;.*$/', '', $optHeaders['if-modified-since'] ); + if ( $mtimeCT->getTimestamp( TS_UNIX ) <= strtotime( $modsince ) ) { + ini_set( 'zlib.output_compression', 0 ); + $headerFunc( 304 ); + return true; // ok + } + } + + // Send additional headers + foreach ( $headers as $header ) { + header( $header ); // always use header(); specifically requested + } + + if ( isset( $optHeaders['range'] ) ) { + $range = self::parseRange( $optHeaders['range'], $info['size'] ); + if ( is_array( $range ) ) { + $headerFunc( 206 ); + $headerFunc( 'Content-Length: ' . $range[2] ); + $headerFunc( "Content-Range: bytes {$range[0]}-{$range[1]}/{$info['size']}" ); + } elseif ( $range === 'invalid' ) { + if ( $sendErrors ) { + $headerFunc( 416 ); + $headerFunc( 'Cache-Control: no-cache' ); + $headerFunc( 'Content-Type: text/html; charset=utf-8' ); + $headerFunc( 'Content-Range: bytes */' . $info['size'] ); + } + return false; + } else { // unsupported Range request (e.g. multiple ranges) + $range = null; + $headerFunc( 'Content-Length: ' . $info['size'] ); + } + } else { + $range = null; + $headerFunc( 'Content-Length: ' . $info['size'] ); + } + + if ( is_array( $range ) ) { + $handle = fopen( $this->path, 'rb' ); + if ( $handle ) { + $ok = true; + fseek( $handle, $range[0] ); + $remaining = $range[2]; + while ( $remaining > 0 && $ok ) { + $bytes = min( $remaining, 8 * 1024 ); + $data = fread( $handle, $bytes ); + $remaining -= $bytes; + $ok = ( $data !== false ); + print $data; + } + } else { + return false; + } + } else { + return readfile( $this->path ) !== false; // faster + } + + return true; + } + + /** + * Send out a standard 404 message for a file + * + * @param string $fname Full name and path of the file to stream + * @param integer $flags Bitfield of STREAM_* constants + * @since 1.24 + */ + public static function send404Message( $fname, $flags = 0 ) { + if ( ( $flags & self::STREAM_HEADLESS ) == 0 ) { + HttpStatus::header( 404 ); + header( 'Cache-Control: no-cache' ); + header( 'Content-Type: text/html; charset=utf-8' ); + } + $encFile = htmlspecialchars( $fname ); + $encScript = htmlspecialchars( $_SERVER['SCRIPT_NAME'] ); + echo " +

File not found

+

Although this PHP script ($encScript) exists, the file requested for output + ($encFile) does not.

+ + "; + } + + /** + * Convert a Range header value to an absolute (start, end) range tuple + * + * @param string $range Range header value + * @param integer $size File size + * @return array|string Returns error string on failure (start, end, length) + * @since 1.24 + */ + public static function parseRange( $range, $size ) { + $m = []; + if ( preg_match( '#^bytes=(\d*)-(\d*)$#', $range, $m ) ) { + list( , $start, $end ) = $m; + if ( $start === '' && $end === '' ) { + $absRange = [ 0, $size - 1 ]; + } elseif ( $start === '' ) { + $absRange = [ $size - $end, $size - 1 ]; + } elseif ( $end === '' ) { + $absRange = [ $start, $size - 1 ]; + } else { + $absRange = [ $start, $end ]; + } + if ( $absRange[0] >= 0 && $absRange[1] >= $absRange[0] ) { + if ( $absRange[0] < $size ) { + $absRange[1] = min( $absRange[1], $size - 1 ); // stop at EOF + $absRange[2] = $absRange[1] - $absRange[0] + 1; + return $absRange; + } elseif ( $absRange[0] == 0 && $size == 0 ) { + return 'unrecognized'; // the whole file should just be sent + } + } + return 'invalid'; + } + return 'unrecognized'; + } + + protected static function resetOutputBuffers() { + while ( ob_get_status() ) { + if ( !ob_end_clean() ) { + // Could not remove output buffer handler; abort now + // to avoid getting in some kind of infinite loop. + break; + } + } + } + + /** + * Determine the file type of a file based on the path + * + * @param string $filename Storage path or file system path + * @return null|string + */ + protected static function contentTypeFromPath( $filename ) { + $ext = strrchr( $filename, '.' ); + $ext = $ext === false ? '' : strtolower( substr( $ext, 1 ) ); + + switch ( $ext ) { + case 'gif': + return 'image/gif'; + case 'png': + return 'image/png'; + case 'jpg': + return 'image/jpeg'; + case 'jpeg': + return 'image/jpeg'; + } + + return 'unknown/unknown'; + } +} diff --git a/includes/libs/time/ConvertibleTimestamp.php b/includes/libs/time/ConvertibleTimestamp.php index 7cada849be..b02985a15b 100644 --- a/includes/libs/time/ConvertibleTimestamp.php +++ b/includes/libs/time/ConvertibleTimestamp.php @@ -224,7 +224,7 @@ class ConvertibleTimestamp { } /** - * Calculate the difference between two ConvertableTimestamp objects. + * Calculate the difference between two ConvertibleTimestamp objects. * * @param ConvertibleTimestamp $relativeTo Base time to calculate difference from * @return DateInterval|bool The DateInterval object representing the -- 2.20.1