From dcbe8e7dd3c4d54391d30add214e3c31ce381e34 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Thu, 8 Dec 2011 03:43:07 +0000 Subject: [PATCH] FU r101117: removed cURL thumb handler code and made thumb_handler.php a thin wrapper around thumb.php * Moved original URL fetching code and parameter extraction code to thumb.php * Made use of local repo URL and hash settings to avoid extra config code * This makes it easy to add hooks for extensions/config to alter behavoir (ExtractThumbParameters hook added) * Added FileRepo::getHashLevels() --- docs/hooks.txt | 4 + includes/filerepo/FileRepo.php | 9 ++ thumb.config.sample | 49 ------- thumb.php | 122 +++++++++++++++-- thumb_handler.php | 237 ++------------------------------- 5 files changed, 129 insertions(+), 292 deletions(-) delete mode 100644 thumb.config.sample diff --git a/docs/hooks.txt b/docs/hooks.txt index 8223ef17c7..dd9a9c1db5 100644 --- a/docs/hooks.txt +++ b/docs/hooks.txt @@ -867,6 +867,10 @@ $ip: The ip address of the user 'ExtensionTypes': called when generating the extensions credits, use this to change the tables headers &$extTypes: associative array of extensions types +'ExtractThumbParameters': called when extracting thumbnail parameters from a thumbnail file name +$thumbname: the base name of the thumbnail file +&$params: the currently extracted params (has source name, temp or archived zone) + 'FetchChangesList': When fetching the ChangesList derivative for a particular user $user: User the list is being fetched for diff --git a/includes/filerepo/FileRepo.php b/includes/filerepo/FileRepo.php index dbda0d1bfc..58b64e4f51 100644 --- a/includes/filerepo/FileRepo.php +++ b/includes/filerepo/FileRepo.php @@ -276,6 +276,15 @@ abstract class FileRepo { } } + /** + * Get the number of hash directory levels + * + * @return integer + */ + function getHashLevels() { + return $this->hashLevels; + } + /** * Get a relative path including trailing slash, e.g. f/fa/ * If the repo is not hashed, returns an empty string diff --git a/thumb.config.sample b/thumb.config.sample deleted file mode 100644 index 84445c1681..0000000000 --- a/thumb.config.sample +++ /dev/null @@ -1,49 +0,0 @@ - - * RewriteEngine on - * RewriteCond %{REQUEST_FILENAME} !-f - * RewriteCond %{REQUEST_FILENAME} !-d - * RewriteRule ^([^/]+/)?[0-9a-f]/[0-9a-f][0-9a-f]/[^/]+/[^/]+$ /path/to/thumb_handler.php [L] - * - */ - -$thgThumbUrlMatch = array( - # URL name of the server (e.g. "upload.wikipedia.org"). - 'server' => 'http://localhost', - # URL fragment to the thumb/ directory - 'dirFragment' => 'MW_trunk/images/thumb', - # URL regex fragment correspond to the directory hashing of thumbnails. - # This must correspond to $wgLocalFileRepo['hashLevels']. - 'hashFragment' => '[0-9a-f]/[0-9a-f][0-9a-f]/' // 2-level directory hashing -); - -$thgThumbCurlConfig = array( - # Optionally cURL to thumb.php instead of using it directly - 'enabled' => false, - # The URL to thumb.php, accessible from the web server. - 'url' => 'http://localhost/MW_trunk/thumb.php', - # Optional proxy server to use to access thumb.php - 'proxy' => null, - # Timeout to use for cURL request to thumb.php. - # Leave it long enough to generate a ulimit timeout in ordinary - # cases, but short enough to avoid a local PHP timeout. - 'timeout' => 53 -); - -# Custom functions for overriding aspects of thumb handling -$thgThumbCallbacks = array(); diff --git a/thumb.php b/thumb.php index 0bb0b60c34..e16d95b4ce 100644 --- a/thumb.php +++ b/thumb.php @@ -13,27 +13,69 @@ if ( isset( $_SERVER['MW_COMPILED'] ) ) { require ( dirname( __FILE__ ) . '/includes/WebStart.php' ); } -$wgTrivialMimeDetection = true; //don't use fancy mime detection, just check the file extension for jpg/gif/png. +// Don't use fancy mime detection, just check the file extension for jpg/gif/png +$wgTrivialMimeDetection = true; -wfThumbMain(); +if ( defined( 'THUMB_HANDLER' ) ) { + // Called from thumb_handler.php via 404; extract params from the URI... + wfThumbHandle404(); +} else { + // Called directly, use $_REQUEST params + wfThumbHandleRequest(); +} wfLogProfilingData(); //-------------------------------------------------------------------------- -function wfThumbMain() { - wfProfileIn( __METHOD__ ); +/** + * Handle a thumbnail request via query parameters + * + * @return void + */ +function wfThumbHandleRequest() { + $params = get_magic_quotes_gpc() + ? array_map( 'stripslashes', $_REQUEST ) + : $_REQUEST; - $headers = array(); + wfStreamThumb( $params ); // stream the thumbnail +} - // Get input parameters - if ( defined( 'THUMB_HANDLER' ) ) { - $params = $_REQUEST; // called from thumb_handler.php +/** + * Handle a thumbnail request via thumbnail file URL + * + * @return void + */ +function wfThumbHandle404() { + # lighttpd puts the original request in REQUEST_URI, while + # sjs sets that to the 404 handler, and puts the original + # request in REDIRECT_URL. + if ( isset( $_SERVER['REDIRECT_URL'] ) ) { + # The URL is un-encoded, so put it back how it was. + $uri = str_replace( "%2F", "/", urlencode( $_SERVER['REDIRECT_URL'] ) ); } else { - $params = get_magic_quotes_gpc() - ? array_map( 'stripslashes', $_REQUEST ) - : $_REQUEST; + $uri = $_SERVER['REQUEST_URI']; } + $params = wfExtractThumbParams( $uri ); // basic wiki URL param extracting + if ( $params == null ) { + wfThumbError( 404, 'The source file for the specified thumbnail does not exist.' ); + return; + } + + wfStreamThumb( $params ); // stream the thumbnail +} + +/** + * Stream a thumbnail specified by parameters + * + * @param $params Array + * @return void + */ +function wfStreamThumb( array $params ) { + wfProfileIn( __METHOD__ ); + + $headers = array(); // HTTP headers to send + $fileName = isset( $params['f'] ) ? $params['f'] : ''; unset( $params['f'] ); @@ -64,7 +106,7 @@ function wfThumbMain() { return; } $title = Title::makeTitleSafe( NS_FILE, $bits[1] ); - if ( is_null( $title ) ) { + if ( !$title ) { wfThumbError( 404, wfMsg( 'badtitletext' ) ); wfProfileOut( __METHOD__ ); return; @@ -169,11 +211,63 @@ function wfThumbMain() { } /** - * @param $status - * @param $msg + * Extract the required params for thumb.php from the thumbnail request URI. + * At least 'width' and 'f' should be set if the result is an array. + * + * @param $uri String Thumbnail request URI + * @return Array|null associative params array or null + */ +function wfExtractThumbParams( $uri ) { + $repo = RepoGroup::singleton()->getLocalRepo(); + + $hashDirRegex = $subdirRegex = ''; + for ( $i = 0; $i < $repo->getHashLevels(); $i++ ) { + $subdirRegex .= '[0-9a-f]'; + $hashDirRegex .= "$subdirRegex/"; + } + $zoneUrlRegex = preg_quote( $repo->getZoneUrl( 'thumb' ) ); + + $thumbUrlRegex = "!^$zoneUrlRegex(/archive|/temp|)/$hashDirRegex([^/]*)/([^/]*)$!"; + + // Check if this is a valid looking thumbnail request... + if ( preg_match( $thumbUrlRegex, $uri, $matches ) ) { + list( /* all */, $archOrTemp, $filename, $thumbname ) = $matches; + + $params = array( 'f' => $filename ); + if ( $archOrTemp == '/archive' ) { + $params['archived'] = 1; + } elseif ( $archOrTemp == '/temp' ) { + $params['temp'] = 1; + } + + // Check if the parameters can be extracted from the thumbnail name... + // @TODO: remove 'page' stuff and make ProofreadPage handle it via hook. + if ( preg_match( '!^(page(\d*)-)*(\d*)px-[^/]*$!', $thumbname, $matches ) ) { + list( /* all */, $pagefull, $pagenum, $size ) = $matches; + $params['width'] = $size; + if ( $pagenum ) { + $params['page'] = $pagenum; + } + return $params; // valid thumbnail URL + // Hooks return false if they manage to *resolve* the parameters + } elseif ( !wfRunHooks( 'ExtractThumbParameters', array( $thumbname, &$params ) ) ) { + return $params; // valid thumbnail URL (via extension or config) + } + } + + return null; // not a valid thumbnail URL +} + +/** + * Output a thumbnail generation error message + * + * @param $status integer + * @param $msg string + * @return void */ function wfThumbError( $status, $msg ) { global $wgShowHostnames; + header( 'Cache-Control: no-cache' ); header( 'Content-Type: text/html; charset=utf-8' ); if ( $status == 404 ) { diff --git a/thumb_handler.php b/thumb_handler.php index 073a87929e..39a3e7c057 100644 --- a/thumb_handler.php +++ b/thumb_handler.php @@ -3,239 +3,18 @@ # Valid web server entry point define( 'THUMB_HANDLER', true ); -# Load thumb-handler configuration. Avoids WebStart.php for performance. -if ( !file_exists( dirname( __FILE__ ) . "/thumb.config.php" ) ) { - die( "thumb_handler.php is not enabled for this wiki.\n" ); -} -require( dirname( __FILE__ ) . "/thumb.config.php" ); - -# Execute thumb.php if not handled via cURL -if ( wfHandleThumb404Main() === 'wfThumbMain' ) { +if ( $_SERVER['REQUEST_URI'] === $_SERVER['SCRIPT_NAME'] ) { + # Directly requesting this script is not a use case. + # Instead of giving a thumbnail error, give a generic 404. + wfDisplay404Error(); // go away, nothing to see here +} else { + # Execute thumb.php, having set THUMB_HANDLER so that + # it knows to extract params from a thumbnail file URL. require( dirname( __FILE__ ) . '/thumb.php' ); } -function wfHandleThumb404Main() { - global $thgThumbCallbacks, $thgThumbCurlConfig; - - # lighttpd puts the original request in REQUEST_URI, while - # sjs sets that to the 404 handler, and puts the original - # request in REDIRECT_URL. - if ( isset( $_SERVER['REDIRECT_URL'] ) ) { - # The URL is un-encoded, so put it back how it was. - $uri = str_replace( "%2F", "/", urlencode( $_SERVER['REDIRECT_URL'] ) ); - } else { - $uri = $_SERVER['REQUEST_URI']; - } - - # Extract thumb.php params from the URI... - if ( isset( $thgThumbCallbacks['extractParams'] ) - && is_callable( $thgThumbCallbacks['extractParams'] ) ) // overridden by configuration? - { - $params = call_user_func_array( $thgThumbCallbacks['extractParams'], array( $uri ) ); - } else { - $params = wfExtractThumbParams( $uri ); // basic wiki URL param extracting - } - - # Show 404 error if this is not a valid thumb request... - if ( !is_array( $params ) ) { - header( 'X-Debug: no regex match' ); // useful for debugging - if ( isset( $thgThumbCallbacks['error404'] ) - && is_callable( $thgThumbCallbacks['error404'] ) ) // overridden by configuration? - { - call_user_func( $thgThumbCallbacks['error404'] ); - } else { - wfDisplay404Error(); // standard 404 message - } - return; - } - - # Obtain and stream the thumbnail or setup for wfThumbMain() call... - if ( $thgThumbCurlConfig['enabled'] ) { - wfStreamThumbViaCurl( $params, $uri ); - return true; // done - } else { - $_REQUEST = $params; // pass params to thumb.php - return 'wfThumbMain'; - } -} - -/** - * Extract the required params for thumb.php from the thumbnail request URI. - * At least 'width' and 'f' should be set if the result is an array. - * - * @param $uri String Thumbnail request URI - * @return Array|null associative params array or null - */ -function wfExtractThumbParams( $uri ) { - global $thgThumbUrlMatch; - - $thumbRegex = '!^(?:' . preg_quote( $thgThumbUrlMatch['server'] ) . ')?/' . - preg_quote( $thgThumbUrlMatch['dirFragment'] ) . '(/archive|/temp|)/' . - $thgThumbUrlMatch['hashFragment'] . '([^/]*)/(page(\d*)-)*(\d*)px-[^/]*$!'; - - if ( preg_match( $thumbRegex, $uri, $matches ) ) { - list( $all, $archOrTemp, $filename, $pagefull, $pagenum, $size ) = $matches; - $params = array( 'f' => $filename, 'width' => $size ); - if ( $pagenum ) { - $params['page'] = $pagenum; - } - if ( $archOrTemp == '/archive' ) { - $params['archived'] = 1; - } elseif ( $archOrTemp == '/temp' ) { - $params['temp'] = 1; - } - } else { - $params = null; // not a valid thumbnail URL - } - - return $params; -} - -/** - * cURL to thumb.php and stream back the resulting file or give an error message. - * - * @param $params Array Parameters to thumb.php - * @param $uri String Thumbnail request URI - * @return void - */ -function wfStreamThumbViaCurl( array $params, $uri ) { - global $thgThumbCallbacks, $thgThumbCurlConfig; - - # Check any backend caches for the thumbnail... - if ( isset( $thgThumbCallbacks['checkCache'] ) - && is_callable( $thgThumbCallbacks['checkCache'] ) ) - { - if ( call_user_func_array( $thgThumbCallbacks['checkCache'], array( $uri, $params ) ) ) { - return; // file streamed from backend thumb cache - } - } - - if ( !extension_loaded( 'curl' ) ) { - die( "cURL is not enabled for PHP on this wiki.\n" ); // sanity - } - - # Build up the request URL to use with CURL... - $reqURL = $thgThumbCurlConfig['url'] . '?'; - $first = true; - foreach ( $params as $name => $value ) { - if ( $first ) { - $first = false; - } else { - $reqURL .= '&'; - } - $reqURL .= "$name=$value"; // Note: value is already urlencoded - } - - # Set relevant HTTP headers... - $headers = array(); - $headers[] = "X-Original-URI: " . str_replace( "\n", '', $uri ); - if ( isset( $thgThumbCallbacks['curlHeaders'] ) - && is_callable( $thgThumbCallbacks['curlHeaders'] ) ) - { - # Add on any custom headers (like XFF) - call_user_func_array( $thgThumbCallbacks['curlHeaders'], array( &$headers ) ); - } - - # Pass through some other headers... - $passThrough = array( 'If-Modified-Since', 'Referer', 'User-Agent' ); - foreach ( $passThrough as $headerName ) { - $serverVarName = 'HTTP_' . str_replace( '-', '_', strtoupper( $headerName ) ); - if ( !empty( $_SERVER[$serverVarName] ) ) { - $headers[] = $headerName . ': ' . - str_replace( "\n", '', $_SERVER[$serverVarName] ); - } - } - - $ch = curl_init( $reqURL ); - if ( $thgThumbCurlConfig['proxy'] ) { - curl_setopt( $ch, CURLOPT_PROXY, $thgThumbCurlConfig['proxy'] ); - } - - curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); - curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); - curl_setopt( $ch, CURLOPT_TIMEOUT, $thgThumbCurlConfig['timeout'] ); - - # Actually make the request - $text = curl_exec( $ch ); - - # Send it on to the client... - $errno = curl_errno( $ch ); - $contentType = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ); - $httpCode = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); - if ( $errno ) { - header( 'HTTP/1.1 500 Internal server error' ); - header( 'Cache-Control: no-cache' ); - $contentType = 'text/html'; - $text = wfCurlErrorText( $ch ); - } elseif ( $httpCode == 304 ) { // OK - header( 'HTTP/1.1 304 Not modified' ); - $contentType = ''; - $text = ''; - } elseif ( strval( $text ) == '' ) { - header( 'HTTP/1.1 500 Internal server error' ); - header( 'Cache-Control: no-cache' ); - $contentType = 'text/html'; - $text = wfCurlEmptyText( $ch ); - } elseif ( $httpCode == 404 ) { - header( 'HTTP/1.1 404 Not found' ); - header( 'Cache-Control: s-maxage=300, must-revalidate, max-age=0' ); - } elseif ( $httpCode != 200 || substr( $contentType, 0, 9 ) == 'text/html' ) { - # Error message, suppress cache - header( 'HTTP/1.1 500 Internal server error' ); - header( 'Cache-Control: no-cache' ); - } else { - # OK thumbnail; save to any backend caches... - if ( isset( $thgThumbCallbacks['fillCache'] ) - && is_callable( $thgThumbCallbacks['fillCache'] ) ) - { - call_user_func_array( $thgThumbCallbacks['fillCache'], array( $uri, $text ) ); - } - } - - if ( !$contentType ) { - header( 'Content-Type:' ); - } else { - header( "Content-Type: $contentType" ); - } - - print $text; // thumb data or error text - - curl_close( $ch ); -} - -/** - * Get error message HTML for when the cURL response is an error. - * - * @param $ch cURL handle - * @return string - */ -function wfCurlErrorText( $ch ) { - $error = htmlspecialchars( curl_error( $ch ) ); - return << -Thumbnail error -Error retrieving thumbnail from scaling server: $error - -EOT; -} - -/** - * Get error message HTML for when the cURL response is empty. - * - * @param $ch cURL handle - * @return string - */ -function wfCurlEmptyText( $ch ) { - return << -Thumbnail error -Error retrieving thumbnail from scaling server: empty response - -EOT; -} - /** - * Print out a generic 404 error message. + * Print out a generic 404 error message * * @return void */ -- 2.20.1