From: Neil Kandalgaonkar Date: Tue, 30 Nov 2010 02:45:10 +0000 (+0000) Subject: Dual strategy thumbnailing -- locally for development and simpler wikis, or in the... X-Git-Tag: 1.31.0-rc.0~33689 X-Git-Url: http://git.cyclocoop.org/fichier?a=commitdiff_plain;h=e4e03d1ba46518773c892287db9f82577c6ea7ec;p=lhc%2Fweb%2Fwiklou.git Dual strategy thumbnailing -- locally for development and simpler wikis, or in the cluster for setups like the WMF's When we did our test deploy, we found that we could not scale thumbnails locally in the cluster (and this was undesirable anyway). So, we moved UploadStash thumbnailing to occur on URL invocation, which is more like the usual MediaWiki model anyway. So the customized transform() moved from UploadStash to just being a special case of how SpecialUploadStash does things. Note that the Special:UploadStash url masks how the thumbnail is obtained. Unlike the regular Commons wiki, the user never sees the cluster's URL for the thumbnail. A web request, or an imageMagick call, is performed right there and then, and the results are catted out to the client. For consistency, we did not use wfStreamfile since we were in some cases streaming from contents obtained over the MWhttpRequest, not just local files. --- diff --git a/includes/specials/SpecialUploadStash.php b/includes/specials/SpecialUploadStash.php index 89866097b2..a64e0f8e3d 100644 --- a/includes/specials/SpecialUploadStash.php +++ b/includes/specials/SpecialUploadStash.php @@ -66,24 +66,24 @@ class SpecialUploadStash extends UnlistedSpecialPage { . 'use the URL of this page, with a slash and the key of the stashed file appended.'; } else { try { - $file = $this->getStashFile( $subPage ); - $size = $file->getSize(); - if ( $size === 0 ) { - $code = 500; - $message = 'File is zero length'; - } else if ( $size > self::MAX_SERVE_BYTES ) { - $code = 500; - $message = 'Cannot serve a file larger than ' . self::MAX_SERVE_BYTES . ' bytes'; + if ( preg_match( '/^(\d+)px-(.*)$/', $subPage, $matches ) ) { + list( /* full match */, $width, $key ) = $matches; + return $this->outputThumbFromStash( $key, $width ); } else { - $this->outputFile( $file ); - return true; + return $this->outputFileFromStash( $subPage ); } } catch( UploadStashFileNotFoundException $e ) { $code = 404; $message = $e->getMessage(); + } catch( UploadStashZeroLengthFileException $e ) { + $code = 500; + $message = $e->getMessage(); } catch( UploadStashBadPathException $e ) { $code = 500; $message = $e->getMessage(); + } catch( SpecialUploadStashTooLargeException $e ) { + $code = 500; + $message = 'Cannot serve a file larger than ' . self::MAX_SERVE_BYTES . ' bytes. ' . $e->getMessage(); } catch( Exception $e ) { $code = 500; $message = $e->getMessage(); @@ -93,66 +93,164 @@ class SpecialUploadStash extends UnlistedSpecialPage { wfHttpError( $code, OutputPage::getStatusMessage( $code ), $message ); return false; } + + /** + * Get a file from stash and stream it out. Rely on parent to catch exceptions and transform them into HTTP + * @param String: $key - key of this file in the stash, which probably looks like a filename with extension. + * @throws ....? + * @return boolean + */ + private function outputFileFromStash( $key ) { + $file = $this->stash->getFile( $key ); + $this->outputLocalFile( $file ); + return true; + } /** - * Convert the incoming url portion (subpage of Special page) into a stashed file, - * if available. - * - * @param $subPage String - * @return File object - * @throws MWException, UploadStashFileNotFoundException, UploadStashBadPathException + * Get a thumbnail for file, either generated locally or remotely, and stream it out + * @param String $key: key for the file in the stash + * @param int $width: width of desired thumbnail + * @return ?? + */ + private function outputThumbFromStash( $key, $width ) { + + // this global, if it exists, points to a "scaler", as you might find in the Wikimedia Foundation cluster. See outputRemoteScaledThumb() + global $wgUploadStashScalerBaseUrl; + + // let exceptions propagate to caller. + $file = $this->stash->getFile( $key ); + + // OK, we're here and no exception was thrown, + // so the original file must exist. + + // let's get ready to transform the original -- these are standard + $params = array( 'width' => $width ); + $flags = 0; + + return $wgUploadStashScalerBaseUrl ? $this->outputRemoteScaledThumb( $file, $params, $flags ) + : $this->outputLocallyScaledThumb( $file, $params, $flags ); + + } + + + /** + * Scale a file (probably with a locally installed imagemagick, or similar) and output it to STDOUT. + * @param $file: File object + * @param $params: scaling parameters ( e.g. array( width => '50' ) ); + * @param $flags: scaling flags ( see File:: constants ) + * @throws MWException + * @return boolean success */ - private function getStashFile( $subPage ) { - // due to an implementation quirk (and trying to be compatible with older method) - // the stash key doesn't have an extension - $key = $subPage; - $n = strrpos( $subPage, '.' ); - if ( $n !== false ) { - $key = $n ? substr( $subPage, 0, $n ) : $subPage; - } + private function outputLocallyScaledThumb( $params, $flags ) { + wfDebug( "UploadStash: SCALING locally!\n" ); - try { - $file = $this->stash->getFile( $key ); - } catch ( UploadStashFileNotFoundException $e ) { - // if we couldn't find it, and it looks like a thumbnail, - // and it looks like we have the original, go ahead and generate it - $matches = array(); - if ( ! preg_match( '/^(\d+)px-(.*)$/', $key, $matches ) ) { - // that doesn't look like a thumbnail. re-raise exception - throw $e; - } + // n.b. this is stupid, we insist on re-transforming the file every time we are invoked. We rely + // on HTTP caching to ensure this doesn't happen. + + $flags |= File::RENDER_NOW; - list( , $width, $origKey ) = $matches; + $thumbnailImage = $file->transform( $params, $flags ); + if ( !$thumbnailImage ) { + throw new MWException( 'Could not obtain thumbnail' ); + } - // do not trap exceptions, if key is in bad format, or file not found, - // let exceptions propagate to caller. - $origFile = $this->stash->getFile( $origKey ); + // we should have just generated it locally + if ( ! $thumbnailImage->getPath() ) { + throw new UploadStashFileNotFoundException( "no local path for scaled item" ); + } - // ok we're here so the original must exist. Generate the thumbnail. - // because the file is a UploadStashFile, this thumbnail will also be stashed, - // and a thumbnailFile will be created in the thumbnailImage composite object - $thumbnailImage = $origFile->transform( array( 'width' => $width ) ); - if ( !$thumbnailImage ) { - throw new MWException( 'Could not obtain thumbnail' ); - } - $file = $thumbnailImage->thumbnailFile; + // now we should construct a File, so we can get mime and other such info in a standard way + // n.b. mimetype may be different from original (ogx original -> jpeg thumb) + $thumbFile = new UnregisteredLocalFile( false, $this->stash->repo, $thumbnailImage->getPath(), false ); + if ( ! $thumbFile ) { + throw new UploadStashFileNotFoundException( "couldn't create local file object for thumbnail" ); } - return $file; + return $this->outputLocalFile( $thumbFile ); + + } + + /** + * Scale a file with a remote "scaler", as exists on the Wikimedia Foundation cluster, and output it to STDOUT. + * Note: unlike the usual thumbnail process, the web client never sees the cluster URL; we do the whole HTTP transaction to the scaler ourselves + * and cat the results out. + * Note: We rely on NFS to have propagated the file contents to the scaler. However, we do not rely on the thumbnail being created in NFS and then + * propagated back to our filesystem. Instead we take the results of the HTTP request instead. + * Note: no caching is being done here, although we are instructing the client to cache it forever. + * @param $file: File object + * @param $params: scaling parameters ( e.g. array( width => '50' ) ); + * @param $flags: scaling flags ( see File:: constants ) + * @throws MWException + * @return boolean success + */ + private function outputRemoteScaledThumb( $file, $params, $flags ) { + + // this global probably looks something like 'http://upload.wikimedia.org/wikipedia/test/thumb/temp' + // do not use trailing slash + global $wgUploadStashScalerBaseUrl; + + $scalerThumbName = $file->getParamThumbName( $file->name, $params ); + $scalerThumbUrl = $wgUploadStashScalerBaseUrl . '/' . $file->getRel() . '/' . $scalerThumbName; + // make a CURL call to the scaler to create a thumbnail + wfDebug( "UploadStash: calling " . $scalerThumbUrl . " with curl \n" ); + $req = MWHttpRequest::factory( $thumbScalerUrl ); + $status = $req->execute(); + if ( ! $status->isOK() ) { + throw new MWException( "Fetching thumbnail failed" ); + } + $contentType = $req->getResponseHeader( "content-type" ); + if ( ! $contentType ) { + throw new MWException( "Missing content-type header" ); + } + return $this->outputFromContent( $req->getContent(), $contentType ); } /** * Output HTTP response for file - * Side effects, obviously, of echoing lots of stuff to stdout. + * Side effect: writes HTTP response to STDOUT. + * XXX could use wfStreamfile (in includes/Streamfile.php), but for consistency with outputContents() doing it this way. + * XXX is mimeType really enough, or do we need encoding for full Content-Type header? * - * @param $file File object + * @param $file File object with a local path (e.g. UnregisteredLocalFile, LocalFile. Oddly these don't share an ancestor!) */ - private function outputFile( $file ) { - header( 'Content-Type: ' . $file->getMimeType(), true ); + private function outputLocalFile( $file ) { + if ( $file->getSize() > self::MAX_SERVE_BYTES ) { + throw new SpecialUploadStashTooLargeException(); + } + self::outputHeaders( $file->getMimeType(), $file->getSize() ); + readfile( $file->getPath() ); + } + + /** + * Output HTTP response of raw content + * Side effect: writes HTTP response to STDOUT. + * @param String $content: content + * @param String $mimeType: mime type + */ + private function outputContents( $content, $contentType ) { + $size = strlen( $content ); + if ( $size > self::MAX_SERVE_BYTES ) { + throw new SpecialUploadStashTooLargeException(); + } + self::outputHeaders( $contentType, $size ); + print $content; + } + + /** + * Output headers for streaming + * XXX unsure about encoding as binary; if we received from HTTP perhaps we should use that encoding, concatted with semicolon to mimeType as it usually is. + * Side effect: preps PHP to write headers to STDOUT. + * @param String $contentType : string suitable for content-type header + * @param String $size: length in bytes + */ + private static function outputHeaders( $contentType, $size ) { + header( "Content-Type: $mimeType", true ); header( 'Content-Transfer-Encoding: binary', true ); header( 'Expires: Sun, 17-Jan-2038 19:14:07 GMT', true ); - header( 'Content-Length: ' . $file->getSize(), true ); - readfile( $file->getPath() ); + header( "Content-Length: $size", true ); } + } + +class SpecialUploadStashTooLargeException extends MWException {}; diff --git a/includes/upload/UploadStash.php b/includes/upload/UploadStash.php index 94496d3598..2ee1ad281d 100644 --- a/includes/upload/UploadStash.php +++ b/includes/upload/UploadStash.php @@ -12,14 +12,13 @@ * */ class UploadStash { - // Format of the key for files -- has to be suitable as a filename itself in some cases. - // This should encompass a sha1 content hash in hex (new style), or an integer (old style), - // and also thumbnails with prepended strings like "120px-". - // The file extension should not be part of the key. - const KEY_FORMAT_REGEX = '/^[\w-]+$/'; + + // Format of the key for files -- has to be suitable as a filename itself (e.g. ab12cd34ef.jpg) + const KEY_FORMAT_REGEX = '/^[\w-]+\.\w+$/'; // repository that this uses to store temp files - protected $repo; + // public because we sometimes need to get a LocalFile within the same repo. + public $repo; // array of initialized objects obtained from session (lazily initialized upon getFile()) private $files = array(); @@ -82,7 +81,9 @@ class UploadStash { unset( $data['mTempPath'] ); $file = new UploadStashFile( $this, $this->repo, $path, $key, $data ); - + if ( $file->getSize === 0 ) { + throw new UploadStashZeroLengthFileException( "File is zero length" ); + } $this->files[$key] = $file; } @@ -108,18 +109,31 @@ class UploadStash { } $fileProps = File::getPropsFromPath( $path ); + // we will be initializing from some tmpnam files that don't have extensions. + // most of MediaWiki assumes all uploaded files have good extensions. So, we fix this. + $extension = self::getExtensionForPath( $path ); + if ( ! preg_match( "/\\.\\Q$extension\\E$/", $path ) ) { + $pathWithGoodExtension = "$path.$extension"; + if ( ! rename( $path, $pathWithGoodExtension ) ) { + throw new UploadStashFileException( "couldn't rename $path to have a better extension at $pathWithGoodExtension" ); + } + $path = $pathWithGoodExtension; + } + // If no key was supplied, use content hash. Also has the nice property of collapsing multiple identical files // uploaded this session, which could happen if uploads had failed. if ( is_null( $key ) ) { - $key = $fileProps['sha1']; + $key = $fileProps['sha1'] . "." . $extension; } if ( ! preg_match( self::KEY_FORMAT_REGEX, $key ) ) { throw new UploadStashBadPathException( "key '$key' is not in a proper format" ); } - // if not already in a temporary area, put it there + + // if not already in a temporary area, put it there $status = $this->repo->storeTemp( basename( $path ), $path ); + if( ! $status->isOK() ) { // It is a convention in MediaWiki to only return one error per API exception, even if multiple errors // are available. We use reset() to pick the "first" thing that was wrong, preferring errors to warnings. @@ -136,7 +150,7 @@ class UploadStash { throw new UploadStashFileException( "error storing file in '$path': " . implode( '; ', $error ) ); } $stashPath = $status->value; - + // required info we always store. Must trump any other application info in $data // 'mTempPath', 'mFileSize', and 'mFileProps' are arbitrary names // chosen for compatibility with UploadBase's way of doing this. @@ -149,11 +163,42 @@ class UploadStash { // now, merge required info and extra data into the session. (The extra data changes from application to application. // UploadWizard wants different things than say FirefoggChunkedUpload.) + wfDebug( __METHOD__ . " storing under $key\n" ); $_SESSION[UploadBase::SESSION_KEYNAME][$key] = array_merge( $data, $requiredData ); return $this->getFile( $key ); } + /** + * Find or guess extension -- ensuring that our extension matches our mime type. + * Since these files are constructed from php tempnames they may not start off + * with an extension. + * XXX this is somewhat redundant with the checks that ApiUpload.php does with incoming + * uploads versus the desired filename. Maybe we can get that passed to us... + */ + public static function getExtensionForPath( $path ) { + // Does this have an extension? + $n = strrpos( $path, '.' ); + $extension = null; + if ( $n !== false ) { + $extension = $n ? substr( $path, $n + 1 ) : ''; + } else { + // If not, assume that it should be related to the mime type of the original file. + $magic = MimeMagic::singleton(); + $mimeType = $magic->guessMimeType( $path ); + $extensions = explode( ' ', MimeMagic::singleton()->getExtensionsForType( $mimeType ) ); + if ( count( $extensions ) ) { + $extension = $extensions[0]; + } + } + + if ( is_null( $extension ) ) { + throw new UploadStashFileException( "extension is null" ); + } + + return File::normalizeExtension( $extension ); + } + } class UploadStashFile extends UnregisteredLocalFile { @@ -198,13 +243,11 @@ class UploadStashFile extends UnregisteredLocalFile { throw new UploadStashFileNotFoundException( 'cannot find path, or not a plain file' ); } + + parent::__construct( false, $repo, $path, false ); - // we will be initializing from some tmpnam files that don't have extensions. - // most of MediaWiki assumes all uploaded files have good extensions. So, we fix this. $this->name = basename( $this->path ); - $this->setExtension(); - } /** @@ -219,40 +262,6 @@ class UploadStashFile extends UnregisteredLocalFile { return $this->getUrl(); } - /** - * Find or guess extension -- ensuring that our extension matches our mime type. - * Since these files are constructed from php tempnames they may not start off - * with an extension. - * This does not override getExtension() because things like getMimeType() already call getExtension(), - * and that results in infinite recursion. So, we preemptively *set* the extension so getExtension() can find it. - * For obvious reasons this should be called as early as possible, as part of initialization - */ - public function setExtension() { - // Does this have an extension? - $n = strrpos( $this->path, '.' ); - $extension = null; - if ( $n !== false ) { - $extension = $n ? substr( $this->path, $n + 1 ) : ''; - } else { - // If not, assume that it should be related to the mime type of the original file. - // - // This entire thing is backwards -- we *should* just create an extension based on - // the mime type of the transformed file, *after* transformation. But File.php demands - // to know the name of the transformed file before creating it. - $mimeType = $this->getMimeType(); - $extensions = explode( ' ', MimeMagic::singleton()->getExtensionsForType( $mimeType ) ); - if ( count( $extensions ) ) { - $extension = $extensions[0]; - } - } - - if ( is_null( $extension ) ) { - throw new UploadStashFileException( "extension is null" ); - } - - $this->extension = parent::normalizeExtension( $extension ); - } - /** * Get the path for the thumbnail (actually any transformation of this file) * The actual argument is the result of thumbName although we seem to have @@ -276,12 +285,27 @@ class UploadStashFile extends UnregisteredLocalFile { * @return String: base name for URL, like '120px-12345.jpg', or null if there is no handler */ function thumbName( $params ) { + return $this->getParamThumbName( $this->getUrlName(), $params ); + } + + + /** + * Given the name of the original, i.e. Foo.jpg, and scaling parameters, returns filename with appropriate extension + * This is abstracted from getThumbName because we also use it to calculate the thumbname the file should have on + * remote image scalers + * + * @param String $urlName: A filename, like MyMovie.ogx + * @param Array $parameters: scaling parameters, like array( 'width' => '120' ); + * @return String|null parameterized thumb name, like 120px-MyMovie.ogx.jpg, or null if no handler found + */ + function getParamThumbName( $urlName, $params ) { + wfDebug( __METHOD__ . " getting for $urlName, " . print_r( $params, 1 ) . " \n" ); if ( !$this->getHandler() ) { return null; } $extension = $this->getExtension(); list( $thumbExt, $thumbMime ) = $this->handler->getThumbType( $extension, $this->getMimeType(), $params ); - $thumbName = $this->getHandler()->makeParamString( $params ) . '-' . $this->getUrlName(); + $thumbName = $this->getHandler()->makeParamString( $params ) . '-' . $urlName; if ( $thumbExt != $extension ) { $thumbName .= ".$thumbExt"; } @@ -308,6 +332,7 @@ class UploadStashFile extends UnregisteredLocalFile { * @return String: URL to access thumbnail, or URL with partial path */ public function getThumbUrl( $thumbName = false ) { + wfDebug( __METHOD__ . " getting for $thumbName \n" ); return $this->getSpecialUrl( $thumbName ); } @@ -319,7 +344,7 @@ class UploadStashFile extends UnregisteredLocalFile { */ public function getUrlName() { if ( ! $this->urlName ) { - $this->urlName = $this->sessionKey . '.' . $this->getExtension(); + $this->urlName = $this->sessionKey; } return $this->urlName; } @@ -357,43 +382,6 @@ class UploadStashFile extends UnregisteredLocalFile { return $this->sessionKey; } - /** - * Typically, transform() returns a ThumbnailImage, which you can think of as being the exact - * equivalent of an HTML thumbnail on Wikipedia. So its URL is the full-size file, not the thumbnail's URL. - * - * Here we override transform() to stash the thumbnail file, and then - * provide a way to get at the stashed thumbnail file to extract properties such as its URL - * - * @param $params Array: parameters suitable for File::transform() - * @param $flags Integer: bitmask, flags suitable for File::transform() - * @return ThumbnailImage: with additional File thumbnailFile property - */ - public function transform( $params, $flags = 0 ) { - - // force it to get a thumbnail right away - $flags |= self::RENDER_NOW; - - // returns a ThumbnailImage object containing the url and path. Note. NOT A FILE OBJECT. - $thumb = parent::transform( $params, $flags ); - wfDebug( "UploadStash: generating thumbnail\n" ); - wfDebug( print_r( $thumb, 1 ) ); - $key = $this->thumbName($params); - - // remove extension, so it's stored in the session under '120px-123456' - // this makes it uniform with the other session key for the original, '123456' - $n = strrpos( $key, '.' ); - if ( $n !== false ) { - $key = substr( $key, 0, $n ); - } - - // stash the thumbnail File, and provide our caller with a way to get at its properties - $stashedThumbFile = $this->sessionStash->stashFile( $thumb->getPath(), array(), $key ); - $thumb->thumbnailFile = $stashedThumbFile; - - return $thumb; - - } - /** * Remove the associated temporary file * @return Status: success @@ -409,4 +397,5 @@ class UploadStashFileNotFoundException extends MWException {}; class UploadStashBadPathException extends MWException {}; class UploadStashBadVersionException extends MWException {}; class UploadStashFileException extends MWException {}; +class UploadStashZeroLengthFileException extends MWException {};