From 9b35539f7e838985c60cc45237ed30db35bdfa1c Mon Sep 17 00:00:00 2001 From: Bryan Tong Minh Date: Sun, 6 Feb 2011 22:47:10 +0000 Subject: [PATCH] Cleanup MimeMagic: Add public identifier to functions; kill error operator; Cleanup function documentation; Manual stylize --- includes/MimeMagic.php | 396 +++++++++++++++++++++++------------------ 1 file changed, 225 insertions(+), 171 deletions(-) diff --git a/includes/MimeMagic.php b/includes/MimeMagic.php index 62c11c8788..159a658d2b 100644 --- a/includes/MimeMagic.php +++ b/includes/MimeMagic.php @@ -117,11 +117,11 @@ unknown/unknown application/octet-stream application/x-empty [UNKNOWN] END_STRING ); -#note: because this file is possibly included by a function, -#we need to access the global scope explicitely! +// Note: because this file is possibly included by a function, +// we need to access the global scope explicitely! global $wgLoadFileinfoExtension; -if ($wgLoadFileinfoExtension) { +if ( $wgLoadFileinfoExtension ) { wfDl( 'fileinfo' ); } @@ -138,19 +138,19 @@ class MimeMagic { * Mapping of media types to arrays of mime types. * This is used by findMediaType and getMediaType, respectively */ - var $mMediaTypes= null; + var $mMediaTypes = null; /** Map of mime type aliases */ - var $mMimeTypeAliases= null; + var $mMimeTypeAliases = null; /** map of mime types to file extensions (as a space seprarated list) */ - var $mMimeToExt= null; + var $mMimeToExt = null; /** map of file extensions types to mime types (as a space seprarated list) */ - var $mExtToMime= null; + var $mExtToMime = null; /** IEContentAnalyzer instance */ @@ -311,46 +311,68 @@ class MimeMagic { * Get an instance of this class * @return MimeMagic */ - static function &singleton() { + public static function &singleton() { if ( !isset( self::$instance ) ) { self::$instance = new MimeMagic; } return self::$instance; } - /** returns a list of file extensions for a given mime type - * as a space separated string. - */ - function getExtensionsForType( $mime ) { + /** + * Returns a list of file extensions for a given mime type as a space + * separated string or null if the mime type was unrecognized. Resolves + * mime type aliases. + * + * @param $mime string + * @return string|null + */ + public function getExtensionsForType( $mime ) { $mime = strtolower( $mime ); - $r = @$this->mMimeToExt[$mime]; + // Check the mime-to-ext map + if ( isset( $this->mMimeToExt[$mime] ) ) { + return $this->mMimeToExt[$mime]; + } - if ( @!$r && isset( $this->mMimeTypeAliases[$mime] ) ) { + // Resolve the mime type to the canonical type + if ( isset( $this->mMimeTypeAliases[$mime] ) ) { $mime = $this->mMimeTypeAliases[$mime]; - $r = @$this->mMimeToExt[$mime]; + if ( isset( $this->mMimeToExt[$mime] ) ) { + return $this->mMimeToExt[$mime]; + } } - return $r; + return null; } - /** returns a list of mime types for a given file extension - * as a space separated string. - */ - function getTypesForExtension( $ext ) { + /** + * Returns a list of mime types for a given file extension as a space + * separated string or null if the extension was unrecognized. + * + * @param $ext string + * @return string|null + */ + public function getTypesForExtension( $ext ) { $ext = strtolower( $ext ); $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null; return $r; } - /** returns a single mime type for a given file extension. - * This is always the first type from the list returned by getTypesForExtension($ext). - */ - function guessTypesForExtension( $ext ) { + /** + * Returns a single mime type for a given file extension or null if unknown. + * This is always the first type from the list returned by getTypesForExtension($ext). + * + * @param $ext string + * @return string|null + */ + public function guessTypesForExtension( $ext ) { $m = $this->getTypesForExtension( $ext ); - if ( is_null( $m ) ) return null; + if ( is_null( $m ) ) { + return null; + } + // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient $m = trim( $m ); $m = preg_replace( '/\s.*$/', '', $m ); @@ -358,32 +380,34 @@ class MimeMagic { } - /** tests if the extension matches the given mime type. - * returns true if a match was found, NULL if the mime type is unknown, - * and false if the mime type is known but no matches where found. - */ - function isMatchingExtension( $extension, $mime ) { + /** + * Tests if the extension matches the given mime type. Returns true if a + * match was found, null if the mime type is unknown, and false if the + * mime type is known but no matches where found. + * + * @param $extension string + * @param $mime string + * @return bool|null + */ + public function isMatchingExtension( $extension, $mime ) { $ext = $this->getExtensionsForType( $mime ); if ( !$ext ) { - return null; //unknown + return null; // Unknown mime type } $ext = explode( ' ', $ext ); $extension = strtolower( $extension ); - if ( in_array( $extension, $ext ) ) { - return true; - } - - return false; + return in_array( $extension, $ext ); } - /** returns true if the mime type is known to represent - * an image format supported by the PHP GD library. - */ - function isPHPImageType( $mime ) { - #as defined by imagegetsize and image_type_to_mime + /** + * Returns true if the mime type is known to represent an image format + * supported by the PHP GD library. + */ + public function isPHPImageType( $mime ) { + // As defined by imagegetsize and image_type_to_mime static $types = array( 'image/gif', 'image/jpeg', 'image/png', 'image/x-bmp', 'image/xbm', 'image/tiff', @@ -427,44 +451,45 @@ class MimeMagic { return in_array( strtolower( $extension ), $types ); } - /** improves a mime type using the file extension. Some file formats are very generic, - * so their mime type is not very meaningful. A more useful mime type can be derived - * by looking at the file extension. Typically, this method would be called on the - * result of guessMimeType(). - * - * Currently, this method does the following: - * - * If $mime is "unknown/unknown" and isRecognizableExtension( $ext ) returns false, - * return the result of guessTypesForExtension($ext). - * - * If $mime is "application/x-opc+zip" and isMatchingExtension( $ext, $mime ) - * gives true, return the result of guessTypesForExtension($ext). - * - * @param $mime String: the mime type, typically guessed from a file's content. - * @param $ext String: the file extension, as taken from the file name - * - * @return string the mime type - */ - function improveTypeFromExtension( $mime, $ext ) { - if ( $mime === "unknown/unknown" ) { - if( $this->isRecognizableExtension( $ext ) ) { - wfDebug( __METHOD__. ": refusing to guess mime type for .$ext file, " . - "we should have recognized it\n" ); + /** + * Improves a mime type using the file extension. Some file formats are very generic, + * so their mime type is not very meaningful. A more useful mime type can be derived + * by looking at the file extension. Typically, this method would be called on the + * result of guessMimeType(). + * + * Currently, this method does the following: + * + * If $mime is "unknown/unknown" and isRecognizableExtension( $ext ) returns false, + * return the result of guessTypesForExtension($ext). + * + * If $mime is "application/x-opc+zip" and isMatchingExtension( $ext, $mime ) + * gives true, return the result of guessTypesForExtension($ext). + * + * @param $mime String: the mime type, typically guessed from a file's content. + * @param $ext String: the file extension, as taken from the file name + * + * @return string the mime type + */ + public function improveTypeFromExtension( $mime, $ext ) { + if ( $mime === 'unknown/unknown' ) { + if ( $this->isRecognizableExtension( $ext ) ) { + wfDebug( __METHOD__. ': refusing to guess mime type for .' . + "$ext file, we should have recognized it\n" ); } else { - /* Not something we can detect, so simply - * trust the file extension */ + // Not something we can detect, so simply + // trust the file extension $mime = $this->guessTypesForExtension( $ext ); } } - else if ( $mime === "application/x-opc+zip" ) { + elseif ( $mime === 'application/x-opc+zip' ) { if ( $this->isMatchingExtension( $ext, $mime ) ) { - /* A known file extension for an OPC file, - * find the proper mime type for that file extension */ + // A known file extension for an OPC file, + // find the proper mime type for that file extension $mime = $this->guessTypesForExtension( $ext ); } else { wfDebug( __METHOD__. ": refusing to guess better type for $mime file, " . ".$ext is not a known OPC extension.\n" ); - $mime = "application/zip"; + $mime = 'application/zip'; } } @@ -476,20 +501,22 @@ class MimeMagic { return $mime; } - /** mime type detection. This uses detectMimeType to detect the mime type of the file, - * but applies additional checks to determine some well known file formats that may be missed - * or misinterpreter by the default mime detection (namely XML based formats like XHTML or SVG, - * as well as ZIP based formats like OPC/ODF files). - * - * @param $file String: the file to check - * @param $ext Mixed: the file extension, or true (default) to extract it from the filename. - * Set it to false to ignore the extension. DEPRECATED! Set to false, use - * improveTypeFromExtension($mime, $ext) later to improve mime type. - * - * @return string the mime type of $file - */ - function guessMimeType( $file, $ext = true ) { - if( $ext ) { # TODO: make $ext default to false. Or better, remove it. + /** + * Mime type detection. This uses detectMimeType to detect the mime type + * of the file, but applies additional checks to determine some well known + * file formats that may be missed or misinterpreter by the default mime + * detection (namely XML based formats like XHTML or SVG, as well as ZIP + * based formats like OPC/ODF files). + * + * @param $file String: the file to check + * @param $ext Mixed: the file extension, or true (default) to extract it from the filename. + * Set it to false to ignore the extension. DEPRECATED! Set to false, use + * improveTypeFromExtension($mime, $ext) later to improve mime type. + * + * @return string the mime type of $file + */ + public function guessMimeType( $file, $ext = true ) { + if ( $ext ) { // TODO: make $ext default to false. Or better, remove it. wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. " . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); } @@ -509,12 +536,21 @@ class MimeMagic { return $mime; } - private function doGuessMimeType( $file, $ext ) { # TODO: remove $ext param + /** + * Guess the mime type from the file contents. + * + * @param string $file + * @param mixed $ext + */ + private function doGuessMimeType( $file, $ext ) { // TODO: remove $ext param // Read a chunk of the file wfSuppressWarnings(); - $f = fopen( $file, "rt" ); + $f = fopen( $file, 'rt' ); // FIXME: Shouldn't this be rb? wfRestoreWarnings(); - if( !$f ) return "unknown/unknown"; + + if( !$f ) { + return 'unknown/unknown'; + } $head = fread( $f, 1024 ); fseek( $f, -65558, SEEK_END ); $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR @@ -541,23 +577,23 @@ class MimeMagic { "\x7fELF" => 'application/octet-stream', // ELF binary ); - foreach( $headers as $magic => $candidate ) { - if( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) { + foreach ( $headers as $magic => $candidate ) { + if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) { wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" ); return $candidate; } } /* Look for WebM and Matroska files */ - if( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) { + if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) { $doctype = strpos( $head, "\x42\x82" ); - if( $doctype ) { + if ( $doctype ) { // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers) $data = substr($head, $doctype+3, 8); - if( strncmp( $data, "matroska", 8 ) == 0 ) { + if ( strncmp( $data, "matroska", 8 ) == 0 ) { wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" ); return "video/x-matroska"; - } else if ( strncmp( $data, "webm", 4 ) == 0 ) { + } elseif ( strncmp( $data, "webm", 4 ) == 0 ) { wfDebug( __METHOD__ . ": recognized file as video/webm\n" ); return "video/webm"; } @@ -567,7 +603,7 @@ class MimeMagic { } /* Look for WebP */ - if( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 8), "WEBPVP8 ", 8 ) == 0 ) { + if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 8), "WEBPVP8 ", 8 ) == 0 ) { wfDebug( __METHOD__ . ": recognized file as image/webp\n" ); return "image/webp"; } @@ -584,7 +620,7 @@ class MimeMagic { * 16583). The heuristic has been cut down to exclude three-character * strings like "wellFormed ) { + if ( $xml->wellFormed ) { global $wgXMLMimeTypes; - if( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) { + if ( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) { return $wgXMLMimeTypes[$xml->getRootElement()]; } else { return 'application/xml'; @@ -719,18 +755,18 @@ class MimeMagic { $openxmlRegex = "/^\[Content_Types\].xml/"; - if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) { + if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) { $mime = $matches[1]; wfDebug( __METHOD__.": detected $mime from ZIP archive\n" ); - } elseif( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) { + } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) { $mime = "application/x-opc+zip"; # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere - if( $ext !== true && $ext !== false ) { + if ( $ext !== true && $ext !== false ) { /** This is the mode used by getPropsFromPath * These mime's are stored in the database, where we don't really want * x-opc+zip, because we use it only for internal purposes */ - if( $this->isMatchingExtension( $ext, $mime) ) { + if ( $this->isMatchingExtension( $ext, $mime) ) { /* A known file extension for an OPC file, * find the proper mime type for that file extension */ $mime = $this->guessTypesForExtension( $ext ); @@ -739,10 +775,10 @@ class MimeMagic { } } wfDebug( __METHOD__.": detected an Open Packaging Conventions archive: $mime\n" ); - } else if( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" && + } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" && ($headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false && preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) { - if( substr( $header, 512, 4) == "\xEC\xA5\xC1\x00" ) { + if ( substr( $header, 512, 4) == "\xEC\xA5\xC1\x00" ) { $mime = "application/msword"; } switch( substr( $header, 512, 6) ) { @@ -774,31 +810,34 @@ class MimeMagic { return $mime; } - /** Internal mime type detection, please use guessMimeType() for application code instead. - * Detection is done using an external program, if $wgMimeDetectorCommand is set. - * Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available. - * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using - * guessTypesForExtension. - * If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image. - * If no mime type can be determined, this function returns "unknown/unknown". - * - * @param $file String: the file to check - * @param $ext Mixed: the file extension, or true (default) to extract it from the filename. - * Set it to false to ignore the extension. DEPRECATED! Set to false, use - * improveTypeFromExtension($mime, $ext) later to improve mime type. - * - * @return string the mime type of $file - * @access private - */ + /** + * Internal mime type detection. Detection is done using an external + * program, if $wgMimeDetectorCommand is set. Otherwise, the fileinfo + * extension and mime_content_type are tried (in this order), if they + * are available. If the dections fails and $ext is not false, the mime + * type is guessed from the file extension, using guessTypesForExtension. + * + * If the mime type is still unknown, getimagesize is used to detect the + * mime type if the file is an image. If no mime type can be determined, + * this function returns 'unknown/unknown'. + * + * @param $file String: the file to check + * @param $ext Mixed: the file extension, or true (default) to extract it from the filename. + * Set it to false to ignore the extension. DEPRECATED! Set to false, use + * improveTypeFromExtension($mime, $ext) later to improve mime type. + * + * @return string the mime type of $file + */ private function detectMimeType( $file, $ext = true ) { global $wgMimeDetectorCommand; - if( $ext ) { # TODO: make $ext default to false. Or better, remove it. + if ( $ext ) { # TODO: make $ext default to false. Or better, remove it. wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); } $m = null; if ( $wgMimeDetectorCommand ) { + // FIXME: Use wfShellExec $fn = wfEscapeShellArg( $file ); $m = `$wgMimeDetectorCommand $fn`; } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) { @@ -813,9 +852,9 @@ class MimeMagic { # If you may need to load the fileinfo extension at runtime, set # $wgLoadFileinfoExtension in LocalSettings.php - $mime_magic_resource = finfo_open(FILEINFO_MIME); /* return mime type ala mimetype extension */ + $mime_magic_resource = finfo_open( FILEINFO_MIME ); /* return mime type ala mimetype extension */ - if ($mime_magic_resource) { + if ( $mime_magic_resource ) { $m = finfo_file( $mime_magic_resource, $file ); finfo_close( $mime_magic_resource ); } else { @@ -851,7 +890,7 @@ class MimeMagic { } } - # if desired, look at extension as a fallback. + // If desired, look at extension as a fallback. if ( $ext === true ) { $i = strrpos( $file, '.' ); $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' ); @@ -868,36 +907,40 @@ class MimeMagic { } } - #unknown type - wfDebug( __METHOD__.": failed to guess mime type for $file!\n" ); - return "unknown/unknown"; + // Unknown type + wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" ); + return 'unknown/unknown'; } /** - * Determine the media type code for a file, using its mime type, name and possibly - * its contents. - * - * This function relies on the findMediaType(), mapping extensions and mime - * types to media types. - * - * @todo analyse file if need be - * @todo look at multiple extension, separately and together. - * - * @param $path String: full path to the image file, in case we have to look at the contents - * (if null, only the mime type is used to determine the media type code). - * @param $mime String: mime type. If null it will be guessed using guessMimeType. - * - * @return (int?string?) a value to be used with the MEDIATYPE_xxx constants. - */ + * Determine the media type code for a file, using its mime type, name and + * possibly its contents. + * + * This function relies on the findMediaType(), mapping extensions and mime + * types to media types. + * + * @todo analyse file if need be + * @todo look at multiple extension, separately and together. + * + * @param $path String: full path to the image file, in case we have to look at the contents + * (if null, only the mime type is used to determine the media type code). + * @param $mime String: mime type. If null it will be guessed using guessMimeType. + * + * @return (int?string?) a value to be used with the MEDIATYPE_xxx constants. + */ function getMediaType( $path = null, $mime = null ) { - if( !$mime && !$path ) return MEDIATYPE_UNKNOWN; + if( !$mime && !$path ) { + return MEDIATYPE_UNKNOWN; + } - # If mime type is unknown, guess it - if( !$mime ) $mime = $this->guessMimeType( $path, false ); + // If mime type is unknown, guess it + if( !$mime ) { + $mime = $this->guessMimeType( $path, false ); + } - # Special code for ogg - detect if it's video (theora), - # else label it as sound. - if( $mime == "application/ogg" && file_exists( $path ) ) { + // Special code for ogg - detect if it's video (theora), + // else label it as sound. + if ( $mime == 'application/ogg' && file_exists( $path ) ) { // Read a chunk of the file $f = fopen( $path, "rt" ); @@ -907,7 +950,7 @@ class MimeMagic { $head = strtolower( $head ); - # This is an UGLY HACK, file should be parsed correctly + // This is an UGLY HACK, file should be parsed correctly if ( strpos( $head, 'theora' ) !== false ) return MEDIATYPE_VIDEO; elseif ( strpos( $head, 'vorbis' ) !== false ) return MEDIATYPE_AUDIO; elseif ( strpos( $head, 'flac' ) !== false ) return MEDIATYPE_AUDIO; @@ -915,58 +958,69 @@ class MimeMagic { else return MEDIATYPE_MULTIMEDIA; } - # check for entry for full mime type + // Check for entry for full mime type if( $mime ) { $type = $this->findMediaType( $mime ); - if( $type !== MEDIATYPE_UNKNOWN ) return $type; + if ( $type !== MEDIATYPE_UNKNOWN ) { + return $type; + } } - # Check for entry for file extension + // Check for entry for file extension if ( $path ) { $i = strrpos( $path, '.' ); $e = strtolower( $i ? substr( $path, $i + 1 ) : '' ); - # TODO: look at multi-extension if this fails, parse from full path - + // TODO: look at multi-extension if this fails, parse from full path $type = $this->findMediaType( '.' . $e ); - if ( $type !== MEDIATYPE_UNKNOWN ) return $type; + if ( $type !== MEDIATYPE_UNKNOWN ) { + return $type; + } } - # Check major mime type - if( $mime ) { + // Check major mime type + if ( $mime ) { $i = strpos( $mime, '/' ); - if( $i !== false ) { + if ( $i !== false ) { $major = substr( $mime, 0, $i ); $type = $this->findMediaType( $major ); - if( $type !== MEDIATYPE_UNKNOWN ) return $type; + if ( $type !== MEDIATYPE_UNKNOWN ) { + return $type; + } } } - if( !$type ) $type = MEDIATYPE_UNKNOWN; + if( !$type ) { + $type = MEDIATYPE_UNKNOWN; + } return $type; } - /** returns a media code matching the given mime type or file extension. - * File extensions are represented by a string starting with a dot (.) to - * distinguish them from mime types. - * - * This funktion relies on the mapping defined by $this->mMediaTypes - * @access private - */ + /** + * Returns a media code matching the given mime type or file extension. + * File extensions are represented by a string starting with a dot (.) to + * distinguish them from mime types. + * + * This funktion relies on the mapping defined by $this->mMediaTypes + * @access private + */ function findMediaType( $extMime ) { - if ( strpos( $extMime, '.' ) === 0 ) { #if it's an extension, look up the mime types + if ( strpos( $extMime, '.' ) === 0 ) { + // If it's an extension, look up the mime types $m = $this->getTypesForExtension( substr( $extMime, 1 ) ); - if ( !$m ) return MEDIATYPE_UNKNOWN; + if ( !$m ) { + return MEDIATYPE_UNKNOWN; + } $m = explode( ' ', $m ); } else { - # Normalize mime type + // Normalize mime type if ( isset( $this->mMimeTypeAliases[$extMime] ) ) { $extMime = $this->mMimeTypeAliases[$extMime]; } - $m = array($extMime); + $m = array( $extMime ); } foreach ( $m as $mime ) { -- 2.20.1