* The later resides in /libs with related files.
* Explose MimeAnalyzer as a service.
* Keep MimeMagic::singleton() as a b/c alias.
* MimeMagic::applyDefaultConfig() will bootstrap the service
with all of the old config, extension hook handler, and
detector command shell-out behavior.
Change-Id: Ie2695a52e7a3bcfda9f7fa83659a9ff31b372bc3
'IContextSource' => __DIR__ . '/includes/context/IContextSource.php',
'IDBAccessObject' => __DIR__ . '/includes/dao/IDBAccessObject.php',
'IDatabase' => __DIR__ . '/includes/libs/rdbms/database/IDatabase.php',
- 'IEContentAnalyzer' => __DIR__ . '/includes/libs/IEContentAnalyzer.php',
+ 'IEContentAnalyzer' => __DIR__ . '/includes/libs/mime/IEContentAnalyzer.php',
'IEUrlExtension' => __DIR__ . '/includes/libs/IEUrlExtension.php',
'IExpiringStore' => __DIR__ . '/includes/libs/objectcache/IExpiringStore.php',
'IJobSpecification' => __DIR__ . '/includes/jobqueue/JobSpecification.php',
'MessageSpecifier' => __DIR__ . '/includes/libs/MessageSpecifier.php',
'MigrateFileRepoLayout' => __DIR__ . '/maintenance/migrateFileRepoLayout.php',
'MigrateUserGroup' => __DIR__ . '/maintenance/migrateUserGroup.php',
+ 'MimeAnalyzer' => __DIR__ . '/includes/libs/mime/MimeAnalyzer.php',
'MimeMagic' => __DIR__ . '/includes/MimeMagic.php',
'MinifyScript' => __DIR__ . '/maintenance/minify.php',
'MostcategoriesPage' => __DIR__ . '/includes/specials/SpecialMostcategories.php',
'XmlDumpWriter' => __DIR__ . '/includes/export/XmlDumpWriter.php',
'XmlJsCode' => __DIR__ . '/includes/Xml.php',
'XmlSelect' => __DIR__ . '/includes/XmlSelect.php',
- 'XmlTypeCheck' => __DIR__ . '/includes/libs/XmlTypeCheck.php',
+ 'XmlTypeCheck' => __DIR__ . '/includes/libs/mime/XmlTypeCheck.php',
'ZhConverter' => __DIR__ . '/languages/classes/LanguageZh.php',
'ZipDirectoryReader' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
'ZipDirectoryReaderError' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
define( 'CACHE_ACCEL', 3 ); // APC, XCache or WinCache
/**@}*/
-/**@{
- * Media types.
- * This defines constants for the value returned by File::getMediaType()
- */
-// unknown format
-define( 'MEDIATYPE_UNKNOWN', 'UNKNOWN' );
-// some bitmap image or image source (like psd, etc). Can't scale up.
-define( 'MEDIATYPE_BITMAP', 'BITMAP' );
-// some vector drawing (SVG, WMF, PS, ...) or image source (oo-draw, etc). Can scale up.
-define( 'MEDIATYPE_DRAWING', 'DRAWING' );
-// simple audio file (ogg, mp3, wav, midi, whatever)
-define( 'MEDIATYPE_AUDIO', 'AUDIO' );
-// simple video file (ogg, mpg, etc;
-// no not include formats here that may contain executable sections or scripts!)
-define( 'MEDIATYPE_VIDEO', 'VIDEO' );
-// Scriptable Multimedia (flash, advanced video container formats, etc)
-define( 'MEDIATYPE_MULTIMEDIA', 'MULTIMEDIA' );
-// Office Documents, Spreadsheets (office formats possibly containing apples, scripts, etc)
-define( 'MEDIATYPE_OFFICE', 'OFFICE' );
-// Plain text (possibly containing program code or scripts)
-define( 'MEDIATYPE_TEXT', 'TEXT' );
-// binary executable
-define( 'MEDIATYPE_EXECUTABLE', 'EXECUTABLE' );
-// archive file (zip, tar, etc)
-define( 'MEDIATYPE_ARCHIVE', 'ARCHIVE' );
-/**@}*/
+require_once __DIR__ . '/libs/mime/defines.php';
/**@{
* Antivirus result codes, for use in $wgAntivirusSetup.
use MediaWiki\Services\ServiceContainer;
use MediaWiki\Services\NoSuchServiceException;
use MWException;
+use MimeAnalyzer;
use ObjectCache;
use ProxyLookup;
use SearchEngine;
return $this->getService( 'MediaHandlerFactory' );
}
+ /**
+ * @since 1.28
+ * @return MimeAnalyzer
+ */
+ public function getMimeAnalyzer() {
+ return $this->getService( 'MimeAnalyzer' );
+ }
+
/**
* @since 1.28
* @return ProxyLookup
<?php
/**
- * Module defining helper functions for detecting and dealing with MIME types.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
*
* @file
*/
+use MediaWiki\MediaWikiServices;
+use MediaWiki\Logger\LoggerFactory;
-/**
- * Defines a set of well known MIME types
- * This is used as a fallback to mime.types files.
- * An extensive list of well known MIME types is provided by
- * the file mime.types in the includes directory.
- *
- * This list concatenated with mime.types is used to create a MIME <-> ext
- * map. Each line contains a MIME type followed by a space separated list of
- * extensions. If multiple extensions for a single MIME type exist or if
- * multiple MIME types exist for a single extension then in most cases
- * MediaWiki assumes that the first extension following the MIME type is the
- * canonical extension, and the first time a MIME type appears for a certain
- * extension is considered the canonical MIME type.
- *
- * (Note that appending $wgMimeTypeFile to the end of MM_WELL_KNOWN_MIME_TYPES
- * sucks because you can't redefine canonical types. This could be fixed by
- * appending MM_WELL_KNOWN_MIME_TYPES behind $wgMimeTypeFile, but who knows
- * what will break? In practice this probably isn't a problem anyway -- Bryan)
- */
-define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING
-application/ogg ogx ogg ogm ogv oga spx
-application/pdf pdf
-application/vnd.oasis.opendocument.chart odc
-application/vnd.oasis.opendocument.chart-template otc
-application/vnd.oasis.opendocument.database odb
-application/vnd.oasis.opendocument.formula odf
-application/vnd.oasis.opendocument.formula-template otf
-application/vnd.oasis.opendocument.graphics odg
-application/vnd.oasis.opendocument.graphics-template otg
-application/vnd.oasis.opendocument.image odi
-application/vnd.oasis.opendocument.image-template oti
-application/vnd.oasis.opendocument.presentation odp
-application/vnd.oasis.opendocument.presentation-template otp
-application/vnd.oasis.opendocument.spreadsheet ods
-application/vnd.oasis.opendocument.spreadsheet-template ots
-application/vnd.oasis.opendocument.text odt
-application/vnd.oasis.opendocument.text-master otm
-application/vnd.oasis.opendocument.text-template ott
-application/vnd.oasis.opendocument.text-web oth
-application/javascript js
-application/x-shockwave-flash swf
-audio/midi mid midi kar
-audio/mpeg mpga mpa mp2 mp3
-audio/x-aiff aif aiff aifc
-audio/x-wav wav
-audio/ogg oga spx ogg
-image/x-bmp bmp
-image/gif gif
-image/jpeg jpeg jpg jpe
-image/png png
-image/svg+xml svg
-image/svg svg
-image/tiff tiff tif
-image/vnd.djvu djvu
-image/x.djvu djvu
-image/x-djvu djvu
-image/x-portable-pixmap ppm
-image/x-xcf xcf
-text/plain txt
-text/html html htm
-video/ogg ogv ogm ogg
-video/mpeg mpg mpeg
-END_STRING
-);
-
-/**
- * Defines a set of well known MIME info entries
- * This is used as a fallback to mime.info files.
- * An extensive list of well known MIME types is provided by
- * the file mime.info in the includes directory.
- */
-define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
-application/pdf [OFFICE]
-application/vnd.oasis.opendocument.chart [OFFICE]
-application/vnd.oasis.opendocument.chart-template [OFFICE]
-application/vnd.oasis.opendocument.database [OFFICE]
-application/vnd.oasis.opendocument.formula [OFFICE]
-application/vnd.oasis.opendocument.formula-template [OFFICE]
-application/vnd.oasis.opendocument.graphics [OFFICE]
-application/vnd.oasis.opendocument.graphics-template [OFFICE]
-application/vnd.oasis.opendocument.image [OFFICE]
-application/vnd.oasis.opendocument.image-template [OFFICE]
-application/vnd.oasis.opendocument.presentation [OFFICE]
-application/vnd.oasis.opendocument.presentation-template [OFFICE]
-application/vnd.oasis.opendocument.spreadsheet [OFFICE]
-application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
-application/vnd.oasis.opendocument.text [OFFICE]
-application/vnd.oasis.opendocument.text-template [OFFICE]
-application/vnd.oasis.opendocument.text-master [OFFICE]
-application/vnd.oasis.opendocument.text-web [OFFICE]
-application/javascript text/javascript application/x-javascript [EXECUTABLE]
-application/x-shockwave-flash [MULTIMEDIA]
-audio/midi [AUDIO]
-audio/x-aiff [AUDIO]
-audio/x-wav [AUDIO]
-audio/mp3 audio/mpeg [AUDIO]
-application/ogg audio/ogg video/ogg [MULTIMEDIA]
-image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
-image/gif [BITMAP]
-image/jpeg [BITMAP]
-image/png [BITMAP]
-image/svg+xml [DRAWING]
-image/tiff [BITMAP]
-image/vnd.djvu [BITMAP]
-image/x-xcf [BITMAP]
-image/x-portable-pixmap [BITMAP]
-text/plain [TEXT]
-text/html [TEXT]
-video/ogg [VIDEO]
-video/mpeg [VIDEO]
-unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
-END_STRING
-);
-
-/**
- * Implements functions related to MIME types such as detection and mapping to
- * file extension.
- *
- * Instances of this class are stateless, there only needs to be one global instance
- * of MimeMagic. Please use MimeMagic::singleton() to get that instance.
- */
-class MimeMagic {
- /**
- * @var array Mapping of media types to arrays of MIME types.
- * This is used by findMediaType and getMediaType, respectively
- */
- protected $mMediaTypes = null;
-
- /** @var array Map of MIME type aliases
- */
- protected $mMimeTypeAliases = null;
-
- /** @var array Map of MIME types to file extensions (as a space separated list)
- */
- protected $mMimeToExt = null;
-
- /** @var array Map of file extensions types to MIME types (as a space separated list)
- */
- public $mExtToMime = null;
-
- /** @var IEContentAnalyzer
- */
- protected $mIEAnalyzer;
-
- /** @var string Extra MIME types, set for example by media handling extensions
- */
- private $mExtraTypes = '';
-
- /** @var string Extra MIME info, set for example by media handling extensions
- */
- private $mExtraInfo = '';
-
- /** @var Config */
- private $mConfig;
-
- /** @var MimeMagic The singleton instance
- */
- private static $instance = null;
-
- /** Initializes the MimeMagic object. This is called by MimeMagic::singleton().
- *
- * This constructor parses the mime.types and mime.info files and build internal mappings.
- *
- * @todo Make this constructor private once everything uses the singleton instance
- * @param Config $config
- */
- function __construct( Config $config = null ) {
- if ( !$config ) {
- wfDebug( __METHOD__ . ' called with no Config instance passed to it' );
- $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
- }
- $this->mConfig = $config;
-
- /**
- * --- load mime.types ---
- */
-
- global $IP;
-
- # Allow media handling extensions adding MIME-types and MIME-info
- Hooks::run( 'MimeMagicInit', [ $this ] );
-
- $types = MM_WELL_KNOWN_MIME_TYPES;
-
- $mimeTypeFile = $this->mConfig->get( 'MimeTypeFile' );
- if ( $mimeTypeFile == 'includes/mime.types' ) {
- $mimeTypeFile = "$IP/$mimeTypeFile";
- }
-
- if ( $mimeTypeFile ) {
- if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
- wfDebug( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
- $types .= "\n";
- $types .= file_get_contents( $mimeTypeFile );
- } else {
- wfDebug( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
- }
- } else {
- wfDebug( __METHOD__ . ": no mime types file defined, using built-ins only.\n" );
- }
-
- $types .= "\n" . $this->mExtraTypes;
-
- $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
- $types = str_replace( "\t", " ", $types );
-
- $this->mMimeToExt = [];
- $this->mExtToMime = [];
-
- $lines = explode( "\n", $types );
- foreach ( $lines as $s ) {
- $s = trim( $s );
- if ( empty( $s ) ) {
- continue;
- }
- if ( strpos( $s, '#' ) === 0 ) {
- continue;
- }
-
- $s = strtolower( $s );
- $i = strpos( $s, ' ' );
-
- if ( $i === false ) {
- continue;
- }
-
- $mime = substr( $s, 0, $i );
- $ext = trim( substr( $s, $i + 1 ) );
-
- if ( empty( $ext ) ) {
- continue;
- }
-
- if ( !empty( $this->mMimeToExt[$mime] ) ) {
- $this->mMimeToExt[$mime] .= ' ' . $ext;
- } else {
- $this->mMimeToExt[$mime] = $ext;
- }
-
- $extensions = explode( ' ', $ext );
-
- foreach ( $extensions as $e ) {
- $e = trim( $e );
- if ( empty( $e ) ) {
- continue;
- }
-
- if ( !empty( $this->mExtToMime[$e] ) ) {
- $this->mExtToMime[$e] .= ' ' . $mime;
- } else {
- $this->mExtToMime[$e] = $mime;
- }
- }
- }
-
- /**
- * --- load mime.info ---
- */
-
- $mimeInfoFile = $this->mConfig->get( 'MimeInfoFile' );
- if ( $mimeInfoFile == 'includes/mime.info' ) {
- $mimeInfoFile = "$IP/$mimeInfoFile";
- }
-
- $info = MM_WELL_KNOWN_MIME_INFO;
-
- if ( $mimeInfoFile ) {
- if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
- wfDebug( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
- $info .= "\n";
- $info .= file_get_contents( $mimeInfoFile );
- } else {
- wfDebug( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
- }
- } else {
- wfDebug( __METHOD__ . ": no mime info file defined, using built-ins only.\n" );
- }
-
- $info .= "\n" . $this->mExtraInfo;
-
- $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
- $info = str_replace( "\t", " ", $info );
-
- $this->mMimeTypeAliases = [];
- $this->mMediaTypes = [];
-
- $lines = explode( "\n", $info );
- foreach ( $lines as $s ) {
- $s = trim( $s );
- if ( empty( $s ) ) {
- continue;
- }
- if ( strpos( $s, '#' ) === 0 ) {
- continue;
- }
-
- $s = strtolower( $s );
- $i = strpos( $s, ' ' );
-
- if ( $i === false ) {
- continue;
- }
-
- # print "processing MIME INFO line $s<br>";
-
- $match = [];
- if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
- $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
- $mtype = trim( strtoupper( $match[1] ) );
- } else {
- $mtype = MEDIATYPE_UNKNOWN;
- }
-
- $m = explode( ' ', $s );
-
- if ( !isset( $this->mMediaTypes[$mtype] ) ) {
- $this->mMediaTypes[$mtype] = [];
- }
-
- foreach ( $m as $mime ) {
- $mime = trim( $mime );
- if ( empty( $mime ) ) {
- continue;
- }
-
- $this->mMediaTypes[$mtype][] = $mime;
- }
-
- if ( count( $m ) > 1 ) {
- $main = $m[0];
- $mCount = count( $m );
- for ( $i = 1; $i < $mCount; $i += 1 ) {
- $mime = $m[$i];
- $this->mMimeTypeAliases[$mime] = $main;
- }
- }
- }
- }
-
+class MimeMagic extends MimeAnalyzer {
/**
* Get an instance of this class
* @return MimeMagic
+ * @deprecated since 1.28
*/
public static function singleton() {
- if ( self::$instance === null ) {
- self::$instance = new MimeMagic(
- ConfigFactory::getDefaultInstance()->makeConfig( 'main' )
- );
- }
- return self::$instance;
- }
-
- /**
- * Adds to the list mapping MIME to file extensions.
- * As an extension author, you are encouraged to submit patches to
- * MediaWiki's core to add new MIME types to mime.types.
- * @param string $types
- */
- public function addExtraTypes( $types ) {
- $this->mExtraTypes .= "\n" . $types;
- }
-
- /**
- * Adds to the list mapping MIME to media type.
- * As an extension author, you are encouraged to submit patches to
- * MediaWiki's core to add new MIME info to mime.info.
- * @param string $info
- */
- public function addExtraInfo( $info ) {
- $this->mExtraInfo .= "\n" . $info;
- }
-
- /**
- * Returns a list of file extensions for a given MIME type as a space
- * separated string or null if the MIME type was unrecognized. Resolves
- * MIME type aliases.
- *
- * @param string $mime
- * @return string|null
- */
- public function getExtensionsForType( $mime ) {
- $mime = strtolower( $mime );
-
- // Check the mime-to-ext map
- if ( isset( $this->mMimeToExt[$mime] ) ) {
- return $this->mMimeToExt[$mime];
- }
-
- // Resolve the MIME type to the canonical type
- if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
- $mime = $this->mMimeTypeAliases[$mime];
- if ( isset( $this->mMimeToExt[$mime] ) ) {
- return $this->mMimeToExt[$mime];
- }
- }
-
- return null;
+ return MediaWikiServices::getInstance()->getMIMEAnalyzer();
}
/**
- * Returns a list of MIME types for a given file extension as a space
- * separated string or null if the extension was unrecognized.
- *
- * @param string $ext
- * @return string|null
- */
- public function getTypesForExtension( $ext ) {
- $ext = strtolower( $ext );
-
- $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
- return $r;
- }
-
- /**
- * Returns a single MIME type for a given file extension or null if unknown.
- * This is always the first type from the list returned by getTypesForExtension($ext).
- *
- * @param string $ext
- * @return string|null
- */
- public function guessTypesForExtension( $ext ) {
- $m = $this->getTypesForExtension( $ext );
- if ( is_null( $m ) ) {
- return null;
- }
-
- // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
- $m = trim( $m );
- $m = preg_replace( '/\s.*$/', '', $m );
-
- return $m;
- }
-
- /**
- * Tests if the extension matches the given MIME type. Returns true if a
- * match was found, null if the MIME type is unknown, and false if the
- * MIME type is known but no matches where found.
- *
- * @param string $extension
- * @param string $mime
- * @return bool|null
- */
- public function isMatchingExtension( $extension, $mime ) {
- $ext = $this->getExtensionsForType( $mime );
-
- if ( !$ext ) {
- return null; // Unknown MIME type
- }
-
- $ext = explode( ' ', $ext );
-
- $extension = strtolower( $extension );
- return in_array( $extension, $ext );
- }
-
- /**
- * Returns true if the MIME type is known to represent an image format
- * supported by the PHP GD library.
- *
- * @param string $mime
- *
- * @return bool
- */
- public function isPHPImageType( $mime ) {
- // As defined by imagegetsize and image_type_to_mime
- static $types = [
- 'image/gif', 'image/jpeg', 'image/png',
- 'image/x-bmp', 'image/xbm', 'image/tiff',
- 'image/jp2', 'image/jpeg2000', 'image/iff',
- 'image/xbm', 'image/x-xbitmap',
- 'image/vnd.wap.wbmp', 'image/vnd.xiff',
- 'image/x-photoshop',
- 'application/x-shockwave-flash',
- ];
-
- return in_array( $mime, $types );
- }
-
- /**
- * Returns true if the extension represents a type which can
- * be reliably detected from its content. Use this to determine
- * whether strict content checks should be applied to reject
- * invalid uploads; if we can't identify the type we won't
- * be able to say if it's invalid.
- *
- * @todo Be more accurate when using fancy MIME detector plugins;
- * right now this is the bare minimum getimagesize() list.
- * @param string $extension
- * @return bool
- */
- function isRecognizableExtension( $extension ) {
- static $types = [
- // Types recognized by getimagesize()
- 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
- 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
- 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
- 'xbm',
-
- // Formats we recognize magic numbers for
- 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
- 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
- 'webp',
-
- // XML formats we sure hope we recognize reliably
- 'svg',
- ];
- return in_array( strtolower( $extension ), $types );
- }
-
- /**
- * Improves a MIME type using the file extension. Some file formats are very generic,
- * so their MIME type is not very meaningful. A more useful MIME type can be derived
- * by looking at the file extension. Typically, this method would be called on the
- * result of guessMimeType().
- *
- * @param string $mime The MIME type, typically guessed from a file's content.
- * @param string $ext The file extension, as taken from the file name
- *
- * @return string The MIME type
- */
- public function improveTypeFromExtension( $mime, $ext ) {
- if ( $mime === 'unknown/unknown' ) {
- if ( $this->isRecognizableExtension( $ext ) ) {
- wfDebug( __METHOD__ . ': refusing to guess mime type for .' .
- "$ext file, we should have recognized it\n" );
- } else {
- // Not something we can detect, so simply
- // trust the file extension
- $mime = $this->guessTypesForExtension( $ext );
- }
- } elseif ( $mime === 'application/x-opc+zip' ) {
- if ( $this->isMatchingExtension( $ext, $mime ) ) {
- // A known file extension for an OPC file,
- // find the proper MIME type for that file extension
- $mime = $this->guessTypesForExtension( $ext );
- } else {
- wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " .
- ".$ext is not a known OPC extension.\n" );
- $mime = 'application/zip';
- }
- } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
- // Textual types are sometimes not recognized properly.
- // If detected as text/plain, and has an extension which is textual
- // improve to the extension's type. For example, csv and json are often
- // misdetected as text/plain.
- $mime = $this->guessTypesForExtension( $ext );
- }
-
- # Media handling extensions can improve the MIME detected
- Hooks::run( 'MimeMagicImproveFromExtension', [ $this, $ext, &$mime ] );
-
- if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
- $mime = $this->mMimeTypeAliases[$mime];
- }
-
- wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
- return $mime;
- }
-
- /**
- * MIME type detection. This uses detectMimeType to detect the MIME type
- * of the file, but applies additional checks to determine some well known
- * file formats that may be missed or misinterpreted by the default MIME
- * detection (namely XML based formats like XHTML or SVG, as well as ZIP
- * based formats like OPC/ODF files).
- *
- * @param string $file The file to check
- * @param string|bool $ext The file extension, or true (default) to extract it from the filename.
- * Set it to false to ignore the extension. DEPRECATED! Set to false, use
- * improveTypeFromExtension($mime, $ext) later to improve MIME type.
- *
- * @return string The MIME type of $file
- */
- public function guessMimeType( $file, $ext = true ) {
- if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
- wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
- "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
- }
-
- $mime = $this->doGuessMimeType( $file, $ext );
-
- if ( !$mime ) {
- wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" );
- $mime = $this->detectMimeType( $file, $ext );
- }
-
- if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
- $mime = $this->mMimeTypeAliases[$mime];
- }
-
- wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" );
- return $mime;
- }
-
- /**
- * Guess the MIME type from the file contents.
- *
- * @todo Remove $ext param
- *
- * @param string $file
- * @param mixed $ext
- * @return bool|string
- * @throws MWException
+ * @param array $params
+ * @param Config $mainConfig
+ * @return array
*/
- private function doGuessMimeType( $file, $ext ) {
- // Read a chunk of the file
- MediaWiki\suppressWarnings();
- $f = fopen( $file, 'rb' );
- MediaWiki\restoreWarnings();
-
- if ( !$f ) {
- return 'unknown/unknown';
- }
-
- $fsize = filesize( $file );
- if ( $fsize === false ) {
- return 'unknown/unknown';
- }
-
- $head = fread( $f, 1024 );
- $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
- if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
- throw new MWException(
- "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
- }
- $tail = $tailLength ? fread( $f, $tailLength ) : '';
- fclose( $f );
-
- wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
-
- // Hardcode a few magic number checks...
- $headers = [
- // Multimedia...
- 'MThd' => 'audio/midi',
- 'OggS' => 'application/ogg',
-
- // Image formats...
- // Note that WMF may have a bare header, no magic number.
- "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
- "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
- '%PDF' => 'application/pdf',
- 'gimp xcf' => 'image/x-xcf',
-
- // Some forbidden fruit...
- 'MZ' => 'application/octet-stream', // DOS/Windows executable
- "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
- "\x7fELF" => 'application/octet-stream', // ELF binary
- ];
-
- foreach ( $headers as $magic => $candidate ) {
- if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
- wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
- return $candidate;
- }
- }
-
- /* Look for WebM and Matroska files */
- if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
- $doctype = strpos( $head, "\x42\x82" );
- if ( $doctype ) {
- // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
- $data = substr( $head, $doctype + 3, 8 );
- if ( strncmp( $data, "matroska", 8 ) == 0 ) {
- wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
- return "video/x-matroska";
- } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
- wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
- return "video/webm";
- }
- }
- wfDebug( __METHOD__ . ": unknown EBML file\n" );
- return "unknown/unknown";
- }
-
- /* Look for WebP */
- if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0 ) {
- wfDebug( __METHOD__ . ": recognized file as image/webp\n" );
- return "image/webp";
- }
-
- /**
- * Look for PHP. Check for this before HTML/XML... Warning: this is a
- * heuristic, and won't match a file with a lot of non-PHP before. It
- * will also match text files which could be PHP. :)
- *
- * @todo FIXME: For this reason, the check is probably useless -- an attacker
- * could almost certainly just pad the file with a lot of nonsense to
- * circumvent the check in any case where it would be a security
- * problem. On the other hand, it causes harmful false positives (bug
- * 16583). The heuristic has been cut down to exclude three-character
- * strings like "<? ", but should it be axed completely?
- */
- if ( ( strpos( $head, '<?php' ) !== false ) ||
- ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
- ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
- ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
- ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
- ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
-
- wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
- return 'application/x-php';
- }
-
- /**
- * look for XML formats (XHTML and SVG)
- */
- $xml = new XmlTypeCheck( $file );
- if ( $xml->wellFormed ) {
- $xmlMimeTypes = $this->mConfig->get( 'XMLMimeTypes' );
- if ( isset( $xmlMimeTypes[$xml->getRootElement()] ) ) {
- return $xmlMimeTypes[$xml->getRootElement()];
- } else {
- return 'application/xml';
- }
- }
-
- /**
- * look for shell scripts
- */
- $script_type = null;
-
- # detect by shebang
- if ( substr( $head, 0, 2 ) == "#!" ) {
- $script_type = "ASCII";
- } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
- $script_type = "UTF-8";
- } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
- $script_type = "UTF-16BE";
- } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
- $script_type = "UTF-16LE";
- }
-
- if ( $script_type ) {
- if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
- // Quick and dirty fold down to ASCII!
- $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
- $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
- $head = '';
- foreach ( $chars as $codepoint ) {
- if ( $codepoint < 128 ) {
- $head .= chr( $codepoint );
- } else {
- $head .= '?';
+ public static function applyDefaultParameters( array $params, Config $mainConfig ) {
+ $logger = LoggerFactory::getInstance( 'Mime' );
+ $params += [
+ 'typeFile' => $mainConfig->get( 'MimeTypeFile' ),
+ 'infoFile' => $mainConfig->get( 'MimeInfoFile' ),
+ 'xmlTypes' => $mainConfig->get( 'XMLMimeTypes' ),
+ 'guessCallback' =>
+ function ( $mimeAnalyzer, &$head, &$tail, $file, &$mime ) use ( $logger ) {
+ // Also test DjVu
+ $deja = new DjVuImage( $file );
+ if ( $deja->isValid() ) {
+ $logger->info( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
+ $mime = 'image/vnd.djvu';
+
+ return;
}
- }
- }
-
- $match = [];
-
- if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
- $mime = "application/x-{$match[2]}";
- wfDebug( __METHOD__ . ": shell script recognized as $mime\n" );
- return $mime;
- }
- }
-
- // Check for ZIP variants (before getimagesize)
- if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
- wfDebug( __METHOD__ . ": ZIP header present in $file\n" );
- return $this->detectZipType( $head, $tail, $ext );
- }
-
- MediaWiki\suppressWarnings();
- $gis = getimagesize( $file );
- MediaWiki\restoreWarnings();
-
- if ( $gis && isset( $gis['mime'] ) ) {
- $mime = $gis['mime'];
- wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" );
- return $mime;
- }
-
- // Also test DjVu
- $deja = new DjVuImage( $file );
- if ( $deja->isValid() ) {
- wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
- return 'image/vnd.djvu';
- }
-
- # Media handling extensions can guess the MIME by content
- # It's intentionally here so that if core is wrong about a type (false positive),
- # people will hopefully nag and submit patches :)
- $mime = false;
- # Some strings by reference for performance - assuming well-behaved hooks
- Hooks::run(
- 'MimeMagicGuessFromContent',
- [ $this, &$head, &$tail, $file, &$mime ]
- );
-
- return $mime;
- }
-
- /**
- * Detect application-specific file type of a given ZIP file from its
- * header data. Currently works for OpenDocument and OpenXML types...
- * If can't tell, returns 'application/zip'.
- *
- * @param string $header Some reasonably-sized chunk of file header
- * @param string|null $tail The tail of the file
- * @param string|bool $ext The file extension, or true to extract it from the filename.
- * Set it to false (default) to ignore the extension. DEPRECATED! Set to false,
- * use improveTypeFromExtension($mime, $ext) later to improve MIME type.
- *
- * @return string
- */
- function detectZipType( $header, $tail = null, $ext = false ) {
- if ( $ext ) { # TODO: remove $ext param
- wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
- "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
- }
-
- $mime = 'application/zip';
- $opendocTypes = [
- 'chart-template',
- 'chart',
- 'formula-template',
- 'formula',
- 'graphics-template',
- 'graphics',
- 'image-template',
- 'image',
- 'presentation-template',
- 'presentation',
- 'spreadsheet-template',
- 'spreadsheet',
- 'text-template',
- 'text-master',
- 'text-web',
- 'text' ];
-
- // http://lists.oasis-open.org/archives/office/200505/msg00006.html
- $types = '(?:' . implode( '|', $opendocTypes ) . ')';
- $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
-
- $openxmlRegex = "/^\[Content_Types\].xml/";
-
- if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
- $mime = $matches[1];
- wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" );
- } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
- $mime = "application/x-opc+zip";
- # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
- if ( $ext !== true && $ext !== false ) {
- // These MIME's are stored in the database, where we don't really want
- // x-opc+zip, because we use it only for internal purposes
- if ( $this->isMatchingExtension( $ext, $mime ) ) {
- /* A known file extension for an OPC file,
- * find the proper mime type for that file extension
- */
- $mime = $this->guessTypesForExtension( $ext );
- } else {
- $mime = "application/zip";
- }
- }
- wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" );
- } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
- ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
- preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
- if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
- $mime = "application/msword";
- }
- switch ( substr( $header, 512, 6 ) ) {
- case "\xEC\xA5\xC1\x00\x0E\x00":
- case "\xEC\xA5\xC1\x00\x1C\x00":
- case "\xEC\xA5\xC1\x00\x43\x00":
- $mime = "application/vnd.ms-powerpoint";
- break;
- case "\xFD\xFF\xFF\xFF\x10\x00":
- case "\xFD\xFF\xFF\xFF\x1F\x00":
- case "\xFD\xFF\xFF\xFF\x22\x00":
- case "\xFD\xFF\xFF\xFF\x23\x00":
- case "\xFD\xFF\xFF\xFF\x28\x00":
- case "\xFD\xFF\xFF\xFF\x29\x00":
- case "\xFD\xFF\xFF\xFF\x10\x02":
- case "\xFD\xFF\xFF\xFF\x1F\x02":
- case "\xFD\xFF\xFF\xFF\x22\x02":
- case "\xFD\xFF\xFF\xFF\x23\x02":
- case "\xFD\xFF\xFF\xFF\x28\x02":
- case "\xFD\xFF\xFF\xFF\x29\x02":
- $mime = "application/vnd.msexcel";
- break;
- }
-
- wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" );
- } else {
- wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" );
- }
- return $mime;
- }
-
- /**
- * Internal MIME type detection. Detection is done using an external
- * program, if $wgMimeDetectorCommand is set. Otherwise, the fileinfo
- * extension is tried if it is available. If detection fails and $ext
- * is not false, the MIME type is guessed from the file extension,
- * using guessTypesForExtension.
- *
- * If the MIME type is still unknown, getimagesize is used to detect the
- * MIME type if the file is an image. If no MIME type can be determined,
- * this function returns 'unknown/unknown'.
- *
- * @param string $file The file to check
- * @param string|bool $ext The file extension, or true (default) to extract it from the filename.
- * Set it to false to ignore the extension. DEPRECATED! Set to false, use
- * improveTypeFromExtension($mime, $ext) later to improve MIME type.
- *
- * @return string The MIME type of $file
- */
- private function detectMimeType( $file, $ext = true ) {
- /** @todo Make $ext default to false. Or better, remove it. */
- if ( $ext ) {
- wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. "
- . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
- }
-
- $mimeDetectorCommand = $this->mConfig->get( 'MimeDetectorCommand' );
- $m = null;
- if ( $mimeDetectorCommand ) {
- $args = wfEscapeShellArg( $file );
- $m = wfShellExec( "$mimeDetectorCommand $args" );
- } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
- $mime_magic_resource = finfo_open( FILEINFO_MIME );
-
- if ( $mime_magic_resource ) {
- $m = finfo_file( $mime_magic_resource, $file );
- finfo_close( $mime_magic_resource );
- } else {
- wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
- }
- } else {
- wfDebug( __METHOD__ . ": no magic mime detector found!\n" );
- }
-
- if ( $m ) {
- # normalize
- $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
- $m = trim( $m );
- $m = strtolower( $m );
-
- if ( strpos( $m, 'unknown' ) !== false ) {
- $m = null;
- } else {
- wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" );
- return $m;
- }
- }
-
- // If desired, look at extension as a fallback.
- if ( $ext === true ) {
- $i = strrpos( $file, '.' );
- $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
- }
- if ( $ext ) {
- if ( $this->isRecognizableExtension( $ext ) ) {
- wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, "
- . "we should have recognized it\n" );
- } else {
- $m = $this->guessTypesForExtension( $ext );
- if ( $m ) {
- wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" );
- return $m;
- }
- }
- }
-
- // Unknown type
- wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" );
- return 'unknown/unknown';
- }
-
- /**
- * Determine the media type code for a file, using its MIME type, name and
- * possibly its contents.
- *
- * This function relies on the findMediaType(), mapping extensions and MIME
- * types to media types.
- *
- * @todo analyse file if need be
- * @todo look at multiple extension, separately and together.
- *
- * @param string $path Full path to the image file, in case we have to look at the contents
- * (if null, only the MIME type is used to determine the media type code).
- * @param string $mime MIME type. If null it will be guessed using guessMimeType.
- *
- * @return string A value to be used with the MEDIATYPE_xxx constants.
- */
- function getMediaType( $path = null, $mime = null ) {
- if ( !$mime && !$path ) {
- return MEDIATYPE_UNKNOWN;
- }
-
- // If MIME type is unknown, guess it
- if ( !$mime ) {
- $mime = $this->guessMimeType( $path, false );
- }
-
- // Special code for ogg - detect if it's video (theora),
- // else label it as sound.
- if ( $mime == 'application/ogg' && file_exists( $path ) ) {
-
- // Read a chunk of the file
- $f = fopen( $path, "rt" );
- if ( !$f ) {
- return MEDIATYPE_UNKNOWN;
- }
- $head = fread( $f, 256 );
- fclose( $f );
-
- $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
-
- // This is an UGLY HACK, file should be parsed correctly
- if ( strpos( $head, 'theora' ) !== false ) {
- return MEDIATYPE_VIDEO;
- } elseif ( strpos( $head, 'vorbis' ) !== false ) {
- return MEDIATYPE_AUDIO;
- } elseif ( strpos( $head, 'flac' ) !== false ) {
- return MEDIATYPE_AUDIO;
- } elseif ( strpos( $head, 'speex' ) !== false ) {
- return MEDIATYPE_AUDIO;
- } else {
- return MEDIATYPE_MULTIMEDIA;
- }
- }
-
- $type = null;
- // Check for entry for full MIME type
- if ( $mime ) {
- $type = $this->findMediaType( $mime );
- if ( $type !== MEDIATYPE_UNKNOWN ) {
- return $type;
- }
- }
-
- // Check for entry for file extension
- if ( $path ) {
- $i = strrpos( $path, '.' );
- $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
-
- // TODO: look at multi-extension if this fails, parse from full path
- $type = $this->findMediaType( '.' . $e );
- if ( $type !== MEDIATYPE_UNKNOWN ) {
- return $type;
- }
- }
-
- // Check major MIME type
- if ( $mime ) {
- $i = strpos( $mime, '/' );
- if ( $i !== false ) {
- $major = substr( $mime, 0, $i );
- $type = $this->findMediaType( $major );
- if ( $type !== MEDIATYPE_UNKNOWN ) {
- return $type;
- }
- }
- }
+ // Some strings by reference for performance - assuming well-behaved hooks
+ Hooks::run(
+ 'MimeMagicGuessFromContent',
+ [ $mimeAnalyzer, &$head, &$tail, $file, &$mime ]
+ );
+ },
+ 'extCallback' => function ( $mimeAnalyzer, $ext, &$mime ) {
+ // Media handling extensions can improve the MIME detected
+ Hooks::run( 'MimeMagicImproveFromExtension', [ $mimeAnalyzer, $ext, &$mime ] );
+ },
+ 'initCallback' => function ( $mimeAnalyzer ) {
+ // Allow media handling extensions adding MIME-types and MIME-info
+ Hooks::run( 'MimeMagicInit', [ $mimeAnalyzer ] );
+ },
+ 'logger' => $logger
+ ];
- if ( !$type ) {
- $type = MEDIATYPE_UNKNOWN;
+ if ( $params['infoFile'] === 'includes/mime.info' ) {
+ $params['infoFile'] = __DIR__ . "/libs/mime/mime.info";
}
- return $type;
- }
-
- /**
- * Returns a media code matching the given MIME type or file extension.
- * File extensions are represented by a string starting with a dot (.) to
- * distinguish them from MIME types.
- *
- * This function relies on the mapping defined by $this->mMediaTypes
- * @access private
- * @param string $extMime
- * @return int|string
- */
- function findMediaType( $extMime ) {
- if ( strpos( $extMime, '.' ) === 0 ) {
- // If it's an extension, look up the MIME types
- $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
- if ( !$m ) {
- return MEDIATYPE_UNKNOWN;
- }
-
- $m = explode( ' ', $m );
- } else {
- // Normalize MIME type
- if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
- $extMime = $this->mMimeTypeAliases[$extMime];
- }
-
- $m = [ $extMime ];
+ if ( $params['typeFile'] === 'includes/mime.types' ) {
+ $params['typeFile'] = __DIR__ . "/libs/mime/mime.types";
}
- foreach ( $m as $mime ) {
- foreach ( $this->mMediaTypes as $type => $codes ) {
- if ( in_array( $mime, $codes, true ) ) {
- return $type;
- }
- }
+ $detectorCmd = $mainConfig->get( 'MimeDetectorCommand' );
+ if ( $detectorCmd ) {
+ $params['detectCallback'] = function ( $file ) use ( $detectorCmd ) {
+ return wfShellExec( "$detectorCmd " . wfEscapeShellArg( $file ) );
+ };
}
- return MEDIATYPE_UNKNOWN;
- }
-
- /**
- * Get the MIME types that various versions of Internet Explorer would
- * detect from a chunk of the content.
- *
- * @param string $fileName The file name (unused at present)
- * @param string $chunk The first 256 bytes of the file
- * @param string $proposed The MIME type proposed by the server
- * @return array
- */
- public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
- $ca = $this->getIEContentAnalyzer();
- return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
- }
-
- /**
- * Get a cached instance of IEContentAnalyzer
- *
- * @return IEContentAnalyzer
- */
- protected function getIEContentAnalyzer() {
- if ( is_null( $this->mIEAnalyzer ) ) {
- $this->mIEAnalyzer = new IEContentAnalyzer;
- }
- return $this->mIEAnalyzer;
+ return $params;
}
}
);
},
+ 'MimeAnalyzer' => function( MediaWikiServices $services ) {
+ return new MimeMagic(
+ MimeMagic::applyDefaultParameters(
+ [],
+ $services->getMainConfig()
+ )
+ );
+ },
+
'ProxyLookup' => function( MediaWikiServices $services ) {
$mainConfig = $services->getMainConfig();
return new ProxyLookup(
+++ /dev/null
-<?php
-/**
- * Simulation of Microsoft Internet Explorer's MIME type detection algorithm.
- *
- * @file
- * @todo Define the exact license of this file.
- */
-
-/**
- * This class simulates Microsoft Internet Explorer's terribly broken and
- * insecure MIME type detection algorithm. It can be used to check web uploads
- * with an apparently safe type, to see if IE will reinterpret them to produce
- * something dangerous.
- *
- * It is full of bugs and strange design choices should not under any
- * circumstances be used to determine a MIME type to present to a user or
- * client. (Apple Safari developers, this means you too.)
- *
- * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
- * attempted to ensure that this code works in exactly the same way as Internet
- * Explorer, it does not share any source code, or creative choices such as
- * variable names, thus I (Tim Starling) claim copyright on it.
- *
- * It may be redistributed without restriction. To aid reuse, this class does
- * not depend on any MediaWiki module.
- */
-class IEContentAnalyzer {
- /**
- * Relevant data taken from the type table in IE 5
- */
- protected $baseTypeTable = [
- 'ambiguous' /*1*/ => [
- 'text/plain',
- 'application/octet-stream',
- 'application/x-netcdf', // [sic]
- ],
- 'text' /*3*/ => [
- 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
- 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
- ],
- 'binary' /*4*/ => [
- 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
- 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
- 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
- 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
- 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
- 'application/x-msdownload'
- ],
- 'html' /*5*/ => [ 'text/html' ],
- ];
-
- /**
- * Changes to the type table in later versions of IE
- */
- protected $addedTypes = [
- 'ie07' => [
- 'text' => [ 'text/xml', 'application/xml' ]
- ],
- ];
-
- /**
- * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
- * typical Windows installation.
- *
- * Used for extension to MIME type mapping if detection fails.
- */
- protected $registry = [
- '.323' => 'text/h323',
- '.3g2' => 'video/3gpp2',
- '.3gp' => 'video/3gpp',
- '.3gp2' => 'video/3gpp2',
- '.3gpp' => 'video/3gpp',
- '.aac' => 'audio/aac',
- '.ac3' => 'audio/ac3',
- '.accda' => 'application/msaccess',
- '.accdb' => 'application/msaccess',
- '.accdc' => 'application/msaccess',
- '.accde' => 'application/msaccess',
- '.accdr' => 'application/msaccess',
- '.accdt' => 'application/msaccess',
- '.ade' => 'application/msaccess',
- '.adp' => 'application/msaccess',
- '.adts' => 'audio/aac',
- '.ai' => 'application/postscript',
- '.aif' => 'audio/aiff',
- '.aifc' => 'audio/aiff',
- '.aiff' => 'audio/aiff',
- '.amc' => 'application/x-mpeg',
- '.application' => 'application/x-ms-application',
- '.asf' => 'video/x-ms-asf',
- '.asx' => 'video/x-ms-asf',
- '.au' => 'audio/basic',
- '.avi' => 'video/avi',
- '.bmp' => 'image/bmp',
- '.caf' => 'audio/x-caf',
- '.cat' => 'application/vnd.ms-pki.seccat',
- '.cbo' => 'application/sha',
- '.cdda' => 'audio/aiff',
- '.cer' => 'application/x-x509-ca-cert',
- '.conf' => 'text/plain',
- '.crl' => 'application/pkix-crl',
- '.crt' => 'application/x-x509-ca-cert',
- '.css' => 'text/css',
- '.csv' => 'application/vnd.ms-excel',
- '.der' => 'application/x-x509-ca-cert',
- '.dib' => 'image/bmp',
- '.dif' => 'video/x-dv',
- '.dll' => 'application/x-msdownload',
- '.doc' => 'application/msword',
- '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
- '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
- '.dot' => 'application/msword',
- '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
- '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
- '.dv' => 'video/x-dv',
- '.dwfx' => 'model/vnd.dwfx+xps',
- '.edn' => 'application/vnd.adobe.edn',
- '.eml' => 'message/rfc822',
- '.eps' => 'application/postscript',
- '.etd' => 'application/x-ebx',
- '.exe' => 'application/x-msdownload',
- '.fdf' => 'application/vnd.fdf',
- '.fif' => 'application/fractals',
- '.gif' => 'image/gif',
- '.gsm' => 'audio/x-gsm',
- '.hqx' => 'application/mac-binhex40',
- '.hta' => 'application/hta',
- '.htc' => 'text/x-component',
- '.htm' => 'text/html',
- '.html' => 'text/html',
- '.htt' => 'text/webviewhtml',
- '.hxa' => 'application/xml',
- '.hxc' => 'application/xml',
- '.hxd' => 'application/octet-stream',
- '.hxe' => 'application/xml',
- '.hxf' => 'application/xml',
- '.hxh' => 'application/octet-stream',
- '.hxi' => 'application/octet-stream',
- '.hxk' => 'application/xml',
- '.hxq' => 'application/octet-stream',
- '.hxr' => 'application/octet-stream',
- '.hxs' => 'application/octet-stream',
- '.hxt' => 'application/xml',
- '.hxv' => 'application/xml',
- '.hxw' => 'application/octet-stream',
- '.ico' => 'image/x-icon',
- '.iii' => 'application/x-iphone',
- '.ins' => 'application/x-internet-signup',
- '.iqy' => 'text/x-ms-iqy',
- '.isp' => 'application/x-internet-signup',
- '.jfif' => 'image/jpeg',
- '.jnlp' => 'application/x-java-jnlp-file',
- '.jpe' => 'image/jpeg',
- '.jpeg' => 'image/jpeg',
- '.jpg' => 'image/jpeg',
- '.jtx' => 'application/x-jtx+xps',
- '.latex' => 'application/x-latex',
- '.log' => 'text/plain',
- '.m1v' => 'video/mpeg',
- '.m2v' => 'video/mpeg',
- '.m3u' => 'audio/x-mpegurl',
- '.mac' => 'image/x-macpaint',
- '.man' => 'application/x-troff-man',
- '.mda' => 'application/msaccess',
- '.mdb' => 'application/msaccess',
- '.mde' => 'application/msaccess',
- '.mfp' => 'application/x-shockwave-flash',
- '.mht' => 'message/rfc822',
- '.mhtml' => 'message/rfc822',
- '.mid' => 'audio/mid',
- '.midi' => 'audio/mid',
- '.mod' => 'video/mpeg',
- '.mov' => 'video/quicktime',
- '.mp2' => 'video/mpeg',
- '.mp2v' => 'video/mpeg',
- '.mp3' => 'audio/mpeg',
- '.mp4' => 'video/mp4',
- '.mpa' => 'video/mpeg',
- '.mpe' => 'video/mpeg',
- '.mpeg' => 'video/mpeg',
- '.mpf' => 'application/vnd.ms-mediapackage',
- '.mpg' => 'video/mpeg',
- '.mpv2' => 'video/mpeg',
- '.mqv' => 'video/quicktime',
- '.NMW' => 'application/nmwb',
- '.nws' => 'message/rfc822',
- '.odc' => 'text/x-ms-odc',
- '.ols' => 'application/vnd.ms-publisher',
- '.p10' => 'application/pkcs10',
- '.p12' => 'application/x-pkcs12',
- '.p7b' => 'application/x-pkcs7-certificates',
- '.p7c' => 'application/pkcs7-mime',
- '.p7m' => 'application/pkcs7-mime',
- '.p7r' => 'application/x-pkcs7-certreqresp',
- '.p7s' => 'application/pkcs7-signature',
- '.pct' => 'image/pict',
- '.pdf' => 'application/pdf',
- '.pdx' => 'application/vnd.adobe.pdx',
- '.pfx' => 'application/x-pkcs12',
- '.pic' => 'image/pict',
- '.pict' => 'image/pict',
- '.pinstall' => 'application/x-picasa-detect',
- '.pko' => 'application/vnd.ms-pki.pko',
- '.png' => 'image/png',
- '.pnt' => 'image/x-macpaint',
- '.pntg' => 'image/x-macpaint',
- '.pot' => 'application/vnd.ms-powerpoint',
- '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
- '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
- '.ppa' => 'application/vnd.ms-powerpoint',
- '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
- '.pps' => 'application/vnd.ms-powerpoint',
- '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
- '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
- '.ppt' => 'application/vnd.ms-powerpoint',
- '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
- '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
- '.prf' => 'application/pics-rules',
- '.ps' => 'application/postscript',
- '.pub' => 'application/vnd.ms-publisher',
- '.pwz' => 'application/vnd.ms-powerpoint',
- '.py' => 'text/plain',
- '.pyw' => 'text/plain',
- '.qht' => 'text/x-html-insertion',
- '.qhtm' => 'text/x-html-insertion',
- '.qt' => 'video/quicktime',
- '.qti' => 'image/x-quicktime',
- '.qtif' => 'image/x-quicktime',
- '.qtl' => 'application/x-quicktimeplayer',
- '.rat' => 'application/rat-file',
- '.rmf' => 'application/vnd.adobe.rmf',
- '.rmi' => 'audio/mid',
- '.rqy' => 'text/x-ms-rqy',
- '.rtf' => 'application/msword',
- '.sct' => 'text/scriptlet',
- '.sd2' => 'audio/x-sd2',
- '.sdp' => 'application/sdp',
- '.shtml' => 'text/html',
- '.sit' => 'application/x-stuffit',
- '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
- '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
- '.slk' => 'application/vnd.ms-excel',
- '.snd' => 'audio/basic',
- '.so' => 'application/x-apachemodule',
- '.sol' => 'text/plain',
- '.sor' => 'text/plain',
- '.spc' => 'application/x-pkcs7-certificates',
- '.spl' => 'application/futuresplash',
- '.sst' => 'application/vnd.ms-pki.certstore',
- '.stl' => 'application/vnd.ms-pki.stl',
- '.swf' => 'application/x-shockwave-flash',
- '.thmx' => 'application/vnd.ms-officetheme',
- '.tif' => 'image/tiff',
- '.tiff' => 'image/tiff',
- '.txt' => 'text/plain',
- '.uls' => 'text/iuls',
- '.vcf' => 'text/x-vcard',
- '.vdx' => 'application/vnd.ms-visio.viewer',
- '.vsd' => 'application/vnd.ms-visio.viewer',
- '.vss' => 'application/vnd.ms-visio.viewer',
- '.vst' => 'application/vnd.ms-visio.viewer',
- '.vsx' => 'application/vnd.ms-visio.viewer',
- '.vtx' => 'application/vnd.ms-visio.viewer',
- '.wav' => 'audio/wav',
- '.wax' => 'audio/x-ms-wax',
- '.wbk' => 'application/msword',
- '.wdp' => 'image/vnd.ms-photo',
- '.wiz' => 'application/msword',
- '.wm' => 'video/x-ms-wm',
- '.wma' => 'audio/x-ms-wma',
- '.wmd' => 'application/x-ms-wmd',
- '.wmv' => 'video/x-ms-wmv',
- '.wmx' => 'video/x-ms-wmx',
- '.wmz' => 'application/x-ms-wmz',
- '.wpl' => 'application/vnd.ms-wpl',
- '.wsc' => 'text/scriptlet',
- '.wvx' => 'video/x-ms-wvx',
- '.xaml' => 'application/xaml+xml',
- '.xbap' => 'application/x-ms-xbap',
- '.xdp' => 'application/vnd.adobe.xdp+xml',
- '.xfdf' => 'application/vnd.adobe.xfdf',
- '.xht' => 'application/xhtml+xml',
- '.xhtml' => 'application/xhtml+xml',
- '.xla' => 'application/vnd.ms-excel',
- '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
- '.xlk' => 'application/vnd.ms-excel',
- '.xll' => 'application/vnd.ms-excel',
- '.xlm' => 'application/vnd.ms-excel',
- '.xls' => 'application/vnd.ms-excel',
- '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
- '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
- '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
- '.xlt' => 'application/vnd.ms-excel',
- '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
- '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
- '.xlw' => 'application/vnd.ms-excel',
- '.xml' => 'text/xml',
- '.xps' => 'application/vnd.ms-xpsdocument',
- '.xsl' => 'text/xml',
- ];
-
- /**
- * IE versions which have been analysed to bring you this class, and for
- * which some substantive difference exists. These will appear as keys
- * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
- */
- protected $versions = [ 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ];
-
- /**
- * Type table with versions expanded
- */
- protected $typeTable = [];
-
- /** constructor */
- function __construct() {
- // Construct versioned type arrays from the base type array plus additions
- $types = $this->baseTypeTable;
- foreach ( $this->versions as $version ) {
- if ( isset( $this->addedTypes[$version] ) ) {
- foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
- $types[$format] = array_merge( $types[$format], $addedTypes );
- }
- }
- $this->typeTable[$version] = $types;
- }
- }
-
- /**
- * Get the MIME types from getMimesFromData(), but convert the result from IE's
- * idiosyncratic private types into something other apps will understand.
- *
- * @param string $fileName the file name (unused at present)
- * @param string $chunk the first 256 bytes of the file
- * @param string $proposed the MIME type proposed by the server
- *
- * @return Array: map of IE version to detected MIME type
- */
- public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
- $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
- $types = array_map( [ $this, 'translateMimeType' ], $types );
- return $types;
- }
-
- /**
- * Translate a MIME type from IE's idiosyncratic private types into
- * more commonly understood type strings
- * @param $type
- * @return string
- */
- public function translateMimeType( $type ) {
- static $table = [
- 'image/pjpeg' => 'image/jpeg',
- 'image/x-png' => 'image/png',
- 'image/x-wmf' => 'application/x-msmetafile',
- 'image/bmp' => 'image/x-bmp',
- 'application/x-zip-compressed' => 'application/zip',
- 'application/x-compressed' => 'application/x-compress',
- 'application/x-gzip-compressed' => 'application/x-gzip',
- 'audio/mid' => 'audio/midi',
- ];
- if ( isset( $table[$type] ) ) {
- $type = $table[$type];
- }
- return $type;
- }
-
- /**
- * Get the untranslated MIME types for all known versions
- *
- * @param string $fileName the file name (unused at present)
- * @param string $chunk the first 256 bytes of the file
- * @param string $proposed the MIME type proposed by the server
- *
- * @return Array: map of IE version to detected MIME type
- */
- public function getMimesFromData( $fileName, $chunk, $proposed ) {
- $types = [];
- foreach ( $this->versions as $version ) {
- $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
- }
- return $types;
- }
-
- /**
- * Get the MIME type for a given named version
- * @param $version
- * @param $fileName
- * @param $chunk
- * @param $proposed
- * @return bool|string
- */
- protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
- // Strip text after a semicolon
- $semiPos = strpos( $proposed, ';' );
- if ( $semiPos !== false ) {
- $proposed = substr( $proposed, 0, $semiPos );
- }
-
- $proposedFormat = $this->getDataFormat( $version, $proposed );
- if ( $proposedFormat == 'unknown'
- && $proposed != 'multipart/mixed'
- && $proposed != 'multipart/x-mixed-replace' )
- {
- return $proposed;
- }
- if ( strval( $chunk ) === '' ) {
- return $proposed;
- }
-
- // Truncate chunk at 255 bytes
- $chunk = substr( $chunk, 0, 255 );
-
- // IE does the Check*Headers() calls last, and instead does the following image
- // type checks by directly looking for the magic numbers. What I do here should
- // have the same effect since the magic number checks are identical in both cases.
- $result = $this->sampleData( $version, $chunk );
- $sampleFound = $result['found'];
- $counters = $result['counters'];
- $binaryType = $this->checkBinaryHeaders( $version, $chunk );
- $textType = $this->checkTextHeaders( $version, $chunk );
-
- if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
- return 'text/html';
- }
- if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
- return 'image/gif';
- }
- if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
- && $binaryType == 'image/pjpeg' )
- {
- return $proposed;
- }
- // PNG check added in IE 7
- if ( $version >= 'ie07'
- && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
- && $binaryType == 'image/x-png' )
- {
- return $proposed;
- }
-
- // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
- if ( isset( $sampleFound['cdf'] ) ) {
- return 'application/x-cdf';
- }
-
- // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
- // previous versions
- if ( isset( $sampleFound['rss'] ) ) {
- return 'application/rss+xml';
- }
- if ( isset( $sampleFound['rdf-tag'] )
- && isset( $sampleFound['rdf-url'] )
- && isset( $sampleFound['rdf-purl'] ) )
- {
- return 'application/rss+xml';
- }
- if ( isset( $sampleFound['atom'] ) ) {
- return 'application/atom+xml';
- }
-
- if ( isset( $sampleFound['xml'] ) ) {
- // TODO: I'm not sure under what circumstances this flag is enabled
- if ( strpos( $version, 'strict' ) !== false ) {
- if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
- return 'text/xml';
- }
- } else {
- return 'text/xml';
- }
- }
- if ( isset( $sampleFound['html'] ) ) {
- // TODO: I'm not sure under what circumstances this flag is enabled
- if ( strpos( $version, 'nohtml' ) !== false ) {
- if ( $proposed == 'text/plain' ) {
- return 'text/html';
- }
- } else {
- return 'text/html';
- }
- }
- if ( isset( $sampleFound['xbm'] ) ) {
- return 'image/x-bitmap';
- }
- if ( isset( $sampleFound['binhex'] ) ) {
- return 'application/macbinhex40';
- }
- if ( isset( $sampleFound['scriptlet'] ) ) {
- if ( strpos( $version, 'strict' ) !== false ) {
- if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
- return 'text/scriptlet';
- }
- } else {
- return 'text/scriptlet';
- }
- }
-
- // Freaky heuristics to determine if the data is text or binary
- // The heuristic is of course broken for non-ASCII text
- if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
- < ( $counters['ctrl'] + $counters['high'] ) * 16 )
- {
- $kindOfBinary = true;
- $type = $binaryType ? $binaryType : $textType;
- if ( $type === false ) {
- $type = 'application/octet-stream';
- }
- } else {
- $kindOfBinary = false;
- $type = $textType ? $textType : $binaryType;
- if ( $type === false ) {
- $type = 'text/plain';
- }
- }
-
- // Check if the output format is ambiguous
- // This generally means that detection failed, real types aren't ambiguous
- $detectedFormat = $this->getDataFormat( $version, $type );
- if ( $detectedFormat != 'ambiguous' ) {
- return $type;
- }
-
- if ( $proposedFormat != 'ambiguous' ) {
- // FormatAgreesWithData()
- if ( $proposedFormat == 'text' && !$kindOfBinary ) {
- return $proposed;
- }
- if ( $proposedFormat == 'binary' && $kindOfBinary ) {
- return $proposed;
- }
- if ( $proposedFormat == 'html' ) {
- return $proposed;
- }
- }
-
- // Find a MIME type by searching the registry for the file extension.
- $dotPos = strrpos( $fileName, '.' );
- if ( $dotPos === false ) {
- return $type;
- }
- $ext = substr( $fileName, $dotPos );
- if ( isset( $this->registry[$ext] ) ) {
- return $this->registry[$ext];
- }
-
- // TODO: If the extension has an application registered to it, IE will return
- // application/octet-stream. We'll skip that, so we could erroneously
- // return text/plain or application/x-netcdf where application/octet-stream
- // would be correct.
-
- return $type;
- }
-
- /**
- * Check for text headers at the start of the chunk
- * Confirmed same in 5 and 7.
- * @param $version
- * @param $chunk
- * @return bool|string
- */
- private function checkTextHeaders( $version, $chunk ) {
- $chunk2 = substr( $chunk, 0, 2 );
- $chunk4 = substr( $chunk, 0, 4 );
- $chunk5 = substr( $chunk, 0, 5 );
- if ( $chunk4 == '%PDF' ) {
- return 'application/pdf';
- }
- if ( $chunk2 == '%!' ) {
- return 'application/postscript';
- }
- if ( $chunk5 == '{\\rtf' ) {
- return 'text/richtext';
- }
- if ( $chunk5 == 'begin' ) {
- return 'application/base64';
- }
- return false;
- }
-
- /**
- * Check for binary headers at the start of the chunk
- * Confirmed same in 5 and 7.
- * @param $version
- * @param $chunk
- * @return bool|string
- */
- private function checkBinaryHeaders( $version, $chunk ) {
- $chunk2 = substr( $chunk, 0, 2 );
- $chunk3 = substr( $chunk, 0, 3 );
- $chunk4 = substr( $chunk, 0, 4 );
- $chunk5 = substr( $chunk, 0, 5 );
- $chunk5uc = strtoupper( $chunk5 );
- $chunk8 = substr( $chunk, 0, 8 );
- if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
- return 'image/gif';
- }
- if ( $chunk2 == "\xff\xd8" ) {
- return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
- }
-
- if ( $chunk2 == 'BM'
- && substr( $chunk, 6, 2 ) == "\000\000"
- && substr( $chunk, 8, 2 ) == "\000\000" )
- {
- return 'image/bmp'; // another non-standard MIME
- }
- if ( $chunk4 == 'RIFF'
- && substr( $chunk, 8, 4 ) == 'WAVE' )
- {
- return 'audio/wav';
- }
- // These were integer literals in IE
- // Perhaps the author was not sure what the target endianness was
- if ( $chunk4 == ".sd\000"
- || $chunk4 == ".snd"
- || $chunk4 == "\000ds."
- || $chunk4 == "dns." )
- {
- return 'audio/basic';
- }
- if ( $chunk3 == "MM\000" ) {
- return 'image/tiff';
- }
- if ( $chunk2 == 'MZ' ) {
- return 'application/x-msdownload';
- }
- if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
- return 'image/x-png'; // [sic]
- }
- if ( strlen( $chunk ) >= 5 ) {
- $byte2 = ord( $chunk[2] );
- $byte4 = ord( $chunk[4] );
- if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
- return 'image/x-jg';
- }
- }
- // More endian confusion?
- if ( $chunk4 == 'MROF' ) {
- return 'audio/x-aiff';
- }
- $chunk4_8 = substr( $chunk, 8, 4 );
- if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
- return 'audio/x-aiff';
- }
- if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
- return 'video/avi';
- }
- if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
- return 'video/mpeg';
- }
- if ( $chunk4 == "\001\000\000\000"
- && substr( $chunk, 40, 4 ) == ' EMF' )
- {
- return 'image/x-emf';
- }
- if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
- return 'image/x-wmf';
- }
- if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
- return 'application/java';
- }
- if ( $chunk2 == 'PK' ) {
- return 'application/x-zip-compressed';
- }
- if ( $chunk2 == "\x1f\x9d" ) {
- return 'application/x-compressed';
- }
- if ( $chunk2 == "\x1f\x8b" ) {
- return 'application/x-gzip-compressed';
- }
- // Skip redundant check for ZIP
- if ( $chunk5 == "MThd\000" ) {
- return 'audio/mid';
- }
- if ( $chunk4 == '%PDF' ) {
- return 'application/pdf';
- }
- return false;
- }
-
- /**
- * Do heuristic checks on the bulk of the data sample.
- * Search for HTML tags.
- * @param $version
- * @param $chunk
- * @return array
- */
- protected function sampleData( $version, $chunk ) {
- $found = [];
- $counters = [
- 'ctrl' => 0,
- 'high' => 0,
- 'low' => 0,
- 'lf' => 0,
- 'cr' => 0,
- 'ff' => 0
- ];
- $htmlTags = [
- 'html',
- 'head',
- 'title',
- 'body',
- 'script',
- 'a href',
- 'pre',
- 'img',
- 'plaintext',
- 'table'
- ];
- $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
- $rdfPurl = 'http://purl.org/rss/1.0/';
- $xbmMagic1 = '#define';
- $xbmMagic2 = '_width';
- $xbmMagic3 = '_bits';
- $binhexMagic = 'converted with BinHex';
- $chunkLength = strlen( $chunk );
-
- for ( $offset = 0; $offset < $chunkLength; $offset++ ) {
- $curChar = $chunk[$offset];
- if ( $curChar == "\x0a" ) {
- $counters['lf']++;
- continue;
- } elseif ( $curChar == "\x0d" ) {
- $counters['cr']++;
- continue;
- } elseif ( $curChar == "\x0c" ) {
- $counters['ff']++;
- continue;
- } elseif ( $curChar == "\t" ) {
- $counters['low']++;
- continue;
- } elseif ( ord( $curChar ) < 32 ) {
- $counters['ctrl']++;
- continue;
- } elseif ( ord( $curChar ) >= 128 ) {
- $counters['high']++;
- continue;
- }
-
- $counters['low']++;
- if ( $curChar == '<' ) {
- // XML
- $remainder = substr( $chunk, $offset + 1 );
- if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
- $nextChar = substr( $chunk, $offset + 5, 1 );
- if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
- $found['xml'] = true;
- }
- }
- // Scriptlet (JSP)
- if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
- $found['scriptlet'] = true;
- break;
- }
- // HTML
- foreach ( $htmlTags as $tag ) {
- if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
- $found['html'] = true;
- }
- }
- // Skip broken check for additional tags (HR etc.)
-
- // CHANNEL replaced by RSS, RDF and FEED in IE 7
- if ( $version < 'ie07' ) {
- if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
- $found['cdf'] = true;
- }
- } else {
- // RSS
- if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
- $found['rss'] = true;
- break; // return from SampleData
- }
- if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
- $found['rdf-tag'] = true;
- // no break
- }
- if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
- $found['atom'] = true;
- break;
- }
- }
- continue;
- }
- // Skip broken check for -->
-
- // RSS URL checks
- // For some reason both URLs must appear before it is recognised
- $remainder = substr( $chunk, $offset );
- if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
- $found['rdf-url'] = true;
- if ( isset( $found['rdf-tag'] )
- && isset( $found['rdf-purl'] ) ) // [sic]
- {
- break;
- }
- continue;
- }
-
- if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
- if ( isset( $found['rdf-tag'] )
- && isset( $found['rdf-url'] ) ) // [sic]
- {
- break;
- }
- continue;
- }
-
- // XBM checks
- if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
- $found['xbm1'] = true;
- continue;
- }
- if ( $curChar == '_' ) {
- if ( isset( $found['xbm2'] ) ) {
- if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
- $found['xbm'] = true;
- break;
- }
- } elseif ( isset( $found['xbm1'] ) ) {
- if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
- $found['xbm2'] = true;
- }
- }
- }
-
- // BinHex
- if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
- $found['binhex'] = true;
- }
- }
- return [ 'found' => $found, 'counters' => $counters ];
- }
-
- /**
- * @param $version
- * @param $type
- * @return int|string
- */
- protected function getDataFormat( $version, $type ) {
- $types = $this->typeTable[$version];
- if ( $type == '(null)' || strval( $type ) === '' ) {
- return 'ambiguous';
- }
- foreach ( $types as $format => $list ) {
- if ( in_array( $type, $list ) ) {
- return $format;
- }
- }
- return 'unknown';
- }
-}
+++ /dev/null
-<?php
-/**
- * XML syntax and type checker.
- *
- * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
- * more control over the expansion of XML entities. When passed to the
- * callback, entities will be fully expanded, but may report the XML is
- * invalid if expanding the entities are likely to cause a DoS.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-class XmlTypeCheck {
- /**
- * Will be set to true or false to indicate whether the file is
- * well-formed XML. Note that this doesn't check schema validity.
- */
- public $wellFormed = null;
-
- /**
- * Will be set to true if the optional element filter returned
- * a match at some point.
- */
- public $filterMatch = false;
-
- /**
- * Will contain the type of filter hit if the optional element filter returned
- * a match at some point.
- * @var mixed
- */
- public $filterMatchType = false;
-
- /**
- * Name of the document's root element, including any namespace
- * as an expanded URL.
- */
- public $rootElement = '';
-
- /**
- * A stack of strings containing the data of each xml element as it's processed. Append
- * data to the top string of the stack, then pop off the string and process it when the
- * element is closed.
- */
- protected $elementData = [];
-
- /**
- * A stack of element names and attributes, as we process them.
- */
- protected $elementDataContext = [];
-
- /**
- * Current depth of the data stack.
- */
- protected $stackDepth = 0;
-
- /**
- * Additional parsing options
- */
- private $parserOptions = [
- 'processing_instruction_handler' => '',
- ];
-
- /**
- * @param string $input a filename or string containing the XML element
- * @param callable $filterCallback (optional)
- * Function to call to do additional custom validity checks from the
- * SAX element handler event. This gives you access to the element
- * namespace, name, attributes, and text contents.
- * Filter should return 'true' to toggle on $this->filterMatch
- * @param bool $isFile (optional) indicates if the first parameter is a
- * filename (default, true) or if it is a string (false)
- * @param array $options list of additional parsing options:
- * processing_instruction_handler: Callback for xml_set_processing_instruction_handler
- */
- function __construct( $input, $filterCallback = null, $isFile = true, $options = [] ) {
- $this->filterCallback = $filterCallback;
- $this->parserOptions = array_merge( $this->parserOptions, $options );
- $this->validateFromInput( $input, $isFile );
- }
-
- /**
- * Alternative constructor: from filename
- *
- * @param string $fname the filename of an XML document
- * @param callable $filterCallback (optional)
- * Function to call to do additional custom validity checks from the
- * SAX element handler event. This gives you access to the element
- * namespace, name, and attributes, but not to text contents.
- * Filter should return 'true' to toggle on $this->filterMatch
- * @return XmlTypeCheck
- */
- public static function newFromFilename( $fname, $filterCallback = null ) {
- return new self( $fname, $filterCallback, true );
- }
-
- /**
- * Alternative constructor: from string
- *
- * @param string $string a string containing an XML element
- * @param callable $filterCallback (optional)
- * Function to call to do additional custom validity checks from the
- * SAX element handler event. This gives you access to the element
- * namespace, name, and attributes, but not to text contents.
- * Filter should return 'true' to toggle on $this->filterMatch
- * @return XmlTypeCheck
- */
- public static function newFromString( $string, $filterCallback = null ) {
- return new self( $string, $filterCallback, false );
- }
-
- /**
- * Get the root element. Simple accessor to $rootElement
- *
- * @return string
- */
- public function getRootElement() {
- return $this->rootElement;
- }
-
- /**
- * @param string $fname the filename
- */
- private function validateFromInput( $xml, $isFile ) {
- $reader = new XMLReader();
- if ( $isFile ) {
- $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
- } else {
- $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
- }
- if ( $s !== true ) {
- // Couldn't open the XML
- $this->wellFormed = false;
- } else {
- $oldDisable = libxml_disable_entity_loader( true );
- $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
- try {
- $this->validate( $reader );
- } catch ( Exception $e ) {
- // Calling this malformed, because we didn't parse the whole
- // thing. Maybe just an external entity refernce.
- $this->wellFormed = false;
- $reader->close();
- libxml_disable_entity_loader( $oldDisable );
- throw $e;
- }
- $reader->close();
- libxml_disable_entity_loader( $oldDisable );
- }
- }
-
- private function readNext( XMLReader $reader ) {
- set_error_handler( [ $this, 'XmlErrorHandler' ] );
- $ret = $reader->read();
- restore_error_handler();
- return $ret;
- }
-
- public function XmlErrorHandler( $errno, $errstr ) {
- $this->wellFormed = false;
- }
-
- private function validate( $reader ) {
-
- // First, move through anything that isn't an element, and
- // handle any processing instructions with the callback
- do {
- if ( !$this->readNext( $reader ) ) {
- // Hit the end of the document before any elements
- $this->wellFormed = false;
- return;
- }
- if ( $reader->nodeType === XMLReader::PI ) {
- $this->processingInstructionHandler( $reader->name, $reader->value );
- }
- } while ( $reader->nodeType != XMLReader::ELEMENT );
-
- // Process the rest of the document
- do {
- switch ( $reader->nodeType ) {
- case XMLReader::ELEMENT:
- $name = $this->expandNS(
- $reader->name,
- $reader->namespaceURI
- );
- if ( $this->rootElement === '' ) {
- $this->rootElement = $name;
- }
- $empty = $reader->isEmptyElement;
- $attrs = $this->getAttributesArray( $reader );
- $this->elementOpen( $name, $attrs );
- if ( $empty ) {
- $this->elementClose();
- }
- break;
-
- case XMLReader::END_ELEMENT:
- $this->elementClose();
- break;
-
- case XMLReader::WHITESPACE:
- case XMLReader::SIGNIFICANT_WHITESPACE:
- case XMLReader::CDATA:
- case XMLReader::TEXT:
- $this->elementData( $reader->value );
- break;
-
- case XMLReader::ENTITY_REF:
- // Unexpanded entity (maybe external?),
- // don't send to the filter (xml_parse didn't)
- break;
-
- case XMLReader::COMMENT:
- // Don't send to the filter (xml_parse didn't)
- break;
-
- case XMLReader::PI:
- // Processing instructions can happen after the header too
- $this->processingInstructionHandler(
- $reader->name,
- $reader->value
- );
- break;
- default:
- // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
- // NOTATION, or XML_DECLARATION
- // xml_parse didn't send these to the filter, so we won't.
- }
-
- } while ( $this->readNext( $reader ) );
-
- if ( $this->stackDepth !== 0 ) {
- $this->wellFormed = false;
- } elseif ( $this->wellFormed === null ) {
- $this->wellFormed = true;
- }
-
- }
-
- /**
- * Get all of the attributes for an XMLReader's current node
- * @param $r XMLReader
- * @return array of attributes
- */
- private function getAttributesArray( XMLReader $r ) {
- $attrs = [];
- while ( $r->moveToNextAttribute() ) {
- if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
- // XMLReader treats xmlns attributes as normal
- // attributes, while xml_parse doesn't
- continue;
- }
- $name = $this->expandNS( $r->name, $r->namespaceURI );
- $attrs[$name] = $r->value;
- }
- return $attrs;
- }
-
- /**
- * @param $name element or attribute name, maybe with a full or short prefix
- * @param $namespaceURI the namespaceURI
- * @return string the name prefixed with namespaceURI
- */
- private function expandNS( $name, $namespaceURI ) {
- if ( $namespaceURI ) {
- $parts = explode( ':', $name );
- $localname = array_pop( $parts );
- return "$namespaceURI:$localname";
- }
- return $name;
- }
-
- /**
- * @param $name
- * @param $attribs
- */
- private function elementOpen( $name, $attribs ) {
- $this->elementDataContext[] = [ $name, $attribs ];
- $this->elementData[] = '';
- $this->stackDepth++;
- }
-
- /**
- */
- private function elementClose() {
- list( $name, $attribs ) = array_pop( $this->elementDataContext );
- $data = array_pop( $this->elementData );
- $this->stackDepth--;
- $callbackReturn = false;
-
- if ( is_callable( $this->filterCallback ) ) {
- $callbackReturn = call_user_func(
- $this->filterCallback,
- $name,
- $attribs,
- $data
- );
- }
- if ( $callbackReturn ) {
- // Filter hit!
- $this->filterMatch = true;
- $this->filterMatchType = $callbackReturn;
- }
- }
-
- /**
- * @param $data
- */
- private function elementData( $data ) {
- // Collect any data here, and we'll run the callback in elementClose
- $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
- }
-
- /**
- * @param $target
- * @param $data
- */
- private function processingInstructionHandler( $target, $data ) {
- $callbackReturn = false;
- if ( $this->parserOptions['processing_instruction_handler'] ) {
- $callbackReturn = call_user_func(
- $this->parserOptions['processing_instruction_handler'],
- $target,
- $data
- );
- }
- if ( $callbackReturn ) {
- // Filter hit!
- $this->filterMatch = true;
- $this->filterMatchType = $callbackReturn;
- }
- }
-}
--- /dev/null
+<?php
+/**
+ * Simulation of Microsoft Internet Explorer's MIME type detection algorithm.
+ *
+ * @file
+ * @todo Define the exact license of this file.
+ */
+
+/**
+ * This class simulates Microsoft Internet Explorer's terribly broken and
+ * insecure MIME type detection algorithm. It can be used to check web uploads
+ * with an apparently safe type, to see if IE will reinterpret them to produce
+ * something dangerous.
+ *
+ * It is full of bugs and strange design choices should not under any
+ * circumstances be used to determine a MIME type to present to a user or
+ * client. (Apple Safari developers, this means you too.)
+ *
+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
+ * attempted to ensure that this code works in exactly the same way as Internet
+ * Explorer, it does not share any source code, or creative choices such as
+ * variable names, thus I (Tim Starling) claim copyright on it.
+ *
+ * It may be redistributed without restriction. To aid reuse, this class does
+ * not depend on any MediaWiki module.
+ */
+class IEContentAnalyzer {
+ /**
+ * Relevant data taken from the type table in IE 5
+ */
+ protected $baseTypeTable = [
+ 'ambiguous' /*1*/ => [
+ 'text/plain',
+ 'application/octet-stream',
+ 'application/x-netcdf', // [sic]
+ ],
+ 'text' /*3*/ => [
+ 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
+ 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
+ ],
+ 'binary' /*4*/ => [
+ 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
+ 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
+ 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
+ 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
+ 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
+ 'application/x-msdownload'
+ ],
+ 'html' /*5*/ => [ 'text/html' ],
+ ];
+
+ /**
+ * Changes to the type table in later versions of IE
+ */
+ protected $addedTypes = [
+ 'ie07' => [
+ 'text' => [ 'text/xml', 'application/xml' ]
+ ],
+ ];
+
+ /**
+ * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
+ * typical Windows installation.
+ *
+ * Used for extension to MIME type mapping if detection fails.
+ */
+ protected $registry = [
+ '.323' => 'text/h323',
+ '.3g2' => 'video/3gpp2',
+ '.3gp' => 'video/3gpp',
+ '.3gp2' => 'video/3gpp2',
+ '.3gpp' => 'video/3gpp',
+ '.aac' => 'audio/aac',
+ '.ac3' => 'audio/ac3',
+ '.accda' => 'application/msaccess',
+ '.accdb' => 'application/msaccess',
+ '.accdc' => 'application/msaccess',
+ '.accde' => 'application/msaccess',
+ '.accdr' => 'application/msaccess',
+ '.accdt' => 'application/msaccess',
+ '.ade' => 'application/msaccess',
+ '.adp' => 'application/msaccess',
+ '.adts' => 'audio/aac',
+ '.ai' => 'application/postscript',
+ '.aif' => 'audio/aiff',
+ '.aifc' => 'audio/aiff',
+ '.aiff' => 'audio/aiff',
+ '.amc' => 'application/x-mpeg',
+ '.application' => 'application/x-ms-application',
+ '.asf' => 'video/x-ms-asf',
+ '.asx' => 'video/x-ms-asf',
+ '.au' => 'audio/basic',
+ '.avi' => 'video/avi',
+ '.bmp' => 'image/bmp',
+ '.caf' => 'audio/x-caf',
+ '.cat' => 'application/vnd.ms-pki.seccat',
+ '.cbo' => 'application/sha',
+ '.cdda' => 'audio/aiff',
+ '.cer' => 'application/x-x509-ca-cert',
+ '.conf' => 'text/plain',
+ '.crl' => 'application/pkix-crl',
+ '.crt' => 'application/x-x509-ca-cert',
+ '.css' => 'text/css',
+ '.csv' => 'application/vnd.ms-excel',
+ '.der' => 'application/x-x509-ca-cert',
+ '.dib' => 'image/bmp',
+ '.dif' => 'video/x-dv',
+ '.dll' => 'application/x-msdownload',
+ '.doc' => 'application/msword',
+ '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
+ '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+ '.dot' => 'application/msword',
+ '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
+ '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
+ '.dv' => 'video/x-dv',
+ '.dwfx' => 'model/vnd.dwfx+xps',
+ '.edn' => 'application/vnd.adobe.edn',
+ '.eml' => 'message/rfc822',
+ '.eps' => 'application/postscript',
+ '.etd' => 'application/x-ebx',
+ '.exe' => 'application/x-msdownload',
+ '.fdf' => 'application/vnd.fdf',
+ '.fif' => 'application/fractals',
+ '.gif' => 'image/gif',
+ '.gsm' => 'audio/x-gsm',
+ '.hqx' => 'application/mac-binhex40',
+ '.hta' => 'application/hta',
+ '.htc' => 'text/x-component',
+ '.htm' => 'text/html',
+ '.html' => 'text/html',
+ '.htt' => 'text/webviewhtml',
+ '.hxa' => 'application/xml',
+ '.hxc' => 'application/xml',
+ '.hxd' => 'application/octet-stream',
+ '.hxe' => 'application/xml',
+ '.hxf' => 'application/xml',
+ '.hxh' => 'application/octet-stream',
+ '.hxi' => 'application/octet-stream',
+ '.hxk' => 'application/xml',
+ '.hxq' => 'application/octet-stream',
+ '.hxr' => 'application/octet-stream',
+ '.hxs' => 'application/octet-stream',
+ '.hxt' => 'application/xml',
+ '.hxv' => 'application/xml',
+ '.hxw' => 'application/octet-stream',
+ '.ico' => 'image/x-icon',
+ '.iii' => 'application/x-iphone',
+ '.ins' => 'application/x-internet-signup',
+ '.iqy' => 'text/x-ms-iqy',
+ '.isp' => 'application/x-internet-signup',
+ '.jfif' => 'image/jpeg',
+ '.jnlp' => 'application/x-java-jnlp-file',
+ '.jpe' => 'image/jpeg',
+ '.jpeg' => 'image/jpeg',
+ '.jpg' => 'image/jpeg',
+ '.jtx' => 'application/x-jtx+xps',
+ '.latex' => 'application/x-latex',
+ '.log' => 'text/plain',
+ '.m1v' => 'video/mpeg',
+ '.m2v' => 'video/mpeg',
+ '.m3u' => 'audio/x-mpegurl',
+ '.mac' => 'image/x-macpaint',
+ '.man' => 'application/x-troff-man',
+ '.mda' => 'application/msaccess',
+ '.mdb' => 'application/msaccess',
+ '.mde' => 'application/msaccess',
+ '.mfp' => 'application/x-shockwave-flash',
+ '.mht' => 'message/rfc822',
+ '.mhtml' => 'message/rfc822',
+ '.mid' => 'audio/mid',
+ '.midi' => 'audio/mid',
+ '.mod' => 'video/mpeg',
+ '.mov' => 'video/quicktime',
+ '.mp2' => 'video/mpeg',
+ '.mp2v' => 'video/mpeg',
+ '.mp3' => 'audio/mpeg',
+ '.mp4' => 'video/mp4',
+ '.mpa' => 'video/mpeg',
+ '.mpe' => 'video/mpeg',
+ '.mpeg' => 'video/mpeg',
+ '.mpf' => 'application/vnd.ms-mediapackage',
+ '.mpg' => 'video/mpeg',
+ '.mpv2' => 'video/mpeg',
+ '.mqv' => 'video/quicktime',
+ '.NMW' => 'application/nmwb',
+ '.nws' => 'message/rfc822',
+ '.odc' => 'text/x-ms-odc',
+ '.ols' => 'application/vnd.ms-publisher',
+ '.p10' => 'application/pkcs10',
+ '.p12' => 'application/x-pkcs12',
+ '.p7b' => 'application/x-pkcs7-certificates',
+ '.p7c' => 'application/pkcs7-mime',
+ '.p7m' => 'application/pkcs7-mime',
+ '.p7r' => 'application/x-pkcs7-certreqresp',
+ '.p7s' => 'application/pkcs7-signature',
+ '.pct' => 'image/pict',
+ '.pdf' => 'application/pdf',
+ '.pdx' => 'application/vnd.adobe.pdx',
+ '.pfx' => 'application/x-pkcs12',
+ '.pic' => 'image/pict',
+ '.pict' => 'image/pict',
+ '.pinstall' => 'application/x-picasa-detect',
+ '.pko' => 'application/vnd.ms-pki.pko',
+ '.png' => 'image/png',
+ '.pnt' => 'image/x-macpaint',
+ '.pntg' => 'image/x-macpaint',
+ '.pot' => 'application/vnd.ms-powerpoint',
+ '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
+ '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
+ '.ppa' => 'application/vnd.ms-powerpoint',
+ '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
+ '.pps' => 'application/vnd.ms-powerpoint',
+ '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
+ '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
+ '.ppt' => 'application/vnd.ms-powerpoint',
+ '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
+ '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+ '.prf' => 'application/pics-rules',
+ '.ps' => 'application/postscript',
+ '.pub' => 'application/vnd.ms-publisher',
+ '.pwz' => 'application/vnd.ms-powerpoint',
+ '.py' => 'text/plain',
+ '.pyw' => 'text/plain',
+ '.qht' => 'text/x-html-insertion',
+ '.qhtm' => 'text/x-html-insertion',
+ '.qt' => 'video/quicktime',
+ '.qti' => 'image/x-quicktime',
+ '.qtif' => 'image/x-quicktime',
+ '.qtl' => 'application/x-quicktimeplayer',
+ '.rat' => 'application/rat-file',
+ '.rmf' => 'application/vnd.adobe.rmf',
+ '.rmi' => 'audio/mid',
+ '.rqy' => 'text/x-ms-rqy',
+ '.rtf' => 'application/msword',
+ '.sct' => 'text/scriptlet',
+ '.sd2' => 'audio/x-sd2',
+ '.sdp' => 'application/sdp',
+ '.shtml' => 'text/html',
+ '.sit' => 'application/x-stuffit',
+ '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
+ '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
+ '.slk' => 'application/vnd.ms-excel',
+ '.snd' => 'audio/basic',
+ '.so' => 'application/x-apachemodule',
+ '.sol' => 'text/plain',
+ '.sor' => 'text/plain',
+ '.spc' => 'application/x-pkcs7-certificates',
+ '.spl' => 'application/futuresplash',
+ '.sst' => 'application/vnd.ms-pki.certstore',
+ '.stl' => 'application/vnd.ms-pki.stl',
+ '.swf' => 'application/x-shockwave-flash',
+ '.thmx' => 'application/vnd.ms-officetheme',
+ '.tif' => 'image/tiff',
+ '.tiff' => 'image/tiff',
+ '.txt' => 'text/plain',
+ '.uls' => 'text/iuls',
+ '.vcf' => 'text/x-vcard',
+ '.vdx' => 'application/vnd.ms-visio.viewer',
+ '.vsd' => 'application/vnd.ms-visio.viewer',
+ '.vss' => 'application/vnd.ms-visio.viewer',
+ '.vst' => 'application/vnd.ms-visio.viewer',
+ '.vsx' => 'application/vnd.ms-visio.viewer',
+ '.vtx' => 'application/vnd.ms-visio.viewer',
+ '.wav' => 'audio/wav',
+ '.wax' => 'audio/x-ms-wax',
+ '.wbk' => 'application/msword',
+ '.wdp' => 'image/vnd.ms-photo',
+ '.wiz' => 'application/msword',
+ '.wm' => 'video/x-ms-wm',
+ '.wma' => 'audio/x-ms-wma',
+ '.wmd' => 'application/x-ms-wmd',
+ '.wmv' => 'video/x-ms-wmv',
+ '.wmx' => 'video/x-ms-wmx',
+ '.wmz' => 'application/x-ms-wmz',
+ '.wpl' => 'application/vnd.ms-wpl',
+ '.wsc' => 'text/scriptlet',
+ '.wvx' => 'video/x-ms-wvx',
+ '.xaml' => 'application/xaml+xml',
+ '.xbap' => 'application/x-ms-xbap',
+ '.xdp' => 'application/vnd.adobe.xdp+xml',
+ '.xfdf' => 'application/vnd.adobe.xfdf',
+ '.xht' => 'application/xhtml+xml',
+ '.xhtml' => 'application/xhtml+xml',
+ '.xla' => 'application/vnd.ms-excel',
+ '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
+ '.xlk' => 'application/vnd.ms-excel',
+ '.xll' => 'application/vnd.ms-excel',
+ '.xlm' => 'application/vnd.ms-excel',
+ '.xls' => 'application/vnd.ms-excel',
+ '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
+ '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
+ '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+ '.xlt' => 'application/vnd.ms-excel',
+ '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
+ '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
+ '.xlw' => 'application/vnd.ms-excel',
+ '.xml' => 'text/xml',
+ '.xps' => 'application/vnd.ms-xpsdocument',
+ '.xsl' => 'text/xml',
+ ];
+
+ /**
+ * IE versions which have been analysed to bring you this class, and for
+ * which some substantive difference exists. These will appear as keys
+ * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
+ */
+ protected $versions = [ 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ];
+
+ /**
+ * Type table with versions expanded
+ */
+ protected $typeTable = [];
+
+ /** constructor */
+ function __construct() {
+ // Construct versioned type arrays from the base type array plus additions
+ $types = $this->baseTypeTable;
+ foreach ( $this->versions as $version ) {
+ if ( isset( $this->addedTypes[$version] ) ) {
+ foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
+ $types[$format] = array_merge( $types[$format], $addedTypes );
+ }
+ }
+ $this->typeTable[$version] = $types;
+ }
+ }
+
+ /**
+ * Get the MIME types from getMimesFromData(), but convert the result from IE's
+ * idiosyncratic private types into something other apps will understand.
+ *
+ * @param string $fileName the file name (unused at present)
+ * @param string $chunk the first 256 bytes of the file
+ * @param string $proposed the MIME type proposed by the server
+ *
+ * @return Array: map of IE version to detected MIME type
+ */
+ public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
+ $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
+ $types = array_map( [ $this, 'translateMimeType' ], $types );
+ return $types;
+ }
+
+ /**
+ * Translate a MIME type from IE's idiosyncratic private types into
+ * more commonly understood type strings
+ * @param $type
+ * @return string
+ */
+ public function translateMimeType( $type ) {
+ static $table = [
+ 'image/pjpeg' => 'image/jpeg',
+ 'image/x-png' => 'image/png',
+ 'image/x-wmf' => 'application/x-msmetafile',
+ 'image/bmp' => 'image/x-bmp',
+ 'application/x-zip-compressed' => 'application/zip',
+ 'application/x-compressed' => 'application/x-compress',
+ 'application/x-gzip-compressed' => 'application/x-gzip',
+ 'audio/mid' => 'audio/midi',
+ ];
+ if ( isset( $table[$type] ) ) {
+ $type = $table[$type];
+ }
+ return $type;
+ }
+
+ /**
+ * Get the untranslated MIME types for all known versions
+ *
+ * @param string $fileName the file name (unused at present)
+ * @param string $chunk the first 256 bytes of the file
+ * @param string $proposed the MIME type proposed by the server
+ *
+ * @return Array: map of IE version to detected MIME type
+ */
+ public function getMimesFromData( $fileName, $chunk, $proposed ) {
+ $types = [];
+ foreach ( $this->versions as $version ) {
+ $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
+ }
+ return $types;
+ }
+
+ /**
+ * Get the MIME type for a given named version
+ * @param $version
+ * @param $fileName
+ * @param $chunk
+ * @param $proposed
+ * @return bool|string
+ */
+ protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
+ // Strip text after a semicolon
+ $semiPos = strpos( $proposed, ';' );
+ if ( $semiPos !== false ) {
+ $proposed = substr( $proposed, 0, $semiPos );
+ }
+
+ $proposedFormat = $this->getDataFormat( $version, $proposed );
+ if ( $proposedFormat == 'unknown'
+ && $proposed != 'multipart/mixed'
+ && $proposed != 'multipart/x-mixed-replace' )
+ {
+ return $proposed;
+ }
+ if ( strval( $chunk ) === '' ) {
+ return $proposed;
+ }
+
+ // Truncate chunk at 255 bytes
+ $chunk = substr( $chunk, 0, 255 );
+
+ // IE does the Check*Headers() calls last, and instead does the following image
+ // type checks by directly looking for the magic numbers. What I do here should
+ // have the same effect since the magic number checks are identical in both cases.
+ $result = $this->sampleData( $version, $chunk );
+ $sampleFound = $result['found'];
+ $counters = $result['counters'];
+ $binaryType = $this->checkBinaryHeaders( $version, $chunk );
+ $textType = $this->checkTextHeaders( $version, $chunk );
+
+ if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
+ return 'text/html';
+ }
+ if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
+ return 'image/gif';
+ }
+ if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
+ && $binaryType == 'image/pjpeg' )
+ {
+ return $proposed;
+ }
+ // PNG check added in IE 7
+ if ( $version >= 'ie07'
+ && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
+ && $binaryType == 'image/x-png' )
+ {
+ return $proposed;
+ }
+
+ // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
+ if ( isset( $sampleFound['cdf'] ) ) {
+ return 'application/x-cdf';
+ }
+
+ // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
+ // previous versions
+ if ( isset( $sampleFound['rss'] ) ) {
+ return 'application/rss+xml';
+ }
+ if ( isset( $sampleFound['rdf-tag'] )
+ && isset( $sampleFound['rdf-url'] )
+ && isset( $sampleFound['rdf-purl'] ) )
+ {
+ return 'application/rss+xml';
+ }
+ if ( isset( $sampleFound['atom'] ) ) {
+ return 'application/atom+xml';
+ }
+
+ if ( isset( $sampleFound['xml'] ) ) {
+ // TODO: I'm not sure under what circumstances this flag is enabled
+ if ( strpos( $version, 'strict' ) !== false ) {
+ if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
+ return 'text/xml';
+ }
+ } else {
+ return 'text/xml';
+ }
+ }
+ if ( isset( $sampleFound['html'] ) ) {
+ // TODO: I'm not sure under what circumstances this flag is enabled
+ if ( strpos( $version, 'nohtml' ) !== false ) {
+ if ( $proposed == 'text/plain' ) {
+ return 'text/html';
+ }
+ } else {
+ return 'text/html';
+ }
+ }
+ if ( isset( $sampleFound['xbm'] ) ) {
+ return 'image/x-bitmap';
+ }
+ if ( isset( $sampleFound['binhex'] ) ) {
+ return 'application/macbinhex40';
+ }
+ if ( isset( $sampleFound['scriptlet'] ) ) {
+ if ( strpos( $version, 'strict' ) !== false ) {
+ if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
+ return 'text/scriptlet';
+ }
+ } else {
+ return 'text/scriptlet';
+ }
+ }
+
+ // Freaky heuristics to determine if the data is text or binary
+ // The heuristic is of course broken for non-ASCII text
+ if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
+ < ( $counters['ctrl'] + $counters['high'] ) * 16 )
+ {
+ $kindOfBinary = true;
+ $type = $binaryType ? $binaryType : $textType;
+ if ( $type === false ) {
+ $type = 'application/octet-stream';
+ }
+ } else {
+ $kindOfBinary = false;
+ $type = $textType ? $textType : $binaryType;
+ if ( $type === false ) {
+ $type = 'text/plain';
+ }
+ }
+
+ // Check if the output format is ambiguous
+ // This generally means that detection failed, real types aren't ambiguous
+ $detectedFormat = $this->getDataFormat( $version, $type );
+ if ( $detectedFormat != 'ambiguous' ) {
+ return $type;
+ }
+
+ if ( $proposedFormat != 'ambiguous' ) {
+ // FormatAgreesWithData()
+ if ( $proposedFormat == 'text' && !$kindOfBinary ) {
+ return $proposed;
+ }
+ if ( $proposedFormat == 'binary' && $kindOfBinary ) {
+ return $proposed;
+ }
+ if ( $proposedFormat == 'html' ) {
+ return $proposed;
+ }
+ }
+
+ // Find a MIME type by searching the registry for the file extension.
+ $dotPos = strrpos( $fileName, '.' );
+ if ( $dotPos === false ) {
+ return $type;
+ }
+ $ext = substr( $fileName, $dotPos );
+ if ( isset( $this->registry[$ext] ) ) {
+ return $this->registry[$ext];
+ }
+
+ // TODO: If the extension has an application registered to it, IE will return
+ // application/octet-stream. We'll skip that, so we could erroneously
+ // return text/plain or application/x-netcdf where application/octet-stream
+ // would be correct.
+
+ return $type;
+ }
+
+ /**
+ * Check for text headers at the start of the chunk
+ * Confirmed same in 5 and 7.
+ * @param $version
+ * @param $chunk
+ * @return bool|string
+ */
+ private function checkTextHeaders( $version, $chunk ) {
+ $chunk2 = substr( $chunk, 0, 2 );
+ $chunk4 = substr( $chunk, 0, 4 );
+ $chunk5 = substr( $chunk, 0, 5 );
+ if ( $chunk4 == '%PDF' ) {
+ return 'application/pdf';
+ }
+ if ( $chunk2 == '%!' ) {
+ return 'application/postscript';
+ }
+ if ( $chunk5 == '{\\rtf' ) {
+ return 'text/richtext';
+ }
+ if ( $chunk5 == 'begin' ) {
+ return 'application/base64';
+ }
+ return false;
+ }
+
+ /**
+ * Check for binary headers at the start of the chunk
+ * Confirmed same in 5 and 7.
+ * @param $version
+ * @param $chunk
+ * @return bool|string
+ */
+ private function checkBinaryHeaders( $version, $chunk ) {
+ $chunk2 = substr( $chunk, 0, 2 );
+ $chunk3 = substr( $chunk, 0, 3 );
+ $chunk4 = substr( $chunk, 0, 4 );
+ $chunk5 = substr( $chunk, 0, 5 );
+ $chunk5uc = strtoupper( $chunk5 );
+ $chunk8 = substr( $chunk, 0, 8 );
+ if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
+ return 'image/gif';
+ }
+ if ( $chunk2 == "\xff\xd8" ) {
+ return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
+ }
+
+ if ( $chunk2 == 'BM'
+ && substr( $chunk, 6, 2 ) == "\000\000"
+ && substr( $chunk, 8, 2 ) == "\000\000" )
+ {
+ return 'image/bmp'; // another non-standard MIME
+ }
+ if ( $chunk4 == 'RIFF'
+ && substr( $chunk, 8, 4 ) == 'WAVE' )
+ {
+ return 'audio/wav';
+ }
+ // These were integer literals in IE
+ // Perhaps the author was not sure what the target endianness was
+ if ( $chunk4 == ".sd\000"
+ || $chunk4 == ".snd"
+ || $chunk4 == "\000ds."
+ || $chunk4 == "dns." )
+ {
+ return 'audio/basic';
+ }
+ if ( $chunk3 == "MM\000" ) {
+ return 'image/tiff';
+ }
+ if ( $chunk2 == 'MZ' ) {
+ return 'application/x-msdownload';
+ }
+ if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
+ return 'image/x-png'; // [sic]
+ }
+ if ( strlen( $chunk ) >= 5 ) {
+ $byte2 = ord( $chunk[2] );
+ $byte4 = ord( $chunk[4] );
+ if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
+ return 'image/x-jg';
+ }
+ }
+ // More endian confusion?
+ if ( $chunk4 == 'MROF' ) {
+ return 'audio/x-aiff';
+ }
+ $chunk4_8 = substr( $chunk, 8, 4 );
+ if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
+ return 'audio/x-aiff';
+ }
+ if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
+ return 'video/avi';
+ }
+ if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
+ return 'video/mpeg';
+ }
+ if ( $chunk4 == "\001\000\000\000"
+ && substr( $chunk, 40, 4 ) == ' EMF' )
+ {
+ return 'image/x-emf';
+ }
+ if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
+ return 'image/x-wmf';
+ }
+ if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
+ return 'application/java';
+ }
+ if ( $chunk2 == 'PK' ) {
+ return 'application/x-zip-compressed';
+ }
+ if ( $chunk2 == "\x1f\x9d" ) {
+ return 'application/x-compressed';
+ }
+ if ( $chunk2 == "\x1f\x8b" ) {
+ return 'application/x-gzip-compressed';
+ }
+ // Skip redundant check for ZIP
+ if ( $chunk5 == "MThd\000" ) {
+ return 'audio/mid';
+ }
+ if ( $chunk4 == '%PDF' ) {
+ return 'application/pdf';
+ }
+ return false;
+ }
+
+ /**
+ * Do heuristic checks on the bulk of the data sample.
+ * Search for HTML tags.
+ * @param $version
+ * @param $chunk
+ * @return array
+ */
+ protected function sampleData( $version, $chunk ) {
+ $found = [];
+ $counters = [
+ 'ctrl' => 0,
+ 'high' => 0,
+ 'low' => 0,
+ 'lf' => 0,
+ 'cr' => 0,
+ 'ff' => 0
+ ];
+ $htmlTags = [
+ 'html',
+ 'head',
+ 'title',
+ 'body',
+ 'script',
+ 'a href',
+ 'pre',
+ 'img',
+ 'plaintext',
+ 'table'
+ ];
+ $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $rdfPurl = 'http://purl.org/rss/1.0/';
+ $xbmMagic1 = '#define';
+ $xbmMagic2 = '_width';
+ $xbmMagic3 = '_bits';
+ $binhexMagic = 'converted with BinHex';
+ $chunkLength = strlen( $chunk );
+
+ for ( $offset = 0; $offset < $chunkLength; $offset++ ) {
+ $curChar = $chunk[$offset];
+ if ( $curChar == "\x0a" ) {
+ $counters['lf']++;
+ continue;
+ } elseif ( $curChar == "\x0d" ) {
+ $counters['cr']++;
+ continue;
+ } elseif ( $curChar == "\x0c" ) {
+ $counters['ff']++;
+ continue;
+ } elseif ( $curChar == "\t" ) {
+ $counters['low']++;
+ continue;
+ } elseif ( ord( $curChar ) < 32 ) {
+ $counters['ctrl']++;
+ continue;
+ } elseif ( ord( $curChar ) >= 128 ) {
+ $counters['high']++;
+ continue;
+ }
+
+ $counters['low']++;
+ if ( $curChar == '<' ) {
+ // XML
+ $remainder = substr( $chunk, $offset + 1 );
+ if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
+ $nextChar = substr( $chunk, $offset + 5, 1 );
+ if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
+ $found['xml'] = true;
+ }
+ }
+ // Scriptlet (JSP)
+ if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
+ $found['scriptlet'] = true;
+ break;
+ }
+ // HTML
+ foreach ( $htmlTags as $tag ) {
+ if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
+ $found['html'] = true;
+ }
+ }
+ // Skip broken check for additional tags (HR etc.)
+
+ // CHANNEL replaced by RSS, RDF and FEED in IE 7
+ if ( $version < 'ie07' ) {
+ if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
+ $found['cdf'] = true;
+ }
+ } else {
+ // RSS
+ if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
+ $found['rss'] = true;
+ break; // return from SampleData
+ }
+ if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
+ $found['rdf-tag'] = true;
+ // no break
+ }
+ if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
+ $found['atom'] = true;
+ break;
+ }
+ }
+ continue;
+ }
+ // Skip broken check for -->
+
+ // RSS URL checks
+ // For some reason both URLs must appear before it is recognised
+ $remainder = substr( $chunk, $offset );
+ if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
+ $found['rdf-url'] = true;
+ if ( isset( $found['rdf-tag'] )
+ && isset( $found['rdf-purl'] ) ) // [sic]
+ {
+ break;
+ }
+ continue;
+ }
+
+ if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
+ if ( isset( $found['rdf-tag'] )
+ && isset( $found['rdf-url'] ) ) // [sic]
+ {
+ break;
+ }
+ continue;
+ }
+
+ // XBM checks
+ if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
+ $found['xbm1'] = true;
+ continue;
+ }
+ if ( $curChar == '_' ) {
+ if ( isset( $found['xbm2'] ) ) {
+ if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
+ $found['xbm'] = true;
+ break;
+ }
+ } elseif ( isset( $found['xbm1'] ) ) {
+ if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
+ $found['xbm2'] = true;
+ }
+ }
+ }
+
+ // BinHex
+ if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
+ $found['binhex'] = true;
+ }
+ }
+ return [ 'found' => $found, 'counters' => $counters ];
+ }
+
+ /**
+ * @param $version
+ * @param $type
+ * @return int|string
+ */
+ protected function getDataFormat( $version, $type ) {
+ $types = $this->typeTable[$version];
+ if ( $type == '(null)' || strval( $type ) === '' ) {
+ return 'ambiguous';
+ }
+ foreach ( $types as $format => $list ) {
+ if ( in_array( $type, $list ) ) {
+ return $format;
+ }
+ }
+ return 'unknown';
+ }
+}
--- /dev/null
+<?php
+/**
+ * Module defining helper functions for detecting and dealing with MIME types.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+use Psr\Log\LoggerAwareInterface;
+use Psr\Log\LoggerInterface;
+
+/**
+ * Implements functions related to MIME types such as detection and mapping to file extension
+ *
+ * @since 1.28
+ */
+class MimeAnalyzer implements LoggerAwareInterface {
+ /** @var string */
+ protected $typeFile;
+ /** @var string */
+ protected $infoFile;
+ /** @var string */
+ protected $xmlTypes;
+ /** @var callable */
+ protected $initCallback;
+ /** @var callable */
+ protected $detectCallback;
+ /** @var callable */
+ protected $guessCallback;
+ /** @var callable */
+ protected $extCallback;
+ /** @var array Mapping of media types to arrays of MIME types */
+ protected $mediaTypes = null;
+ /** @var array Map of MIME type aliases */
+ protected $mimeTypeAliases = null;
+ /** @var array Map of MIME types to file extensions (as a space separated list) */
+ protected $mimetoExt = null;
+
+ /** @var array Map of file extensions types to MIME types (as a space separated list) */
+ public $mExtToMime = null; // legacy name; field accessed by hooks
+
+ /** @var IEContentAnalyzer */
+ protected $IEAnalyzer;
+
+ /** @var string Extra MIME types, set for example by media handling extensions */
+ private $extraTypes = '';
+ /** @var string Extra MIME info, set for example by media handling extensions */
+ private $extraInfo = '';
+
+ /** @var LoggerInterface */
+ private $logger;
+
+ /**
+ * Defines a set of well known MIME types
+ * This is used as a fallback to mime.types files.
+ * An extensive list of well known MIME types is provided by
+ * the file mime.types in the includes directory.
+ *
+ * This list concatenated with mime.types is used to create a MIME <-> ext
+ * map. Each line contains a MIME type followed by a space separated list of
+ * extensions. If multiple extensions for a single MIME type exist or if
+ * multiple MIME types exist for a single extension then in most cases
+ * MediaWiki assumes that the first extension following the MIME type is the
+ * canonical extension, and the first time a MIME type appears for a certain
+ * extension is considered the canonical MIME type.
+ *
+ * (Note that appending the type file list to the end of self::$wellKnownTypes
+ * sucks because you can't redefine canonical types. This could be fixed by
+ * appending self::$wellKnownTypes behind type file list, but who knows
+ * what will break? In practice this probably isn't a problem anyway -- Bryan)
+ */
+ protected static $wellKnownTypes = <<<EOT
+application/ogg ogx ogg ogm ogv oga spx
+application/pdf pdf
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.database odb
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-master otm
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-web oth
+application/javascript js
+application/x-shockwave-flash swf
+audio/midi mid midi kar
+audio/mpeg mpga mpa mp2 mp3
+audio/x-aiff aif aiff aifc
+audio/x-wav wav
+audio/ogg oga spx ogg
+image/x-bmp bmp
+image/gif gif
+image/jpeg jpeg jpg jpe
+image/png png
+image/svg+xml svg
+image/svg svg
+image/tiff tiff tif
+image/vnd.djvu djvu
+image/x.djvu djvu
+image/x-djvu djvu
+image/x-portable-pixmap ppm
+image/x-xcf xcf
+text/plain txt
+text/html html htm
+video/ogg ogv ogm ogg
+video/mpeg mpg mpeg
+EOT;
+
+ /**
+ * Defines a set of well known MIME info entries
+ * This is used as a fallback to mime.info files.
+ * An extensive list of well known MIME types is provided by
+ * the file mime.info in the includes directory.
+ */
+ protected static $wellKnownInfo = <<<EOT
+application/pdf [OFFICE]
+application/vnd.oasis.opendocument.chart [OFFICE]
+application/vnd.oasis.opendocument.chart-template [OFFICE]
+application/vnd.oasis.opendocument.database [OFFICE]
+application/vnd.oasis.opendocument.formula [OFFICE]
+application/vnd.oasis.opendocument.formula-template [OFFICE]
+application/vnd.oasis.opendocument.graphics [OFFICE]
+application/vnd.oasis.opendocument.graphics-template [OFFICE]
+application/vnd.oasis.opendocument.image [OFFICE]
+application/vnd.oasis.opendocument.image-template [OFFICE]
+application/vnd.oasis.opendocument.presentation [OFFICE]
+application/vnd.oasis.opendocument.presentation-template [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
+application/vnd.oasis.opendocument.text [OFFICE]
+application/vnd.oasis.opendocument.text-template [OFFICE]
+application/vnd.oasis.opendocument.text-master [OFFICE]
+application/vnd.oasis.opendocument.text-web [OFFICE]
+application/javascript text/javascript application/x-javascript [EXECUTABLE]
+application/x-shockwave-flash [MULTIMEDIA]
+audio/midi [AUDIO]
+audio/x-aiff [AUDIO]
+audio/x-wav [AUDIO]
+audio/mp3 audio/mpeg [AUDIO]
+application/ogg audio/ogg video/ogg [MULTIMEDIA]
+image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
+image/gif [BITMAP]
+image/jpeg [BITMAP]
+image/png [BITMAP]
+image/svg+xml [DRAWING]
+image/tiff [BITMAP]
+image/vnd.djvu [BITMAP]
+image/x-xcf [BITMAP]
+image/x-portable-pixmap [BITMAP]
+text/plain [TEXT]
+text/html [TEXT]
+video/ogg [VIDEO]
+video/mpeg [VIDEO]
+unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
+EOT;
+
+ /**
+ * @param array $params Configuration map, includes:
+ * - typeFile: path to file with the list of known MIME types
+ * - infoFile: path to file with the MIME type info
+ * - xmlTypes: map of root element names to XML MIME types
+ * - initCallback: initialization callback that is passed this object [optional]
+ * - detectCallback: alternative to finfo that returns the mime type for a file.
+ * For example, the callback can return the output of "file -bi". [optional]
+ * - guessCallback: callback to improve the guessed MIME type using the file data.
+ * This is intended for fixing mistakes in fileinfo or "detectCallback". [optional]
+ * - extCallback: callback to improve the guessed MIME type using the extension. [optional]
+ * - logger: PSR-3 logger [optional]
+ * @note Constructing these instances is expensive due to file reads.
+ * A service or singleton pattern should be used to avoid creating instances again and again.
+ */
+ public function __construct( array $params ) {
+ $this->typeFile = $params['typeFile'];
+ $this->infoFile = $params['infoFile'];
+ $this->xmlTypes = $params['xmlTypes'];
+ $this->initCallback = isset( $params['initCallback'] )
+ ? $params['initCallback']
+ : null;
+ $this->detectCallback = isset( $params['detectCallback'] )
+ ? $params['detectCallback']
+ : null;
+ $this->guessCallback = isset( $params['guessCallback'] )
+ ? $params['guessCallback']
+ : null;
+ $this->extCallback = isset( $params['extCallback'] )
+ ? $params['extCallback']
+ : null;
+ $this->logger = isset( $params['logger'] )
+ ? $params['logger']
+ : new \Psr\Log\NullLogger();
+
+ $this->loadFiles();
+ }
+
+ protected function loadFiles() {
+ /**
+ * --- load mime.types ---
+ */
+
+ # Allow media handling extensions adding MIME-types and MIME-info
+ if ( $this->initCallback ) {
+ call_user_func( $this->initCallback, $this );
+ }
+
+ $types = self::$wellKnownTypes;
+
+ $mimeTypeFile = $this->typeFile;
+ if ( $mimeTypeFile ) {
+ if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
+ $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
+ $types .= "\n";
+ $types .= file_get_contents( $mimeTypeFile );
+ } else {
+ $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
+ }
+ } else {
+ $this->logger->info( __METHOD__ .
+ ": no mime types file defined, using built-ins only.\n" );
+ }
+
+ $types .= "\n" . $this->extraTypes;
+
+ $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
+ $types = str_replace( "\t", " ", $types );
+
+ $this->mimetoExt = [];
+ $this->mExtToMime = [];
+
+ $lines = explode( "\n", $types );
+ foreach ( $lines as $s ) {
+ $s = trim( $s );
+ if ( empty( $s ) ) {
+ continue;
+ }
+ if ( strpos( $s, '#' ) === 0 ) {
+ continue;
+ }
+
+ $s = strtolower( $s );
+ $i = strpos( $s, ' ' );
+
+ if ( $i === false ) {
+ continue;
+ }
+
+ $mime = substr( $s, 0, $i );
+ $ext = trim( substr( $s, $i + 1 ) );
+
+ if ( empty( $ext ) ) {
+ continue;
+ }
+
+ if ( !empty( $this->mimetoExt[$mime] ) ) {
+ $this->mimetoExt[$mime] .= ' ' . $ext;
+ } else {
+ $this->mimetoExt[$mime] = $ext;
+ }
+
+ $extensions = explode( ' ', $ext );
+
+ foreach ( $extensions as $e ) {
+ $e = trim( $e );
+ if ( empty( $e ) ) {
+ continue;
+ }
+
+ if ( !empty( $this->mExtToMime[$e] ) ) {
+ $this->mExtToMime[$e] .= ' ' . $mime;
+ } else {
+ $this->mExtToMime[$e] = $mime;
+ }
+ }
+ }
+
+ /**
+ * --- load mime.info ---
+ */
+
+ $mimeInfoFile = $this->infoFile;
+
+ $info = self::$wellKnownInfo;
+
+ if ( $mimeInfoFile ) {
+ if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
+ $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
+ $info .= "\n";
+ $info .= file_get_contents( $mimeInfoFile );
+ } else {
+ $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
+ }
+ } else {
+ $this->logger->info( __METHOD__ .
+ ": no mime info file defined, using built-ins only.\n" );
+ }
+
+ $info .= "\n" . $this->extraInfo;
+
+ $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
+ $info = str_replace( "\t", " ", $info );
+
+ $this->mimeTypeAliases = [];
+ $this->mediaTypes = [];
+
+ $lines = explode( "\n", $info );
+ foreach ( $lines as $s ) {
+ $s = trim( $s );
+ if ( empty( $s ) ) {
+ continue;
+ }
+ if ( strpos( $s, '#' ) === 0 ) {
+ continue;
+ }
+
+ $s = strtolower( $s );
+ $i = strpos( $s, ' ' );
+
+ if ( $i === false ) {
+ continue;
+ }
+
+ # print "processing MIME INFO line $s<br>";
+
+ $match = [];
+ if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
+ $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
+ $mtype = trim( strtoupper( $match[1] ) );
+ } else {
+ $mtype = MEDIATYPE_UNKNOWN;
+ }
+
+ $m = explode( ' ', $s );
+
+ if ( !isset( $this->mediaTypes[$mtype] ) ) {
+ $this->mediaTypes[$mtype] = [];
+ }
+
+ foreach ( $m as $mime ) {
+ $mime = trim( $mime );
+ if ( empty( $mime ) ) {
+ continue;
+ }
+
+ $this->mediaTypes[$mtype][] = $mime;
+ }
+
+ if ( count( $m ) > 1 ) {
+ $main = $m[0];
+ $mCount = count( $m );
+ for ( $i = 1; $i < $mCount; $i += 1 ) {
+ $mime = $m[$i];
+ $this->mimeTypeAliases[$mime] = $main;
+ }
+ }
+ }
+ }
+
+ public function setLogger( LoggerInterface $logger ) {
+ $this->logger = $logger;
+ }
+
+ /**
+ * Adds to the list mapping MIME to file extensions.
+ * As an extension author, you are encouraged to submit patches to
+ * MediaWiki's core to add new MIME types to mime.types.
+ * @param string $types
+ */
+ public function addExtraTypes( $types ) {
+ $this->extraTypes .= "\n" . $types;
+ }
+
+ /**
+ * Adds to the list mapping MIME to media type.
+ * As an extension author, you are encouraged to submit patches to
+ * MediaWiki's core to add new MIME info to mime.info.
+ * @param string $info
+ */
+ public function addExtraInfo( $info ) {
+ $this->extraInfo .= "\n" . $info;
+ }
+
+ /**
+ * Returns a list of file extensions for a given MIME type as a space
+ * separated string or null if the MIME type was unrecognized. Resolves
+ * MIME type aliases.
+ *
+ * @param string $mime
+ * @return string|null
+ */
+ public function getExtensionsForType( $mime ) {
+ $mime = strtolower( $mime );
+
+ // Check the mime-to-ext map
+ if ( isset( $this->mimetoExt[$mime] ) ) {
+ return $this->mimetoExt[$mime];
+ }
+
+ // Resolve the MIME type to the canonical type
+ if ( isset( $this->mimeTypeAliases[$mime] ) ) {
+ $mime = $this->mimeTypeAliases[$mime];
+ if ( isset( $this->mimetoExt[$mime] ) ) {
+ return $this->mimetoExt[$mime];
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Returns a list of MIME types for a given file extension as a space
+ * separated string or null if the extension was unrecognized.
+ *
+ * @param string $ext
+ * @return string|null
+ */
+ public function getTypesForExtension( $ext ) {
+ $ext = strtolower( $ext );
+
+ $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
+ return $r;
+ }
+
+ /**
+ * Returns a single MIME type for a given file extension or null if unknown.
+ * This is always the first type from the list returned by getTypesForExtension($ext).
+ *
+ * @param string $ext
+ * @return string|null
+ */
+ public function guessTypesForExtension( $ext ) {
+ $m = $this->getTypesForExtension( $ext );
+ if ( is_null( $m ) ) {
+ return null;
+ }
+
+ // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
+ $m = trim( $m );
+ $m = preg_replace( '/\s.*$/', '', $m );
+
+ return $m;
+ }
+
+ /**
+ * Tests if the extension matches the given MIME type. Returns true if a
+ * match was found, null if the MIME type is unknown, and false if the
+ * MIME type is known but no matches where found.
+ *
+ * @param string $extension
+ * @param string $mime
+ * @return bool|null
+ */
+ public function isMatchingExtension( $extension, $mime ) {
+ $ext = $this->getExtensionsForType( $mime );
+
+ if ( !$ext ) {
+ return null; // Unknown MIME type
+ }
+
+ $ext = explode( ' ', $ext );
+
+ $extension = strtolower( $extension );
+ return in_array( $extension, $ext );
+ }
+
+ /**
+ * Returns true if the MIME type is known to represent an image format
+ * supported by the PHP GD library.
+ *
+ * @param string $mime
+ *
+ * @return bool
+ */
+ public function isPHPImageType( $mime ) {
+ // As defined by imagegetsize and image_type_to_mime
+ static $types = [
+ 'image/gif', 'image/jpeg', 'image/png',
+ 'image/x-bmp', 'image/xbm', 'image/tiff',
+ 'image/jp2', 'image/jpeg2000', 'image/iff',
+ 'image/xbm', 'image/x-xbitmap',
+ 'image/vnd.wap.wbmp', 'image/vnd.xiff',
+ 'image/x-photoshop',
+ 'application/x-shockwave-flash',
+ ];
+
+ return in_array( $mime, $types );
+ }
+
+ /**
+ * Returns true if the extension represents a type which can
+ * be reliably detected from its content. Use this to determine
+ * whether strict content checks should be applied to reject
+ * invalid uploads; if we can't identify the type we won't
+ * be able to say if it's invalid.
+ *
+ * @todo Be more accurate when using fancy MIME detector plugins;
+ * right now this is the bare minimum getimagesize() list.
+ * @param string $extension
+ * @return bool
+ */
+ function isRecognizableExtension( $extension ) {
+ static $types = [
+ // Types recognized by getimagesize()
+ 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
+ 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
+ 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
+ 'xbm',
+
+ // Formats we recognize magic numbers for
+ 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
+ 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
+ 'webp',
+
+ // XML formats we sure hope we recognize reliably
+ 'svg',
+ ];
+ return in_array( strtolower( $extension ), $types );
+ }
+
+ /**
+ * Improves a MIME type using the file extension. Some file formats are very generic,
+ * so their MIME type is not very meaningful. A more useful MIME type can be derived
+ * by looking at the file extension. Typically, this method would be called on the
+ * result of guessMimeType().
+ *
+ * @param string $mime The MIME type, typically guessed from a file's content.
+ * @param string $ext The file extension, as taken from the file name
+ *
+ * @return string The MIME type
+ */
+ public function improveTypeFromExtension( $mime, $ext ) {
+ if ( $mime === 'unknown/unknown' ) {
+ if ( $this->isRecognizableExtension( $ext ) ) {
+ $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
+ "$ext file, we should have recognized it\n" );
+ } else {
+ // Not something we can detect, so simply
+ // trust the file extension
+ $mime = $this->guessTypesForExtension( $ext );
+ }
+ } elseif ( $mime === 'application/x-opc+zip' ) {
+ if ( $this->isMatchingExtension( $ext, $mime ) ) {
+ // A known file extension for an OPC file,
+ // find the proper MIME type for that file extension
+ $mime = $this->guessTypesForExtension( $ext );
+ } else {
+ $this->logger->info( __METHOD__ .
+ ": refusing to guess better type for $mime file, " .
+ ".$ext is not a known OPC extension.\n" );
+ $mime = 'application/zip';
+ }
+ } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
+ // Textual types are sometimes not recognized properly.
+ // If detected as text/plain, and has an extension which is textual
+ // improve to the extension's type. For example, csv and json are often
+ // misdetected as text/plain.
+ $mime = $this->guessTypesForExtension( $ext );
+ }
+
+ # Media handling extensions can improve the MIME detected
+ $callback = $this->extCallback;
+ if ( $callback ) {
+ $callback( $this, $ext, $mime /* by reference */ );
+ }
+
+ if ( isset( $this->mimeTypeAliases[$mime] ) ) {
+ $mime = $this->mimeTypeAliases[$mime];
+ }
+
+ $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
+ return $mime;
+ }
+
+ /**
+ * MIME type detection. This uses detectMimeType to detect the MIME type
+ * of the file, but applies additional checks to determine some well known
+ * file formats that may be missed or misinterpreted by the default MIME
+ * detection (namely XML based formats like XHTML or SVG, as well as ZIP
+ * based formats like OPC/ODF files).
+ *
+ * @param string $file The file to check
+ * @param string|bool $ext The file extension, or true (default) to extract
+ * it from the filename. Set it to false to ignore the extension. DEPRECATED!
+ * Set to false, use improveTypeFromExtension($mime, $ext) later to improve MIME type.
+ *
+ * @return string The MIME type of $file
+ */
+ public function guessMimeType( $file, $ext = true ) {
+ if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
+ $this->logger->info( __METHOD__ .
+ ": WARNING: use of the \$ext parameter is deprecated. " .
+ "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+ }
+
+ $mime = $this->doGuessMimeType( $file, $ext );
+
+ if ( !$mime ) {
+ $this->logger->info( __METHOD__ .
+ ": internal type detection failed for $file (.$ext)...\n" );
+ $mime = $this->detectMimeType( $file, $ext );
+ }
+
+ if ( isset( $this->mimeTypeAliases[$mime] ) ) {
+ $mime = $this->mimeTypeAliases[$mime];
+ }
+
+ $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
+ return $mime;
+ }
+
+ /**
+ * Guess the MIME type from the file contents.
+ *
+ * @todo Remove $ext param
+ *
+ * @param string $file
+ * @param mixed $ext
+ * @return bool|string
+ * @throws UnexpectedValueException
+ */
+ private function doGuessMimeType( $file, $ext ) {
+ // Read a chunk of the file
+ MediaWiki\suppressWarnings();
+ $f = fopen( $file, 'rb' );
+ MediaWiki\restoreWarnings();
+
+ if ( !$f ) {
+ return 'unknown/unknown';
+ }
+
+ $fsize = filesize( $file );
+ if ( $fsize === false ) {
+ return 'unknown/unknown';
+ }
+
+ $head = fread( $f, 1024 );
+ $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
+ if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
+ throw new UnexpectedValueException(
+ "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
+ }
+ $tail = $tailLength ? fread( $f, $tailLength ) : '';
+ fclose( $f );
+
+ $this->logger->info( __METHOD__ .
+ ": analyzing head and tail of $file for magic numbers.\n" );
+
+ // Hardcode a few magic number checks...
+ $headers = [
+ // Multimedia...
+ 'MThd' => 'audio/midi',
+ 'OggS' => 'application/ogg',
+
+ // Image formats...
+ // Note that WMF may have a bare header, no magic number.
+ "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
+ "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
+ '%PDF' => 'application/pdf',
+ 'gimp xcf' => 'image/x-xcf',
+
+ // Some forbidden fruit...
+ 'MZ' => 'application/octet-stream', // DOS/Windows executable
+ "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
+ "\x7fELF" => 'application/octet-stream', // ELF binary
+ ];
+
+ foreach ( $headers as $magic => $candidate ) {
+ if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
+ $this->logger->info( __METHOD__ .
+ ": magic header in $file recognized as $candidate\n" );
+ return $candidate;
+ }
+ }
+
+ /* Look for WebM and Matroska files */
+ if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
+ $doctype = strpos( $head, "\x42\x82" );
+ if ( $doctype ) {
+ // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
+ $data = substr( $head, $doctype + 3, 8 );
+ if ( strncmp( $data, "matroska", 8 ) == 0 ) {
+ $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
+ return "video/x-matroska";
+ } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
+ $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
+ return "video/webm";
+ }
+ }
+ $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
+ return "unknown/unknown";
+ }
+
+ /* Look for WebP */
+ if ( strncmp( $head, "RIFF", 4 ) == 0 &&
+ strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
+ ) {
+ $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
+ return "image/webp";
+ }
+
+ /**
+ * Look for PHP. Check for this before HTML/XML... Warning: this is a
+ * heuristic, and won't match a file with a lot of non-PHP before. It
+ * will also match text files which could be PHP. :)
+ *
+ * @todo FIXME: For this reason, the check is probably useless -- an attacker
+ * could almost certainly just pad the file with a lot of nonsense to
+ * circumvent the check in any case where it would be a security
+ * problem. On the other hand, it causes harmful false positives (bug
+ * 16583). The heuristic has been cut down to exclude three-character
+ * strings like "<? ", but should it be axed completely?
+ */
+ if ( ( strpos( $head, '<?php' ) !== false ) ||
+ ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
+ ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
+ ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
+ ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
+ ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
+
+ $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
+ return 'application/x-php';
+ }
+
+ /**
+ * look for XML formats (XHTML and SVG)
+ */
+ $xml = new XmlTypeCheck( $file );
+ if ( $xml->wellFormed ) {
+ $xmlTypes = $this->xmlTypes;
+ if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
+ return $xmlTypes[$xml->getRootElement()];
+ } else {
+ return 'application/xml';
+ }
+ }
+
+ /**
+ * look for shell scripts
+ */
+ $script_type = null;
+
+ # detect by shebang
+ if ( substr( $head, 0, 2 ) == "#!" ) {
+ $script_type = "ASCII";
+ } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
+ $script_type = "UTF-8";
+ } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
+ $script_type = "UTF-16BE";
+ } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
+ $script_type = "UTF-16LE";
+ }
+
+ if ( $script_type ) {
+ if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
+ // Quick and dirty fold down to ASCII!
+ $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
+ $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
+ $head = '';
+ foreach ( $chars as $codepoint ) {
+ if ( $codepoint < 128 ) {
+ $head .= chr( $codepoint );
+ } else {
+ $head .= '?';
+ }
+ }
+ }
+
+ $match = [];
+
+ if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
+ $mime = "application/x-{$match[2]}";
+ $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
+ return $mime;
+ }
+ }
+
+ // Check for ZIP variants (before getimagesize)
+ if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
+ $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
+ return $this->detectZipType( $head, $tail, $ext );
+ }
+
+ MediaWiki\suppressWarnings();
+ $gis = getimagesize( $file );
+ MediaWiki\restoreWarnings();
+
+ if ( $gis && isset( $gis['mime'] ) ) {
+ $mime = $gis['mime'];
+ $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
+ return $mime;
+ }
+
+ # Media handling extensions can guess the MIME by content
+ # It's intentionally here so that if core is wrong about a type (false positive),
+ # people will hopefully nag and submit patches :)
+ $mime = false;
+ # Some strings by reference for performance - assuming well-behaved hooks
+ $callback = $this->guessCallback;
+ if ( $callback ) {
+ $callback( $this, $head, $tail, $file, $mime /* by reference */ );
+ };
+
+ return $mime;
+ }
+
+ /**
+ * Detect application-specific file type of a given ZIP file from its
+ * header data. Currently works for OpenDocument and OpenXML types...
+ * If can't tell, returns 'application/zip'.
+ *
+ * @param string $header Some reasonably-sized chunk of file header
+ * @param string|null $tail The tail of the file
+ * @param string|bool $ext The file extension, or true to extract it from the filename.
+ * Set it to false (default) to ignore the extension. DEPRECATED! Set to false,
+ * use improveTypeFromExtension($mime, $ext) later to improve MIME type.
+ *
+ * @return string
+ */
+ function detectZipType( $header, $tail = null, $ext = false ) {
+ if ( $ext ) { # TODO: remove $ext param
+ $this->logger->info( __METHOD__ .
+ ": WARNING: use of the \$ext parameter is deprecated. " .
+ "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+ }
+
+ $mime = 'application/zip';
+ $opendocTypes = [
+ 'chart-template',
+ 'chart',
+ 'formula-template',
+ 'formula',
+ 'graphics-template',
+ 'graphics',
+ 'image-template',
+ 'image',
+ 'presentation-template',
+ 'presentation',
+ 'spreadsheet-template',
+ 'spreadsheet',
+ 'text-template',
+ 'text-master',
+ 'text-web',
+ 'text' ];
+
+ // http://lists.oasis-open.org/archives/office/200505/msg00006.html
+ $types = '(?:' . implode( '|', $opendocTypes ) . ')';
+ $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
+
+ $openxmlRegex = "/^\[Content_Types\].xml/";
+
+ if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
+ $mime = $matches[1];
+ $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
+ } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
+ $mime = "application/x-opc+zip";
+ # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
+ if ( $ext !== true && $ext !== false ) {
+ /** This is the mode used by getPropsFromPath
+ * These MIME's are stored in the database, where we don't really want
+ * x-opc+zip, because we use it only for internal purposes
+ */
+ if ( $this->isMatchingExtension( $ext, $mime ) ) {
+ /* A known file extension for an OPC file,
+ * find the proper mime type for that file extension
+ */
+ $mime = $this->guessTypesForExtension( $ext );
+ } else {
+ $mime = "application/zip";
+ }
+ }
+ $this->logger->info( __METHOD__ .
+ ": detected an Open Packaging Conventions archive: $mime\n" );
+ } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
+ ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
+ preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
+ if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
+ $mime = "application/msword";
+ }
+ switch ( substr( $header, 512, 6 ) ) {
+ case "\xEC\xA5\xC1\x00\x0E\x00":
+ case "\xEC\xA5\xC1\x00\x1C\x00":
+ case "\xEC\xA5\xC1\x00\x43\x00":
+ $mime = "application/vnd.ms-powerpoint";
+ break;
+ case "\xFD\xFF\xFF\xFF\x10\x00":
+ case "\xFD\xFF\xFF\xFF\x1F\x00":
+ case "\xFD\xFF\xFF\xFF\x22\x00":
+ case "\xFD\xFF\xFF\xFF\x23\x00":
+ case "\xFD\xFF\xFF\xFF\x28\x00":
+ case "\xFD\xFF\xFF\xFF\x29\x00":
+ case "\xFD\xFF\xFF\xFF\x10\x02":
+ case "\xFD\xFF\xFF\xFF\x1F\x02":
+ case "\xFD\xFF\xFF\xFF\x22\x02":
+ case "\xFD\xFF\xFF\xFF\x23\x02":
+ case "\xFD\xFF\xFF\xFF\x28\x02":
+ case "\xFD\xFF\xFF\xFF\x29\x02":
+ $mime = "application/vnd.msexcel";
+ break;
+ }
+
+ $this->logger->info( __METHOD__ .
+ ": detected a MS Office document with OPC trailer\n" );
+ } else {
+ $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
+ }
+ return $mime;
+ }
+
+ /**
+ * Internal MIME type detection. Detection is done using the fileinfo
+ * extension if it is available. It can be overriden by callback, which could
+ * use an external program, for example. If detection fails and $ext is not false,
+ * the MIME type is guessed from the file extension, using guessTypesForExtension.
+ *
+ * If the MIME type is still unknown, getimagesize is used to detect the
+ * MIME type if the file is an image. If no MIME type can be determined,
+ * this function returns 'unknown/unknown'.
+ *
+ * @param string $file The file to check
+ * @param string|bool $ext The file extension, or true (default) to extract it from the filename.
+ * Set it to false to ignore the extension. DEPRECATED! Set to false, use
+ * improveTypeFromExtension($mime, $ext) later to improve MIME type.
+ *
+ * @return string The MIME type of $file
+ */
+ private function detectMimeType( $file, $ext = true ) {
+ /** @todo Make $ext default to false. Or better, remove it. */
+ if ( $ext ) {
+ $this->logger->info( __METHOD__ .
+ ": WARNING: use of the \$ext parameter is deprecated. "
+ . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+ }
+
+ $callback = $this->detectCallback;
+ $m = null;
+ if ( $callback ) {
+ $m = $callback( $file );
+ } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
+ $mime_magic_resource = finfo_open( FILEINFO_MIME );
+
+ if ( $mime_magic_resource ) {
+ $m = finfo_file( $mime_magic_resource, $file );
+ finfo_close( $mime_magic_resource );
+ } else {
+ $this->logger->info( __METHOD__ .
+ ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
+ }
+ } else {
+ $this->logger->info( __METHOD__ . ": no magic mime detector found!\n" );
+ }
+
+ if ( $m ) {
+ # normalize
+ $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
+ $m = trim( $m );
+ $m = strtolower( $m );
+
+ if ( strpos( $m, 'unknown' ) !== false ) {
+ $m = null;
+ } else {
+ $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
+ return $m;
+ }
+ }
+
+ // If desired, look at extension as a fallback.
+ if ( $ext === true ) {
+ $i = strrpos( $file, '.' );
+ $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
+ }
+ if ( $ext ) {
+ if ( $this->isRecognizableExtension( $ext ) ) {
+ $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
+ . "we should have recognized it\n" );
+ } else {
+ $m = $this->guessTypesForExtension( $ext );
+ if ( $m ) {
+ $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
+ return $m;
+ }
+ }
+ }
+
+ // Unknown type
+ $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
+ return 'unknown/unknown';
+ }
+
+ /**
+ * Determine the media type code for a file, using its MIME type, name and
+ * possibly its contents.
+ *
+ * This function relies on the findMediaType(), mapping extensions and MIME
+ * types to media types.
+ *
+ * @todo analyse file if need be
+ * @todo look at multiple extension, separately and together.
+ *
+ * @param string $path Full path to the image file, in case we have to look at the contents
+ * (if null, only the MIME type is used to determine the media type code).
+ * @param string $mime MIME type. If null it will be guessed using guessMimeType.
+ *
+ * @return string A value to be used with the MEDIATYPE_xxx constants.
+ */
+ function getMediaType( $path = null, $mime = null ) {
+ if ( !$mime && !$path ) {
+ return MEDIATYPE_UNKNOWN;
+ }
+
+ // If MIME type is unknown, guess it
+ if ( !$mime ) {
+ $mime = $this->guessMimeType( $path, false );
+ }
+
+ // Special code for ogg - detect if it's video (theora),
+ // else label it as sound.
+ if ( $mime == 'application/ogg' && file_exists( $path ) ) {
+
+ // Read a chunk of the file
+ $f = fopen( $path, "rt" );
+ if ( !$f ) {
+ return MEDIATYPE_UNKNOWN;
+ }
+ $head = fread( $f, 256 );
+ fclose( $f );
+
+ $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
+
+ // This is an UGLY HACK, file should be parsed correctly
+ if ( strpos( $head, 'theora' ) !== false ) {
+ return MEDIATYPE_VIDEO;
+ } elseif ( strpos( $head, 'vorbis' ) !== false ) {
+ return MEDIATYPE_AUDIO;
+ } elseif ( strpos( $head, 'flac' ) !== false ) {
+ return MEDIATYPE_AUDIO;
+ } elseif ( strpos( $head, 'speex' ) !== false ) {
+ return MEDIATYPE_AUDIO;
+ } else {
+ return MEDIATYPE_MULTIMEDIA;
+ }
+ }
+
+ $type = null;
+ // Check for entry for full MIME type
+ if ( $mime ) {
+ $type = $this->findMediaType( $mime );
+ if ( $type !== MEDIATYPE_UNKNOWN ) {
+ return $type;
+ }
+ }
+
+ // Check for entry for file extension
+ if ( $path ) {
+ $i = strrpos( $path, '.' );
+ $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
+
+ // TODO: look at multi-extension if this fails, parse from full path
+ $type = $this->findMediaType( '.' . $e );
+ if ( $type !== MEDIATYPE_UNKNOWN ) {
+ return $type;
+ }
+ }
+
+ // Check major MIME type
+ if ( $mime ) {
+ $i = strpos( $mime, '/' );
+ if ( $i !== false ) {
+ $major = substr( $mime, 0, $i );
+ $type = $this->findMediaType( $major );
+ if ( $type !== MEDIATYPE_UNKNOWN ) {
+ return $type;
+ }
+ }
+ }
+
+ if ( !$type ) {
+ $type = MEDIATYPE_UNKNOWN;
+ }
+
+ return $type;
+ }
+
+ /**
+ * Returns a media code matching the given MIME type or file extension.
+ * File extensions are represented by a string starting with a dot (.) to
+ * distinguish them from MIME types.
+ *
+ * This function relies on the mapping defined by $this->mMediaTypes
+ * @access private
+ * @param string $extMime
+ * @return int|string
+ */
+ function findMediaType( $extMime ) {
+ if ( strpos( $extMime, '.' ) === 0 ) {
+ // If it's an extension, look up the MIME types
+ $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
+ if ( !$m ) {
+ return MEDIATYPE_UNKNOWN;
+ }
+
+ $m = explode( ' ', $m );
+ } else {
+ // Normalize MIME type
+ if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
+ $extMime = $this->mimeTypeAliases[$extMime];
+ }
+
+ $m = [ $extMime ];
+ }
+
+ foreach ( $m as $mime ) {
+ foreach ( $this->mediaTypes as $type => $codes ) {
+ if ( in_array( $mime, $codes, true ) ) {
+ return $type;
+ }
+ }
+ }
+
+ return MEDIATYPE_UNKNOWN;
+ }
+
+ /**
+ * Get the MIME types that various versions of Internet Explorer would
+ * detect from a chunk of the content.
+ *
+ * @param string $fileName The file name (unused at present)
+ * @param string $chunk The first 256 bytes of the file
+ * @param string $proposed The MIME type proposed by the server
+ * @return array
+ */
+ public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
+ $ca = $this->getIEContentAnalyzer();
+ return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
+ }
+
+ /**
+ * Get a cached instance of IEContentAnalyzer
+ *
+ * @return IEContentAnalyzer
+ */
+ protected function getIEContentAnalyzer() {
+ if ( is_null( $this->IEAnalyzer ) ) {
+ $this->IEAnalyzer = new IEContentAnalyzer;
+ }
+ return $this->IEAnalyzer;
+ }
+}
--- /dev/null
+<?php
+/**
+ * XML syntax and type checker.
+ *
+ * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
+ * more control over the expansion of XML entities. When passed to the
+ * callback, entities will be fully expanded, but may report the XML is
+ * invalid if expanding the entities are likely to cause a DoS.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+class XmlTypeCheck {
+ /**
+ * Will be set to true or false to indicate whether the file is
+ * well-formed XML. Note that this doesn't check schema validity.
+ */
+ public $wellFormed = null;
+
+ /**
+ * Will be set to true if the optional element filter returned
+ * a match at some point.
+ */
+ public $filterMatch = false;
+
+ /**
+ * Will contain the type of filter hit if the optional element filter returned
+ * a match at some point.
+ * @var mixed
+ */
+ public $filterMatchType = false;
+
+ /**
+ * Name of the document's root element, including any namespace
+ * as an expanded URL.
+ */
+ public $rootElement = '';
+
+ /**
+ * A stack of strings containing the data of each xml element as it's processed. Append
+ * data to the top string of the stack, then pop off the string and process it when the
+ * element is closed.
+ */
+ protected $elementData = [];
+
+ /**
+ * A stack of element names and attributes, as we process them.
+ */
+ protected $elementDataContext = [];
+
+ /**
+ * Current depth of the data stack.
+ */
+ protected $stackDepth = 0;
+
+ /**
+ * Additional parsing options
+ */
+ private $parserOptions = [
+ 'processing_instruction_handler' => '',
+ ];
+
+ /**
+ * @param string $input a filename or string containing the XML element
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, attributes, and text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @param bool $isFile (optional) indicates if the first parameter is a
+ * filename (default, true) or if it is a string (false)
+ * @param array $options list of additional parsing options:
+ * processing_instruction_handler: Callback for xml_set_processing_instruction_handler
+ */
+ function __construct( $input, $filterCallback = null, $isFile = true, $options = [] ) {
+ $this->filterCallback = $filterCallback;
+ $this->parserOptions = array_merge( $this->parserOptions, $options );
+ $this->validateFromInput( $input, $isFile );
+ }
+
+ /**
+ * Alternative constructor: from filename
+ *
+ * @param string $fname the filename of an XML document
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @return XmlTypeCheck
+ */
+ public static function newFromFilename( $fname, $filterCallback = null ) {
+ return new self( $fname, $filterCallback, true );
+ }
+
+ /**
+ * Alternative constructor: from string
+ *
+ * @param string $string a string containing an XML element
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @return XmlTypeCheck
+ */
+ public static function newFromString( $string, $filterCallback = null ) {
+ return new self( $string, $filterCallback, false );
+ }
+
+ /**
+ * Get the root element. Simple accessor to $rootElement
+ *
+ * @return string
+ */
+ public function getRootElement() {
+ return $this->rootElement;
+ }
+
+ /**
+ * @param string $fname the filename
+ */
+ private function validateFromInput( $xml, $isFile ) {
+ $reader = new XMLReader();
+ if ( $isFile ) {
+ $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+ } else {
+ $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+ }
+ if ( $s !== true ) {
+ // Couldn't open the XML
+ $this->wellFormed = false;
+ } else {
+ $oldDisable = libxml_disable_entity_loader( true );
+ $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
+ try {
+ $this->validate( $reader );
+ } catch ( Exception $e ) {
+ // Calling this malformed, because we didn't parse the whole
+ // thing. Maybe just an external entity refernce.
+ $this->wellFormed = false;
+ $reader->close();
+ libxml_disable_entity_loader( $oldDisable );
+ throw $e;
+ }
+ $reader->close();
+ libxml_disable_entity_loader( $oldDisable );
+ }
+ }
+
+ private function readNext( XMLReader $reader ) {
+ set_error_handler( [ $this, 'XmlErrorHandler' ] );
+ $ret = $reader->read();
+ restore_error_handler();
+ return $ret;
+ }
+
+ public function XmlErrorHandler( $errno, $errstr ) {
+ $this->wellFormed = false;
+ }
+
+ private function validate( $reader ) {
+
+ // First, move through anything that isn't an element, and
+ // handle any processing instructions with the callback
+ do {
+ if ( !$this->readNext( $reader ) ) {
+ // Hit the end of the document before any elements
+ $this->wellFormed = false;
+ return;
+ }
+ if ( $reader->nodeType === XMLReader::PI ) {
+ $this->processingInstructionHandler( $reader->name, $reader->value );
+ }
+ } while ( $reader->nodeType != XMLReader::ELEMENT );
+
+ // Process the rest of the document
+ do {
+ switch ( $reader->nodeType ) {
+ case XMLReader::ELEMENT:
+ $name = $this->expandNS(
+ $reader->name,
+ $reader->namespaceURI
+ );
+ if ( $this->rootElement === '' ) {
+ $this->rootElement = $name;
+ }
+ $empty = $reader->isEmptyElement;
+ $attrs = $this->getAttributesArray( $reader );
+ $this->elementOpen( $name, $attrs );
+ if ( $empty ) {
+ $this->elementClose();
+ }
+ break;
+
+ case XMLReader::END_ELEMENT:
+ $this->elementClose();
+ break;
+
+ case XMLReader::WHITESPACE:
+ case XMLReader::SIGNIFICANT_WHITESPACE:
+ case XMLReader::CDATA:
+ case XMLReader::TEXT:
+ $this->elementData( $reader->value );
+ break;
+
+ case XMLReader::ENTITY_REF:
+ // Unexpanded entity (maybe external?),
+ // don't send to the filter (xml_parse didn't)
+ break;
+
+ case XMLReader::COMMENT:
+ // Don't send to the filter (xml_parse didn't)
+ break;
+
+ case XMLReader::PI:
+ // Processing instructions can happen after the header too
+ $this->processingInstructionHandler(
+ $reader->name,
+ $reader->value
+ );
+ break;
+ default:
+ // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
+ // NOTATION, or XML_DECLARATION
+ // xml_parse didn't send these to the filter, so we won't.
+ }
+
+ } while ( $this->readNext( $reader ) );
+
+ if ( $this->stackDepth !== 0 ) {
+ $this->wellFormed = false;
+ } elseif ( $this->wellFormed === null ) {
+ $this->wellFormed = true;
+ }
+
+ }
+
+ /**
+ * Get all of the attributes for an XMLReader's current node
+ * @param $r XMLReader
+ * @return array of attributes
+ */
+ private function getAttributesArray( XMLReader $r ) {
+ $attrs = [];
+ while ( $r->moveToNextAttribute() ) {
+ if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
+ // XMLReader treats xmlns attributes as normal
+ // attributes, while xml_parse doesn't
+ continue;
+ }
+ $name = $this->expandNS( $r->name, $r->namespaceURI );
+ $attrs[$name] = $r->value;
+ }
+ return $attrs;
+ }
+
+ /**
+ * @param $name element or attribute name, maybe with a full or short prefix
+ * @param $namespaceURI the namespaceURI
+ * @return string the name prefixed with namespaceURI
+ */
+ private function expandNS( $name, $namespaceURI ) {
+ if ( $namespaceURI ) {
+ $parts = explode( ':', $name );
+ $localname = array_pop( $parts );
+ return "$namespaceURI:$localname";
+ }
+ return $name;
+ }
+
+ /**
+ * @param $name
+ * @param $attribs
+ */
+ private function elementOpen( $name, $attribs ) {
+ $this->elementDataContext[] = [ $name, $attribs ];
+ $this->elementData[] = '';
+ $this->stackDepth++;
+ }
+
+ /**
+ */
+ private function elementClose() {
+ list( $name, $attribs ) = array_pop( $this->elementDataContext );
+ $data = array_pop( $this->elementData );
+ $this->stackDepth--;
+ $callbackReturn = false;
+
+ if ( is_callable( $this->filterCallback ) ) {
+ $callbackReturn = call_user_func(
+ $this->filterCallback,
+ $name,
+ $attribs,
+ $data
+ );
+ }
+ if ( $callbackReturn ) {
+ // Filter hit!
+ $this->filterMatch = true;
+ $this->filterMatchType = $callbackReturn;
+ }
+ }
+
+ /**
+ * @param $data
+ */
+ private function elementData( $data ) {
+ // Collect any data here, and we'll run the callback in elementClose
+ $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
+ }
+
+ /**
+ * @param $target
+ * @param $data
+ */
+ private function processingInstructionHandler( $target, $data ) {
+ $callbackReturn = false;
+ if ( $this->parserOptions['processing_instruction_handler'] ) {
+ $callbackReturn = call_user_func(
+ $this->parserOptions['processing_instruction_handler'],
+ $target,
+ $data
+ );
+ }
+ if ( $callbackReturn ) {
+ // Filter hit!
+ $this->filterMatch = true;
+ $this->filterMatchType = $callbackReturn;
+ }
+ }
+}
--- /dev/null
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**@{
+ * Media types.
+ * This defines constants for the value returned by File::getMediaType()
+ */
+// unknown format
+define( 'MEDIATYPE_UNKNOWN', 'UNKNOWN' );
+// some bitmap image or image source (like psd, etc). Can't scale up.
+define( 'MEDIATYPE_BITMAP', 'BITMAP' );
+// some vector drawing (SVG, WMF, PS, ...) or image source (oo-draw, etc). Can scale up.
+define( 'MEDIATYPE_DRAWING', 'DRAWING' );
+// simple audio file (ogg, mp3, wav, midi, whatever)
+define( 'MEDIATYPE_AUDIO', 'AUDIO' );
+// simple video file (ogg, mpg, etc;
+// no not include formats here that may contain executable sections or scripts!)
+define( 'MEDIATYPE_VIDEO', 'VIDEO' );
+// Scriptable Multimedia (flash, advanced video container formats, etc)
+define( 'MEDIATYPE_MULTIMEDIA', 'MULTIMEDIA' );
+// Office Documents, Spreadsheets (office formats possibly containing apples, scripts, etc)
+define( 'MEDIATYPE_OFFICE', 'OFFICE' );
+// Plain text (possibly containing program code or scripts)
+define( 'MEDIATYPE_TEXT', 'TEXT' );
+// binary executable
+define( 'MEDIATYPE_EXECUTABLE', 'EXECUTABLE' );
+// archive file (zip, tar, etc)
+define( 'MEDIATYPE_ARCHIVE', 'ARCHIVE' );
+/**@}*/
--- /dev/null
+# MIME type info file.
+# the first MIME type in each line is the "main" MIME type,
+# the others are aliases for this type
+# the media type is given in upper case and square brackets,
+# like [BITMAP], and must indicate a media type as defined by
+# the MEDIATYPE_xxx constants in Defines.php
+
+
+image/gif [BITMAP]
+image/png image/x-png [BITMAP]
+image/ief [BITMAP]
+image/jpeg image/pjpeg [BITMAP]
+image/jp2 [BITMAP]
+image/xbm [BITMAP]
+image/tiff [BITMAP]
+image/x-icon image/x-ico image/vnd.microsoft.icon [BITMAP]
+image/x-rgb [BITMAP]
+image/x-portable-pixmap [BITMAP]
+image/x-portable-graymap image/x-portable-greymap [BITMAP]
+image/x-bmp image/x-ms-bmp image/bmp application/x-bmp application/bmp [BITMAP]
+image/x-photoshop image/psd image/x-psd image/photoshop image/vnd.adobe.photoshop [BITMAP]
+image/vnd.djvu image/x.djvu image/x-djvu [BITMAP]
+image/webp [BITMAP]
+
+image/svg+xml application/svg+xml application/svg image/svg [DRAWING]
+application/postscript [DRAWING]
+application/x-latex [DRAWING]
+application/x-tex [DRAWING]
+application/x-dia-diagram [DRAWING]
+
+
+audio/mpeg audio/mp3 audio/mpeg3 [AUDIO]
+audio/mp4 [AUDIO]
+audio/wav audio/x-wav audio/wave [AUDIO]
+audio/midi audio/mid [AUDIO]
+audio/basic [AUDIO]
+audio/ogg [AUDIO]
+audio/x-aiff [AUDIO]
+audio/x-pn-realaudio [AUDIO]
+audio/x-realaudio [AUDIO]
+audio/webm [AUDIO]
+audio/x-matroska [AUDIO]
+audio/x-flac [AUDIO]
+audio/flac [AUDIO]
+
+video/mpeg application/mpeg [VIDEO]
+video/ogg [VIDEO]
+video/x-sgi-video [VIDEO]
+video/x-flv [VIDEO]
+video/webm [VIDEO]
+video/x-matroska [VIDEO]
+video/mp4 [VIDEO]
+
+application/ogg application/x-ogg audio/ogg audio/x-ogg video/ogg video/x-ogg [MULTIMEDIA]
+
+application/x-shockwave-flash [MULTIMEDIA]
+audio/x-pn-realaudio-plugin [MULTIMEDIA]
+model/iges [MULTIMEDIA]
+model/mesh [MULTIMEDIA]
+model/vrml [MULTIMEDIA]
+video/quicktime [MULTIMEDIA]
+video/x-msvideo [MULTIMEDIA]
+
+text/plain [TEXT]
+text/html application/xhtml+xml [TEXT]
+application/xml text/xml [TEXT]
+text [TEXT]
+application/json [TEXT]
+text/csv [TEXT]
+text/tab-separated-values [TEXT]
+
+application/zip application/x-zip [ARCHIVE]
+application/x-gzip [ARCHIVE]
+application/x-bzip [ARCHIVE]
+application/x-bzip2 [ARCHIVE]
+application/x-tar [ARCHIVE]
+application/x-stuffit [ARCHIVE]
+application/x-opc+zip [ARCHIVE]
+application/x-7z-compressed [ARCHIVE]
+
+application/javascript text/javascript application/x-javascript application/x-ecmascript text/ecmascript [EXECUTABLE]
+application/x-bash [EXECUTABLE]
+application/x-sh [EXECUTABLE]
+application/x-csh [EXECUTABLE]
+application/x-tcsh [EXECUTABLE]
+application/x-tcl [EXECUTABLE]
+application/x-perl [EXECUTABLE]
+application/x-python [EXECUTABLE]
+
+application/pdf application/acrobat [OFFICE]
+application/msword [OFFICE]
+application/vnd.ms-excel [OFFICE]
+application/vnd.ms-powerpoint [OFFICE]
+application/x-director [OFFICE]
+text/rtf [OFFICE]
+
+application/vnd.openxmlformats-officedocument.wordprocessingml.document [OFFICE]
+application/vnd.openxmlformats-officedocument.wordprocessingml.template [OFFICE]
+application/vnd.ms-word.document.macroEnabled.12 [OFFICE]
+application/vnd.ms-word.template.macroEnabled.12 [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.template [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.slideshow [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.presentation [OFFICE]
+application/vnd.ms-powerpoint.addin.macroEnabled.12 [OFFICE]
+application/vnd.ms-powerpoint.presentation.macroEnabled.12 [OFFICE]
+application/vnd.ms-powerpoint.presentation.macroEnabled.12 [OFFICE]
+application/vnd.ms-powerpoint.slideshow.macroEnabled.12 [OFFICE]
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet [OFFICE]
+application/vnd.openxmlformats-officedocument.spreadsheetml.template [OFFICE]
+application/vnd.ms-excel.sheet.macroEnabled.12 [OFFICE]
+application/vnd.ms-excel.template.macroEnabled.12 [OFFICE]
+application/vnd.ms-excel.addin.macroEnabled.12 [OFFICE]
+application/vnd.ms-excel.sheet.binary.macroEnabled.12 [OFFICE]
+application/acad application/x-acad application/autocad_dwg image/x-dwg application/dwg application/x-dwg application/x-autocad image/vnd.dwg drawing/dwg [DRAWING]
+chemical/x-mdl-molfile [DRAWING]
+chemical/x-mdl-sdfile [DRAWING]
+chemical/x-mdl-rxnfile [DRAWING]
+chemical/x-mdl-rdfile [DRAWING]
+chemical/x-mdl-rgfile [DRAWING]
--- /dev/null
+application/acad dwg
+application/andrew-inset ez
+application/mac-binhex40 hqx
+application/mac-compactpro cpt
+application/mathml+xml mathml
+application/msword doc dot
+application/octet-stream bin dms lha lzh exe class so dll
+application/oda oda
+application/ogg ogx ogg ogm ogv oga spx opus
+application/pdf pdf
+application/postscript ai eps ps
+application/rdf+xml rdf
+application/smil smi smil
+application/srgs gram
+application/srgs+xml grxml
+application/vnd.mif mif
+application/vnd.ms-excel xls xlt xla
+application/vnd.ms-powerpoint ppt pot pps ppa
+application/vnd.wap.wbxml wbxml
+application/vnd.wap.wmlc wmlc
+application/vnd.wap.wmlscriptc wmlsc
+application/voicexml+xml vxml
+application/x-7z-compressed 7z
+application/x-bcpio bcpio
+application/x-bzip bz
+application/x-bzip2 bz2
+application/x-cdlink vcd
+application/x-chess-pgn pgn
+application/x-cpio cpio
+application/x-csh csh
+application/x-dia-diagram dia
+application/x-director dcr dir dxr
+application/x-dvi dvi
+application/x-futuresplash spl
+application/x-gtar gtar tar
+application/x-gzip gz
+application/x-hdf hdf
+application/x-jar jar
+application/javascript js
+application/json json
+application/x-koan skp skd skt skm
+application/x-latex latex
+application/x-netcdf nc cdf
+application/x-sh sh
+application/x-shar shar
+application/x-shockwave-flash swf
+application/x-stuffit sit
+application/x-sv4cpio sv4cpio
+application/x-sv4crc sv4crc
+application/x-tar tar
+application/x-tcl tcl
+application/x-tex tex
+application/x-texinfo texinfo texi
+application/x-troff t tr roff
+application/x-troff-man man
+application/x-troff-me me
+application/x-troff-ms ms
+application/x-ustar ustar
+application/x-wais-source src
+application/x-xpinstall xpi
+application/xhtml+xml xhtml xht
+application/xslt+xml xslt
+application/xml xml xsl xsd kml
+application/xml-dtd dtd
+application/zip zip jar xpi sxc stc sxd std sxi sti sxm stm sxw stw
+application/x-rar rar
+application/font-woff woff
+application/font-woff2 woff2
+application/vnd.ms-fontobject eot
+application/x-font-ttf ttf
+audio/basic au snd
+audio/midi mid midi kar
+audio/mpeg mpga mp2 mp3
+audio/ogg oga ogg spx opus
+video/webm webm
+audio/webm webm
+audio/x-aiff aif aiff aifc
+audio/x-matroska mka mkv
+audio/x-mpegurl m3u
+audio/x-ogg oga ogg spx opus
+audio/x-pn-realaudio ram rm
+audio/x-pn-realaudio-plugin rpm
+audio/x-realaudio ra
+audio/x-wav wav
+audio/wav wav
+audio/x-flac flac
+audio/flac flac
+chemical/x-pdb pdb
+chemical/x-xyz xyz
+image/bmp bmp
+image/cgm cgm
+image/gif gif
+image/ief ief
+image/jp2 j2k jp2 jpg2
+image/jpeg jpeg jpg jpe
+image/png png apng
+image/svg+xml svg
+image/tiff tiff tif
+image/vnd.djvu djvu djv
+image/vnd.microsoft.icon ico
+image/vnd.wap.wbmp wbmp
+image/webp webp
+image/x-cmu-raster ras
+image/x-icon ico
+image/x-ms-bmp bmp
+image/x-portable-anymap pnm
+image/x-portable-bitmap pbm
+image/x-portable-graymap pgm
+image/x-portable-pixmap ppm
+image/x-rgb rgb
+image/x-photoshop psd
+image/x-xbitmap xbm
+image/x-xpixmap xpm
+image/x-xwindowdump xwd
+model/iges igs iges
+model/mesh msh mesh silo
+model/vrml wrl vrml
+text/calendar ics ifb
+text/css css
+text/csv csv
+text/html html htm
+text/plain txt
+text/richtext rtx
+text/rtf rtf
+text/sgml sgml sgm
+text/tab-separated-values tsv
+text/vnd.wap.wml wml
+text/vnd.wap.wmlscript wmls
+text/xml xml xsl xslt rss rdf
+text/x-component htc
+text/x-setext etx
+text/x-sawfish jl
+video/mpeg mpeg mpg mpe
+video/mp4 mp4 m4a m4p m4b m4r m4v
+audio/mp4 m4a
+video/ogg ogv ogm ogg
+video/quicktime qt mov
+video/vnd.mpegurl mxu
+video/x-flv flv
+video/x-matroska mkv mka
+video/x-msvideo avi
+video/x-ogg ogv ogm ogg
+video/x-sgi-movie movie
+x-conference/x-cooltalk ice
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.database odb
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-master odm
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-web oth
+application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
+application/vnd.openxmlformats-officedocument.wordprocessingml.template dotx
+application/vnd.ms-word.document.macroEnabled.12 docm
+application/vnd.ms-word.template.macroEnabled.12 dotm
+application/vnd.openxmlformats-officedocument.presentationml.template potx
+application/vnd.openxmlformats-officedocument.presentationml.slideshow ppsx
+application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
+application/vnd.ms-powerpoint.addin.macroEnabled.12 ppam
+application/vnd.ms-powerpoint.presentation.macroEnabled.12 pptm
+application/vnd.ms-powerpoint.presentation.macroEnabled.12 potm
+application/vnd.ms-powerpoint.slideshow.macroEnabled.12 ppsm
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
+application/vnd.openxmlformats-officedocument.spreadsheetml.template xltx
+application/vnd.ms-excel.sheet.macroEnabled.12 xlsm
+application/vnd.ms-excel.template.macroEnabled.12 xltm
+application/vnd.ms-excel.addin.macroEnabled.12 xlam
+application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb
+model/vnd.dwfx+xps dwfx
+application/vnd.ms-xpsdocument xps
+application/x-opc+zip docx dotx docm dotm potx ppsx pptx ppam pptm potm ppsm xlsx xltx xlsm xltm xlam xlsb dwfx xps
+chemical/x-mdl-molfile mol
+chemical/x-mdl-sdfile sdf
+chemical/x-mdl-rxnfile rxn
+chemical/x-mdl-rdfile rd
+chemical/x-mdl-rgfile rg
+application/x-amf amf
+application/sla stl
+++ /dev/null
-# MIME type info file.
-# the first MIME type in each line is the "main" MIME type,
-# the others are aliases for this type
-# the media type is given in upper case and square brackets,
-# like [BITMAP], and must indicate a media type as defined by
-# the MEDIATYPE_xxx constants in Defines.php
-
-
-image/gif [BITMAP]
-image/png image/x-png [BITMAP]
-image/ief [BITMAP]
-image/jpeg image/pjpeg [BITMAP]
-image/jp2 [BITMAP]
-image/xbm [BITMAP]
-image/tiff [BITMAP]
-image/x-icon image/x-ico image/vnd.microsoft.icon [BITMAP]
-image/x-rgb [BITMAP]
-image/x-portable-pixmap [BITMAP]
-image/x-portable-graymap image/x-portable-greymap [BITMAP]
-image/x-bmp image/x-ms-bmp image/bmp application/x-bmp application/bmp [BITMAP]
-image/x-photoshop image/psd image/x-psd image/photoshop image/vnd.adobe.photoshop [BITMAP]
-image/vnd.djvu image/x.djvu image/x-djvu [BITMAP]
-image/webp [BITMAP]
-
-image/svg+xml application/svg+xml application/svg image/svg [DRAWING]
-application/postscript [DRAWING]
-application/x-latex [DRAWING]
-application/x-tex [DRAWING]
-application/x-dia-diagram [DRAWING]
-
-
-audio/mpeg audio/mp3 audio/mpeg3 [AUDIO]
-audio/mp4 [AUDIO]
-audio/wav audio/x-wav audio/wave [AUDIO]
-audio/midi audio/mid [AUDIO]
-audio/basic [AUDIO]
-audio/ogg [AUDIO]
-audio/x-aiff [AUDIO]
-audio/x-pn-realaudio [AUDIO]
-audio/x-realaudio [AUDIO]
-audio/webm [AUDIO]
-audio/x-matroska [AUDIO]
-audio/x-flac [AUDIO]
-audio/flac [AUDIO]
-
-video/mpeg application/mpeg [VIDEO]
-video/ogg [VIDEO]
-video/x-sgi-video [VIDEO]
-video/x-flv [VIDEO]
-video/webm [VIDEO]
-video/x-matroska [VIDEO]
-video/mp4 [VIDEO]
-
-application/ogg application/x-ogg audio/ogg audio/x-ogg video/ogg video/x-ogg [MULTIMEDIA]
-
-application/x-shockwave-flash [MULTIMEDIA]
-audio/x-pn-realaudio-plugin [MULTIMEDIA]
-model/iges [MULTIMEDIA]
-model/mesh [MULTIMEDIA]
-model/vrml [MULTIMEDIA]
-video/quicktime [MULTIMEDIA]
-video/x-msvideo [MULTIMEDIA]
-
-text/plain [TEXT]
-text/html application/xhtml+xml [TEXT]
-application/xml text/xml [TEXT]
-text [TEXT]
-application/json [TEXT]
-text/csv [TEXT]
-text/tab-separated-values [TEXT]
-
-application/zip application/x-zip [ARCHIVE]
-application/x-gzip [ARCHIVE]
-application/x-bzip [ARCHIVE]
-application/x-bzip2 [ARCHIVE]
-application/x-tar [ARCHIVE]
-application/x-stuffit [ARCHIVE]
-application/x-opc+zip [ARCHIVE]
-application/x-7z-compressed [ARCHIVE]
-
-application/javascript text/javascript application/x-javascript application/x-ecmascript text/ecmascript [EXECUTABLE]
-application/x-bash [EXECUTABLE]
-application/x-sh [EXECUTABLE]
-application/x-csh [EXECUTABLE]
-application/x-tcsh [EXECUTABLE]
-application/x-tcl [EXECUTABLE]
-application/x-perl [EXECUTABLE]
-application/x-python [EXECUTABLE]
-
-application/pdf application/acrobat [OFFICE]
-application/msword [OFFICE]
-application/vnd.ms-excel [OFFICE]
-application/vnd.ms-powerpoint [OFFICE]
-application/x-director [OFFICE]
-text/rtf [OFFICE]
-
-application/vnd.openxmlformats-officedocument.wordprocessingml.document [OFFICE]
-application/vnd.openxmlformats-officedocument.wordprocessingml.template [OFFICE]
-application/vnd.ms-word.document.macroEnabled.12 [OFFICE]
-application/vnd.ms-word.template.macroEnabled.12 [OFFICE]
-application/vnd.openxmlformats-officedocument.presentationml.template [OFFICE]
-application/vnd.openxmlformats-officedocument.presentationml.slideshow [OFFICE]
-application/vnd.openxmlformats-officedocument.presentationml.presentation [OFFICE]
-application/vnd.ms-powerpoint.addin.macroEnabled.12 [OFFICE]
-application/vnd.ms-powerpoint.presentation.macroEnabled.12 [OFFICE]
-application/vnd.ms-powerpoint.presentation.macroEnabled.12 [OFFICE]
-application/vnd.ms-powerpoint.slideshow.macroEnabled.12 [OFFICE]
-application/vnd.openxmlformats-officedocument.spreadsheetml.sheet [OFFICE]
-application/vnd.openxmlformats-officedocument.spreadsheetml.template [OFFICE]
-application/vnd.ms-excel.sheet.macroEnabled.12 [OFFICE]
-application/vnd.ms-excel.template.macroEnabled.12 [OFFICE]
-application/vnd.ms-excel.addin.macroEnabled.12 [OFFICE]
-application/vnd.ms-excel.sheet.binary.macroEnabled.12 [OFFICE]
-application/acad application/x-acad application/autocad_dwg image/x-dwg application/dwg application/x-dwg application/x-autocad image/vnd.dwg drawing/dwg [DRAWING]
-chemical/x-mdl-molfile [DRAWING]
-chemical/x-mdl-sdfile [DRAWING]
-chemical/x-mdl-rxnfile [DRAWING]
-chemical/x-mdl-rdfile [DRAWING]
-chemical/x-mdl-rgfile [DRAWING]
+++ /dev/null
-application/acad dwg
-application/andrew-inset ez
-application/mac-binhex40 hqx
-application/mac-compactpro cpt
-application/mathml+xml mathml
-application/msword doc dot
-application/octet-stream bin dms lha lzh exe class so dll
-application/oda oda
-application/ogg ogx ogg ogm ogv oga spx opus
-application/pdf pdf
-application/postscript ai eps ps
-application/rdf+xml rdf
-application/smil smi smil
-application/srgs gram
-application/srgs+xml grxml
-application/vnd.mif mif
-application/vnd.ms-excel xls xlt xla
-application/vnd.ms-powerpoint ppt pot pps ppa
-application/vnd.wap.wbxml wbxml
-application/vnd.wap.wmlc wmlc
-application/vnd.wap.wmlscriptc wmlsc
-application/voicexml+xml vxml
-application/x-7z-compressed 7z
-application/x-bcpio bcpio
-application/x-bzip bz
-application/x-bzip2 bz2
-application/x-cdlink vcd
-application/x-chess-pgn pgn
-application/x-cpio cpio
-application/x-csh csh
-application/x-dia-diagram dia
-application/x-director dcr dir dxr
-application/x-dvi dvi
-application/x-futuresplash spl
-application/x-gtar gtar tar
-application/x-gzip gz
-application/x-hdf hdf
-application/x-jar jar
-application/javascript js
-application/json json
-application/x-koan skp skd skt skm
-application/x-latex latex
-application/x-netcdf nc cdf
-application/x-sh sh
-application/x-shar shar
-application/x-shockwave-flash swf
-application/x-stuffit sit
-application/x-sv4cpio sv4cpio
-application/x-sv4crc sv4crc
-application/x-tar tar
-application/x-tcl tcl
-application/x-tex tex
-application/x-texinfo texinfo texi
-application/x-troff t tr roff
-application/x-troff-man man
-application/x-troff-me me
-application/x-troff-ms ms
-application/x-ustar ustar
-application/x-wais-source src
-application/x-xpinstall xpi
-application/xhtml+xml xhtml xht
-application/xslt+xml xslt
-application/xml xml xsl xsd kml
-application/xml-dtd dtd
-application/zip zip jar xpi sxc stc sxd std sxi sti sxm stm sxw stw
-application/x-rar rar
-application/font-woff woff
-application/font-woff2 woff2
-application/vnd.ms-fontobject eot
-application/x-font-ttf ttf
-audio/basic au snd
-audio/midi mid midi kar
-audio/mpeg mpga mp2 mp3
-audio/ogg oga ogg spx opus
-video/webm webm
-audio/webm webm
-audio/x-aiff aif aiff aifc
-audio/x-matroska mka mkv
-audio/x-mpegurl m3u
-audio/x-ogg oga ogg spx opus
-audio/x-pn-realaudio ram rm
-audio/x-pn-realaudio-plugin rpm
-audio/x-realaudio ra
-audio/x-wav wav
-audio/wav wav
-audio/x-flac flac
-audio/flac flac
-chemical/x-pdb pdb
-chemical/x-xyz xyz
-image/bmp bmp
-image/cgm cgm
-image/gif gif
-image/ief ief
-image/jp2 j2k jp2 jpg2
-image/jpeg jpeg jpg jpe
-image/png png apng
-image/svg+xml svg
-image/tiff tiff tif
-image/vnd.djvu djvu djv
-image/vnd.microsoft.icon ico
-image/vnd.wap.wbmp wbmp
-image/webp webp
-image/x-cmu-raster ras
-image/x-icon ico
-image/x-ms-bmp bmp
-image/x-portable-anymap pnm
-image/x-portable-bitmap pbm
-image/x-portable-graymap pgm
-image/x-portable-pixmap ppm
-image/x-rgb rgb
-image/x-photoshop psd
-image/x-xbitmap xbm
-image/x-xpixmap xpm
-image/x-xwindowdump xwd
-model/iges igs iges
-model/mesh msh mesh silo
-model/vrml wrl vrml
-text/calendar ics ifb
-text/css css
-text/csv csv
-text/html html htm
-text/plain txt
-text/richtext rtx
-text/rtf rtf
-text/sgml sgml sgm
-text/tab-separated-values tsv
-text/vnd.wap.wml wml
-text/vnd.wap.wmlscript wmls
-text/xml xml xsl xslt rss rdf
-text/x-component htc
-text/x-setext etx
-text/x-sawfish jl
-video/mpeg mpeg mpg mpe
-video/mp4 mp4 m4a m4p m4b m4r m4v
-audio/mp4 m4a
-video/ogg ogv ogm ogg
-video/quicktime qt mov
-video/vnd.mpegurl mxu
-video/x-flv flv
-video/x-matroska mkv mka
-video/x-msvideo avi
-video/x-ogg ogv ogm ogg
-video/x-sgi-movie movie
-x-conference/x-cooltalk ice
-application/vnd.oasis.opendocument.chart odc
-application/vnd.oasis.opendocument.chart-template otc
-application/vnd.oasis.opendocument.database odb
-application/vnd.oasis.opendocument.formula odf
-application/vnd.oasis.opendocument.formula-template otf
-application/vnd.oasis.opendocument.graphics odg
-application/vnd.oasis.opendocument.graphics-template otg
-application/vnd.oasis.opendocument.image odi
-application/vnd.oasis.opendocument.image-template oti
-application/vnd.oasis.opendocument.presentation odp
-application/vnd.oasis.opendocument.presentation-template otp
-application/vnd.oasis.opendocument.spreadsheet ods
-application/vnd.oasis.opendocument.spreadsheet-template ots
-application/vnd.oasis.opendocument.text odt
-application/vnd.oasis.opendocument.text-master odm
-application/vnd.oasis.opendocument.text-template ott
-application/vnd.oasis.opendocument.text-web oth
-application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
-application/vnd.openxmlformats-officedocument.wordprocessingml.template dotx
-application/vnd.ms-word.document.macroEnabled.12 docm
-application/vnd.ms-word.template.macroEnabled.12 dotm
-application/vnd.openxmlformats-officedocument.presentationml.template potx
-application/vnd.openxmlformats-officedocument.presentationml.slideshow ppsx
-application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
-application/vnd.ms-powerpoint.addin.macroEnabled.12 ppam
-application/vnd.ms-powerpoint.presentation.macroEnabled.12 pptm
-application/vnd.ms-powerpoint.presentation.macroEnabled.12 potm
-application/vnd.ms-powerpoint.slideshow.macroEnabled.12 ppsm
-application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
-application/vnd.openxmlformats-officedocument.spreadsheetml.template xltx
-application/vnd.ms-excel.sheet.macroEnabled.12 xlsm
-application/vnd.ms-excel.template.macroEnabled.12 xltm
-application/vnd.ms-excel.addin.macroEnabled.12 xlam
-application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb
-model/vnd.dwfx+xps dwfx
-application/vnd.ms-xpsdocument xps
-application/x-opc+zip docx dotx docm dotm potx ppsx pptx ppam pptm potm ppsm xlsx xltx xlsm xltm xlam xlsb dwfx xps
-chemical/x-mdl-molfile mol
-chemical/x-mdl-sdfile sdf
-chemical/x-mdl-rxnfile rxn
-chemical/x-mdl-rdfile rd
-chemical/x-mdl-rgfile rg
-application/x-amf amf
-application/sla stl
return true;
}
$mime = false;
-$lines = explode( "\n", file_get_contents( "includes/mime.types" ) );
+// Borrow mime type file from MimeAnalyzer
+$lines = explode( "\n", file_get_contents( "includes/libs/mime/mime.types" ) );
foreach ( $lines as $line ) {
$exts = explode( " ", $line );
$mime = array_shift( $exts );
'LinkRenderer' => [ 'LinkRenderer', LinkRenderer::class ],
'LinkRendererFactory' => [ 'LinkRendererFactory', LinkRendererFactory::class ],
'_MediaWikiTitleCodec' => [ '_MediaWikiTitleCodec', MediaWikiTitleCodec::class ],
+ 'MimeAnalyzer' => [ 'MimeAnalyzer', MimeAnalyzer::class ],
'TitleFormatter' => [ 'TitleFormatter', TitleFormatter::class ],
'TitleParser' => [ 'TitleParser', TitleParser::class ],
'ProxyLookup' => [ 'ProxyLookup', ProxyLookup::class ],
+++ /dev/null
-<?php
-class MimeMagicTest extends PHPUnit_Framework_TestCase {
-
- /** @var MimeMagic */
- private $mimeMagic;
-
- function setUp() {
- $this->mimeMagic = MimeMagic::singleton();
- parent::setUp();
- }
-
- /**
- * @dataProvider providerImproveTypeFromExtension
- * @param string $ext File extension (no leading dot)
- * @param string $oldMime Initially detected MIME
- * @param string $expectedMime MIME type after taking extension into account
- */
- function testImproveTypeFromExtension( $ext, $oldMime, $expectedMime ) {
- $actualMime = $this->mimeMagic->improveTypeFromExtension( $oldMime, $ext );
- $this->assertEquals( $expectedMime, $actualMime );
- }
-
- function providerImproveTypeFromExtension() {
- return [
- [ 'gif', 'image/gif', 'image/gif' ],
- [ 'gif', 'unknown/unknown', 'unknown/unknown' ],
- [ 'wrl', 'unknown/unknown', 'model/vrml' ],
- [ 'txt', 'text/plain', 'text/plain' ],
- [ 'csv', 'text/plain', 'text/csv' ],
- [ 'tsv', 'text/plain', 'text/tab-separated-values' ],
- [ 'js', 'text/javascript', 'application/javascript' ],
- [ 'js', 'application/x-javascript', 'application/javascript' ],
- [ 'json', 'text/plain', 'application/json' ],
- [ 'foo', 'application/x-opc+zip', 'application/zip' ],
- [ 'docx', 'application/x-opc+zip',
- 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ],
- [ 'djvu', 'image/x-djvu', 'image/vnd.djvu' ],
- [ 'wav', 'audio/wav', 'audio/wav' ],
- ];
- }
-
- /**
- * Test to make sure that encoder=ffmpeg2theora doesn't trigger
- * MEDIATYPE_VIDEO (bug 63584)
- */
- function testOggRecognize() {
- $oggFile = __DIR__ . '/../data/media/say-test.ogg';
- $actualType = $this->mimeMagic->getMediaType( $oggFile, 'application/ogg' );
- $this->assertEquals( $actualType, MEDIATYPE_AUDIO );
- }
-}
--- /dev/null
+<?php
+class MimeMagicTest extends PHPUnit_Framework_TestCase {
+ /** @var MimeAnalyzer */
+ private $mimeAnalyzer;
+
+ function setUp() {
+ global $IP;
+
+ $this->mimeAnalyzer = new MimeAnalyzer( [
+ 'infoFile' => $IP . "/includes/libs/mime/mime.info",
+ 'typeFile' => $IP . "/includes/libs/mime/mime.types",
+ 'xmlTypes' => [
+ 'http://www.w3.org/2000/svg:svg' => 'image/svg+xml',
+ 'svg' => 'image/svg+xml',
+ 'http://www.lysator.liu.se/~alla/dia/:diagram' => 'application/x-dia-diagram',
+ 'http://www.w3.org/1999/xhtml:html' => 'text/html', // application/xhtml+xml?
+ 'html' => 'text/html', // application/xhtml+xml?
+ ]
+ ] );
+ parent::setUp();
+ }
+
+ /**
+ * @dataProvider providerImproveTypeFromExtension
+ * @param string $ext File extension (no leading dot)
+ * @param string $oldMime Initially detected MIME
+ * @param string $expectedMime MIME type after taking extension into account
+ */
+ function testImproveTypeFromExtension( $ext, $oldMime, $expectedMime ) {
+ $actualMime = $this->mimeAnalyzer->improveTypeFromExtension( $oldMime, $ext );
+ $this->assertEquals( $expectedMime, $actualMime );
+ }
+
+ function providerImproveTypeFromExtension() {
+ return [
+ [ 'gif', 'image/gif', 'image/gif' ],
+ [ 'gif', 'unknown/unknown', 'unknown/unknown' ],
+ [ 'wrl', 'unknown/unknown', 'model/vrml' ],
+ [ 'txt', 'text/plain', 'text/plain' ],
+ [ 'csv', 'text/plain', 'text/csv' ],
+ [ 'tsv', 'text/plain', 'text/tab-separated-values' ],
+ [ 'js', 'text/javascript', 'application/javascript' ],
+ [ 'js', 'application/x-javascript', 'application/javascript' ],
+ [ 'json', 'text/plain', 'application/json' ],
+ [ 'foo', 'application/x-opc+zip', 'application/zip' ],
+ [ 'docx', 'application/x-opc+zip',
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ],
+ [ 'djvu', 'image/x-djvu', 'image/vnd.djvu' ],
+ [ 'wav', 'audio/wav', 'audio/wav' ],
+ ];
+ }
+
+ /**
+ * Test to make sure that encoder=ffmpeg2theora doesn't trigger
+ * MEDIATYPE_VIDEO (bug 63584)
+ */
+ function testOggRecognize() {
+ $oggFile = __DIR__ . '/../../../data/media/say-test.ogg';
+ $actualType = $this->mimeAnalyzer->getMediaType( $oggFile, 'application/ogg' );
+ $this->assertEquals( $actualType, MEDIATYPE_AUDIO );
+ }
+}