define('MM_WELL_KNOWN_MIME_TYPES',<<<END_STRING
application/ogg ogg ogm ogv
application/pdf pdf
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-master otm
+application/vnd.oasis.opendocument.text-web oth
application/x-javascript js
application/x-shockwave-flash swf
audio/midi mid midi kar
*/
define('MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
application/pdf [OFFICE]
+application/vnd.oasis.opendocument.chart [OFFICE]
+application/vnd.oasis.opendocument.chart-template [OFFICE]
+application/vnd.oasis.opendocument.formula [OFFICE]
+application/vnd.oasis.opendocument.formula-template [OFFICE]
+application/vnd.oasis.opendocument.graphics [OFFICE]
+application/vnd.oasis.opendocument.graphics-template [OFFICE]
+application/vnd.oasis.opendocument.image [OFFICE]
+application/vnd.oasis.opendocument.image-template [OFFICE]
+application/vnd.oasis.opendocument.presentation [OFFICE]
+application/vnd.oasis.opendocument.presentation-template [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
+application/vnd.oasis.opendocument.text [OFFICE]
+application/vnd.oasis.opendocument.text-template [OFFICE]
+application/vnd.oasis.opendocument.text-master [OFFICE]
+application/vnd.oasis.opendocument.text-web [OFFICE]
text/javascript application/x-javascript [EXECUTABLE]
application/x-shockwave-flash [MULTIMEDIA]
audio/midi [AUDIO]
* Mapping of media types to arrays of mime types.
* This is used by findMediaType and getMediaType, respectively
*/
- var $mMediaTypes= NULL;
+ var $mMediaTypes= null;
/** Map of mime type aliases
*/
- var $mMimeTypeAliases= NULL;
+ var $mMimeTypeAliases= null;
/** map of mime types to file extensions (as a space seprarated list)
*/
- var $mMimeToExt= NULL;
+ var $mMimeToExt= null;
/** map of file extensions types to mime types (as a space seprarated list)
*/
- var $mExtToMime= NULL;
+ var $mExtToMime= null;
+
+ /** IEContentAnalyzer instance
+ */
+ var $mIEAnalyzer;
/** The singleton instance
*/
*/
function guessTypesForExtension( $ext ) {
$m = $this->getTypesForExtension( $ext );
- if ( is_null( $m ) ) return NULL;
+ if ( is_null( $m ) ) return null;
$m = trim( $m );
$m = preg_replace( '/\s.*$/', '', $m );
$ext = $this->getExtensionsForType( $mime );
if ( !$ext ) {
- return NULL; //unknown
+ return null; //unknown
}
$ext = explode( ' ', $ext );
wfRestoreWarnings();
if( !$f ) return "unknown/unknown";
$head = fread( $f, 1024 );
+ fseek( $f, -65558, SEEK_END );
+ $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR
fclose( $f );
// Hardcode a few magic number checks...
}
/*
- * look for PHP
- * Check for this before HTML/XML...
- * Warning: this is a heuristic, and won't match a file with a lot of non-PHP before.
- * It will also match text files which could be PHP. :)
+ * Look for PHP. Check for this before HTML/XML... Warning: this is a
+ * heuristic, and won't match a file with a lot of non-PHP before. It
+ * will also match text files which could be PHP. :)
+ *
+ * FIXME: For this reason, the check is probably useless -- an attacker
+ * could almost certainly just pad the file with a lot of nonsense to
+ * circumvent the check in any case where it would be a security
+ * problem. On the other hand, it causes harmful false positives (bug
+ * 16583). The heuristic has been cut down to exclude three-character
+ * strings like "<? ", but should it be axed completely?
*/
if( ( strpos( $head, '<?php' ) !== false ) ||
- ( strpos( $head, '<? ' ) !== false ) ||
- ( strpos( $head, "<?\n" ) !== false ) ||
- ( strpos( $head, "<?\t" ) !== false ) ||
- ( strpos( $head, "<?=" ) !== false ) ||
( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
( strpos( $head, "<\x00?\x00 " ) !== false ) ||
*/
$xml = new XmlTypeCheck( $file );
if( $xml->wellFormed ) {
- $types = array(
- 'http://www.w3.org/2000/svg:svg' => 'image/svg+xml',
- 'svg' => 'image/svg+xml',
- 'http://www.lysator.liu.se/~alla/dia/:diagram' => 'application/x-dia-diagram',
- 'http://www.w3.org/1999/xhtml:html' => 'text/html', // application/xhtml+xml?
- 'html' => 'text/html', // application/xhtml+xml?
- );
- if( isset( $types[$xml->rootElement] ) ) {
- $mime = $types[$xml->rootElement];
- return $mime;
+ global $wgXMLMimeTypes;
+ if( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) {
+ return $wgXMLMimeTypes[$xml->getRootElement()];
} else {
- /// Fixme -- this would be the place to allow additional XML type checks
return 'application/xml';
}
}
/*
* look for shell scripts
*/
- $script_type = NULL;
+ $script_type = null;
# detect by shebang
if ( substr( $head, 0, 2) == "#!" ) {
}
}
+ // Check for ZIP (before getimagesize)
+ if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
+ wfDebug( __METHOD__.": ZIP header present at end of $file\n" );
+ return $this->detectZipType( $head );
+ }
+
wfSuppressWarnings();
$gis = getimagesize( $file );
wfRestoreWarnings();
$mime = $gis['mime'];
wfDebug( __METHOD__.": getimagesize detected $file as $mime\n" );
return $mime;
- } else {
- return false;
}
// Also test DjVu
wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" );
return 'image/vnd.djvu';
}
+
+ return false;
+ }
+
+ /**
+ * Detect application-specific file type of a given ZIP file from its
+ * header data. Currently works for OpenDocument types...
+ * If can't tell, returns 'application/zip'.
+ *
+ * @param string $header Some reasonably-sized chunk of file header
+ * @return string
+ */
+ function detectZipType( $header ) {
+ $opendocTypes = array(
+ 'chart-template',
+ 'chart',
+ 'formula-template',
+ 'formula',
+ 'graphics-template',
+ 'graphics',
+ 'image-template',
+ 'image',
+ 'presentation-template',
+ 'presentation',
+ 'spreadsheet-template',
+ 'spreadsheet',
+ 'text-template',
+ 'text-master',
+ 'text-web',
+ 'text' );
+
+ // http://lists.oasis-open.org/archives/office/200505/msg00006.html
+ $types = '(?:' . implode( '|', $opendocTypes ) . ')';
+ $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
+ wfDebug( __METHOD__.": $opendocRegex\n" );
+
+ if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
+ $mime = $matches[1];
+ wfDebug( __METHOD__.": detected $mime from ZIP archive\n" );
+ return $mime;
+ } else {
+ wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" );
+ return 'application/zip';
+ }
}
/** Internal mime type detection, please use guessMimeType() for application code instead.
function detectMimeType( $file, $ext = true ) {
global $wgMimeDetectorCommand;
- $m = NULL;
+ $m = null;
if ( $wgMimeDetectorCommand ) {
$fn = wfEscapeShellArg( $file );
$m = `$wgMimeDetectorCommand $fn`;
$m = strtolower( $m );
if ( strpos( $m, 'unknown' ) !== false ) {
- $m = NULL;
+ $m = null;
} else {
wfDebug( __METHOD__.": magic mime type of $file: $m\n" );
return $m;
*
* @return (int?string?) a value to be used with the MEDIATYPE_xxx constants.
*/
- function getMediaType( $path = NULL, $mime = NULL ) {
+ function getMediaType( $path = null, $mime = null ) {
if( !$mime && !$path ) return MEDIATYPE_UNKNOWN;
# If mime type is unknown, guess it
}
# Check for entry for file extension
- $e = NULL;
+ $e = null;
if ( $path ) {
$i = strrpos( $path, '.' );
$e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
return MEDIATYPE_UNKNOWN;
}
+
+ /**
+ * Get the MIME types that various versions of Internet Explorer would
+ * detect from a chunk of the content.
+ *
+ * @param string $fileName The file name (unused at present)
+ * @param string $chunk The first 256 bytes of the file
+ * @param string $proposed The MIME type proposed by the server
+ */
+ public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
+ $ca = $this->getIEContentAnalyzer();
+ return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
+ }
+
+ /**
+ * Get a cached instance of IEContentAnalyzer
+ */
+ protected function getIEContentAnalyzer() {
+ if ( is_null( $this->mIEAnalyzer ) ) {
+ $this->mIEAnalyzer = new IEContentAnalyzer;
+ }
+ return $this->mIEAnalyzer;
+ }
}