From 08c96166ee3dd9644b03c96fde8156b962792c7a Mon Sep 17 00:00:00 2001 From: Derk-Jan Hartman Date: Sat, 19 Jun 2010 18:47:34 +0000 Subject: [PATCH] Bug 23642. Best effort mime detection for OpenXML files OpenXML files are Open Package Convention files. Internally, we use the custom mime application/x-opc+zip for these files. In the database, we store the 'proper' mime, which we gu ess from the file extension, or if not supported, application/zip. All OPC files are blacklisted by $wgMimeTypeBlacklist by default, just as other zip files. --- RELEASE-NOTES | 1 + includes/DefaultSettings.php | 3 +++ includes/MimeMagic.php | 30 ++++++++++++++++++++++++++---- includes/mime.info | 20 ++++++++++++++++++++ includes/mime.types | 4 ++++ 5 files changed, 54 insertions(+), 4 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index e38f341179..2b5359c30e 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -203,6 +203,7 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * (bug 23465) Don't ignore the predefined destination filename on Special:Upload after following a red link * Correct the escaping of the autosummary URI fragments. +* (bug 23642) Recognize mime types of MS OpenXML documents. === API changes in 1.17 === * (bug 22738) Allow filtering by action type on query=logevent. diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index b326067579..392191b501 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -520,6 +520,9 @@ $wgMimeTypeBlacklist = array( # A ZIP file may be a valid Java archive containing an applet which exploits the # same-origin policy to steal cookies 'application/zip', + # MS Office OpenXML and other Open Package Conventions files are zip files + # and thus blacklisted just as other zip files + 'application/x-opc+zip', ); /** This is a flag to determine whether or not to check file extensions on upload. */ diff --git a/includes/MimeMagic.php b/includes/MimeMagic.php index 555cc11b51..18fc9e7747 100644 --- a/includes/MimeMagic.php +++ b/includes/MimeMagic.php @@ -548,7 +548,7 @@ class MimeMagic { // Check for ZIP (before getimagesize) if ( strpos( $tail, "PK\x05\x06" ) !== false ) { wfDebug( __METHOD__.": ZIP header present at end of $file\n" ); - return $this->detectZipType( $head ); + return $this->detectZipType( $head, $ext ); } wfSuppressWarnings(); @@ -577,9 +577,13 @@ class MimeMagic { * If can't tell, returns 'application/zip'. * * @param $header String: some reasonably-sized chunk of file header + * @param $ext Mixed: the file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * * @return string */ - function detectZipType( $header ) { + function detectZipType( $header, $ext = false ) { + $mime = 'application/zip'; $opendocTypes = array( 'chart-template', 'chart', @@ -602,15 +606,33 @@ class MimeMagic { $types = '(?:' . implode( '|', $opendocTypes ) . ')'; $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/"; wfDebug( __METHOD__.": $opendocRegex\n" ); + + $openxmlRegex = "/^\[Content_Types\].xml/"; if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) { $mime = $matches[1]; wfDebug( __METHOD__.": detected $mime from ZIP archive\n" ); - return $mime; + } elseif( preg_match( $openxmlRegex, substr( $header, 30 ), $matches ) ) { + $mime = "application/x-opc+zip"; + if( $ext !== true && $ext !== false ) { + /** This is the mode used by getPropsFromPath + * These mime's are stored in the database, where we don't really want + * x-opc+zip, because we use it only for internal purposes + */ + if( $this->isMatchingExtension( $ext, $mime) ) { + /* A known file extension for an OPC file, + * find the proper mime type for that file extension */ + $mime = $this->guessTypesForExtension( $ext ); + } else { + $mime = 'application/zip'; + } + + } + wfDebug( __METHOD__.": detected an Open Packaging Conventions archive: $mime\n" ); } else { wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" ); - return 'application/zip'; } + return $mime; } /** Internal mime type detection, please use guessMimeType() for application code instead. diff --git a/includes/mime.info b/includes/mime.info index a739981206..610f6ecf97 100644 --- a/includes/mime.info +++ b/includes/mime.info @@ -61,6 +61,7 @@ application/x-gzip [ARCHIVE] application/x-bzip [ARCHIVE] application/x-tar [ARCHIVE] application/x-stuffit [ARCHIVE] +application/x-opc+zip [ARCHIVE] text/javascript application/x-javascript application/x-ecmascript text/ecmascript [EXECUTABLE] @@ -78,3 +79,22 @@ application/vnd.ms-excel [OFFICE] application/vnd.ms-powerpoint [OFFICE] application/x-director [OFFICE] text/rtf [OFFICE] + +application/vnd.openxmlformats-officedocument.wordprocessingml.document [OFFICE] +application/vnd.openxmlformats-officedocument.wordprocessingml.template [OFFICE] +application/vnd.ms-word.document.macroEnabled.12 [OFFICE] +application/vnd.ms-word.template.macroEnabled.12 [OFFICE] +application/vnd.openxmlformats-officedocument.presentationml.template [OFFICE] +application/vnd.openxmlformats-officedocument.presentationml.slideshow [OFFICE] +application/vnd.openxmlformats-officedocument.presentationml.presentation [OFFICE] +application/vnd.ms-powerpoint.addin.macroEnabled.12 [OFFICE] +application/vnd.ms-powerpoint.presentation.macroEnabled.12 [OFFICE] +application/vnd.ms-powerpoint.presentation.macroEnabled.12 [OFFICE] +application/vnd.ms-powerpoint.slideshow.macroEnabled.12 [OFFICE] +application/vnd.openxmlformats-officedocument.spreadsheetml.sheet [OFFICE] +application/vnd.openxmlformats-officedocument.spreadsheetml.template [OFFICE] +application/vnd.ms-excel.sheet.macroEnabled.12 [OFFICE] +application/vnd.ms-excel.template.macroEnabled.12 [OFFICE] +application/vnd.ms-excel.addin.macroEnabled.12 [OFFICE] +application/vnd.ms-excel.sheet.binary.macroEnabled.12 [OFFICE] + diff --git a/includes/mime.types b/includes/mime.types index ac3321d0c4..df27d1fa86 100644 --- a/includes/mime.types +++ b/includes/mime.types @@ -152,3 +152,7 @@ application/vnd.ms-excel.sheet.macroEnabled.12 xlsm application/vnd.ms-excel.template.macroEnabled.12 xltm application/vnd.ms-excel.addin.macroEnabled.12 xlam application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb +application/epub+zip epub +model/vnd.dwfx+xps dwfx +application/vnd.ms-xpsdocument xps +application/x-opc+zip docx dotx docm dotm potx ppsx pptx ppam pptm potm ppsm xlsx xltx xlsm xltm xlam xlsb epub dwfx xps -- 2.20.1