Bug 23642. Best effort mime detection for OpenXML files
authorDerk-Jan Hartman <hartman@users.mediawiki.org>
Sat, 19 Jun 2010 18:47:34 +0000 (18:47 +0000)
committerDerk-Jan Hartman <hartman@users.mediawiki.org>
Sat, 19 Jun 2010 18:47:34 +0000 (18:47 +0000)
OpenXML files are Open Package Convention files. Internally, we use the custom mime application/x-opc+zip for these files. In the database, we store the 'proper' mime, which we gu
ess from the file extension, or if not supported, application/zip. All OPC files are blacklisted by $wgMimeTypeBlacklist by default, just as other zip files.

RELEASE-NOTES
includes/DefaultSettings.php
includes/MimeMagic.php
includes/mime.info
includes/mime.types

index e38f341..2b5359c 100644 (file)
@@ -203,6 +203,7 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * (bug 23465) Don't ignore the predefined destination filename on 
   Special:Upload after following a red link
 * Correct the escaping of the autosummary URI fragments.
+* (bug 23642) Recognize mime types of MS OpenXML documents.
 
 === API changes in 1.17 ===
 * (bug 22738) Allow filtering by action type on query=logevent.
index b326067..392191b 100644 (file)
@@ -520,6 +520,9 @@ $wgMimeTypeBlacklist = array(
        # A ZIP file may be a valid Java archive containing an applet which exploits the
        # same-origin policy to steal cookies
        'application/zip',
+       # MS Office OpenXML and other Open Package Conventions files are zip files
+       # and thus blacklisted just as other zip files
+       'application/x-opc+zip',
 );
 
 /** This is a flag to determine whether or not to check file extensions on upload. */
index 555cc11..18fc9e7 100644 (file)
@@ -548,7 +548,7 @@ class MimeMagic {
                // Check for ZIP (before getimagesize)
                if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
                        wfDebug( __METHOD__.": ZIP header present at end of $file\n" );
-                       return $this->detectZipType( $head );
+                       return $this->detectZipType( $head, $ext );
                }
 
                wfSuppressWarnings();
@@ -577,9 +577,13 @@ class MimeMagic {
         * If can't tell, returns 'application/zip'.
         *
         * @param $header String: some reasonably-sized chunk of file header
+        * @param $ext Mixed: the file extension, or true to extract it from the filename.
+        *             Set it to false to ignore the extension.
+        *
         * @return string
         */
-       function detectZipType( $header ) {
+       function detectZipType( $header, $ext = false ) {
+               $mime = 'application/zip';
                $opendocTypes = array(
                        'chart-template',
                        'chart',
@@ -602,15 +606,33 @@ class MimeMagic {
                $types = '(?:' . implode( '|', $opendocTypes ) . ')';
                $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
                wfDebug( __METHOD__.": $opendocRegex\n" );
+
+               $openxmlRegex = "/^\[Content_Types\].xml/";
                
                if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
                        $mime = $matches[1];
                        wfDebug( __METHOD__.": detected $mime from ZIP archive\n" );
-                       return $mime;
+               } elseif( preg_match( $openxmlRegex, substr( $header, 30 ), $matches ) ) {
+                       $mime = "application/x-opc+zip";
+                       if( $ext !== true && $ext !== false ) {
+                               /** This is the mode used by getPropsFromPath
+                               * These mime's are stored in the database, where we don't really want
+                               * x-opc+zip, because we use it only for internal purposes
+                               */
+                               if( $this->isMatchingExtension( $ext, $mime) ) {
+                                       /* A known file extension for an OPC file,
+                                       * find the proper mime type for that file extension */
+                                       $mime = $this->guessTypesForExtension( $ext );
+                               } else {
+                                       $mime = 'application/zip';
+                               }
+                               
+                       }
+                       wfDebug( __METHOD__.": detected an Open Packaging Conventions archive: $mime\n" );
                } else {
                        wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" );
-                       return 'application/zip';
                }
+               return $mime;
        }
 
        /** Internal mime type detection, please use guessMimeType() for application code instead.
index a739981..610f6ec 100644 (file)
@@ -61,6 +61,7 @@ application/x-gzip    [ARCHIVE]
 application/x-bzip     [ARCHIVE]
 application/x-tar      [ARCHIVE]
 application/x-stuffit  [ARCHIVE]
+application/x-opc+zip  [ARCHIVE]
 
 
 text/javascript application/x-javascript application/x-ecmascript text/ecmascript      [EXECUTABLE]
@@ -78,3 +79,22 @@ application/vnd.ms-excel     [OFFICE]
 application/vnd.ms-powerpoint  [OFFICE]
 application/x-director         [OFFICE]
 text/rtf                       [OFFICE]
+
+application/vnd.openxmlformats-officedocument.wordprocessingml.document        [OFFICE]
+application/vnd.openxmlformats-officedocument.wordprocessingml.template                [OFFICE]
+application/vnd.ms-word.document.macroEnabled.12                               [OFFICE]
+application/vnd.ms-word.template.macroEnabled.12                               [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.template          [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.slideshow         [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.presentation      [OFFICE]
+application/vnd.ms-powerpoint.addin.macroEnabled.12                            [OFFICE]
+application/vnd.ms-powerpoint.presentation.macroEnabled.12                     [OFFICE]
+application/vnd.ms-powerpoint.presentation.macroEnabled.12                     [OFFICE]
+application/vnd.ms-powerpoint.slideshow.macroEnabled.12                                [OFFICE]
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet              [OFFICE]
+application/vnd.openxmlformats-officedocument.spreadsheetml.template           [OFFICE]
+application/vnd.ms-excel.sheet.macroEnabled.12                                 [OFFICE]
+application/vnd.ms-excel.template.macroEnabled.12                              [OFFICE]
+application/vnd.ms-excel.addin.macroEnabled.12                                 [OFFICE]
+application/vnd.ms-excel.sheet.binary.macroEnabled.12                          [OFFICE]
+
index ac3321d..df27d1f 100644 (file)
@@ -152,3 +152,7 @@ application/vnd.ms-excel.sheet.macroEnabled.12 xlsm
 application/vnd.ms-excel.template.macroEnabled.12 xltm
 application/vnd.ms-excel.addin.macroEnabled.12 xlam
 application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb
+application/epub+zip epub
+model/vnd.dwfx+xps dwfx
+application/vnd.ms-xpsdocument xps
+application/x-opc+zip docx dotx docm dotm potx ppsx pptx ppam pptm potm ppsm xlsx xltx xlsm xltm xlam xlsb epub dwfx xps