Merge "Fix mime detection of easily-confused-with text/plain formats"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Fri, 4 Jul 2014 07:43:42 +0000 (07:43 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 4 Jul 2014 07:43:42 +0000 (07:43 +0000)
1  2 
includes/MimeMagic.php
includes/mime.info
includes/mime.types
includes/upload/UploadBase.php

diff --combined includes/MimeMagic.php
@@@ -485,14 -485,6 +485,6 @@@ class MimeMagic 
         * by looking at the file extension. Typically, this method would be called on the
         * result of guessMimeType().
         *
-        * Currently, this method does the following:
-        *
-        * If $mime is "unknown/unknown" and isRecognizableExtension( $ext ) returns false,
-        * return the result of guessTypesForExtension($ext).
-        *
-        * If $mime is "application/x-opc+zip" and isMatchingExtension( $ext, $mime )
-        * gives true, return the result of guessTypesForExtension($ext).
-        *
         * @param string $mime The mime type, typically guessed from a file's content.
         * @param string $ext The file extension, as taken from the file name
         *
                                        ".$ext is not a known OPC extension.\n" );
                                $mime = 'application/zip';
                        }
+               } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
+                       // Textual types are sometimes not recognized properly.
+                       // If detected as text/plain, and has an extension which is textual
+                       // improve to the extension's type. For example, csv and json are often
+                       // misdetected as text/plain.
+                       $mime = $this->guessTypesForExtension( $ext );
                }
  
                if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
                        $head = fread( $f, 256 );
                        fclose( $f );
  
 -                      $head = strtolower( $head );
 +                      $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
  
                        // This is an UGLY HACK, file should be parsed correctly
                        if ( strpos( $head, 'theora' ) !== false ) {
diff --combined includes/mime.info
@@@ -65,6 -65,9 +65,9 @@@ text/plain    [TEXT
  text/html application/xhtml+xml       [TEXT]
  application/xml text/xml      [TEXT]
  text  [TEXT]
+ application/json      [TEXT]
+ text/csv      [TEXT]
+ text/tab-separated-values     [TEXT]
  
  application/zip application/x-zip     [ARCHIVE]
  application/x-gzip    [ARCHIVE]
@@@ -108,8 -111,3 +111,8 @@@ application/vnd.ms-excel.template.macro
  application/vnd.ms-excel.addin.macroEnabled.12                                        [OFFICE]
  application/vnd.ms-excel.sheet.binary.macroEnabled.12                         [OFFICE]
  application/acad application/x-acad application/autocad_dwg image/x-dwg application/dwg application/x-dwg application/x-autocad image/vnd.dwg drawing/dwg [DRAWING]
 +chemical/x-mdl-molfile     [DRAWING]
 +chemical/x-mdl-sdfile      [DRAWING]
 +chemical/x-mdl-rxnfile     [DRAWING]
 +chemical/x-mdl-rdfile      [DRAWING]
 +chemical/x-mdl-rgfile      [DRAWING]
diff --combined includes/mime.types
@@@ -35,6 -35,7 +35,7 @@@ application/x-gzip g
  application/x-hdf hdf
  application/x-jar jar
  application/x-javascript js
+ application/json json
  application/x-koan skp skd skt skm
  application/x-latex latex
  application/x-netcdf nc cdf
@@@ -109,6 -110,7 +110,7 @@@ model/mesh msh mesh sil
  model/vrml wrl vrml
  text/calendar ics ifb
  text/css css
+ text/csv csv
  text/html html htm
  text/plain txt
  text/richtext rtx
@@@ -169,8 -171,3 +171,8 @@@ application/vnd.ms-excel.sheet.binary.m
  model/vnd.dwfx+xps dwfx
  application/vnd.ms-xpsdocument xps
  application/x-opc+zip docx dotx docm dotm potx ppsx pptx ppam pptm potm ppsm xlsx xltx xlsm xltm xlam xlsb dwfx xps
 +chemical/x-mdl-molfile mol
 +chemical/x-mdl-sdfile sdf
 +chemical/x-mdl-rxnfile rxn
 +chemical/x-mdl-rdfile rd
 +chemical/x-mdl-rgfile rg
@@@ -118,7 -118,7 +118,7 @@@ abstract class UploadBase 
         * Can be overridden by subclasses.
         *
         * @param User $user
 -       * @return bool
 +       * @return bool|string
         */
        public static function isAllowed( $user ) {
                foreach ( array( 'upload', 'edit' ) as $permission ) {
                }
  
                $this->mFileProps = FSFile::getPropsFromPath( $this->mTempPath, $this->mFinalExtension );
-               $mime = $this->mFileProps['file-mime'];
+               $mime = $this->mFileProps['mime'];
  
                if ( $wgVerifyMimeType ) {
                        # XXX: Missing extension will be caught by validateName() via getTitle()
                                return true;
                        }
  
 -                      # href with javascript target
 -                      if ( $stripped == 'href' && strpos( strtolower( $value ), 'javascript:' ) !== false ) {
 -                              wfDebug( __METHOD__
 -                                      . ": Found script in href attribute '$attrib'='$value' in uploaded file.\n" );
 +                      # href with non-local target (don't allow http://, javascript:, etc)
 +                      if ( $stripped == 'href'
 +                              && strpos( $value, 'data:' ) !== 0
 +                              && strpos( $value, '#' ) !== 0
 +                      ) {
 +                              if ( !( $strippedElement === 'a'
 +                                      && preg_match( '!^https?://!im', $value ) )
 +                              ) {
 +                                      wfDebug( __METHOD__ . ": Found href attribute <$strippedElement "
 +                                              . "'$attrib'='$value' in uploaded file.\n" );
  
 -                              return true;
 +                                      return true;
 +                              }
                        }
  
                        # href with embedded svg as target