More SVG detection fixes based on tests on files downloaded from commons...
authorBrion Vibber <brion@users.mediawiki.org>
Tue, 5 Feb 2008 22:50:21 +0000 (22:50 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Tue, 5 Feb 2008 22:50:21 +0000 (22:50 +0000)
* Make the autodetection work for UTF-16LE and UTF-16BE XML, which never worked before due to using the wrong string compare length
* Allow doctype strings to break over newlines
* Detect XML if there's a doctype even if there's no XML header (the xml header isn't required for UTF-8 files)

includes/MimeMagic.php

index 90009f5..8ba9925 100644 (file)
@@ -460,25 +460,41 @@ class MimeMagic {
                        $xml_type = "ASCII";
                } elseif ( substr( $head, 0, 8 ) == "\xef\xbb\xbf<?xml") {
                        $xml_type = "UTF-8";
-               } elseif ( substr( $head, 0, 10 ) == "\xfe\xff\x00<\x00?\x00x\x00m\x00l" ) {
+               } elseif ( substr( $head, 0, 12 ) == "\xfe\xff\x00<\x00?\x00x\x00m\x00l" ) {
                        $xml_type = "UTF-16BE";
-               } elseif ( substr( $head, 0, 10 ) == "\xff\xfe<\x00?\x00x\x00m\x00l\x00") {
+               } elseif ( substr( $head, 0, 12 ) == "\xff\xfe<\x00?\x00x\x00m\x00l\x00") {
                        $xml_type = "UTF-16LE";
+               } else {
+                       /*
+                       echo "WARNING: Undetected xml_type ...\n";
+                       for( $i = 0; $i < 10; $i++ ) {
+                               $c = ord( $head{$i} );
+                               if( $c < 32 || $c > 126 ) {
+                                       printf( "\\x%02x", $c );
+                               } else {
+                                       print $head{$i};
+                               }
+                       }
+                       echo "\n";
+                       */
                }
 
                if ( $xml_type ) {
                        if ( $xml_type !== "UTF-8" && $xml_type !== "ASCII" ) {
                                $head = iconv( $xml_type, "ASCII//IGNORE", $head );
                        }
+               }
 
-                       $match = array();
-                       $doctype = "";
-                       $tag = "";
+               $match = array();
+               $doctype = "";
+               $tag = "";
 
-                       if ( preg_match( '%<!DOCTYPE\s+[\w-]+\s+PUBLIC\s+["'."'".'"](.*?)["'."'".'"].*>%sim', 
-                               $head, $match ) ) {
-                                       $doctype = $match[1];
-                               }
+               if ( preg_match( '%<!DOCTYPE\s+[\w-]+\s+PUBLIC\s+["'."'".'"](.*?)["'."'".'"].*>%siD', 
+                       $head, $match ) ) {
+                               $doctype = $match[1];
+                       }
+               
+               if( $xml_type || $doctype ) {
                        if ( preg_match( '%<(\w+)\b%si', $head, $match ) ) {
                                $tag = $match[1];
                        }