From: Brion Vibber Date: Thu, 6 Jun 2019 21:54:29 +0000 (-0700) Subject: Relax HTML sniffing checks on image upload X-Git-Tag: 1.34.0-rc.0~1491^2 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/pie.php?a=commitdiff_plain;h=2e83387666756039129fd4e8f667dc5fed0849f3;p=lhc%2Fweb%2Fwiklou.git Relax HTML sniffing checks on image upload Allows uploaded files to include some HTML tag strings that were previously forbidden in the first 1k or so of the file: * element in SVG files. + * Obsolete, no longer used. + * SVG file uploads now always allow elements. * - * MediaWiki will reject HTMLesque tags in uploaded files due to idiotic - * browsers which can not perform basic stuff like MIME detection and which are - * vulnerable to further idiots uploading crap files as images. - * - * When this directive is on, "<title>" will be allowed in files with an - * "image/svg+xml" MIME type. You should leave this disabled if your web server - * is misconfigured and doesn't send appropriate MIME types for SVG images. + * @deprecated 1.34 */ -$wgAllowTitlesInSVG = false; +$wgAllowTitlesInSVG = true; /** * Whether thumbnails should be generated in target language (usually, same as @@ -1390,6 +1385,16 @@ $wgAntivirusRequired = true; */ $wgVerifyMimeType = true; +/** + * Determines whether extra checks for IE type detection should be applied. + * This is a conservative check for exactly what IE 6 or so checked for, + * and shouldn't trigger on for instance JPEG files containing links in EXIF + * metadata. + * + * @since 1.34 + */ +$wgVerifyMimeTypeIE = true; + /** * Sets the MIME type definition file to use by includes/libs/mime/MimeAnalyzer.php. * Set to null, to use built-in defaults only. diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php index d905aa47d9..597c2777cf 100644 --- a/includes/upload/UploadBase.php +++ b/includes/upload/UploadBase.php @@ -404,7 +404,7 @@ abstract class UploadBase { * @return mixed True if the file is verified, an array otherwise */ protected function verifyMimeType( $mime ) { - global $wgVerifyMimeType; + global $wgVerifyMimeType, $wgVerifyMimeTypeIE; if ( $wgVerifyMimeType ) { wfDebug( "mime: <$mime> extension: <{$this->mFinalExtension}>\n" ); global $wgMimeTypeBlacklist; @@ -412,17 +412,19 @@ abstract class UploadBase { return [ 'filetype-badmime', $mime ]; } - # Check what Internet Explorer would detect - $fp = fopen( $this->mTempPath, 'rb' ); - $chunk = fread( $fp, 256 ); - fclose( $fp ); - - $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer(); - $extMime = $magic->guessTypesForExtension( $this->mFinalExtension ); - $ieTypes = $magic->getIEMimeTypes( $this->mTempPath, $chunk, $extMime ); - foreach ( $ieTypes as $ieType ) { - if ( $this->checkFileExtension( $ieType, $wgMimeTypeBlacklist ) ) { - return [ 'filetype-bad-ie-mime', $ieType ]; + if ( $wgVerifyMimeTypeIE ) { + # Check what Internet Explorer would detect + $fp = fopen( $this->mTempPath, 'rb' ); + $chunk = fread( $fp, 256 ); + fclose( $fp ); + + $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer(); + $extMime = $magic->guessTypesForExtension( $this->mFinalExtension ); + $ieTypes = $magic->getIEMimeTypes( $this->mTempPath, $chunk, $extMime ); + foreach ( $ieTypes as $ieType ) { + if ( $this->checkFileExtension( $ieType, $wgMimeTypeBlacklist ) ) { + return [ 'filetype-bad-ie-mime', $ieType ]; + } } } } @@ -1262,12 +1264,11 @@ abstract class UploadBase { * @return bool True if the file contains something looking like embedded scripts */ public static function detectScript( $file, $mime, $extension ) { - global $wgAllowTitlesInSVG; - # ugly hack: for text files, always look at the entire file. # For binary field, just check the first K. - if ( strpos( $mime, 'text/' ) === 0 ) { + $isText = strpos( $mime, 'text/' ) === 0; + if ( $isText ) { $chunk = file_get_contents( $file ); } else { $fp = fopen( $file, 'rb' ); @@ -1312,36 +1313,19 @@ abstract class UploadBase { } } - /** - * Internet Explorer for Windows performs some really stupid file type - * autodetection which can cause it to interpret valid image files as HTML - * and potentially execute JavaScript, creating a cross-site scripting - * attack vectors. - * - * Apple's Safari browser also performs some unsafe file type autodetection - * which can cause legitimate files to be interpreted as HTML if the - * web server is not correctly configured to send the right content-type - * (or if you're really uploading plain text and octet streams!) - * - * Returns true if IE is likely to mistake the given file for HTML. - * Also returns true if Safari would mistake the given file for HTML - * when served with a generic content-type. - */ + // Quick check for HTML heuristics in old IE and Safari. + // + // The exact heuristics IE uses are checked separately via verifyMimeType(), so we + // don't need them all here as it can cause many false positives. + // + // Check for `<script` and such still to forbid script tags and embedded HTML in SVG: $tags = [ - '<a href', '<body', '<head', '<html', # also in safari - '<img', - '<pre', '<script', # also in safari - '<table' ]; - if ( !$wgAllowTitlesInSVG && $extension !== 'svg' && $mime !== 'image/svg' ) { - $tags[] = '<title'; - } - foreach ( $tags as $tag ) { if ( strpos( $chunk, $tag ) !== false ) { wfDebug( __METHOD__ . ": found something that may make it be mistaken for html: $tag\n" ); diff --git a/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg b/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg new file mode 100644 index 0000000000..5438736027 Binary files /dev/null and b/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg differ diff --git a/tests/phpunit/data/upload/png-embedded-breaks-ie5.png b/tests/phpunit/data/upload/png-embedded-breaks-ie5.png new file mode 100644 index 0000000000..0af03fc608 Binary files /dev/null and b/tests/phpunit/data/upload/png-embedded-breaks-ie5.png differ diff --git a/tests/phpunit/data/upload/png-plain.png b/tests/phpunit/data/upload/png-plain.png new file mode 100644 index 0000000000..83e9130172 Binary files /dev/null and b/tests/phpunit/data/upload/png-plain.png differ diff --git a/tests/phpunit/includes/upload/UploadBaseTest.php b/tests/phpunit/includes/upload/UploadBaseTest.php index 58c69e3229..cafc846e9a 100644 --- a/tests/phpunit/includes/upload/UploadBaseTest.php +++ b/tests/phpunit/includes/upload/UploadBaseTest.php @@ -585,6 +585,42 @@ class UploadBaseTest extends MediaWikiTestCase { [ '<?xml version="1.0" encoding="WINDOWS-1252"?><svg></svg>', false ], ]; } + + /** + * @covers UploadBase::detectScript + * @dataProvider provideDetectScript + */ + public function testDetectScript( $filename, $mime, $extension, $expected, $message ) { + $result = $this->upload->detectScript( $filename, $mime, $extension ); + $this->assertSame( $expected, $result, $message ); + } + + public static function provideDetectScript() { + global $IP; + return [ + [ + "$IP/tests/phpunit/data/upload/png-plain.png", + 'image/png', + 'png', + false, + 'PNG with no suspicious things in it, should pass.' + ], + [ + "$IP/tests/phpunit/data/upload/png-embedded-breaks-ie5.png", + 'image/png', + 'png', + true, + 'PNG with embedded data that IE5/6 interprets as HTML; should be rejected.' + ], + [ + "$IP/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg", + 'image/jpeg', + 'jpeg', + false, + 'JPEG with innocuous HTML in metadata from a flickr photo; should pass (T27707).' + ], + ]; + } } class UploadTestHandler extends UploadBase {