From dac832e1253901e84223126769a81ccb76a50ed6 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 17 Nov 2008 19:03:37 +0000 Subject: [PATCH] * Improved scripting safety heuristics for IE 5/6 content-type detection. * Improved scripting safety heuristics on SVG uploads. --- RELEASE-NOTES | 2 + includes/XmlTypeCheck.php | 74 ++++++++++++----------------- includes/specials/SpecialUpload.php | 41 ++++++++++++++++ 3 files changed, 73 insertions(+), 44 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 502e4a61a5..50bc7d20d3 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -352,6 +352,8 @@ The following extensions are migrated into MediaWiki 1.14: * Add a .htaccess to deleted images directory for additional protection against exposure of deleted files with known SHA-1 hashes on default installations. +* Improved scripting safety heuristics for IE 5/6 content-type detection. +* Improved scripting safety heuristics on SVG uploads. === API changes in 1.14 === diff --git a/includes/XmlTypeCheck.php b/includes/XmlTypeCheck.php index 10a08547fb..a004ef4d7c 100644 --- a/includes/XmlTypeCheck.php +++ b/includes/XmlTypeCheck.php @@ -6,6 +6,12 @@ class XmlTypeCheck { * well-formed XML. Note that this doesn't check schema validity. */ public $wellFormed = false; + + /** + * Will be set to true if the optional element filter returned + * a match at some point. + */ + public $filterMatch = false; /** * Name of the document's root element, including any namespace @@ -13,19 +19,16 @@ class XmlTypeCheck { */ public $rootElement = ''; - private $softNamespaces; - private $namespaces = array(); - /** * @param $file string filename - * @param $softNamespaces bool - * If set to true, use of undeclared XML namespaces will be ignored. - * This matches the behavior of rsvg, but more compliant consumers - * such as Firefox will reject such files. - * Leave off for the default, stricter checks. + * @param $filterCallback callable (optional) + * Function to call to do additional custom validity checks from the + * SAX element handler event. This gives you access to the element + * namespace, name, and attributes, but not to text contents. + * Filter should return 'true' to toggle on $this->filterMatch */ - function __construct( $file, $softNamespaces=false ) { - $this->softNamespaces = $softNamespaces; + function __construct( $file, $filterCallback=null ) { + $this->filterCallback = $filterCallback; $this->run( $file ); } @@ -37,16 +40,12 @@ class XmlTypeCheck { } private function run( $fname ) { - if( $this->softNamespaces ) { - $parser = xml_parser_create( 'UTF-8' ); - } else { - $parser = xml_parser_create_ns( 'UTF-8' ); - } + $parser = xml_parser_create_ns( 'UTF-8' ); // case folding violates XML standard, turn it off xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); - xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false ); + xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false ); $file = fopen( $fname, "rb" ); do { @@ -66,35 +65,22 @@ class XmlTypeCheck { xml_parser_free( $parser ); } + private function rootElementOpen( $parser, $name, $attribs ) { + $this->rootElement = $name; + + if( is_callable( $this->filterCallback ) ) { + xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false ); + $this->elementOpen( $parser, $name, $attribs ); + } else { + // We only need the first open element + xml_set_element_handler( $parser, false, false ); + } + } + private function elementOpen( $parser, $name, $attribs ) { - if( $this->softNamespaces ) { - // Check namespaces manually, so expat doesn't throw - // errors on use of undeclared namespaces. - foreach( $attribs as $attrib => $val ) { - if( $attrib == 'xmlns' ) { - $this->namespaces[''] = $val; - } elseif( substr( $attrib, 0, strlen( 'xmlns:' ) ) == 'xmlns:' ) { - $this->namespaces[substr( $attrib, strlen( 'xmlns:' ) )] = $val; - } - } - - if( strpos( $name, ':' ) === false ) { - $ns = ''; - $subname = $name; - } else { - list( $ns, $subname ) = explode( ':', $name, 2 ); - } - - if( isset( $this->namespaces[$ns] ) ) { - $name = $this->namespaces[$ns] . ':' . $subname; - } else { - // Technically this is invalid for XML with Namespaces. - // But..... we'll just let it slide in soft mode. - } + if( call_user_func( $this->filterCallback, $name, $attribs ) ) { + // Filter hit! + $this->filterMatch = true; } - - // We only need the first open element - $this->rootElement = $name; - xml_set_element_handler( $parser, false, false ); } } diff --git a/includes/specials/SpecialUpload.php b/includes/specials/SpecialUpload.php index 9d6595d84c..1624e13b2a 100644 --- a/includes/specials/SpecialUpload.php +++ b/includes/specials/SpecialUpload.php @@ -1341,6 +1341,11 @@ wgUploadAutoFill = {$autofill}; if( $this->detectScript ( $tmpfile, $mime, $extension ) ) { return new WikiErrorMsg( 'uploadscripted' ); } + if( $extension == 'svg' || $mime == 'image/svg+xml' ) { + if( $this->detectScriptInSvg( $tmpfile ) ) { + return new WikiErrorMsg( 'uploadscripted' ); + } + } /** * Scan the uploaded file for viruses @@ -1452,6 +1457,7 @@ wgUploadAutoFill = {$autofill}; */ $tags = array( + 'filterMatch; + } + + /** + * @todo Replace this with a whitelist filter! + */ + function checkSvgScriptCallback( $element, $attribs ) { + $stripped = $this->stripXmlNamespace( $element ); + + if( $stripped == 'script' ) { + wfDebug( __METHOD__ . ": Found script element '$element' in uploaded file.\n" ); + return true; + } + + foreach( $attribs as $attrib => $value ) { + $stripped = $this->stripXmlNamespace( $attrib ); + if( substr( $stripped, 0, 2 ) == 'on' ) { + wfDebug( __METHOD__ . ": Found script attribute '$attrib'='value' in uploaded file.\n" ); + return true; + } + if( $stripped == 'href' && strpos( strtolower( $value ), 'javascript:' ) !== false ) { + wfDebug( __METHOD__ . ": Found script href attribute '$attrib'='$value' in uploaded file.\n" ); + return true; + } + } + } + + private function stripXmlNamespace( $name ) { + // 'http://www.w3.org/2000/svg:script' -> 'script' + $parts = explode( ':', strtolower( $name ) ); + return array_pop( $parts ); + } + /** * Generic wrapper function for a virus scanner program. * This relies on the $wgAntivirus and $wgAntivirusSetup variables. -- 2.20.1