* well-formed XML. Note that this doesn't check schema validity.
*/
public $wellFormed = false;
+
+ /**
+ * Will be set to true if the optional element filter returned
+ * a match at some point.
+ */
+ public $filterMatch = false;
/**
* Name of the document's root element, including any namespace
*/
public $rootElement = '';
- private $softNamespaces;
- private $namespaces = array();
-
/**
* @param $file string filename
- * @param $softNamespaces bool
- * If set to true, use of undeclared XML namespaces will be ignored.
- * This matches the behavior of rsvg, but more compliant consumers
- * such as Firefox will reject such files.
- * Leave off for the default, stricter checks.
+ * @param $filterCallback callable (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
*/
- function __construct( $file, $softNamespaces=false ) {
- $this->softNamespaces = $softNamespaces;
+ function __construct( $file, $filterCallback=null ) {
+ $this->filterCallback = $filterCallback;
$this->run( $file );
}
}
private function run( $fname ) {
- if( $this->softNamespaces ) {
- $parser = xml_parser_create( 'UTF-8' );
- } else {
- $parser = xml_parser_create_ns( 'UTF-8' );
- }
+ $parser = xml_parser_create_ns( 'UTF-8' );
// case folding violates XML standard, turn it off
xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
- xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );
+ xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
$file = fopen( $fname, "rb" );
do {
xml_parser_free( $parser );
}
+ private function rootElementOpen( $parser, $name, $attribs ) {
+ $this->rootElement = $name;
+
+ if( is_callable( $this->filterCallback ) ) {
+ xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );
+ $this->elementOpen( $parser, $name, $attribs );
+ } else {
+ // We only need the first open element
+ xml_set_element_handler( $parser, false, false );
+ }
+ }
+
private function elementOpen( $parser, $name, $attribs ) {
- if( $this->softNamespaces ) {
- // Check namespaces manually, so expat doesn't throw
- // errors on use of undeclared namespaces.
- foreach( $attribs as $attrib => $val ) {
- if( $attrib == 'xmlns' ) {
- $this->namespaces[''] = $val;
- } elseif( substr( $attrib, 0, strlen( 'xmlns:' ) ) == 'xmlns:' ) {
- $this->namespaces[substr( $attrib, strlen( 'xmlns:' ) )] = $val;
- }
- }
-
- if( strpos( $name, ':' ) === false ) {
- $ns = '';
- $subname = $name;
- } else {
- list( $ns, $subname ) = explode( ':', $name, 2 );
- }
-
- if( isset( $this->namespaces[$ns] ) ) {
- $name = $this->namespaces[$ns] . ':' . $subname;
- } else {
- // Technically this is invalid for XML with Namespaces.
- // But..... we'll just let it slide in soft mode.
- }
+ if( call_user_func( $this->filterCallback, $name, $attribs ) ) {
+ // Filter hit!
+ $this->filterMatch = true;
}
-
- // We only need the first open element
- $this->rootElement = $name;
- xml_set_element_handler( $parser, false, false );
}
}
if( $this->detectScript ( $tmpfile, $mime, $extension ) ) {
return new WikiErrorMsg( 'uploadscripted' );
}
+ if( $extension == 'svg' || $mime == 'image/svg+xml' ) {
+ if( $this->detectScriptInSvg( $tmpfile ) ) {
+ return new WikiErrorMsg( 'uploadscripted' );
+ }
+ }
/**
* Scan the uploaded file for viruses
*/
$tags = array(
+ '<a href',
'<body',
'<head',
'<html', #also in safari
return false;
}
+ function detectScriptInSvg( $filename ) {
+ $check = new XmlTypeCheck( $filename, array( $this, 'checkSvgScriptCallback' ) );
+ return $check->filterMatch;
+ }
+
+ /**
+ * @todo Replace this with a whitelist filter!
+ */
+ function checkSvgScriptCallback( $element, $attribs ) {
+ $stripped = $this->stripXmlNamespace( $element );
+
+ if( $stripped == 'script' ) {
+ wfDebug( __METHOD__ . ": Found script element '$element' in uploaded file.\n" );
+ return true;
+ }
+
+ foreach( $attribs as $attrib => $value ) {
+ $stripped = $this->stripXmlNamespace( $attrib );
+ if( substr( $stripped, 0, 2 ) == 'on' ) {
+ wfDebug( __METHOD__ . ": Found script attribute '$attrib'='value' in uploaded file.\n" );
+ return true;
+ }
+ if( $stripped == 'href' && strpos( strtolower( $value ), 'javascript:' ) !== false ) {
+ wfDebug( __METHOD__ . ": Found script href attribute '$attrib'='$value' in uploaded file.\n" );
+ return true;
+ }
+ }
+ }
+
+ private function stripXmlNamespace( $name ) {
+ // 'http://www.w3.org/2000/svg:script' -> 'script'
+ $parts = explode( ':', strtolower( $name ) );
+ return array_pop( $parts );
+ }
+
/**
* Generic wrapper function for a virus scanner program.
* This relies on the $wgAntivirus and $wgAntivirusSetup variables.