From d13482fdbe111c4f6b8f95c68bd48a079173116f Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Wed, 25 May 2011 23:59:43 +0000 Subject: [PATCH] * (bug 27465) Fix metadata extraction for SVG files using unusual namespace names The previous fix (in r82307) only checked explicitly for a namespace given the 'svg' prefix; this fix use XML namespacing support on XMLReader to check for the actual namespace URI correctly. Fixed up a test case (for RDF extraction) and added trimming on the whitespace. Also added another test case file that doesn't use a namespace name on the root. --- includes/media/SVGMetadataExtractor.php | 26 ++++++++++--------- .../media/SVGMetadataExtractorTest.php | 18 +++++++++++-- .../phpunit/includes/media/Wikimedia-logo.svg | 14 ++++++++++ 3 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 tests/phpunit/includes/media/Wikimedia-logo.svg diff --git a/includes/media/SVGMetadataExtractor.php b/includes/media/SVGMetadataExtractor.php index 32882634fe..db6b59866e 100644 --- a/includes/media/SVGMetadataExtractor.php +++ b/includes/media/SVGMetadataExtractor.php @@ -35,6 +35,7 @@ class SVGMetadataExtractor { class SVGReader { const DEFAULT_WIDTH = 512; const DEFAULT_HEIGHT = 512; + const NS_SVG = 'http://www.w3.org/2000/svg'; private $reader = null; private $mDebug = false; @@ -101,9 +102,9 @@ class SVGReader { $keepReading = $this->reader->read(); } - if ( $this->reader->name != 'svg' ) { + if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) { throw new MWException( "Expected tag, got ". - $this->reader->name ); + $this->reader->localName . " in NS " . $this->reader->namespaceURI ); } $this->debug( " tag is correct." ); $this->handleSVGAttribs(); @@ -111,18 +112,19 @@ class SVGReader { $exitDepth = $this->reader->depth; $keepReading = $this->reader->read(); while ( $keepReading ) { - $tag = $this->reader->name; + $tag = $this->reader->localName; $type = $this->reader->nodeType; + $isSVG = ($this->reader->namespaceURI == self::NS_SVG); $this->debug( "$tag" ); - if ( $tag == 'svg' && $type == XmlReader::END_ELEMENT && $this->reader->depth <= $exitDepth ) { + if ( $isSVG && $tag == 'svg' && $type == XmlReader::END_ELEMENT && $this->reader->depth <= $exitDepth ) { break; - } elseif ( $tag == 'title' ) { + } elseif ( $isSVG && $tag == 'title' ) { $this->readField( $tag, 'title' ); - } elseif ( $tag == 'desc' ) { + } elseif ( $isSVG && $tag == 'desc' ) { $this->readField( $tag, 'description' ); - } elseif ( $tag == 'metadata' && $type == XmlReader::ELEMENT ) { + } elseif ( $isSVG && $tag == 'metadata' && $type == XmlReader::ELEMENT ) { $this->readXml( $tag, 'metadata' ); } elseif ( $tag !== '#text' ) { $this->debug( "Unhandled top-level XML tag $tag" ); @@ -155,7 +157,7 @@ class SVGReader { } $keepReading = $this->reader->read(); while( $keepReading ) { - if( $this->reader->name == $name && $this->reader->nodeType == XmlReader::END_ELEMENT ) { + if( $this->reader->localName == $name && $this->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) { break; } elseif( $this->reader->nodeType == XmlReader::TEXT ){ $this->metadata[$metafield] = trim( $this->reader->value ); @@ -175,7 +177,7 @@ class SVGReader { return; } // TODO: find and store type of xml snippet. metadata['metadataType'] = "rdf" - $this->metadata[$metafield] = $this->reader->readInnerXML(); + $this->metadata[$metafield] = trim( $this->reader->readInnerXML() ); $this->reader->next(); } @@ -195,11 +197,11 @@ class SVGReader { $exitDepth = $this->reader->depth; $keepReading = $this->reader->read(); while( $keepReading ) { - if( $this->reader->name == $name && $this->reader->depth <= $exitDepth + if( $this->reader->localName == $name && $this->reader->depth <= $exitDepth && $this->reader->nodeType == XmlReader::END_ELEMENT ) { break; - } elseif ( $this->reader->nodeType == XmlReader::ELEMENT ) { - switch( $this->reader->name ) { + } elseif ( $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::ELEMENT ) { + switch( $this->reader->localName ) { case 'animate': case 'set': case 'animateMotion': diff --git a/tests/phpunit/includes/media/SVGMetadataExtractorTest.php b/tests/phpunit/includes/media/SVGMetadataExtractorTest.php index 0c2363a0e5..b1b5373361 100644 --- a/tests/phpunit/includes/media/SVGMetadataExtractorTest.php +++ b/tests/phpunit/includes/media/SVGMetadataExtractorTest.php @@ -25,6 +25,13 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { function providerSvgFiles() { $base = dirname( __FILE__ ); return array( + array( + "$base/Wikimedia-logo.svg", + array( + 'width' => 1024, + 'height' => 1024 + ) + ), array( "$base/QA_icon.svg", array( @@ -42,8 +49,15 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { array( "$base/US_states_by_total_state_tax_revenue.svg", array( - 'width' => 593, - 'height' => 959 + 'height' => 593, + 'metadata' => + ' + + image/svg+xml + + + ', + 'width' => 959 ) ), ); diff --git a/tests/phpunit/includes/media/Wikimedia-logo.svg b/tests/phpunit/includes/media/Wikimedia-logo.svg new file mode 100644 index 0000000000..1e17acbeb6 --- /dev/null +++ b/tests/phpunit/includes/media/Wikimedia-logo.svg @@ -0,0 +1,14 @@ + + + \ No newline at end of file -- 2.20.1