* (bug 27465) Fix metadata extraction for SVG files using unusual namespace names
authorBrion Vibber <brion@users.mediawiki.org>
Wed, 25 May 2011 23:59:43 +0000 (23:59 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Wed, 25 May 2011 23:59:43 +0000 (23:59 +0000)
The previous fix (in r82307) only checked explicitly for a namespace given the 'svg' prefix; this fix use XML namespacing support on XMLReader to check for the actual namespace URI correctly.
Fixed up a test case (for RDF extraction) and added trimming on the whitespace.
Also added another test case file that doesn't use a namespace name on the root.

includes/media/SVGMetadataExtractor.php
tests/phpunit/includes/media/SVGMetadataExtractorTest.php
tests/phpunit/includes/media/Wikimedia-logo.svg [new file with mode: 0644]

index 3288263..db6b598 100644 (file)
@@ -35,6 +35,7 @@ class SVGMetadataExtractor {
 class SVGReader {
        const DEFAULT_WIDTH = 512;
        const DEFAULT_HEIGHT = 512;
+       const NS_SVG = 'http://www.w3.org/2000/svg';
 
        private $reader = null;
        private $mDebug = false;
@@ -101,9 +102,9 @@ class SVGReader {
                        $keepReading = $this->reader->read();
                }
 
-               if ( $this->reader->name != 'svg' ) {
+               if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) {
                        throw new MWException( "Expected <svg> tag, got ".
-                               $this->reader->name );
+                               $this->reader->localName . " in NS " . $this->reader->namespaceURI );
                }
                $this->debug( "<svg> tag is correct." );
                $this->handleSVGAttribs();
@@ -111,18 +112,19 @@ class SVGReader {
                $exitDepth =  $this->reader->depth;
                $keepReading = $this->reader->read();
                while ( $keepReading ) {
-                       $tag = $this->reader->name;
+                       $tag = $this->reader->localName;
                        $type = $this->reader->nodeType;
+                       $isSVG = ($this->reader->namespaceURI == self::NS_SVG);
 
                        $this->debug( "$tag" );
 
-                       if ( $tag == 'svg' && $type == XmlReader::END_ELEMENT && $this->reader->depth <= $exitDepth ) {
+                       if ( $isSVG && $tag == 'svg' && $type == XmlReader::END_ELEMENT && $this->reader->depth <= $exitDepth ) {
                                break;
-                       } elseif ( $tag == 'title' ) {
+                       } elseif ( $isSVG && $tag == 'title' ) {
                                $this->readField( $tag, 'title' );
-                       } elseif ( $tag == 'desc' ) {
+                       } elseif ( $isSVG && $tag == 'desc' ) {
                                $this->readField( $tag, 'description' );
-                       } elseif ( $tag == 'metadata' && $type == XmlReader::ELEMENT ) {
+                       } elseif ( $isSVG && $tag == 'metadata' && $type == XmlReader::ELEMENT ) {
                                $this->readXml( $tag, 'metadata' );
                        } elseif ( $tag !== '#text' ) {
                                $this->debug( "Unhandled top-level XML tag $tag" );
@@ -155,7 +157,7 @@ class SVGReader {
                }
                $keepReading = $this->reader->read();
                while( $keepReading ) {
-                       if( $this->reader->name == $name && $this->reader->nodeType == XmlReader::END_ELEMENT ) {
+                       if( $this->reader->localName == $name && $this->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) {
                                break;
                        } elseif( $this->reader->nodeType == XmlReader::TEXT ){
                                $this->metadata[$metafield] = trim( $this->reader->value );
@@ -175,7 +177,7 @@ class SVGReader {
                        return;
                }
                // TODO: find and store type of xml snippet. metadata['metadataType'] = "rdf"
-               $this->metadata[$metafield] = $this->reader->readInnerXML();
+               $this->metadata[$metafield] = trim( $this->reader->readInnerXML() );
                $this->reader->next();
        }
 
@@ -195,11 +197,11 @@ class SVGReader {
                $exitDepth =  $this->reader->depth;
                $keepReading = $this->reader->read();
                while( $keepReading ) {
-                       if( $this->reader->name == $name && $this->reader->depth <= $exitDepth
+                       if( $this->reader->localName == $name && $this->reader->depth <= $exitDepth
                                && $this->reader->nodeType == XmlReader::END_ELEMENT ) {
                                break;
-                       } elseif ( $this->reader->nodeType == XmlReader::ELEMENT ) {
-                               switch( $this->reader->name ) {
+                       } elseif ( $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::ELEMENT ) {
+                               switch( $this->reader->localName ) {
                                        case 'animate':
                                        case 'set':
                                        case 'animateMotion':
index 0c2363a..b1b5373 100644 (file)
@@ -25,6 +25,13 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
        function providerSvgFiles() {
                $base = dirname( __FILE__ );
                return array(
+                       array(
+                               "$base/Wikimedia-logo.svg",
+                               array(
+                                       'width' => 1024,
+                                       'height' => 1024
+                               )
+                       ),
                        array(
                                "$base/QA_icon.svg",
                                array(
@@ -42,8 +49,15 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
                        array(
                                "$base/US_states_by_total_state_tax_revenue.svg",
                                array(
-                                       'width' => 593,
-                                       'height' => 959
+                                       'height' => 593,
+                                       'metadata' =>
+    '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+      <ns4:Work xmlns:ns4="http://creativecommons.org/ns#" rdf:about="">
+        <ns5:format xmlns:ns5="http://purl.org/dc/elements/1.1/">image/svg+xml</ns5:format>
+        <ns5:type xmlns:ns5="http://purl.org/dc/elements/1.1/" rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+      </ns4:Work>
+    </rdf:RDF>',
+                                       'width' => 959
                                )
                        ),
                );
diff --git a/tests/phpunit/includes/media/Wikimedia-logo.svg b/tests/phpunit/includes/media/Wikimedia-logo.svg
new file mode 100644 (file)
index 0000000..1e17acb
--- /dev/null
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="Wikimedia logo" viewBox="-599 -599 1198 1198" width="1024" height="1024">
+<defs>
+  <clipPath id="mask">
+    <path d="M 47.5,-87.5 v 425 h -95 v -425 l -552,-552 v 1250 h 1199 v -1250 z"/>
+  </clipPath>
+</defs>
+<g clip-path="url(#mask)">
+  <circle id="green parts" fill="#396" r="336.5"/>
+  <circle id="blue arc" fill="none" stroke="#069" r="480.25" stroke-width="135.5"/>
+</g>
+<circle fill="#900" cy="-379.5" r="184.5" id="red circle"/>
+</svg>
\ No newline at end of file