Extract list of languages svg is translated into.
authorBrian Wolff <bawolff+wn@gmail.com>
Thu, 24 Oct 2013 22:11:59 +0000 (19:11 -0300)
committerBrian Wolff <bawolff+wn@gmail.com>
Fri, 6 Dec 2013 23:24:22 +0000 (19:24 -0400)
SVG files support having different renderings based on language.
We support changing the rendering language, but we offer no
discoverability of what languages are available.

Long term Jarry's TranslateSVG will hopefully be used and have
all the associated awesomeness. In the mean time, we should
probably have some indication on the image page what languages
are available for the SVG. This is step 1 (extract the language).
I expect to follow this up with a commit adding some sort of
simple selector on the image page.

As an aside, it should be noted, this only detects languages
if its in the first $wgSVGMetadataCutoff (256 kb). This is a
compromise to avoid OOM on huge SVG files.

Change-Id: I2a462270fe63eb3e3023419eddc8b06f5a617ab5

includes/media/SVGMetadataExtractor.php
tests/phpunit/data/media/README
tests/phpunit/data/media/Speech_bubbles.svg [new file with mode: 0644]
tests/phpunit/includes/media/SVGMetadataExtractorTest.php

index db58c62..3f0edb3 100644 (file)
@@ -43,6 +43,8 @@ class SVGReader {
        const DEFAULT_WIDTH = 512;
        const DEFAULT_HEIGHT = 512;
        const NS_SVG = 'http://www.w3.org/2000/svg';
+       const LANG_PREFIX_MATCH = 1;
+       const LANG_FULL_MATCH = 2;
 
        /** @var null|XMLReader */
        private $reader = null;
@@ -52,6 +54,8 @@ class SVGReader {
 
        /** @var array */
        private $metadata = array();
+       private $languages = array();
+       private $languagePrefixes = array();
 
        /**
         * Constructor
@@ -172,10 +176,8 @@ class SVGReader {
                        } elseif ( $tag !== '#text' ) {
                                $this->debug( "Unhandled top-level XML tag $tag" );
 
-                               if ( !isset( $this->metadata['animated'] ) ) {
-                                       // Recurse into children of current tag, looking for animation.
-                                       $this->animateFilter( $tag );
-                               }
+                               // Recurse into children of current tag, looking for animation and languages.
+                               $this->animateFilterAndLang( $tag );
                        }
 
                        // Goto next element, which is sibling of current (Skip children).
@@ -184,6 +186,8 @@ class SVGReader {
 
                $this->reader->close();
 
+               $this->metadata['translations'] = $this->languages + $this->languagePrefixes;
+
                return true;
        }
 
@@ -235,11 +239,12 @@ class SVGReader {
        }
 
        /**
-        * Filter all children, looking for animate elements
+        * Filter all children, looking for animated elements.
+        * Also get a list of languages that can be targeted.
         *
         * @param string $name Name of the element that we are reading from
         */
-       private function animateFilter( $name ) {
+       private function animateFilterAndLang( $name ) {
                $this->debug( "animate filter for tag $name" );
                if ( $this->reader->nodeType != XmlReader::ELEMENT ) {
                        return;
@@ -254,9 +259,35 @@ class SVGReader {
                                && $this->reader->nodeType == XmlReader::END_ELEMENT
                        ) {
                                break;
-                       } elseif ( $this->reader->namespaceURI ==
-                               self::NS_SVG && $this->reader->nodeType == XmlReader::ELEMENT
+                       } elseif ( $this->reader->namespaceURI == self::NS_SVG
+                               && $this->reader->nodeType == XmlReader::ELEMENT
                        ) {
+
+                               $sysLang = $this->reader->getAttribute( 'systemLanguage' );
+                               if ( !is_null( $sysLang ) && $sysLang !== '' ) {
+                                       // See http://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute
+                                       $langList = explode( ',', $sysLang );
+                                       foreach( $langList as $langItem ) {
+                                               $langItem = trim( $langItem );
+                                               if ( Language::isWellFormedLanguageTag( $langItem ) ) {
+                                                       $this->languages[$langItem] = self::LANG_FULL_MATCH;
+                                               }
+                                               // Note, the standard says that any prefix should work,
+                                               // here we do only the initial prefix, since that will catch
+                                               // 99% of cases, and we are going to compare against fallbacks.
+                                               // This differs mildly from how the spec says languages should be
+                                               // handled, however it matches better how the MediaWiki language
+                                               // preference is generally handled.
+                                               $dash = strpos( $langItem, '-' );
+                                               // Intentionally checking both !false and > 0 at the same time.
+                                               if ( $dash ) {
+                                                       $itemPrefix = substr( $langItem, 0, $dash );
+                                                       if ( Language::isWellFormedLanguageTag( $itemPrefix ) ) {
+                                                               $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH;
+                                                       }
+                                               }
+                                       }
+                               }
                                switch ( $this->reader->localName ) {
                                        case 'script':
                                                // Normally we disallow files with
index 4b489a9..4fcd5fc 100644 (file)
@@ -41,3 +41,8 @@ https://commons.wikimedia.org/wiki/File:Tux.svg
 Larry Ewing, Simon Budig, Anja Gerwinski
 "The copyright holder of this file allows anyone to use it for any purpose, provided that the copyright holder is properly attributed. Redistribution, derivative work, commercial use, and all other use is permitted."
 
+Speech_bubbles.svg (Modified slightly)
+https://commons.wikimedia.org/wiki/File:Speech_bubbles.svg
+CC-BY-SA 3.0
+Jarry1250
+
diff --git a/tests/phpunit/data/media/Speech_bubbles.svg b/tests/phpunit/data/media/Speech_bubbles.svg
new file mode 100644 (file)
index 0000000..6b1ef7a
--- /dev/null
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" width="17.7cm" height="13cm" id="svg2" version="1.1" inkscape:version="0.48.2 r9819" sodipodi:docname="New document 1">
+  <defs id="defs4"/>
+  <sodipodi:namedview id="base" pagecolor="#ffffff" bordercolor="#666666" borderopacity="1.0" inkscape:pageopacity="0.0" inkscape:pageshadow="2" inkscape:zoom="0.7" inkscape:cx="296.43458" inkscape:cy="130.17435" inkscape:document-units="px" inkscape:current-layer="layer1" showgrid="false" fit-margin-top="0" fit-margin-left="0" fit-margin-right="0" fit-margin-bottom="0" inkscape:window-width="1366" inkscape:window-height="706" inkscape:window-x="-8" inkscape:window-y="-8" inkscape:window-maximized="1"/>
+  <g inkscape:label="Layer 1" inkscape:groupmode="layer" id="layer1" transform="translate(-0.28125,-1.21875)">
+    <switch style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"><text xml:space="preserve" x="90" y="108.07646" id="text2985-de" sodipodi:linespacing="125%" systemLanguage="de"><tspan text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2987-de">Hallo!</tspan></text><text xml:space="preserve" x="90" y="108.07646" id="text2985-fr" sodipodi:linespacing="125%" systemLanguage="fr"><tspan x="80" y="108.07646" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2987-fr">Bonjour</tspan></text><text xml:space="preserve" x="90" y="108.07646" id="text2985-nl" sodipodi:linespacing="125%" systemLanguage="nl, tlh-ca"><tspan x="90" y="108.07646" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2987-nl">Hallo!</tspan></text><text xml:space="preserve" x="90" y="108.07646" id="text2985" sodipodi:linespacing="125%"><tspan x="90" y="108.07646" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2987" sodipodi:role="line">Hello!</tspan></text></switch>
+    <switch style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"><text xml:space="preserve" x="330" y="188.07648" id="text2989-de" sodipodi:linespacing="125%" systemLanguage="de"><tspan x="323" y="188.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2991-de">Hallo! Wie</tspan><tspan x="350" y="238.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2993-de" sodipodi:role="line">geht's?</tspan></text><text xml:space="preserve" x="330" y="188.07648" id="text2989-fr" sodipodi:linespacing="125%" systemLanguage="fr"><tspan x="335" y="188.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2991-fr">Bonjour,</tspan><tspan x="350" y="238.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2993-fr">ça va?</tspan></text><text xml:space="preserve" x="330" y="188.07648" id="text2989-nl" sodipodi:linespacing="125%" systemLanguage="nl, tlh-ca"><tspan x="310" y="188.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2991-nl">Hallo! Hoe</tspan><tspan x="330" y="238.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2993-nl">gaat het?</tspan></text><text xml:space="preserve" x="330" y="188.07648" id="text2989" sodipodi:linespacing="125%"><tspan x="330" y="188.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2991" sodipodi:role="line">Hello! How</tspan><tspan x="330" y="238.07648" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2993" sodipodi:role="line">are you?</tspan></text></switch>
+    <switch style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"><text xml:space="preserve" x="101.42857" y="318.64789" id="text2995-fr" sodipodi:linespacing="125%" systemLanguage="fr"><tspan x="82" y="323" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2997-fr">Ça va bien,</tspan><tspan x="117.42857" y="368.64789" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2999-fr">et toi?</tspan></text><text xml:space="preserve" x="101.42857" y="318.64789" id="text2995-nl" sodipodi:linespacing="125%" systemLanguage="nl, tlh-ca"><tspan x="101.42857" y="318.64789" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2997-nl">Goed,</tspan><tspan x="101.42857" y="368.64789" font-size="90%" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2999-nl">met jou?</tspan></text><text xml:space="preserve" x="101.42857" y="318.64789" id="text2995" sodipodi:linespacing="125%"><tspan x="101.42857" y="318.64789" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2997" sodipodi:role="line">I'm well,</tspan><tspan x="101.42857" y="368.64789" text-decoration="normal" font-style="normal" font-weight="normal" id="tspan2999" sodipodi:role="line">   you?</tspan></text></switch>
+    <path style="color:#000000;fill:none;stroke:#808080;stroke-width:8.19999980999999960;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" d="m 145.41518,24.660714 c -54.439497,0 -98.562501,30.043022 -98.562501,67.125 0,9.936246 3.188468,19.358966 8.875,27.843746 -3.477405,24.25473 -24,58.71875 -24,58.71875 0,0 55.316401,-29.49598 68.544641,-28.55804 2.17169,0.15398 -0.660951,4.01645 -2.044641,0.93304 14.019951,5.22007 30.083661,8.21875 47.187501,8.21875 54.4395,0 98.59375,-30.07427 98.59375,-67.156246 0,-37.081978 -44.15425,-67.125 -98.59375,-67.125 z" id="path3769" inkscape:connector-curvature="0" sodipodi:nodetypes="ssccscsss"/>
+    <path style="color:#000000;fill:none;stroke:#808080;stroke-width:8;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" d="m 416.54255,99.214524 c 73.5252,0 133.11712,43.566276 133.11712,97.339926 0,14.40884 -4.3063,28.073 -11.98645,40.37703 4.69653,35.1725 32.41406,85.14978 32.41406,85.14978 0,0 -74.70955,-42.77297 -92.57542,-41.41284 -2.93306,0.22328 0.89266,5.82436 2.76145,1.35303 -18.93514,7.56977 -40.63057,11.91824 -63.73076,11.91824 -73.52523,0 -133.15935,-43.61157 -133.15935,-97.38524 0,-53.77365 59.63412,-97.339926 133.15935,-97.339926 z" id="path3769-1" inkscape:connector-curvature="0" sodipodi:nodetypes="ssccscsss"/>
+    <path style="color:#000000;fill:none;stroke:#808080;stroke-width:8;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" d="m 173.1621,250.34923 c -64.02996,0 -115.926026,34.29807 -115.926026,76.63201 0,11.34353 3.750173,22.1008 10.438488,31.7873 -4.090007,27.68997 -28.228023,67.03517 -28.228023,67.03517 0,0 65.061361,-33.67353 80.619991,-32.60275 2.55427,0.17578 -0.77738,4.5853 -2.40483,1.06519 16.4898,5.95939 35.38343,9.38278 55.5004,9.38278 64.02999,0 115.96279,-34.33373 115.96279,-76.66769 0,-42.33394 -51.9328,-76.63201 -115.96279,-76.63201 z" id="path3769-1-7" inkscape:connector-curvature="0" sodipodi:nodetypes="ssccscsss"/>
+  </g>
+</svg>
index d00a33d..ec7b0c9 100644 (file)
@@ -54,6 +54,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
                                        'height' => 1024,
                                        'originalWidth' => '1024',
                                        'originalHeight' => '1024',
+                                       'translations' => array(),
                                )
                        ),
                        array(
@@ -63,6 +64,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
                                        'height' => 60,
                                        'originalWidth' => '60',
                                        'originalHeight' => '60',
+                                       'translations' => array(),
                                )
                        ),
                        array(
@@ -72,6 +74,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
                                        'height' => 60,
                                        'originalWidth' => '60.0000000',
                                        'originalHeight' => '60.0000000',
+                                       'translations' => array(),
                                )
                        ),
                        array(
@@ -82,6 +85,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
                                        'height' => 385,
                                        'originalWidth' => '385',
                                        'originalHeight' => '385.0004883',
+                                       'translations' => array(),
                                )
                        ),
                        array(
@@ -92,9 +96,26 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
                                        'originalWidth' => '100%',
                                        'originalHeight' => '100%',
                                        'title' => 'Tux',
+                                       'translations' => array(),
                                        'description' => 'For more information see: http://commons.wikimedia.org/wiki/Image:Tux.svg',
                                )
-                       )
+                       ),
+                       array(
+                               "$base/Speech_bubbles.svg",
+                               array(
+                                       'width' => 627,
+                                       'height' => 461,
+                                       'originalWidth' => '17.7cm',
+                                       'originalHeight' => '13cm',
+                                       'translations' => array(
+                                               'de' => SVGReader::LANG_FULL_MATCH,
+                                               'fr' => SVGReader::LANG_FULL_MATCH,
+                                               'nl' => SVGReader::LANG_FULL_MATCH,
+                                               'tlh-ca' => SVGReader::LANG_FULL_MATCH,
+                                               'tlh' => SVGReader::LANG_PREFIX_MATCH
+                                       ),
+                               )
+                       ),
                );
        }
 
@@ -116,6 +137,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase {
                                        'width' => 959,
                                        'originalWidth' => '958.69',
                                        'originalHeight' => '592.78998',
+                                       'translations' => array(),
                                )
                        ),
                );