From 960b23861890f2721a0a0f5fb6cbd5885684efa7 Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Thu, 24 Oct 2013 19:11:59 -0300 Subject: [PATCH] Extract list of languages svg is translated into. SVG files support having different renderings based on language. We support changing the rendering language, but we offer no discoverability of what languages are available. Long term Jarry's TranslateSVG will hopefully be used and have all the associated awesomeness. In the mean time, we should probably have some indication on the image page what languages are available for the SVG. This is step 1 (extract the language). I expect to follow this up with a commit adding some sort of simple selector on the image page. As an aside, it should be noted, this only detects languages if its in the first $wgSVGMetadataCutoff (256 kb). This is a compromise to avoid OOM on huge SVG files. Change-Id: I2a462270fe63eb3e3023419eddc8b06f5a617ab5 --- includes/media/SVGMetadataExtractor.php | 47 +++++++++++++++---- tests/phpunit/data/media/README | 5 ++ tests/phpunit/data/media/Speech_bubbles.svg | 14 ++++++ .../media/SVGMetadataExtractorTest.php | 24 +++++++++- 4 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 tests/phpunit/data/media/Speech_bubbles.svg diff --git a/includes/media/SVGMetadataExtractor.php b/includes/media/SVGMetadataExtractor.php index db58c6200c..3f0edb3bdd 100644 --- a/includes/media/SVGMetadataExtractor.php +++ b/includes/media/SVGMetadataExtractor.php @@ -43,6 +43,8 @@ class SVGReader { const DEFAULT_WIDTH = 512; const DEFAULT_HEIGHT = 512; const NS_SVG = 'http://www.w3.org/2000/svg'; + const LANG_PREFIX_MATCH = 1; + const LANG_FULL_MATCH = 2; /** @var null|XMLReader */ private $reader = null; @@ -52,6 +54,8 @@ class SVGReader { /** @var array */ private $metadata = array(); + private $languages = array(); + private $languagePrefixes = array(); /** * Constructor @@ -172,10 +176,8 @@ class SVGReader { } elseif ( $tag !== '#text' ) { $this->debug( "Unhandled top-level XML tag $tag" ); - if ( !isset( $this->metadata['animated'] ) ) { - // Recurse into children of current tag, looking for animation. - $this->animateFilter( $tag ); - } + // Recurse into children of current tag, looking for animation and languages. + $this->animateFilterAndLang( $tag ); } // Goto next element, which is sibling of current (Skip children). @@ -184,6 +186,8 @@ class SVGReader { $this->reader->close(); + $this->metadata['translations'] = $this->languages + $this->languagePrefixes; + return true; } @@ -235,11 +239,12 @@ class SVGReader { } /** - * Filter all children, looking for animate elements + * Filter all children, looking for animated elements. + * Also get a list of languages that can be targeted. * * @param string $name Name of the element that we are reading from */ - private function animateFilter( $name ) { + private function animateFilterAndLang( $name ) { $this->debug( "animate filter for tag $name" ); if ( $this->reader->nodeType != XmlReader::ELEMENT ) { return; @@ -254,9 +259,35 @@ class SVGReader { && $this->reader->nodeType == XmlReader::END_ELEMENT ) { break; - } elseif ( $this->reader->namespaceURI == - self::NS_SVG && $this->reader->nodeType == XmlReader::ELEMENT + } elseif ( $this->reader->namespaceURI == self::NS_SVG + && $this->reader->nodeType == XmlReader::ELEMENT ) { + + $sysLang = $this->reader->getAttribute( 'systemLanguage' ); + if ( !is_null( $sysLang ) && $sysLang !== '' ) { + // See http://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute + $langList = explode( ',', $sysLang ); + foreach( $langList as $langItem ) { + $langItem = trim( $langItem ); + if ( Language::isWellFormedLanguageTag( $langItem ) ) { + $this->languages[$langItem] = self::LANG_FULL_MATCH; + } + // Note, the standard says that any prefix should work, + // here we do only the initial prefix, since that will catch + // 99% of cases, and we are going to compare against fallbacks. + // This differs mildly from how the spec says languages should be + // handled, however it matches better how the MediaWiki language + // preference is generally handled. + $dash = strpos( $langItem, '-' ); + // Intentionally checking both !false and > 0 at the same time. + if ( $dash ) { + $itemPrefix = substr( $langItem, 0, $dash ); + if ( Language::isWellFormedLanguageTag( $itemPrefix ) ) { + $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH; + } + } + } + } switch ( $this->reader->localName ) { case 'script': // Normally we disallow files with diff --git a/tests/phpunit/data/media/README b/tests/phpunit/data/media/README index 4b489a92c9..4fcd5fc8cd 100644 --- a/tests/phpunit/data/media/README +++ b/tests/phpunit/data/media/README @@ -41,3 +41,8 @@ https://commons.wikimedia.org/wiki/File:Tux.svg Larry Ewing, Simon Budig, Anja Gerwinski "The copyright holder of this file allows anyone to use it for any purpose, provided that the copyright holder is properly attributed. Redistribution, derivative work, commercial use, and all other use is permitted." +Speech_bubbles.svg (Modified slightly) +https://commons.wikimedia.org/wiki/File:Speech_bubbles.svg +CC-BY-SA 3.0 +Jarry1250 + diff --git a/tests/phpunit/data/media/Speech_bubbles.svg b/tests/phpunit/data/media/Speech_bubbles.svg new file mode 100644 index 0000000000..6b1ef7a93e --- /dev/null +++ b/tests/phpunit/data/media/Speech_bubbles.svg @@ -0,0 +1,14 @@ + + + + + + + Hallo!BonjourHallo!Hello! + Hallo! Wiegeht's?Bonjour,ça va?Hallo! Hoegaat het?Hello! Howare you? + Ça va bien,et toi?Goed,met jou?I'm well, you? + + + + + diff --git a/tests/phpunit/includes/media/SVGMetadataExtractorTest.php b/tests/phpunit/includes/media/SVGMetadataExtractorTest.php index d00a33d0a4..ec7b0c90d6 100644 --- a/tests/phpunit/includes/media/SVGMetadataExtractorTest.php +++ b/tests/phpunit/includes/media/SVGMetadataExtractorTest.php @@ -54,6 +54,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { 'height' => 1024, 'originalWidth' => '1024', 'originalHeight' => '1024', + 'translations' => array(), ) ), array( @@ -63,6 +64,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { 'height' => 60, 'originalWidth' => '60', 'originalHeight' => '60', + 'translations' => array(), ) ), array( @@ -72,6 +74,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { 'height' => 60, 'originalWidth' => '60.0000000', 'originalHeight' => '60.0000000', + 'translations' => array(), ) ), array( @@ -82,6 +85,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { 'height' => 385, 'originalWidth' => '385', 'originalHeight' => '385.0004883', + 'translations' => array(), ) ), array( @@ -92,9 +96,26 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { 'originalWidth' => '100%', 'originalHeight' => '100%', 'title' => 'Tux', + 'translations' => array(), 'description' => 'For more information see: http://commons.wikimedia.org/wiki/Image:Tux.svg', ) - ) + ), + array( + "$base/Speech_bubbles.svg", + array( + 'width' => 627, + 'height' => 461, + 'originalWidth' => '17.7cm', + 'originalHeight' => '13cm', + 'translations' => array( + 'de' => SVGReader::LANG_FULL_MATCH, + 'fr' => SVGReader::LANG_FULL_MATCH, + 'nl' => SVGReader::LANG_FULL_MATCH, + 'tlh-ca' => SVGReader::LANG_FULL_MATCH, + 'tlh' => SVGReader::LANG_PREFIX_MATCH + ), + ) + ), ); } @@ -116,6 +137,7 @@ class SVGMetadataExtractorTest extends MediaWikiTestCase { 'width' => 959, 'originalWidth' => '958.69', 'originalHeight' => '592.78998', + 'translations' => array(), ) ), ); -- 2.20.1