Protect -{...}- variant constructs in images.
authorC. Scott Ananian <cscott@cscott.net>
Tue, 13 Dec 2016 19:49:36 +0000 (14:49 -0500)
committerTim Starling <tstarling@wikimedia.org>
Tue, 20 Dec 2016 22:08:36 +0000 (22:08 +0000)
A protected version of explode is factored out as
`StringUtils::delimiterExplode`, since it will be used in follow-up
patches in this series.  The `delimiterExplode` implementation creates
an intermediate array of the exploded results, which is reasonable as
the number of image options is small; but since an Iterator is
returned the implementation can be upgraded in the future (at the cost
of additional complexity) to avoid this.  The additional code in that
case would be similar to ExplodeIterator.

Bug: T146305
Change-Id: I1327685e9e8c07ef476dceaa6f6dae4ba40989ef

includes/libs/StringUtils.php
includes/parser/Parser.php
tests/parser/parserTests.txt

index 6b10c09..26f3c4a 100644 (file)
@@ -54,6 +54,59 @@ class StringUtils {
                        ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
        }
 
+       /**
+        * Explode a string, but ignore any instances of the separator inside
+        * the given start and end delimiters, which may optionally nest.
+        * The delimiters are literal strings, not regular expressions.
+        * @param string $startDelim Start delimiter
+        * @param string $endDelim End delimiter
+        * @param string $separator Separator string for the explode.
+        * @param string $subject Subject string to explode.
+        * @param bool $nested True iff the delimiters are allowed to nest.
+        * @return ArrayIterator
+        */
+       static function delimiterExplode( $startDelim, $endDelim, $separator,
+               $subject, $nested = false ) {
+               $inputPos = 0;
+               $lastPos = 0;
+               $depth = 0;
+               $encStart = preg_quote( $startDelim, '!' );
+               $encEnd = preg_quote( $endDelim, '!' );
+               $encSep = preg_quote( $separator, '!' );
+               $len = strlen( $subject );
+               $m = [];
+               $exploded = [];
+               while (
+                       $inputPos < $len &&
+                       preg_match(
+                               "!$encStart|$encEnd|$encSep!S", $subject, $m,
+                               PREG_OFFSET_CAPTURE, $inputPos
+                       )
+               ) {
+                       $match = $m[0][0];
+                       $matchPos = $m[0][1];
+                       $inputPos = $matchPos + strlen( $match );
+                       if ( $match === $separator ) {
+                               if ( $depth === 0 ) {
+                                       $exploded[] = substr(
+                                               $subject, $lastPos, $matchPos - $lastPos
+                                       );
+                                       $lastPos = $inputPos;
+                               }
+                       } elseif ( $match === $startDelim ) {
+                               if ( $depth === 0 || $nested ) {
+                                       $depth++;
+                               }
+                       } else {
+                               $depth--;
+                       }
+               }
+               $exploded[] = substr( $subject, $lastPos );
+               // This method could be rewritten in the future to avoid creating an
+               // intermediate array, since the return type is just an iterator.
+               return new ArrayIterator( $exploded );
+       }
+
        /**
         * Perform an operation equivalent to `preg_replace()`
         *
index 7418547..8f9830c 100644 (file)
@@ -5150,7 +5150,10 @@ class Parser {
                #  * bottom
                #  * text-bottom
 
-               $parts = StringUtils::explode( "|", $options );
+               # Protect LanguageConverter markup when splitting into parts
+               $parts = StringUtils::delimiterExplode(
+                       '-{', '}-', '|', $options, true /* allow nesting */
+               );
 
                # Give extensions a chance to select the file revision for us
                $options = [];
index 505bc2d..edcc2c4 100644 (file)
@@ -20617,16 +20617,30 @@ language=sr variant=sr-ec
 </p>
 !! end
 
-# FIXME: This test is currently broken in the PHP parser (bug 52661)
 !! test
-Don't break image parsing if language converter markup is in the caption.
+T146305: Don't break image parsing if language converter markup is in the caption.
 !! options
 language=sr
 !! wikitext
-[[File:Foobar.jpg|-{R|caption}-]]
+[[File:Foobar.jpg|thumb|-{R|caption:}-]]
+!! html/php
+<div class="thumb tright"><div class="thumbinner" style="width:182px;"><a href="/wiki/%D0%94%D0%B0%D1%82%D0%BE%D1%82%D0%B5%D0%BA%D0%B0:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" width="180" height="20" class="thumbimage" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/270px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/360px-Foobar.jpg 2x" /></a>  <div class="thumbcaption"><div class="magnify"><a href="/wiki/%D0%94%D0%B0%D1%82%D0%BE%D1%82%D0%B5%D0%BA%D0%B0:Foobar.jpg" class="internal" title="Повећај"></a></div>caption:</div></div></div>
+
 !! html/parsoid
-<p><a href="/wiki/File:Foobar.jpg" class="image" title="caption"><img alt="caption" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220" /></a>
-</p>
+<figure class="mw-default-size" typeof="mw:Image/Thumb"><a href="./Датотека:Foobar.jpg"><img resource="./Датотека:Foobar.jpg" src="//example.com/images/thumb/3/3a/Foobar.jpg/220px-Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="25" width="220"/></a><figcaption><span typeof="mw:LanguageVariant" data-mw='{"disabled":true,"show":true,"text":"caption:"}'></span></figcaption></figure>
+!! end
+
+!! test
+T146305: Don't break image parsing if nested language converter markup is in the caption.
+!! options
+language=zh variant=zh-cn
+!! wikitext
+[[File:Foobar.jpg|thumb|-{zh-cn:blog (hk: -{zh-hans|WEBJOURNAL}-, tw: -{zh-hans|WEBLOG}-)}-]]
+!! html/php
+<div class="thumb tright"><div class="thumbinner" style="width:182px;"><a href="/wiki/File:Foobar.jpg" class="image"><img alt="" src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" width="180" height="20" class="thumbimage" srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/270px-Foobar.jpg 1.5x, http://example.com/images/thumb/3/3a/Foobar.jpg/360px-Foobar.jpg 2x" /></a>  <div class="thumbcaption"><div class="magnify"><a href="/wiki/File:Foobar.jpg" class="internal" title="放大"></a></div>blog (hk: WEBJOURNAL, tw: WEBLOG)</div></div></div>
+
+!! html/parsoid
+<figure class="mw-default-size" typeof="mw:Image/Thumb"><a href="File:Foobar.jpg"><img resource="./File:Foobar.jpg" src="//example.com/images/thumb/3/3a/Foobar.jpg/220px-Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="25" width="220"/></a><figcaption><span typeof="mw:LanguageVariant" data-mw='{"bidir":[{"l":"zh-cn","t":"blog (hk: &lt;span typeof=\"mw:LanguageVariant\" data-parsoid=&#39;{\"fl\":[\"zh-hans\"],\"dsr\":[42,64,null,2]}&#39; data-mw=&#39;{\"filter\":[\"zh-hans\"],\"text\":\"WEBJOURNAL\"}&#39;>&lt;/span>, tw: &lt;span typeof=\"mw:LanguageVariant\" data-parsoid=&#39;{\"fl\":[\"zh-hans\"],\"dsr\":[70,88,null,2]}&#39; data-mw=&#39;{\"filter\":[\"zh-hans\"],\"text\":\"WEBLOG\"}&#39;>&lt;/span>)"}],"show":true}'></span></figcaption></figure>
 !! end
 
 # FIXME: This test is currently broken in the PHP parser (bug 52661)