From 51d54b4b9150c32dec88e76e25ed90c3f026281d Mon Sep 17 00:00:00 2001
From: "C. Scott Ananian"
Date: Tue, 13 Dec 2016 14:49:36 -0500
Subject: [PATCH] Protect -{...}- variant constructs in images.
A protected version of explode is factored out as
`StringUtils::delimiterExplode`, since it will be used in follow-up
patches in this series. The `delimiterExplode` implementation creates
an intermediate array of the exploded results, which is reasonable as
the number of image options is small; but since an Iterator is
returned the implementation can be upgraded in the future (at the cost
of additional complexity) to avoid this. The additional code in that
case would be similar to ExplodeIterator.
Bug: T146305
Change-Id: I1327685e9e8c07ef476dceaa6f6dae4ba40989ef
---
includes/libs/StringUtils.php | 53 +++++++++++++++++++++++++++++++++++
includes/parser/Parser.php | 5 +++-
tests/parser/parserTests.txt | 24 ++++++++++++----
3 files changed, 76 insertions(+), 6 deletions(-)
diff --git a/includes/libs/StringUtils.php b/includes/libs/StringUtils.php
index 6b10c0998b..26f3c4ac61 100644
--- a/includes/libs/StringUtils.php
+++ b/includes/libs/StringUtils.php
@@ -54,6 +54,59 @@ class StringUtils {
( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
}
+ /**
+ * Explode a string, but ignore any instances of the separator inside
+ * the given start and end delimiters, which may optionally nest.
+ * The delimiters are literal strings, not regular expressions.
+ * @param string $startDelim Start delimiter
+ * @param string $endDelim End delimiter
+ * @param string $separator Separator string for the explode.
+ * @param string $subject Subject string to explode.
+ * @param bool $nested True iff the delimiters are allowed to nest.
+ * @return ArrayIterator
+ */
+ static function delimiterExplode( $startDelim, $endDelim, $separator,
+ $subject, $nested = false ) {
+ $inputPos = 0;
+ $lastPos = 0;
+ $depth = 0;
+ $encStart = preg_quote( $startDelim, '!' );
+ $encEnd = preg_quote( $endDelim, '!' );
+ $encSep = preg_quote( $separator, '!' );
+ $len = strlen( $subject );
+ $m = [];
+ $exploded = [];
+ while (
+ $inputPos < $len &&
+ preg_match(
+ "!$encStart|$encEnd|$encSep!S", $subject, $m,
+ PREG_OFFSET_CAPTURE, $inputPos
+ )
+ ) {
+ $match = $m[0][0];
+ $matchPos = $m[0][1];
+ $inputPos = $matchPos + strlen( $match );
+ if ( $match === $separator ) {
+ if ( $depth === 0 ) {
+ $exploded[] = substr(
+ $subject, $lastPos, $matchPos - $lastPos
+ );
+ $lastPos = $inputPos;
+ }
+ } elseif ( $match === $startDelim ) {
+ if ( $depth === 0 || $nested ) {
+ $depth++;
+ }
+ } else {
+ $depth--;
+ }
+ }
+ $exploded[] = substr( $subject, $lastPos );
+ // This method could be rewritten in the future to avoid creating an
+ // intermediate array, since the return type is just an iterator.
+ return new ArrayIterator( $exploded );
+ }
+
/**
* Perform an operation equivalent to `preg_replace()`
*
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 741854749a..8f9830c188 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -5150,7 +5150,10 @@ class Parser {
# * bottom
# * text-bottom
- $parts = StringUtils::explode( "|", $options );
+ # Protect LanguageConverter markup when splitting into parts
+ $parts = StringUtils::delimiterExplode(
+ '-{', '}-', '|', $options, true /* allow nesting */
+ );
# Give extensions a chance to select the file revision for us
$options = [];
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 505bc2d25f..edcc2c46ea 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -20617,16 +20617,30 @@ language=sr variant=sr-ec
!! end
-# FIXME: This test is currently broken in the PHP parser (bug 52661)
!! test
-Don't break image parsing if language converter markup is in the caption.
+T146305: Don't break image parsing if language converter markup is in the caption.
!! options
language=sr
!! wikitext
-[[File:Foobar.jpg|-{R|caption}-]]
+[[File:Foobar.jpg|thumb|-{R|caption:}-]]
+!! html/php
+
caption:
+
!! html/parsoid
-
-
+
+!! end
+
+!! test
+T146305: Don't break image parsing if nested language converter markup is in the caption.
+!! options
+language=zh variant=zh-cn
+!! wikitext
+[[File:Foobar.jpg|thumb|-{zh-cn:blog (hk: -{zh-hans|WEBJOURNAL}-, tw: -{zh-hans|WEBLOG}-)}-]]
+!! html/php
+
blog (hk: WEBJOURNAL, tw: WEBLOG)
+
+!! html/parsoid
+
!! end
# FIXME: This test is currently broken in the PHP parser (bug 52661)
--
2.20.1