From 28774022769d2273be16c6c6e1cca710a1fd97ef Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Tue, 20 Sep 2016 18:26:32 -0400 Subject: [PATCH] Protect language converter markup in the preprocessor. This ensures that `{{echo|-{R|foo}-}}` is parsed correctly as a template invocation with a single argument, not as two separate arguments split by the `|`. Bug: T146304 Change-Id: I709d007c70a3fd19264790055042c615999b2f67 --- includes/parser/Preprocessor.php | 8 +++++- includes/parser/Preprocessor_DOM.php | 35 ++++++++++++++++++++++----- includes/parser/Preprocessor_Hash.php | 35 ++++++++++++++++++++++----- tests/parser/parserTests.txt | 11 +++++++++ 4 files changed, 76 insertions(+), 13 deletions(-) diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php index cc98abd5c0..426b5507a4 100644 --- a/includes/parser/Preprocessor.php +++ b/includes/parser/Preprocessor.php @@ -48,7 +48,13 @@ abstract class Preprocessor { 'names' => [ 2 => null ], 'min' => 2, 'max' => 2, - ] + ], + '-{' => [ + 'end' => '}-', + 'names' => [ 1 => null ], + 'min' => 1, + 'max' => 1, + ], ]; /** diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 5da7cd7441..950d66d7b7 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -193,6 +193,8 @@ class Preprocessor_DOM extends Preprocessor { * @return string */ public function preprocessToXml( $text, $flags = 0 ) { + global $wgDisableLangConversion; + $forInclusion = $flags & Parser::PTD_FOR_INCLUSION; $xmlishElements = $this->parser->getStripList(); @@ -220,6 +222,10 @@ class Preprocessor_DOM extends Preprocessor { $stack = new PPDStack; $searchBase = "[{<\n"; # } + if ( !$wgDisableLangConversion ) { + $searchBase .= '-'; + } + // For fast reverse searches $revText = strrev( $text ); $lengthText = strlen( $text ); @@ -298,7 +304,10 @@ class Preprocessor_DOM extends Preprocessor { break; } } else { - $curChar = $text[$i]; + $curChar = $curTwoChar = $text[$i]; + if ( ( $i + 1 ) < $lengthText ) { + $curTwoChar .= $text[$i + 1]; + } if ( $curChar == '|' ) { $found = 'pipe'; } elseif ( $curChar == '=' ) { @@ -311,11 +320,20 @@ class Preprocessor_DOM extends Preprocessor { } else { $found = 'line-start'; } + } elseif ( $curTwoChar == $currentClosing ) { + $found = 'close'; + $curChar = $curTwoChar; } elseif ( $curChar == $currentClosing ) { $found = 'close'; + } elseif ( isset( $this->rules[$curTwoChar] ) ) { + $curChar = $curTwoChar; + $found = 'open'; + $rule = $this->rules[$curChar]; } elseif ( isset( $this->rules[$curChar] ) ) { $found = 'open'; $rule = $this->rules[$curChar]; + } elseif ( $curChar == '-' ) { + $found = 'dash'; } else { # Some versions of PHP have a strcspn which stops on null characters # Ignore and continue @@ -595,7 +613,8 @@ class Preprocessor_DOM extends Preprocessor { // input pointer. } elseif ( $found == 'open' ) { # count opening brace characters - $count = strspn( $text, $curChar, $i ); + $curLen = strlen( $curChar ); + $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i ); # we need to add to stack only if opening brace count is enough for one of the rules if ( $count >= $rule['min'] ) { @@ -615,12 +634,13 @@ class Preprocessor_DOM extends Preprocessor { # Add literal brace(s) $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); } - $i += $count; + $i += $curLen * $count; } elseif ( $found == 'close' ) { $piece = $stack->top; # lets check if there are enough characters for closing brace $maxCount = $piece->count; - $count = strspn( $text, $curChar, $i, $maxCount ); + $curLen = strlen( $curChar ); + $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount ); # check for maximum matching characters (if there are 5 closing # characters, we will probably need only 3 - depending on the rules) @@ -643,7 +663,7 @@ class Preprocessor_DOM extends Preprocessor { # No matching element found in callback array # Output a literal closing brace and continue $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); - $i += $count; + $i += $curLen * $count; continue; } $name = $rule['names'][$matchingCount]; @@ -682,7 +702,7 @@ class Preprocessor_DOM extends Preprocessor { } # Advance input pointer - $i += $matchingCount; + $i += $curLen * $matchingCount; # Unwind the stack $stack->pop(); @@ -716,6 +736,9 @@ class Preprocessor_DOM extends Preprocessor { $stack->getCurrentPart()->eqpos = strlen( $accum ); $accum .= '='; ++$i; + } elseif ( $found == 'dash' ) { + $accum .= '-'; + ++$i; } } diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index 8a4637ea1e..1317e60e41 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -117,6 +117,8 @@ class Preprocessor_Hash extends Preprocessor { * @return PPNode_Hash_Tree */ public function preprocessToObj( $text, $flags = 0 ) { + global $wgDisableLangConversion; + $tree = $this->cacheGetTree( $text, $flags ); if ( $tree !== false ) { $store = json_decode( $tree ); @@ -152,6 +154,10 @@ class Preprocessor_Hash extends Preprocessor { $stack = new PPDStack_Hash; $searchBase = "[{<\n"; + if ( !$wgDisableLangConversion ) { + $searchBase .= '-'; + } + // For fast reverse searches $revText = strrev( $text ); $lengthText = strlen( $text ); @@ -229,7 +235,10 @@ class Preprocessor_Hash extends Preprocessor { break; } } else { - $curChar = $text[$i]; + $curChar = $curTwoChar = $text[$i]; + if ( ( $i + 1 ) < $lengthText ) { + $curTwoChar .= $text[$i + 1]; + } if ( $curChar == '|' ) { $found = 'pipe'; } elseif ( $curChar == '=' ) { @@ -242,11 +251,20 @@ class Preprocessor_Hash extends Preprocessor { } else { $found = 'line-start'; } + } elseif ( $curTwoChar == $currentClosing ) { + $found = 'close'; + $curChar = $curTwoChar; } elseif ( $curChar == $currentClosing ) { $found = 'close'; + } elseif ( isset( $this->rules[$curTwoChar] ) ) { + $curChar = $curTwoChar; + $found = 'open'; + $rule = $this->rules[$curChar]; } elseif ( isset( $this->rules[$curChar] ) ) { $found = 'open'; $rule = $this->rules[$curChar]; + } elseif ( $curChar == '-' ) { + $found = 'dash'; } else { # Some versions of PHP have a strcspn which stops on null characters # Ignore and continue @@ -538,7 +556,8 @@ class Preprocessor_Hash extends Preprocessor { // input pointer. } elseif ( $found == 'open' ) { # count opening brace characters - $count = strspn( $text, $curChar, $i ); + $curLen = strlen( $curChar ); + $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i ); # we need to add to stack only if opening brace count is enough for one of the rules if ( $count >= $rule['min'] ) { @@ -557,12 +576,13 @@ class Preprocessor_Hash extends Preprocessor { # Add literal brace(s) self::addLiteral( $accum, str_repeat( $curChar, $count ) ); } - $i += $count; + $i += $curLen * $count; } elseif ( $found == 'close' ) { $piece = $stack->top; # lets check if there are enough characters for closing brace $maxCount = $piece->count; - $count = strspn( $text, $curChar, $i, $maxCount ); + $curLen = strlen( $curChar ); + $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount ); # check for maximum matching characters (if there are 5 closing # characters, we will probably need only 3 - depending on the rules) @@ -585,7 +605,7 @@ class Preprocessor_Hash extends Preprocessor { # No matching element found in callback array # Output a literal closing brace and continue self::addLiteral( $accum, str_repeat( $curChar, $count ) ); - $i += $count; + $i += $curLen * $count; continue; } $name = $rule['names'][$matchingCount]; @@ -627,7 +647,7 @@ class Preprocessor_Hash extends Preprocessor { } # Advance input pointer - $i += $matchingCount; + $i += $curLen * $matchingCount; # Unwind the stack $stack->pop(); @@ -661,6 +681,9 @@ class Preprocessor_Hash extends Preprocessor { $accum[] = [ 'equals', [ '=' ] ]; $stack->getCurrentPart()->eqpos = count( $accum ) - 1; ++$i; + } elseif ( $found == 'dash' ) { + self::addLiteral( $accum, '-' ); + ++$i; } } diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 07d50a8ce8..b18f1e7a34 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -20595,6 +20595,17 @@ language=sr variant=sr-ec

!! end +!! test +T146304: Don't break template parsing if language converter markup is in the parameter. +!! options +language=sr variant=sr-ec +!! wikitext +{{echo|-{R|foo}-}} +!! html/php +

foo +

+!! end + # FIXME: This test is currently broken in the PHP parser (bug 52661) !! test Don't break image parsing if language converter markup is in the caption. -- 2.20.1