const COLON_STATE_COMMENT = 5;
const COLON_STATE_COMMENTDASH = 6;
const COLON_STATE_COMMENTDASHDASH = 7;
+ const COLON_STATE_LC = 8;
/**
* Make lists from lines starting with ':', '*', '#', etc.
* @return string The position of the ':', or false if none found
*/
private function findColonNoLinks( $str, &$before, &$after ) {
- $colonPos = strpos( $str, ':' );
- if ( $colonPos === false ) {
+ if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) ) {
# Nothing to find!
return false;
}
- $ltPos = strpos( $str, '<' );
- if ( $ltPos === false || $ltPos > $colonPos ) {
+ if ( $m[0][0] === ':' ) {
# Easy; no tag nesting to worry about
+ $colonPos = $m[0][1];
$before = substr( $str, 0, $colonPos );
$after = substr( $str, $colonPos + 1 );
return $colonPos;
# Ugly state machine to walk through avoiding tags.
$state = self::COLON_STATE_TEXT;
- $level = 0;
+ $ltLevel = 0;
+ $lcLevel = 0;
$len = strlen( $str );
- for ( $i = 0; $i < $len; $i++ ) {
+ for ( $i = $m[0][1]; $i < $len; $i++ ) {
$c = $str[$i];
switch ( $state ) {
$state = self::COLON_STATE_TAGSTART;
break;
case ":":
- if ( $level === 0 ) {
+ if ( $ltLevel === 0 ) {
# We found it!
$before = substr( $str, 0, $i );
$after = substr( $str, $i + 1 );
break;
default:
# Skip ahead looking for something interesting
- $colonPos = strpos( $str, ':', $i );
- if ( $colonPos === false ) {
+ if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
# Nothing else interesting
return false;
}
- $ltPos = strpos( $str, '<', $i );
- if ( $level === 0 ) {
- if ( $ltPos === false || $colonPos < $ltPos ) {
- # We found it!
- $before = substr( $str, 0, $colonPos );
- $after = substr( $str, $colonPos + 1 );
- return $i;
- }
+ if ( $m[0][0] === '-{' ) {
+ $state = self::COLON_STATE_LC;
+ $lcLevel++;
+ $i = $m[0][1] + 1;
+ } else {
+ # Skip ahead to next interesting character.
+ $i = $m[0][1] - 1;
}
- if ( $ltPos === false ) {
- # Nothing else interesting to find; abort!
- # We're nested, but there's no close tags left. Abort!
- break 2;
+ break;
+ }
+ break;
+ case self::COLON_STATE_LC:
+ # In language converter markup -{ ... }-
+ if ( !preg_match( '/-\{|\}-/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
+ # Nothing else interesting to find; abort!
+ # We're nested in language converter markup, but there
+ # are no close tags left. Abort!
+ break 2;
+ } elseif ( $m[0][0] === '-{' ) {
+ $i = $m[0][1] + 1;
+ $lcLevel++;
+ } elseif ( $m[0][0] === '}-' ) {
+ $i = $m[0][1] + 1;
+ $lcLevel--;
+ if ( $lcLevel === 0 ) {
+ $state = self::COLON_STATE_TEXT;
}
- # Skip ahead to next tag start
- $i = $ltPos;
- $state = self::COLON_STATE_TAGSTART;
}
break;
case self::COLON_STATE_TAG:
# In a <tag>
switch ( $c ) {
case ">":
- $level++;
+ $ltLevel++;
$state = self::COLON_STATE_TEXT;
break;
case "/":
case self::COLON_STATE_CLOSETAG:
# In a </tag>
if ( $c === ">" ) {
- $level--;
- if ( $level < 0 ) {
+ $ltLevel--;
+ if ( $ltLevel < 0 ) {
wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
return false;
}
throw new MWException( "State machine error in " . __METHOD__ );
}
}
- if ( $level > 0 ) {
- wfDebug( __METHOD__ . ": Invalid input; not enough close tags (level $level, state $state)\n" );
+ if ( $ltLevel > 0 || $lcLevel > 0 ) {
+ wfDebug(
+ __METHOD__ . ": Invalid input; not enough close tags " .
+ "(level $ltLevel/$lcLevel, state $state)\n"
+ );
return false;
}
return false;
!! end
-# FIXME: This test is currently broken in the PHP parser (bug 52661)
!! test
-Don't break list handling if language converter markup is in the item.
+T153135: Don't break list handling if language converter markup is in the item.
!! options
language=zh variant=zh-cn
!! wikitext
;-{zh-cn:AAA;zh-tw:BBB}-
+;-{R|foo:bar}-
!! html/php
-<dl><dt><span class="error">在手动语言转换规则中检测到错误</span></dd></dl>
+<dl><dt>AAA</dt>
+<dt>foo:bar</dt></dl>
!! html/parsoid
-<dl><dt>AAA
-</dt></dl>
+<dl>
+<dt><span typeof="mw:LanguageVariant" data-mw='{"bidir":[{"l":"zh-cn","t":"AAA"},{"l":"zh-tw","t":"BBB"}],"show":true}'></span></dt>
+<dt><span typeof="mw:LanguageVariant" data-mw='{"disabled":true,"show":true,"text":"foo:bar"}'></span></dt>
+</dl>
+!! end
+
+// Note that parsoid does not protect colons unless language converter
+// markup is properly nested, because it is a backtracking parser.
+!! test
+T153135: Unclosed markup in definition list (code coverage)
+!! options
+language=zh variant=zh-cn
+!! wikitext
+;<b>foo:bar
+;-{zh-cn:AAA
+!! html/php
+<dl><dt><b>foo:bar</dt>
+<dt>-{zh-cn:AAA</b></dt></dl>
+
+!! html/parsoid
+<dl>
+<dt><b>foo:bar</b></dt>
+<b>
+<dt>-{zh-cn</dt>
+<dd>AAA</dd>
+</b></dl>
+!! end
+
+!! test
+T153135: Nested language converter markup in definition list (code coverage)
+!! options
+language=zh variant=zh-cn
+!! wikitext
+;-{zh-cn:AAA -{zh-hans|foo:bar}- -{R|bat:baz}-}-:def
+!! html/php
+<dl><dt>AAA foo:bar bat:baz</dt>
+<dd>def</dd></dl>
+
+!! html/parsoid
+<dl>
+<dt><span typeof="mw:LanguageVariant" data-mw='{"bidir":[{"l":"zh-cn","t":"AAA <span typeof=\"mw:LanguageVariant\" data-parsoid='{\"fl\":[\"zh-hans\"],\"dsr\":[13,32,null,2]}' data-mw='{\"filter\":[\"zh-hans\"],\"text\":\"bar\"}'></span> <span typeof=\"mw:LanguageVariant\" data-parsoid='{\"fl\":[\"R\"],\"dsr\":[33,46,null,2]}' data-mw='{\"disabled\":true,\"show\":true,\"text\":\"bat:baz\"}'></span>"}],"show":true}'></span></dt>
+<dd>def</dd>
+</dl>
!! end
!! test