From: Happy-melon Date: Sun, 16 Jan 2011 23:57:13 +0000 (+0000) Subject: (bug 529, bug 12974) alter behaviour of the table- and template-parser: X-Git-Tag: 1.31.0-rc.0~32506 X-Git-Url: https://git.cyclocoop.org/%28%28?a=commitdiff_plain;h=2d8ee839cb13d922731473ddc1d06e32e84b9bda;p=lhc%2Fweb%2Fwiklou.git (bug 529, bug 12974) alter behaviour of the table- and template-parser: * Remove the hack from bug 529 which inserts a newline when the template text begins with any block character; this breaks many things in many exciting ways. I've left it in for now when the text begins with a wikitable, as that markup can't be mistaken for anything else. * Instead, move the contents of a table cell onto a new line for parsing, so that linestart elements (including nested tables) will parse as normal. This means that structures like {| | {{template-containing-wikilist}} |} Will still work, but for the right reason, and structures like {| | style="color:{{template-containing-hexcode}}" | Foo | * Bar | {| | Look at me, I'm nested! |} |} Will all now start to work. Structures like * Foo {{template-containing-wikilist}} Will now not, but honestly, should they? --- diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 9db072aabb..3b0e3be73c 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -87,6 +87,11 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN members. * (bug 2585) Image pages should send 404 if no image, no shared image and no description page. +* (bug 12974) The behaviour of wikitable and template parsing was altered. Previously, + all templates were treated as being at the start of a block context for the purposes + of evaluating wikitext elements like #, :, *, and tables (bug 529). Now this + behaviour is only applied to wikitable-start {|, but the first line of a wikitable + cell is now treated as a linestart. === API changes in 1.18 === * (bug 26339) Throw warning when truncating an overlarge API result diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 7eadf397eb..2ea87e185a 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -825,7 +825,22 @@ class Parser { $has_opened_tr = array(); # Did this table open a element? $indent_level = 0; # indent level of the table - foreach ( $lines as $outLine ) { + # Keep pulling lines off the front of the array until they're all gone. + # we want to be able to push lines back on to the front of the stream, + # but StringUtils::explode() returns funky optimised Iterators which don't + # support insertion. So maintain a separate buffer and draw on that first if + # there's anything in it + $extraLines = array(); + $lines->rewind(); + do { + if( $extraLines ){ + $outLine = array_shift( $extraLines ); + } elseif( $lines->valid() ) { + $outLine = $lines->current(); + $lines->next(); + } else { + break; + } $line = trim( $outLine ); if ( $line === '' ) { # empty line, go to next line @@ -901,11 +916,10 @@ class Parser { } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) { # This might be cell elements, td, th or captions if ( substr( $line , 0 , 2 ) === '|+' ) { - $first_character = '+'; - $line = substr( $line , 1 ); + $first_character = '|+'; } - $line = substr( $line , 1 ); + $line = substr( $line , strlen( $first_character ) ); if ( $first_character === '!' ) { $line = str_replace( '!!' , '||' , $line ); @@ -916,62 +930,84 @@ class Parser { # by earlier parser steps, but should avoid splitting up eg # attribute values containing literal "||". $cells = StringUtils::explodeMarkup( '||' , $line ); - - $outLine = ''; - - # Loop through each table cell - foreach ( $cells as $cell ) { - $previous = ''; - if ( $first_character !== '+' ) { - $tr_after = array_pop( $tr_attributes ); - if ( !array_pop( $tr_history ) ) { - $previous = "\n"; - } - array_push( $tr_history , true ); - array_push( $tr_attributes , '' ); - array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); + $cell = array_shift( $cells ); + + # Inject cells back into the stream to be dealt with later + # TODO: really we should do the whole thing as a stream... + # but that would be too much like a sensible implementation :P + if( count( $cells ) ){ + foreach( array_reverse( $cells ) as $extraCell ){ + array_unshift( $extraLines, $first_character . $extraCell ); } + } - $last_tag = array_pop( $last_tag_history ); + $outLine = ''; - if ( array_pop( $td_history ) ) { - $previous = "\n{$previous}"; + $previous = ''; + if ( $first_character !== '|+' ) { + $tr_after = array_pop( $tr_attributes ); + if ( !array_pop( $tr_history ) ) { + $previous = "\n"; } + array_push( $tr_history , true ); + array_push( $tr_attributes , '' ); + array_pop( $has_opened_tr ); + array_push( $has_opened_tr , true ); + } - if ( $first_character === '|' ) { - $last_tag = 'td'; - } elseif ( $first_character === '!' ) { - $last_tag = 'th'; - } elseif ( $first_character === '+' ) { - $last_tag = 'caption'; - } else { - $last_tag = ''; - } + $last_tag = array_pop( $last_tag_history ); - array_push( $last_tag_history , $last_tag ); + if ( array_pop( $td_history ) ) { + $previous = "\n{$previous}"; + } - # A cell could contain both parameters and data - $cell_data = explode( '|' , $cell , 2 ); + if ( $first_character === '|' ) { + $last_tag = 'td'; + } elseif ( $first_character === '!' ) { + $last_tag = 'th'; + } elseif ( $first_character === '|+' ) { + $last_tag = 'caption'; + } else { + $last_tag = ''; + } - # Bug 553: Note that a '|' inside an invalid link should not - # be mistaken as delimiting cell parameters - if ( strpos( $cell_data[0], '[[' ) !== false ) { - $cell = "{$previous}<{$last_tag}>{$cell}"; - } elseif ( count( $cell_data ) == 1 ) { - $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; - } else { - $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); - $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; - } + array_push( $last_tag_history , $last_tag ); + + # A cell could contain both parameters and data... but the pipe could + # also be the start of a nested table, or a raw pipe inside an invalid + # link (bug 553). + $cell_data = preg_split( '/(?"; + } elseif ( count( $cell_data ) == 1 ) { + $cell = "{$previous}<{$last_tag}>"; + $data = $cell_data[0]; + } else { + $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); + $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); + $cell = "{$previous}<{$last_tag}{$attributes}>"; + $data = $cell_data[1]; + } - $outLine .= $cell; - array_push( $td_history , true ); + # Bug 529: the start of a table cell should be a linestart context for + # processing other block markup, including nested tables. The original + # implementation of this was to add a newline before every brace construct, + # which broke all manner of other things. Instead, push the contents + # of the cell back into the stream and come back to it later. But don't + # do that if the first line is empty, or you may get extra whitespace + if( $data ){ + array_unshift( $extraLines, trim( $data ) ); } + + $outLine .= $cell; + array_push( $td_history , true ); } $out .= $outLine . "\n"; - } + } while( $lines->valid() || count( $extraLines ) ); # Closing open td, tr && table while ( count( $td_history ) > 0 ) { @@ -2235,6 +2271,7 @@ class Parser { '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. 'mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { + $paragraphStack = false; # TODO bug 5718: paragraph closed $output .= $this->closeParagraph(); @@ -3225,11 +3262,11 @@ class Parser { $text = wfEscapeWikiText( $text ); } elseif ( is_string( $text ) && !$piece['lineStart'] - && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) ) + && preg_match( '/^{\\|/', $text ) ) { - # Bug 529: if the template begins with a table or block-level - # element, it should be treated as beginning a new line. - # This behaviour is somewhat controversial. + # Bug 529: if the template begins with a table, it should be treated as + # beginning a new line. This previously handled other block-level elements + # such as #, :, etc, but these have many false-positives (bug 12974). $text = "\n" . $text; } @@ -3288,7 +3325,7 @@ class Parser { if ( !$title->equals( $cacheTitle ) ) { $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = - array( $title->getNamespace(), $cdb = $title->getDBkey() ); + array( $title->getNamespace(), $title->getDBkey() ); } return array( $dom, $title ); diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index c48be9267c..5736d8cbe3 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -1210,7 +1210,8 @@ A table with nothing but a caption |} !! result -
caption + +caption
!! end @@ -1226,12 +1227,22 @@ Simple table !! result - - + - - +
1 2 + +

1 +

+
+

2 +

3 4 + +

3 +

+
+

4 +

!! end @@ -1261,48 +1272,110 @@ Multiplication table |} !! result - - - - - + + + - - - - + + - - - - + + - - - - + + - - - - + + - - - - + +
Multiplication table + +Multiplication table
× 1 2 3 + +

× +

+
+

1 +

+
+

2 +

+
+

3 +

1 + +

1 +

1 2 3 + +

1 +

+
+

2 +

+
+

3 +

2 + +

2 +

2 4 6 + +

2 +

+
+

4 +

+
+

6 +

3 + +

3 +

3 6 9 + +

3 +

+
+

6 +

+
+

9 +

4 + +

4 +

4 8 12 + +

4 +

+
+

8 +

+
+

12 +

5 + +

5 +

5 10 15 + +

5 +

+
+

10 +

+
+

15 +

!! end @@ -1321,16 +1394,26 @@ Table rowspan !! result - - - - -
Cell 1, row 1 + +

Cell 1, row 1 +

Cell 2, row 1 (and 2) + +

Cell 2, row 1 (and 2) +

Cell 3, row 1 + +

Cell 3, row 1 +

Cell 1, row 2 + +

Cell 1, row 2 +

Cell 3, row 2 + +

Cell 3, row 2 +

!! end @@ -1351,18 +1434,26 @@ Nested table !! result - -
α + +

α +

- -
nested + +

nested +

table + +

table +

the original table again + +

the original table again +

!! end @@ -1376,7 +1467,9 @@ Invalid attributes in table cell (bug 1830) !! result -
broken + +

broken +

!! end @@ -1390,8 +1483,13 @@ Table security: embedded pipes (http://lists.wikimedia.org/mailman/htdig/wikitec !! result - - +
[ftp://%7Cx]" onmouseover="alert(document.cookie)">test + +

[ftp://%7Cx +

+
+

]" onmouseover="alert(document.cookie)">test +

@@ -2571,7 +2669,9 @@ BUG 553: link with two variables in a piped link !! result -
[[{{{1}}}|{{{2}}}]] + +

[[{{{1}}}|{{{2}}}]] +

!! end @@ -2681,12 +2781,22 @@ foo {{table}}

- - + - - +
1 2 + +

1 +

+
+

2 +

3 4 + +

3 +

+
+

4 +

!! end @@ -2701,12 +2811,22 @@ foo

- - + - - +
1 2 + +

1 +

+
+

2 +

3 4 + +

3 +

+
+

4 +

!! end @@ -4287,7 +4407,9 @@ Table multiple attributes correction !! result -
status + +

status +

!!end @@ -4741,7 +4863,9 @@ Table attribute legitimate extension !! result -
status + +

status +

!!end @@ -4755,7 +4879,9 @@ Table attribute safety !! result -
status + +

status +

!! end @@ -5424,9 +5550,14 @@ noxml !! result - - - + + + @@ -5443,7 +5574,9 @@ Fuzz testing: Parser21 !! result
https:// +

https:// +

+
+ +
- @@ -5489,7 +5622,9 @@ MOVE YOUR MOUSE CURSOR OVER THIS TEXT MOVE YOUR MOUSE CURSOR OVER THIS TEXT -
irc://{{ftp://a" onmouseover="alert('hello world');" + +

irc://{{ftp://a" onmouseover="alert('hello world');" +

+ +

+

@@ -7604,18 +7739,17 @@ Template:Bullet !!endarticle !! test -Bug 529: Uncovered bullet +Bug 529/12974: Uncovered bullet !! input * Foo {{bullet}} !! result -
  • Foo -
  • Bar +
    • Foo * Bar
    !! end !! test -Bug 529: Uncovered table already at line-start +Bug 529/12974: Uncovered table already at line-start !! input x @@ -7626,24 +7760,33 @@ y

    - - + - - +
    1 2 + +

    1 +

    +
    +

    2 +

    3 4 + +

    3 +

    +
    +

    4 +

    y

    !! end !! test -Bug 529: Uncovered bullet in parser function result +Bug 529/12974: Uncovered bullet in parser function result !! input * Foo {{lc:{{bullet}} }} !! result -
    • Foo -
    • bar +
      • Foo * bar
      !! end