(no commit message)
authorLeo Koppelkamm <diebuche@users.mediawiki.org>
Tue, 12 Apr 2011 21:27:24 +0000 (21:27 +0000)
committerLeo Koppelkamm <diebuche@users.mediawiki.org>
Tue, 12 Apr 2011 21:27:24 +0000 (21:27 +0000)
includes/Sanitizer.php
includes/parser/Parser.php
skins/common/wikibits.js
tests/parser/parserTests.txt

index 0fd593f..fa961cf 100644 (file)
@@ -369,7 +369,7 @@ class Sanitizer {
                                'strike', 'strong', 'tt', 'var', 'div', 'center',
                                'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
                                'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn',
-                               'kbd', 'samp'
+                               'kbd', 'samp', 'thead', 'tbody', 'tfoot'
                        );
                        $htmlsingle = array(
                                'br', 'hr', 'li', 'dt', 'dd'
index b14e366..7045690 100644 (file)
@@ -824,189 +824,283 @@ class Parser {
 
                $lines = StringUtils::explode( "\n", $text );
                $out = '';
-               $td_history = array(); # Is currently a td tag open?
-               $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
-               $tr_history = array(); # Is currently a tr tag open?
-               $tr_attributes = array(); # history of tr attributes
-               $has_opened_tr = array(); # Did this table open a <tr> element?
-               $indent_level = 0; # indent level of the table
+               $output =& $out;
 
                foreach ( $lines as $outLine ) {
                        $line = trim( $outLine );
 
-                       if ( $line === '' ) { # empty line, go to next line
+                       if ( $line == '') { //empty line, go to next line
                                $out .= $outLine."\n";
                                continue;
                        }
-
-                       $first_character = $line[0];
+                       $first_chars = $line[0];
+                       if ( strlen($line) > 1) {
+                               $first_chars .= in_array($line[1], array('}', '+', '-')) ? $line[1] : '';
+                       }
                        $matches = array();
 
                        if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) {
-                               # First check if we are starting a new table
-                               $indent_level = strlen( $matches[1] );
+                               $tables[] = array();
+                               $table =& $this->last($tables);
+                               $table[0] = array(); //first row
+                               $current_row =& $table[0];
+
+                               $table['indent'] = strlen( $matches[1] );
 
                                $attributes = $this->mStripState->unstripBoth( $matches[2] );
                                $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' );
 
-                               $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
-                               array_push( $td_history , false );
-                               array_push( $last_tag_history , '' );
-                               array_push( $tr_history , false );
-                               array_push( $tr_attributes , '' );
-                               array_push( $has_opened_tr , false );
-                       } elseif ( count( $td_history ) == 0 ) {
-                               # Don't do any of the following
-                               $out .= $outLine."\n";
-                               continue;
-                       } elseif ( substr( $line , 0 , 2 ) === '|}' ) {
-                               # We are ending a table
-                               $line = '</table>' . substr( $line , 2 );
-                               $last_tag = array_pop( $last_tag_history );
-
-                               if ( !array_pop( $has_opened_tr ) ) {
-                                       $line = "<tr><td></td></tr>{$line}";
+                               if ( $attributes !== '' ) {
+                                       $table['attributes'] = $attributes;
                                }
+                       } else if ( !isset($tables[0]) ) {
+                               // we're outside the table
 
-                               if ( array_pop( $tr_history ) ) {
-                                       $line = "</tr>{$line}";
+                               $out .= $outLine."\n";
+                       } else if ( $first_chars === '|}' ) {
+                               // trim the |} code from the line
+                               $line = substr ( $line , 2 );
+
+                               // Shorthand for last row
+                               $last_row =& $this->last($table);
+
+                               // a thead at the end becomes a tfoot, unless there is only one row
+                               // Do this before deleting empty last lines to allow headers at the bottom of tables
+                               if ( isset($last_row['type'] ) && $last_row['type'] == 'thead' && isset($table[1])) {
+                                       $last_row['type'] = 'tfoot';
+                                       for($i = 0; isset($last_row[$i]); $i++ ) {
+                                               $last_row[$i]['type'] = 'td';
+                                       }
                                }
 
-                               if ( array_pop( $td_history ) ) {
-                                       $line = "</{$last_tag}>{$line}";
+                               // Delete empty last lines
+                               if ( empty($last_row) ) {
+                                       $last_row = NULL;
                                }
-                               array_pop( $tr_attributes );
-                               $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
-                       } elseif ( substr( $line , 0 , 2 ) === '|-' ) {
-                               # Now we have a table row
-                               $line = preg_replace( '#^\|-+#', '', $line );
+                               $o = $this->printTableHtml( array_pop($tables) ) . $line;
 
-                               # Whats after the tag is now only attributes
-                               $attributes = $this->mStripState->unstripBoth( $line );
-                               $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
-                               array_pop( $tr_attributes );
-                               array_push( $tr_attributes, $attributes );
+                               if ( count($tables) > 0 ) {
+                                       $table =& $this->last($tables);
+                                       $current_row =& $this->last($table);
+                                       $current_element =& $this->last($current_row);
 
-                               $line = '';
-                               $last_tag = array_pop( $last_tag_history );
-                               array_pop( $has_opened_tr );
-                               array_push( $has_opened_tr , true );
-
-                               if ( array_pop( $tr_history ) ) {
-                                       $line = '</tr>';
+                                       $output =& $current_element['content'];
+                               } else {
+                                       $output =& $out;
                                }
 
-                               if ( array_pop( $td_history ) ) {
-                                       $line = "</{$last_tag}>{$line}";
-                               }
+                               $output .= $o;
 
-                               $outLine = $line;
-                               array_push( $tr_history , false );
-                               array_push( $td_history , false );
-                               array_push( $last_tag_history , '' );
-                       } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 )  === '|+' ) {
-                               # This might be cell elements, td, th or captions
-                               if ( substr( $line , 0 , 2 ) === '|+' ) {
-                                       $first_character = '+';
-                                       $line = substr( $line , 1 );
+                       } else if ( $first_chars === '|-' ) {
+                               // start a new row element
+                               // but only when we haven't started one already
+                               if( count($current_row) != 0 ) {
+                                       $table[] = array();
+                                       $current_row =& $this->last($table);
                                }
-
-                               $line = substr( $line , 1 );
-
-                               if ( $first_character === '!' ) {
-                                       $line = str_replace( '!!' , '||' , $line );
+                               // Get the attributes, there's nothing else useful in $line now
+                               $line = substr ( $line , 2 );
+                               $attributes = $this->mStripState->unstripBoth( $line );
+                               $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
+                               if( $attributes !== '') {
+                                       $current_row['attributes'] = $attributes;
+                               }
+                               
+                       } else if ( $first_chars  === '|+' ) {
+                               // a table caption
+                               $line = substr ( $line , 2 );
+                               
+                               $c = $this->getCellAttr($line , 'caption');
+                               $table['caption'] = array();
+                               $table['caption']['content'] = $c[0];
+                               if(isset($c[1])) $table['caption']['attributes'] = $c[1];
+                               unset($c);
+
+                               $output =& $table['caption'];
+                       } else if ( $first_chars === '|' || $first_chars === '!' || $first_chars === '!+' ) {
+                               // Which kind of cells are we dealing with
+                               $this_tag = 'td';
+                               $line = substr ( $line , 1 );
+
+                               if ( $first_chars === '!'  || $first_chars === '!+' ) {
+                                       $line = str_replace ( '!!' , '||' , $line );
+                                       $this_tag = 'th';
                                }
 
-                               # Split up multiple cells on the same line.
-                               # FIXME : This can result in improper nesting of tags processed
-                               # by earlier parser steps, but should avoid splitting up eg
-                               # attribute values containing literal "||".
+                               // Split up multiple cells on the same line.
                                $cells = StringUtils::explodeMarkup( '||' , $line );
+                               $line = ''; // save memory
 
-                               $outLine = '';
+                               // decide whether thead to tbody
+                               if ( !array_key_exists('type', $current_row) ) {
+                                       $current_row['type'] = ( $first_chars === '!' ) ? 'thead' : 'tbody' ;
+                               } else if( $first_chars === '|' ) {
+                                       $current_row['type'] = 'tbody';
+                               }
 
-                               # Loop through each table cell
+                               // Loop through each table cell
                                foreach ( $cells as $cell ) {
-                                       $previous = '';
-                                       if ( $first_character !== '+' ) {
-                                               $tr_after = array_pop( $tr_attributes );
-                                               if ( !array_pop( $tr_history ) ) {
-                                                       $previous = "<tr{$tr_after}>\n";
-                                               }
-                                               array_push( $tr_history , true );
-                                               array_push( $tr_attributes , '' );
-                                               array_pop( $has_opened_tr );
-                                               array_push( $has_opened_tr , true );
-                                       }
+                                       // a new cell
+                                       $current_row[] = array();
+                                       $current_element =& $this->last($current_row);
 
-                                       $last_tag = array_pop( $last_tag_history );
+                                       $current_element['type'] = $this_tag;
 
-                                       if ( array_pop( $td_history ) ) {
-                                               $previous = "</{$last_tag}>\n{$previous}";
-                                       }
+                                       $c = $this->getCellAttr($cell , $this_tag);
+                                       $current_element['content'] = $c[0];
+                                       if(isset($c[1])) $current_element['attributes'] = $c[1];
+                                       unset($c);
+                               }
+                               $output =& $current_element['content'];
+                               
+                       } else {
+                               $output .= $outLine."\n";
+                       }
+               }
+               
+               # Remove trailing line-ending (b/c)
+               if ( substr( $out, -1 ) === "\n" ) {
+                       $out = substr( $out, 0, -1 );
+               }
+               
+               #Close any unclosed tables
+               if (isset($tables) && count($tables) > 0 ) {
+                       for ($i = 0; $i < count($tables); $i++) {
+                               $out .= $this->printTableHtml( array_pop($tables) );
+                       }
+               }
+                       
+               wfProfileOut( __METHOD__ );
 
-                                       if ( $first_character === '|' ) {
-                                               $last_tag = 'td';
-                                       } elseif ( $first_character === '!' ) {
-                                               $last_tag = 'th';
-                                       } elseif ( $first_character === '+' ) {
-                                               $last_tag = 'caption';
-                                       } else {
-                                               $last_tag = '';
-                                       }
+               return $out;
+       }
 
-                                       array_push( $last_tag_history , $last_tag );
 
-                                       # A cell could contain both parameters and data
-                                       $cell_data = explode( '|' , $cell , 2 );
+       /**
+        * Helper function for doTableStuff() separating the contents of cells from
+        * attributes. Particularly useful as there's a possible bug and this action 
+        * is repeated twice.
+        *
+        * @private
+        */
+       function getCellAttr ($cell , $tag_name) {
+               $content = null;
+               $attributes = null;
 
-                                       # Bug 553: Note that a '|' inside an invalid link should not
-                                       # be mistaken as delimiting cell parameters
-                                       if ( strpos( $cell_data[0], '[[' ) !== false ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell}";
-                                       } elseif ( count( $cell_data ) == 1 ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
-                                       } else {
-                                               $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
-                                               $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
-                                               $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
-                                       }
+               $cell = trim ( $cell );
 
-                                       $outLine .= $cell;
-                                       array_push( $td_history , true );
-                               }
-                       }
-                       $out .= $outLine . "\n";
+               // A cell could contain both parameters and data
+               $cell_data = explode ( '|' , $cell , 2 );
+
+               // Bug 553: Note that a '|' inside an invalid link should not
+               // be mistaken as delimiting cell parameters
+               if ( strpos( $cell_data[0], '[[' ) !== false ) {
+                       $content = trim ( $cell );
                }
+               else if ( count ( $cell_data ) == 1 ) {
+                       $content = trim ( $cell_data[0] );
+               }
+               else {
+                       $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
+                       $attributes = Sanitizer::fixTagAttributes( $attributes , $tag_name );
 
-               # Closing open td, tr && table
-               while ( count( $td_history ) > 0 ) {
-                       if ( array_pop( $td_history ) ) {
-                               $out .= "</td>\n";
-                       }
-                       if ( array_pop( $tr_history ) ) {
-                               $out .= "</tr>\n";
+                       $content = trim ( $cell_data[1] );
+               }
+               return array($content, $attributes);
+       }
+
+
+       /**
+        * Helper function for doTableStuff(). This converts the structured array into html.
+        *
+        * @private
+        */
+       function printTableHtml (&$t) {
+               $r = "\n";
+               $r .= str_repeat( '<dl><dd>' , $t['indent'] );
+               $r .= '<table';
+               $r .= isset($t['attributes']) ? $t['attributes'] : '';
+               $r .= '>';
+               unset($t['attributes']);
+
+               if ( isset($t['caption']) ) {
+                       $r .= "\n<caption";
+                       $r .= isset($t['caption']['attributes']) ? $t['caption']['attributes'] : '';
+                       $r .= '>';
+                       $r .= $t['caption']['content'];
+                       $r .= '</caption>';
+               }
+               $last_section = '';
+               $empty = true;
+               $simple = true;
+               
+               //If we only have tbodies, mark table as simple
+               for($i = 0; isset($t[$i]); $i++ ) {
+                   if ( !count( $t[$i]) ) continue;
+                   if ( !$last_section ) {
+                       $last_section = $t[$i]['type'];
+                   } else if ($last_section != $t[$i]['type']) {
+                       $simple = false;
+                       break;
+                   }
+               }
+               $last_section = '';
+               for($i = 0; isset($t[$i]); $i++ ) {
+                       // Check for empty tables
+                       if ( count( $t[$i]) ) {
+                               $empty = false;
+                       } else {
+                           continue;
                        }
-                       if ( !array_pop( $has_opened_tr ) ) {
-                               $out .= "<tr><td></td></tr>\n" ;
+                       if( $t[$i]['type'] != $last_section && !$simple ) {
+                               $r .= "\n<" . $t[$i]['type'] . '>';
                        }
 
-                       $out .= "</table>\n";
-               }
+                       $r .= "\n<tr";
+                       $r .= isset($t[$i]['attributes']) ? $t[$i]['attributes'] : '';
+                       $r .= '>';
+                       for($j = 0; isset($t[$i][$j]); $j++ ) {
+                               $r .= "\n<" . $t[$i][$j]['type'];
+                               $r .= isset($t[$i][$j]['attributes']) ? $t[$i][$j]['attributes'] : '';
+                               $r .= '>';
 
-               # Remove trailing line-ending (b/c)
-               if ( substr( $out, -1 ) === "\n" ) {
-                       $out = substr( $out, 0, -1 );
-               }
+                               $r .= $t[$i][$j]['content'];
 
-               # special case: don't return empty table
-               if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
-                       $out = '';
+                               $r .= '</' . $t[$i][$j]['type'] . '>';
+                               unset($t[$i][$j]);
+                       }
+                       $r .= "\n</tr>";
+
+                       if( ( !isset($t[$i+1]) && !$simple )|| ( isset($t[$i+1]) && ($t[$i]['type'] != $t[$i+1]['type'])) ) {
+                               $r .= '</' . $t[$i]['type'] . '>';
+                       }
+                       $last_section = $t[$i]['type'];
+                       unset($t[$i]);
+               }
+               if ( $empty ) {
+                       if ( isset($t['caption']) ) {
+                               $r .= "\n<tr><td></td></tr>";
+                       } else {
+                               return '';
+                       }
                }
+               $r .= "\n</table>";
+               $r .= str_repeat( '</dd></dl>' , $t['indent'] );
 
-               wfProfileOut( __METHOD__ );
+               return $r;
+       }
 
-               return $out;
+       /**
+        * like end() but only works on the numeric array index and php's internal pointers
+        * returns a reference to the last element of an array much like "\$arr[-1]" in perl
+        * ignores associative elements and will create a 0 key will a NULL value if there were
+        * no numric elements and an array itself if not previously defined.
+        *
+        * @private
+        */
+       function &last (&$arr) {
+               for($i = count($arr); (!isset($arr[$i]) && $i > 0); $i--) {  }
+               return $arr[$i];
        }
 
        /**
@@ -2239,7 +2333,7 @@ class Parser {
                                        '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
                                if ( $openmatch or $closematch ) {
                                        $paragraphStack = false;
-                                       # TODO bug 5718: paragraph closed
+                                       # TODO bug 5718: paragraph closed
                                        $output .= $this->closeParagraph();
                                        if ( $preOpenMatch and !$preCloseMatch ) {
                                                $this->mInPre = true;
index 3f9dc67..1db54f2 100644 (file)
@@ -551,14 +551,7 @@ window.sortables_init = function() {
 };
 
 window.ts_makeSortable = function( table ) {
-       var firstRow;
-       if ( table.rows && table.rows.length > 0 ) {
-               if ( table.tHead && table.tHead.rows.length > 0 ) {
-                       firstRow = table.tHead.rows[table.tHead.rows.length-1];
-               } else {
-                       firstRow = table.rows[0];
-               }
-       }
+       var firstRow = table.rows[0];
        if ( !firstRow ) {
                return;
        }
index bce99f6..6e45e0a 100644 (file)
@@ -1237,8 +1237,9 @@ A table with nothing but a caption
 |}
 !! result
 <table>
-<caption> caption
-</caption><tr><td></td></tr></table>
+<caption>caption</caption>
+<tr><td></td></tr>
+</table>
 
 !! end
 
@@ -1253,13 +1254,14 @@ Simple table
 !! result
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
-</td></tr>
+<td>1</td>
+<td>2</td>
+</tr>
 <tr>
-<td> 3 </td>
-<td> 4
-</td></tr></table>
+<td>3</td>
+<td>4</td>
+</tr>
+</table>
 
 !! end
 
@@ -1288,49 +1290,46 @@ Multiplication table
 |}
 !! result
 <table border="1" cellpadding="2">
-<caption>Multiplication table
-</caption>
+<caption>Multiplication table</caption>
+<thead>
 <tr>
-<th> &#215; </th>
-<th> 1 </th>
-<th> 2 </th>
-<th> 3
-</th></tr>
+<th>&#215;</th>
+<th>1</th>
+<th>2</th>
+<th>3</th>
+</tr></thead>
+<tbody>
 <tr>
-<th> 1
-</th>
-<td> 1 </td>
-<td> 2 </td>
-<td> 3
-</td></tr>
+<th>1</th>
+<td>1</td>
+<td>2</td>
+<td>3</td>
+</tr>
 <tr>
-<th> 2
-</th>
-<td> 2 </td>
-<td> 4 </td>
-<td> 6
-</td></tr>
+<th>2</th>
+<td>2</td>
+<td>4</td>
+<td>6</td>
+</tr>
 <tr>
-<th> 3
-</th>
-<td> 3 </td>
-<td> 6 </td>
-<td> 9
-</td></tr>
+<th>3</th>
+<td>3</td>
+<td>6</td>
+<td>9</td>
+</tr>
 <tr>
-<th> 4
-</th>
-<td> 4 </td>
-<td> 8 </td>
-<td> 12
-</td></tr>
+<th>4</th>
+<td>4</td>
+<td>8</td>
+<td>12</td>
+</tr>
 <tr>
-<th> 5
-</th>
-<td> 5 </td>
-<td> 10 </td>
-<td> 15
-</td></tr></table>
+<th>5</th>
+<td>5</td>
+<td>10</td>
+<td>15</td>
+</tr></tbody>
+</table>
 
 !! end
 
@@ -1348,17 +1347,15 @@ Table rowspan
 !! result
 <table align="right" border="1">
 <tr>
-<td> Cell 1, row 1
-</td>
-<td rowspan="2"> Cell 2, row 1 (and 2)
-</td>
-<td> Cell 3, row 1
-</td></tr>
+<td>Cell 1, row 1</td>
+<td rowspan="2">Cell 2, row 1 (and 2)</td>
+<td>Cell 3, row 1</td>
+</tr>
 <tr>
-<td> Cell 1, row 2
-</td>
-<td> Cell 3, row 2
-</td></tr></table>
+<td>Cell 1, row 2</td>
+<td>Cell 3, row 2</td>
+</tr>
+</table>
 
 !! end
 
@@ -1378,19 +1375,19 @@ Nested table
 !! result
 <table border="1">
 <tr>
-<td> &#945;
-</td>
+<td>&#945;</td>
 <td>
 <table bgcolor="#ABCDEF" border="2">
 <tr>
-<td>nested
-</td></tr>
+<td>nested</td>
+</tr>
 <tr>
-<td>table
-</td></tr></table>
-</td>
-<td>the original table again
-</td></tr></table>
+<td>table</td>
+</tr>
+</table></td>
+<td>the original table again</td>
+</tr>
+</table>
 
 !! end
 
@@ -1403,8 +1400,9 @@ Invalid attributes in table cell (bug 1830)
 !! result
 <table>
 <tr>
-<td>broken
-</td></tr></table>
+<td>broken</td>
+</tr>
+</table>
 
 !! end
 
@@ -1418,8 +1416,7 @@ Table security: embedded pipes (http://lists.wikimedia.org/mailman/htdig/wikitec
 <table>
 <tr>
 <td>[<a rel="nofollow" class="external free" href="ftp://%7Cx">ftp://%7Cx</a></td>
-<td>]" onmouseover="alert(document.cookie)"&gt;test
-</td>
+<td>]" onmouseover="alert(document.cookie)"&gt;test</td>
 </tr>
 </table>
 
@@ -2685,8 +2682,9 @@ BUG 553: link with two variables in a piped link
 !! result
 <table>
 <tr>
-<td>[[{{{1}}}|{{{2}}}]]
-</td></tr></table>
+<td>[[{{{1}}}|{{{2}}}]]</td>
+</tr>
+</table>
 
 !! end
 
@@ -2795,13 +2793,14 @@ foo {{table}}
 </p>
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
-</td></tr>
+<td>1</td>
+<td>2</td>
+</tr>
 <tr>
-<td> 3 </td>
-<td> 4
-</td></tr></table>
+<td>3</td>
+<td>4</td>
+</tr>
+</table>
 
 !! end
 
@@ -2815,13 +2814,14 @@ foo
 </p>
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
-</td></tr>
+<td>1</td>
+<td>2</td>
+</tr>
 <tr>
-<td> 3 </td>
-<td> 4
-</td></tr></table>
+<td>3</td>
+<td>4</td>
+</tr>
+</table>
 
 !! end
 
@@ -4370,8 +4370,9 @@ Table multiple attributes correction
 !! result
 <table>
 <tr>
-<th class="awesome"> status
-</th></tr></table>
+<th class="awesome">status</th>
+</tr>
+</table>
 
 !!end
 
@@ -4815,8 +4816,9 @@ Table attribute legitimate extension
 !! result
 <table>
 <tr>
-<th style="color:blue"> status
-</th></tr></table>
+<th style="color:blue">status</th>
+</tr>
+</table>
 
 !!end
 
@@ -4829,8 +4831,9 @@ Table attribute safety
 !! result
 <table>
 <tr>
-<th style="/* insecure input */"> status
-</th></tr></table>
+<th style="/* insecure input */">status</th>
+</tr>
+</table>
 
 !! end
 
@@ -5452,8 +5455,7 @@ Fuzz testing: Parser13
 !! result
 <table>
 <tr>
-<td>
-</td>
+<td></td>
 </tr>
 </table>
 
@@ -5501,8 +5503,7 @@ noxml
 <th>https://</th>
 <th></th>
 <th></th>
-<th>
-</td>
+<th></th>
 </tr>
 </table>
 
@@ -5517,10 +5518,8 @@ Fuzz testing: Parser21
 !! result
 <table>
 <tr>
-<th> <a rel="nofollow" class="external free" href="irc://{{ftp://a">irc://{{ftp://a</a>" onmouseover="alert('hello world');"
-</th>
-<td>
-</td>
+<th><a rel="nofollow" class="external free" href="irc://{{ftp://a">irc://{{ftp://a</a>" onmouseover="alert('hello world');"</th>
+<td></td>
 </tr>
 </table>
 
@@ -5542,6 +5541,22 @@ http://===r:::https://b
 !! end
 
 # Known to produce bad XML for now
+
+# Note: the current result listed for this is not what the original one was,
+# but the original bug was JavaScript injection, which is fixed in any case.
+# It's not clear that the original result listed was any more correct than the
+# current one.  Original result:
+# <table>
+# {{{|
+# <u class="&#124;">}}}} &gt;
+# <br style="onmouseover=&#39;alert(document.cookie);&#39;" />
+# 
+# MOVE YOUR MOUSE CURSOR OVER THIS TEXT
+# <tr>
+# <td></u>
+# </td>
+# </tr>
+# </table>
 !! test
 Fuzz testing: Parser24
 !! options
@@ -5556,15 +5571,14 @@ noxml
 MOVE YOUR MOUSE CURSOR OVER THIS TEXT
 |
 !! result
-<table>
-{{{|
+<p>{{{|
 <u class="&#124;">}}}} &gt;
 <br style="onmouseover=&#39;alert(document.cookie);&#39;" />
-
-MOVE YOUR MOUSE CURSOR OVER THIS TEXT
+</p><p>MOVE YOUR MOUSE CURSOR OVER THIS TEXT
+</p>
+<table>
 <tr>
-<td></u>
-</td>
+<td></u></td>
 </tr>
 </table>
 
 </p>
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
-</td></tr>
+<td>1</td>
+<td>2</td>
+</tr>
 <tr>
-<td> 3 </td>
-<td> 4
-</td></tr></table>
+<td>3</td>
+<td>4</td>
+</tr></table>
 <p>y
 </p>
 !! end