Bug 6171: Sanitizing of HTML-elements with optional end tags.
authorAnders Wegge Jakobsen <wegge@users.mediawiki.org>
Sat, 3 Jun 2006 00:01:01 +0000 (00:01 +0000)
committerAnders Wegge Jakobsen <wegge@users.mediawiki.org>
Sat, 3 Jun 2006 00:01:01 +0000 (00:01 +0000)
RELEASE-NOTES
includes/Sanitizer.php
maintenance/parserTests.txt

index 792dda3..113f33e 100644 (file)
@@ -412,7 +412,9 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * New message sp-newimages-showfrom replaces rclistfrom on special:newimages
 * Improve handling of ;: definition list construct with overlapping or
   nested HTML tags
-
+* (bug 6171) Fix sanitizing of HTML-elements with an optional closing
+  tag. The sanitizer still needs to learn how to make well-formed XML
+  in this case.
 
 == Compatibility ==
 
index 30c3a21..f017592 100644 (file)
@@ -367,8 +367,8 @@ class Sanitizer {
                        $tabletags = array();
                }
 
-               $htmlsingle = array_merge( $tabletags, $htmlsingle );
-               $htmlelements = array_merge( $htmlsingle, $htmlpairs );
+               $htmlsingleallowed = array_merge( $htmlsingle, $tabletags );
+               $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest );
 
                # Remove HTML comments
                $text = Sanitizer::removeHTMLcomments( $text );
@@ -391,10 +391,28 @@ class Sanitizer {
                                                if( in_array( $t, $htmlsingleonly ) ) {
                                                        $badtag = 1;
                                                } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) {
-                                                       @array_push( $tagstack, $ot );
-                                                       # <li> can be nested in <ul> or <ol>, skip those cases:
-                                                       if(!(in_array($ot, $htmllist) && in_array($t, $listtags) )) {
-                                                               $badtag = 1;
+                                                       if ( in_array($ot, $htmlsingleallowed) ) {
+                                                               # Pop all elements with an optional close tag
+                                                               # and see if we find a match below them
+                                                               $optstack = array();
+                                                               array_push ($optstack, $ot);
+                                                               while ( ( ( $ot = @array_pop( $tagstack ) ) != $t ) &&
+                                                                                               in_array($ot, $htmlsingleallowed) ) {
+                                                                       array_push ($optstack, $ot);
+                                                               }
+                                                               if ( $t != $ot ) {
+                                                                       # No match. Push the optinal elements back again
+                                                                       $badtag = 1;
+                                                                       while ( $ot = @array_pop( $optstack ) ) {
+                                                                               array_push( $tagstack, $ot );
+                                                                       }
+                                                               }
+                                                       } else {
+                                                               @array_push( $tagstack, $ot );
+                                                               # <li> can be nested in <ul> or <ol>, skip those cases:
+                                                               if(!(in_array($ot, $htmllist) && in_array($t, $listtags) )) {
+                                                                       $badtag = 1;
+                                                               }
                                                        }
                                                } else {
                                                        if ( $t == 'table' ) {
index 4abf3a8..05b67ba 100644 (file)
@@ -4293,6 +4293,49 @@ http://example.com<pre>junk</pre>
 
 !!end
 
+!! test
+Parsing optional HTML elements (Bug 6171)
+!! options
+!! input
+<table>
+  <tr>
+    <td> Some tabular data</td>
+    <td> More tabular data ...
+    <td> And yet som tabular data</td>
+  </tr>
+</table>
+!! result
+<table>
+  <tr>
+    <td> Some tabular data</td>
+    <td> More tabular data ...</td>
+    <td> And yet som tabular data</td>
+  </tr>
+</table>
+
+!! end
+
+!! test
+Correct handling of <td>, <tr> (Bug 6171)
+!! options
+!! input
+<table>
+  <tr>
+    <td> Some tabular data</td>
+    <td> More tabular data ...</td>
+    <td> And yet som tabular data</td>
+  </tr>
+</table>
+!! result
+<table>
+  <tr>
+    <td> Some tabular data</td>
+    <td> More tabular data ...</td>
+    <td> And yet som tabular data</td>
+  </tr>
+</table>
+
+!! end
 #
 #
 #