Bug 11748: Handle optionally-closed HTML tags
authorGabriel Wicke <gwicke@wikimedia.org>
Tue, 17 Apr 2012 16:58:52 +0000 (18:58 +0200)
committerAntoine Musso <hashar@free.fr>
Thu, 25 Oct 2012 17:31:29 +0000 (19:31 +0200)
* Fixes bug 11748 (Parser issue for HTML definition list) and similar
  issues for nested unordered / ordered lists

* Stops wrapping HTML-syntax definition lists into paragraphs
  for consistency with their wikitext variants

* Enables one previously disabled test and adds another for nested
  definition lists with HTML syntax

Change-Id: If75ed54e11452dbcf5e6213cc20923064f811715

RELEASE-NOTES-1.21
includes/Sanitizer.php
includes/parser/Parser.php
tests/parser/parserTests.txt

index 29bef9f..07cba04 100644 (file)
@@ -49,7 +49,8 @@ production.
   to support Squid configured in forward-proxy mode, set 
   $wgSquidPurgeUseHostHeader to false.
 * (bug 37020) sql.php with readline eats semicolon
-
+* (bug 11748) Properly handle optionally-closed HTML tags when Tidy is
+  disabled, and don't wrap HTML-syntax definition lists in paragraphs.
 
 === API changes in 1.21 ===
 * prop=revisions can now report the contentmodel and contentformat, see docs/contenthandler.txt
index 8919f10..2c4ea47 100644 (file)
@@ -387,9 +387,9 @@ class Sanitizer {
                        $htmlsingleonly = array( # Elements that cannot have close tags
                                'br', 'hr'
                        );
-                       $htmlnest = array( # Tags that can be nested--??
+                       $htmlnest = array( # Tags that can be nested directly or indirectly
                                'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
-                               'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
+                               'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span'
                        );
                        $tabletags = array( # Can only appear inside table, we will close them
                                'td', 'th', 'tr',
@@ -510,6 +510,10 @@ class Sanitizer {
                                                } elseif ( isset( $htmlsingle[$t] ) ) {
                                                        # Hack to not close $htmlsingle tags
                                                        $brace = null;
+                                                       # Still need to push this optionally-closed tag to
+                                                       # the tag stack so that we can match end tags
+                                                       # instead of marking them as bad.
+                                                       array_push( $tagstack, $t );
                                                } elseif ( isset( $tabletags[$t] )
                                                && in_array( $t, $tagstack ) ) {
                                                        // New table tag but forgot to close the previous one
index 8671665..b31288f 100644 (file)
@@ -2377,10 +2377,10 @@ class Parser {
                                wfProfileIn( __METHOD__."-paragraph" );
                                # No prefix (not in list)--go to paragraph mode
                                # XXX: use a stack for nestable elements like span, table and div
-                               $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
+                               $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
                                $closematch = preg_match(
                                        '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
-                                       '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
+                                       '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t );
                                if ( $openmatch or $closematch ) {
                                        $paragraphStack = false;
                                        # TODO bug 5718: paragraph closed
index dbbcd47..27843a3 100644 (file)
@@ -1202,7 +1202,6 @@ Definition lists: colon in HTML attribute
 
 !! end
 
-
 !! test
 Definition lists: self-closed tag
 !! input
@@ -1215,8 +1214,6 @@ Definition lists: self-closed tag
 
 !! test
 Bug 11748: Literal closing tags
-!! options
-disabled
 !! input
 <dl>
 <dt>test 1</dt>
@@ -1231,6 +1228,7 @@ disabled
 <dt>test 2</dt>
 <dd>test test test test test</dd>
 </dl>
+
 !! end
 
 !! test
@@ -1253,6 +1251,7 @@ Definition and unordered list using wiki syntax nested in unordered list using h
 !! end
 
 !! test
+
 Definition list with empty definition and following paragraph
 !! input
 ; term:
@@ -1264,6 +1263,23 @@ Paragraph text
 </p>
 !! end
 
+!! test
+Nested definition lists using html syntax
+!! input
+<dl><dd>
+<dl>
+<dd>Foo</dd>
+</dl>
+</dd></dl>
+!! result
+<dl><dd>
+<dl>
+<dd>Foo</dd>
+</dl>
+</dd></dl>
+
+!! end
+
 !! test
 Definition Lists: No nesting: Multiple dd's
 !! input