From 31625b54a03781f7504d67c3dba414e6ed285707 Mon Sep 17 00:00:00 2001 From: Gabriel Wicke Date: Tue, 17 Apr 2012 18:58:52 +0200 Subject: [PATCH] Bug 11748: Handle optionally-closed HTML tags * Fixes bug 11748 (Parser issue for HTML definition list) and similar issues for nested unordered / ordered lists * Stops wrapping HTML-syntax definition lists into paragraphs for consistency with their wikitext variants * Enables one previously disabled test and adds another for nested definition lists with HTML syntax Change-Id: If75ed54e11452dbcf5e6213cc20923064f811715 --- RELEASE-NOTES-1.21 | 3 ++- includes/Sanitizer.php | 8 ++++++-- includes/parser/Parser.php | 4 ++-- tests/parser/parserTests.txt | 22 +++++++++++++++++++--- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/RELEASE-NOTES-1.21 b/RELEASE-NOTES-1.21 index 29bef9f087..07cba04d40 100644 --- a/RELEASE-NOTES-1.21 +++ b/RELEASE-NOTES-1.21 @@ -49,7 +49,8 @@ production. to support Squid configured in forward-proxy mode, set $wgSquidPurgeUseHostHeader to false. * (bug 37020) sql.php with readline eats semicolon - +* (bug 11748) Properly handle optionally-closed HTML tags when Tidy is + disabled, and don't wrap HTML-syntax definition lists in paragraphs. === API changes in 1.21 === * prop=revisions can now report the contentmodel and contentformat, see docs/contenthandler.txt diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 8919f10ad6..2c4ea47372 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -387,9 +387,9 @@ class Sanitizer { $htmlsingleonly = array( # Elements that cannot have close tags 'br', 'hr' ); - $htmlnest = array( # Tags that can be nested--?? + $htmlnest = array( # Tags that can be nested directly or indirectly 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', - 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' + 'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span' ); $tabletags = array( # Can only appear inside table, we will close them 'td', 'th', 'tr', @@ -510,6 +510,10 @@ class Sanitizer { } elseif ( isset( $htmlsingle[$t] ) ) { # Hack to not close $htmlsingle tags $brace = null; + # Still need to push this optionally-closed tag to + # the tag stack so that we can match end tags + # instead of marking them as bad. + array_push( $tagstack, $t ); } elseif ( isset( $tabletags[$t] ) && in_array( $t, $tagstack ) ) { // New table tag but forgot to close the previous one diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 8671665ffc..b31288f58d 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -2377,10 +2377,10 @@ class Parser { wfProfileIn( __METHOD__."-paragraph" ); # No prefix (not in list)--go to paragraph mode # XXX: use a stack for nestable elements like span, table and div - $openmatch = preg_match('/(?:mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); + 'mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { $paragraphStack = false; # TODO bug 5718: paragraph closed diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index dbbcd478cf..27843a31de 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -1202,7 +1202,6 @@ Definition lists: colon in HTML attribute !! end - !! test Definition lists: self-closed tag !! input @@ -1215,8 +1214,6 @@ Definition lists: self-closed tag !! test Bug 11748: Literal closing tags -!! options -disabled !! input
test 1
@@ -1231,6 +1228,7 @@ disabled
test 2
test test test test test
+ !! end !! test @@ -1253,6 +1251,7 @@ Definition and unordered list using wiki syntax nested in unordered list using h !! end !! test + Definition list with empty definition and following paragraph !! input ; term: @@ -1264,6 +1263,23 @@ Paragraph text

!! end +!! test +Nested definition lists using html syntax +!! input +
+
+
Foo
+
+
+!! result +
+
+
Foo
+
+
+ +!! end + !! test Definition Lists: No nesting: Multiple dd's !! input -- 2.20.1