Cleanup the element matches in doBlockLevels a bit
authorArlo Breault <abreault@wikimedia.org>
Fri, 20 Apr 2018 00:00:57 +0000 (20:00 -0400)
committerArlo Breault <abreault@wikimedia.org>
Wed, 25 Apr 2018 17:22:40 +0000 (13:22 -0400)
The MARKER_PREFIX is removed since `unstripGeneral()` happens before we
get here.

Change-Id: Ic668784fd8bbaa8395cd5449c83a993abda141eb

includes/parser/BlockLevelPass.php

index 1173dd2..c366903 100644 (file)
@@ -291,19 +291,34 @@ class BlockLevelPass {
                        if ( 0 == $prefixLength ) {
                                # No prefix (not in list)--go to paragraph mode
                                # @todo consider using a stack for nestable elements like span, table and div
+
+                               // P-wrapping and indent-pre are suppressed inside, not outside
+                               $blockElems = 'table|h1|h2|h3|h4|h5|h6|pre|p|ul|ol|dl|li';
+                               // P-wrapping and indent-pre are suppressed outside, not inside
+                               $antiBlockElems = 'td|th';
+
                                $openMatch = preg_match(
-                                       '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
-                                               . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)\\b/iS',
+                                       '/<('
+                                               . "({$blockElems})|\\/({$antiBlockElems})|"
+                                               // Always suppresses
+                                               . '\\/?(tr)'
+                                               . ')\\b/iS',
                                        $t
                                );
                                $closeMatch = preg_match(
-                                       '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
-                                               . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
-                                               . Parser::MARKER_PREFIX
-                                               . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)\\b/iS',
+                                       '/<('
+                                               . "\\/({$blockElems})|({$antiBlockElems})|"
+                                               // Never suppresses
+                                               . '\\/?(center|blockquote|div|hr|mw:)'
+                                               . ')\\b/iS',
                                        $t
                                );
 
+                               // Any match closes the paragraph, but only when `!$closeMatch`
+                               // do we enter block mode.  The oddities with table rows and
+                               // cells are to avoid paragraph wrapping in interstitial spaces
+                               // leading to fostered content.
+
                                if ( $openMatch || $closeMatch ) {
                                        $pendingPTag = false;
                                        // Only close the paragraph if we're not inside a <pre> tag, or if