public function doHeadings( $text ) {
for ( $i = 6; $i >= 1; --$i ) {
$h = str_repeat( '=', $i );
- $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
+ // Trim non-newline whitespace from headings
+ // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
+ $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
}
return $text;
}
# Get all headlines for numbering them and adding funky stuff like [edit]
# links - this is for later, but we need the number of headlines right now
- # This regexp also trims whitespace in the heading's content
+ # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
+ # be trimmed here since whitespace in HTML headings is significant.
$matches = [];
$numMatches = preg_match_all(
- '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
+ '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
$text,
$matches
);
<h1><span class="mw-headline" id="Header_1">Header 1</span></h1>
<h2><span class="mw-headline" id="Header_1.1">Header 1.1</span></h2>
<h2><span class="mw-headline" id="Header_1.2">Header 1.2</span></h2>
-<h1><span class="mw-headline" id="Header_2">Header 2</span></h1>
+<h1><span class="mw-headline" id="Header_2">Header 2
+</span></h1>
<h2><span class="mw-headline" id="Header_2.1">Header 2.1</span></h2>
<h2><span class="mw-headline" id="Header_2.2">Header 2.2</span></h2>
Trim whitespace in wikitext headings, list items, table captions, headings, and cells
!! wikitext
__NOTOC__
-== <!--c1--> <!--c2--> Heading <!--c3--> <!--c4--> ==
+== <!--c1--> <!--c2--> Spaces <!--c3--> <!--c4--> ==
+== <!--c2--> <!--c2--> Tabs <!--c3--><!--c4--> ==
* <!--c1--> <!--c2--> List item <!--c3--> <!--c4-->
; <!--term to define--> term : <!--term's definition--> definition
{|
| <!--c1--> <!--c2--> Table Cell 1 <!--c3--> <!--c4--> || Table Cell 2 <!--c5-->
|} foo <!--c1-->
!! html/php+tidy
-<h2><span class="mw-headline" id="Heading">Heading</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Heading">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Spaces">Spaces</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Spaces">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Tabs">Tabs</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: Tabs">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<ul><li>List item</li></ul>
<dl><dt>term </dt>
<dd>definition</dd></dl>
Do not trim whitespace in HTML headings, list items, table captions, headings, and cells
!! wikitext
__NOTOC__
-<h2> <!--c1--> <!--c2--> Heading <!--c3--> <!--c4--> <h2>
+<h2> <!--c1--> <!--c2--> Heading <!--c3--> <!--c4--> </h2>
<ul><li> <!--c1--> <!--c2--> List item <!--c3--> <!--c4--> </li></ul>
<table>
<tr><th> <!--c1--> <!--c2--> Table Heading <!--c3--> <!--c4--> <th></tr>
<tr><td> <!--c1--> <!--c2--> Table Cell <!--c3--> <!--c4--> <th></tr>
</table>
!! html/php+tidy
-<h2> Heading </h2><h2>
+<h2><span class="mw-headline" id="Heading"> Heading </span></h2>
<ul><li> List item </li></ul>
<table>
<tbody><tr><th> Table Heading </th><th></th></tr>
<tr><td> Table Cell </td><th></th></tr>
</tbody></table>
-</h2>
!! end
!! test