* (bug 49694) $wgSpamRegex is now also applied on the new section headline text
adding a new topic on a page
* (bug 6200) line breaks in <blockquote> are handled like they are in <div>
+* (bug 41756) Improve treatment of multiple comments on a blank line.
=== API changes in 1.22 ===
* (bug 25553) The JSON output formatter now leaves forward slashes unescaped
}
// Handle comments
if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
- // To avoid leaving blank lines, when a comment is both preceded
- // and followed by a newline (ignoring spaces), trim leading and
- // trailing spaces and one of the newlines.
+
+ // To avoid leaving blank lines, when a sequence of
+ // space-separated comments is both preceded and followed by
+ // a newline (ignoring spaces), then
+ // trim leading and trailing spaces and the trailing newline.
// Find the end
$endPos = strpos( $text, '-->', $i + 4 );
} else {
// Search backwards for leading whitespace
$wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0;
+
// Search forwards for trailing whitespace
// $wsEnd will be the position of the last space (or the '>' if there's none)
$wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
+
+ // Keep looking forward as long as we're finding more
+ // comments.
+ $comments = array( array( $wsStart, $wsEnd ) );
+ while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) {
+ $c = strpos( $text, '-->', $wsEnd + 4 );
+ if ( $c === false ) {
+ break;
+ }
+ $c = $c + 2 + strspn( $text, ' ', $c + 3 );
+ $comments[] = array( $wsEnd + 1, $c );
+ $wsEnd = $c;
+ }
+
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
// the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
&& substr( $text, $wsEnd + 1, 1 ) == "\n" )
{
- $startPos = $wsStart;
- $endPos = $wsEnd + 1;
// Remove leading whitespace from the end of the accumulator
// Sanity check first though
$wsLength = $i - $wsStart;
if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
$accum = substr( $accum, 0, -$wsLength );
}
+
+ // Dump all but the last comment to the accumulator
+ foreach ( $comments as $j => $com ) {
+ $startPos = $com[0];
+ $endPos = $com[1] + 1;
+ if ( $j == ( count( $comments ) - 1) ) {
+ break;
+ }
+ $inner = substr( $text, $startPos, $endPos - $startPos);
+ $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
+ }
+
// Do a line-start run next time to look for headings after the comment
$fakeLineStart = true;
} else {
}
// Handle comments
if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
- // To avoid leaving blank lines, when a comment is both preceded
- // and followed by a newline (ignoring spaces), trim leading and
- // trailing spaces and one of the newlines.
+
+ // To avoid leaving blank lines, when a sequence of
+ // space-separated comments is both preceded and followed by
+ // a newline (ignoring spaces), then
+ // trim leading and trailing spaces and the trailing newline.
// Find the end
$endPos = strpos( $text, '-->', $i + 4 );
} else {
// Search backwards for leading whitespace
$wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0;
+
// Search forwards for trailing whitespace
// $wsEnd will be the position of the last space (or the '>' if there's none)
$wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
+
+ // Keep looking forward as long as we're finding more
+ // comments.
+ $comments = array( array( $wsStart, $wsEnd ) );
+ while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) {
+ $c = strpos( $text, '-->', $wsEnd + 4 );
+ if ( $c === false ) {
+ break;
+ }
+ $c = $c + 2 + strspn( $text, ' ', $c + 3 );
+ $comments[] = array( $wsEnd + 1, $c );
+ $wsEnd = $c;
+ }
+
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
// the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
&& substr( $text, $wsEnd + 1, 1 ) == "\n" )
{
- $startPos = $wsStart;
- $endPos = $wsEnd + 1;
// Remove leading whitespace from the end of the accumulator
// Sanity check first though
$wsLength = $i - $wsStart;
{
$accum->lastNode->value = substr( $accum->lastNode->value, 0, -$wsLength );
}
+
+ // Dump all but the last comment to the accumulator
+ foreach ( $comments as $j => $com ) {
+ $startPos = $com[0];
+ $endPos = $com[1] + 1;
+ if ( $j == ( count( $comments ) - 1) ) {
+ break;
+ }
+ $inner = substr( $text, $startPos, $endPos - $startPos);
+ $accum->addNodeWithText( 'comment', $inner );
+ }
+
// Do a line-start run next time to look for headings after the comment
$fakeLineStart = true;
} else {
b
----
a
-<!--foo--><!--More than 1 comment disables stripping of this line!-->
+<!--foo--><!--More than 1 comment, still stripped-->
+b
+----
+a
+ <!--foo--> <!----> <!-- bar -->
b
----
a
</p>
<hr />
<p>a
-</p><p>b
+b
+</p>
+<hr />
+<p>a
+b
</p>
<hr />
<p>a
b
----
a
- <!--foo--><!--More than 1 comment disables stripping of this line!-->
+ <!--foo--><!--More than 1 comment doesn't disable stripping of this line!-->
b
----
a
</p>
<hr />
<p>a
-</p><p>b
+b
</p>
<hr />
<p>a
!!end
!!test
-Single-comment whitespace lines dont break lists, but multi-comment whitespace lines do
+Single-comment whitespace lines dont break lists, and so do multi-comment whitespace lines
!!input
*a
<!--This line will NOT split the list-->
*b
<!--This line will NOT split the list either-->
*c
- <!--foo--> <!--This line with more than 1 comment will split the list-->
+ <!--foo--> <!----> <!--This line NOT split the list either-->
*d
!!result
<ul><li>a
</li><li>b
</li><li>c
-</li></ul>
-<ul><li>d
+</li><li>d
</li></ul>
!!end