From: Mark A. Hershberger
Date: Mon, 21 Nov 2011 01:45:23 +0000 (+0000)
Subject: Fixes Bug 31865 - Tag for discarding whitespaces.
X-Git-Tag: 1.31.0-rc.0~26374
X-Git-Url: http://git.cyclocoop.org/%28%5B%5E/404?a=commitdiff_plain;h=5bd4f88169e2fc5a45224d1518f989974ddf6f5d;p=lhc%2Fweb%2Fwiklou.git
Fixes Bug 31865 - Tag for discarding whitespaces.
Patch with parser tests from Van de Bugger
---
diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php
index 066589f685..7803a70ec0 100644
--- a/includes/parser/Preprocessor_DOM.php
+++ b/includes/parser/Preprocessor_DOM.php
@@ -211,6 +211,9 @@ class Preprocessor_DOM implements Preprocessor {
$ignoredElements = array( 'includeonly' );
$xmlishElements[] = 'includeonly';
}
+ // `dws' stands for "discard white spaces". `' and all the whitespaces afer it are
+ // discarded.
+ $xmlishElements[] = 'dws';
$xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
@@ -406,6 +409,20 @@ class Preprocessor_DOM implements Preprocessor {
}
$tagStartPos = $i;
+
+ // Handle tag `dws'.
+ if ( $name == 'dws' ) {
+ $i = $tagEndPos + 1;
+ if ( preg_match( '/\s*/', $text, $matches, 0, $i ) ) {
+ $i += strlen( $matches[0] );
+ }
+ $accum .=
+ '' .
+ htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) ) .
+ '';
+ continue;
+ }
+
if ( $text[$tagEndPos-1] == '/' ) {
$attrEnd = $tagEndPos - 1;
$inner = null;
diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php
index 2934181a50..ad5155b8e2 100644
--- a/includes/parser/Preprocessor_Hash.php
+++ b/includes/parser/Preprocessor_Hash.php
@@ -153,6 +153,9 @@ class Preprocessor_Hash implements Preprocessor {
$ignoredElements = array( 'includeonly' );
$xmlishElements[] = 'includeonly';
}
+ // `dws' stands for "discard white spaces". `' and all the whitespaces afer it are
+ // discarded.
+ $xmlishElements[] = 'dws';
$xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
@@ -350,6 +353,17 @@ class Preprocessor_Hash implements Preprocessor {
}
$tagStartPos = $i;
+
+ // Handle tag dws.
+ if ( $name == 'dws' ) {
+ $i = $tagEndPos + 1;
+ if ( preg_match( '/\s*/', $text, $matches, 0, $i ) ) {
+ $i += strlen( $matches[0] );
+ }
+ $accum->addNodeWithText( 'ignore', substr( $text, $tagStartPos, $i - $tagStartPos ) );
+ continue;
+ }
+
if ( $text[$tagEndPos-1] == '/' ) {
// Short end tag
$attrEnd = $tagEndPos - 1;
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 9a7fd40dfb..c331dd31a2 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -8930,6 +8930,97 @@ title=[[MediaWiki:bug32450.css]]
!! end
+!! test
+Bug 31865: HTML-style tag is recognized and discarded.
+!! input
+onetwo
+!! result
+onetwo
+
+!! end
+
+!! test
+Bug 31865: XML-style tag is recognized and discarded.
+!! input
+onetwo
+!! result
+onetwo
+
+!! end
+
+!! test
+Bug 31865: Spaces after tag are discarded.
+!! input
+one two
+!! result
+onetwo
+
+!! end
+
+!! test
+Bug 31865: Tabs after tag are discarded too.
+!! input
+one two
+!! result
+onetwo
+
+!! end
+
+!! test
+Bug 31865: Newlines after tag are discarded too.
+!! input
+one
+
+
+two
+!! result
+onetwo
+
+!! end
+
+!! test
+Bug 31865: Spaces before tag are not discarded.
+!! input
+one two
+!! result
+one two
+
+!! end
+
+!! test
+Bug 31865: Continuation is indented.
+!! input
+one
+ two
+!! result
+onetwo
+
+!! end
+
+!! test
+Bug 31865: List item continuation.
+!! input
+* one
+ two
+* three
+!! result
+
+
+!! end
+
+!! test
+Bug 31865: XML-style; asterisk after the tag does not start list item.
+!! input
+* one
+* two
+!! result
+
+
+!! end
+
TODO:
more images
more tables