Allow lines empty but for tabs and comments to be ignored.
authorC. Scott Ananian <cscott@cscott.net>
Thu, 8 Aug 2013 23:48:16 +0000 (19:48 -0400)
committerC. Scott Ananian <cscott@cscott.net>
Tue, 13 Aug 2013 19:36:57 +0000 (15:36 -0400)
We originally allowed only spaces around comments.  Now allow tabs as
well.  This ought to affect very few pages, but it helps predictability
and to maintain consistency between the PHP preprocessor and parsoid.

Change-Id: Icb3ff6eec08aaa83ae332d03c910c13995c9c9ee

includes/parser/Preprocessor_DOM.php
includes/parser/Preprocessor_Hash.php
tests/parser/parserTests.txt
tests/phpunit/includes/parser/PreprocessorTest.php

index c9e16b3..48318b3 100644 (file)
@@ -376,11 +376,11 @@ class Preprocessor_DOM implements Preprocessor {
                                                $i = $lengthText;
                                        } else {
                                                // Search backwards for leading whitespace
-                                               $wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0;
+                                               $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0;
 
                                                // Search forwards for trailing whitespace
                                                // $wsEnd will be the position of the last space (or the '>' if there's none)
-                                               $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
+                                               $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
 
                                                // Keep looking forward as long as we're finding more
                                                // comments.
@@ -390,7 +390,7 @@ class Preprocessor_DOM implements Preprocessor {
                                                        if ( $c === false ) {
                                                                break;
                                                        }
-                                                       $c = $c + 2 + strspn( $text, ' ', $c + 3 );
+                                                       $c = $c + 2 + strspn( $text, " \t", $c + 3 );
                                                        $comments[] = array( $wsEnd + 1, $c );
                                                        $wsEnd = $c;
                                                }
@@ -405,7 +405,9 @@ class Preprocessor_DOM implements Preprocessor {
                                                        // Remove leading whitespace from the end of the accumulator
                                                        // Sanity check first though
                                                        $wsLength = $i - $wsStart;
-                                                       if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
+                                                       if ( $wsLength > 0
+                                                               && strspn( $accum, " \t", -$wsLength ) === $wsLength )
+                                                       {
                                                                $accum = substr( $accum, 0, -$wsLength );
                                                        }
 
index 333b70d..3f5ca8e 100644 (file)
@@ -302,11 +302,11 @@ class Preprocessor_Hash implements Preprocessor {
                                                $i = $lengthText;
                                        } else {
                                                // Search backwards for leading whitespace
-                                               $wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0;
+                                               $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0;
 
                                                // Search forwards for trailing whitespace
                                                // $wsEnd will be the position of the last space (or the '>' if there's none)
-                                               $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
+                                               $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
 
                                                // Keep looking forward as long as we're finding more
                                                // comments.
@@ -316,7 +316,7 @@ class Preprocessor_Hash implements Preprocessor {
                                                        if ( $c === false ) {
                                                                break;
                                                        }
-                                                       $c = $c + 2 + strspn( $text, ' ', $c + 3 );
+                                                       $c = $c + 2 + strspn( $text, " \t", $c + 3 );
                                                        $comments[] = array( $wsEnd + 1, $c );
                                                        $wsEnd = $c;
                                                }
@@ -333,7 +333,7 @@ class Preprocessor_Hash implements Preprocessor {
                                                        $wsLength = $i - $wsStart;
                                                        if ( $wsLength > 0
                                                                && $accum->lastNode instanceof PPNode_Hash_Text
-                                                               && substr( $accum->lastNode->value, -$wsLength ) === str_repeat( ' ', $wsLength ) )
+                                                               && strspn( $accum->lastNode->value, " \t", -$wsLength ) === $wsLength )
                                                        {
                                                                $accum->lastNode->value = substr( $accum->lastNode->value, 0, -$wsLength );
                                                        }
index 128e25d..c648ff0 100644 (file)
@@ -5432,7 +5432,7 @@ Multiple list tags generated by templates
 !!end
 
 !!test
-Single-comment whitespace lines dont break lists, and so do multi-comment whitespace lines
+Single-comment whitespace lines dont break lists, and neither do multi-comment whitespace lines
 !!input
 *a
 <!--This line will NOT split the list-->
@@ -5450,6 +5450,26 @@ Single-comment whitespace lines dont break lists, and so do multi-comment whites
 
 !!end
 
+!!test
+Replacing whitespace with tabs still doesn't break the list (gerrit 78327)
+!!input
+*a
+<!--This line will NOT split the list-->       
+*b
+       <!--This line will NOT split the list either-->         
+*c
+       <!--foo--> <!---->      <!--This line NOT split the list
+        either-->       
+*d
+!!result
+<ul><li>a
+</li><li>b
+</li><li>c
+</li><li>d
+</li></ul>
+
+!!end
+
 !!test
 Test the li-hack
 (Cannot test this with PHP parser since it relies on Tidy for the hack)
index fb13118..7e9c9d4 100644 (file)
@@ -209,11 +209,11 @@ class PreprocessorTest extends MediaWikiTestCase {
                        array( "== h ==  <!--c1-->  <!--c2--><!--c3-->  ", "<root><h level=\"2\" i=\"1\">== h ==  <comment>&lt;!--c1--&gt;</comment>  <comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment>  </h></root>" ),
                        array( "== h ==  <!--c1--><!--c2-->  <!--c3-->  ", "<root><h level=\"2\" i=\"1\">== h ==  <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment>  <comment>&lt;!--c3--&gt;</comment>  </h></root>" ),
                        array( "== h ==  <!--c1-->  <!--c2-->  <!--c3-->  ", "<root><h level=\"2\" i=\"1\">== h ==  <comment>&lt;!--c1--&gt;</comment>  <comment>&lt;!--c2--&gt;</comment>  <comment>&lt;!--c3--&gt;</comment>  </h></root>" ),
+                       array( "== h ==<!--c1-->        <!--c2-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment>     <comment>&lt;!--c2--&gt;</comment></h></root>" ),
+                       array( "== h ==         <!--c1-->       <!--c2-->", "<root><h level=\"2\" i=\"1\">== h ==       <comment>&lt;!--c1--&gt;</comment>      <comment>&lt;!--c2--&gt;</comment></h></root>" ),
+                       array( "== h ==<!--c1-->        <!--c2-->       ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment>      <comment>&lt;!--c2--&gt;</comment>      </h></root>" ),
 
                        /* These are not working: */
-                       array( "== h ==<!--c1-->        <!--c2-->", "<root>== h ==<comment>&lt;!--c1--&gt;</comment>    <comment>&lt;!--c2--&gt;</comment></root>" ),
-                       array( "== h ==         <!--c1-->       <!--c2-->", "<root>== h ==      <comment>&lt;!--c1--&gt;</comment>      <comment>&lt;!--c2--&gt;</comment></root>" ),
-                       array( "== h ==<!--c1-->        <!--c2-->       ", "<root>== h ==<comment>&lt;!--c1--&gt;</comment>     <comment>&lt;!--c2--&gt;</comment>      </root>" ),
                        array( "== h == x <!--c1--><!--c2--><!--c3-->  ", "<root>== h == x <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment>  </root>" ),
                        array( "== h ==<!--c1--> x <!--c2--><!--c3-->  ", "<root>== h ==<comment>&lt;!--c1--&gt;</comment> x <comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment>  </root>" ),
                        array( "== h ==<!--c1--><!--c2--><!--c3--> x ", "<root>== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> x </root>" ),