Create Parser::stripOuterParagraph to avoid code duplication
authorBartosz Dziewoński <matma.rex@gmail.com>
Sat, 22 Feb 2014 15:21:36 +0000 (16:21 +0100)
committerBrad Jorsch <bjorsch@wikimedia.org>
Thu, 15 May 2014 16:20:19 +0000 (12:20 -0400)
We've had the logic for stripping the outer <p/> element in three
separate places. The version in OutputPage was missing the '$' at the
end of the regex, that was most likely a mistake caused by the
duplication.

Also, extend the logic in order not to generate invalid HTML if the
input contains more than one <p/> tag. Added tests for this and the
previous behaviour.

https://www.mail-archive.com/mediawiki-api@lists.wikimedia.org/msg03188.html

Change-Id: I6bb3597898324556df912a23a7ffc9ff250b8f58

includes/GlobalFunctions.php
includes/Message.php
includes/OutputPage.php
includes/parser/Parser.php
tests/phpunit/includes/parser/ParserMethodsTest.php

index ed99aa9..9c6feb2 100644 (file)
@@ -1726,10 +1726,7 @@ function wfMsgExt( $key, $options ) {
                }
 
                if ( $parseInline ) {
-                       $m = array();
-                       if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $string, $m ) ) {
-                               $string = $m[1];
-                       }
+                       $string = Parser::stripOuterParagraph( $string );
                }
        } elseif ( in_array( 'parsemag', $options, true ) ) {
                $string = $messageCache->transform( $string,
index 8d4058e..826d55b 100644 (file)
@@ -659,10 +659,7 @@ class Message {
                # Maybe transform using the full parser
                if ( $this->format === 'parse' ) {
                        $string = $this->parseText( $string );
-                       $m = array();
-                       if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $string, $m ) ) {
-                               $string = $m[1];
-                       }
+                       $string = Parser::stripOuterParagraph( $string );
                } elseif ( $this->format === 'block-parse' ) {
                        $string = $this->parseText( $string );
                } elseif ( $this->format === 'text' ) {
index 16e070c..72869e4 100644 (file)
@@ -1748,13 +1748,7 @@ class OutputPage extends ContextSource {
         */
        public function parseInline( $text, $linestart = true, $interface = false ) {
                $parsed = $this->parse( $text, $linestart, $interface );
-
-               $m = array();
-               if ( preg_match( '/^<p>(.*)\n?<\/p>\n?/sU', $parsed, $m ) ) {
-                       $parsed = $m[1];
-               }
-
-               return $parsed;
+               return Parser::stripOuterParagraph( $parsed );
        }
 
        /**
index 2066580..ef6c079 100644 (file)
@@ -6400,4 +6400,25 @@ class Parser {
 
                return $recursiveCheck;
        }
+
+       /**
+        * Strip outer <p></p> tag from the HTML source of a single paragraph.
+        *
+        * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
+        * or if there is more than one <p/> tag in the input HTML.
+        *
+        * @param string $html
+        * @return string
+        * @since 1.23
+        */
+       public static function stripOuterParagraph( $html ) {
+               $m = array();
+               if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
+                       if ( strpos( $m[1], '</p>' ) === false ) {
+                               $html = $m[1];
+                       }
+               }
+
+               return $html;
+       }
 }
index 4ea8fc6..29af2c2 100644 (file)
@@ -29,6 +29,42 @@ class ParserMethodsTest extends MediaWikiLangTestCase {
                $this->assertEquals( $expected, $text );
        }
 
+       public static function provideStripOuterParagraph() {
+               // This mimics the most common use case (stripping paragraphs generated by the parser).
+               $message = new RawMessage( "Message text." );
+
+               return array(
+                       array(
+                               "<p>Text.</p>",
+                               "Text.",
+                       ),
+                       array(
+                               "<p class='foo'>Text.</p>",
+                               "<p class='foo'>Text.</p>",
+                       ),
+                       array(
+                               "<p>Text.\n</p>\n",
+                               "Text.",
+                       ),
+                       array(
+                               "<p>Text.</p><p>More text.</p>",
+                               "<p>Text.</p><p>More text.</p>",
+                       ),
+                       array(
+                               $message->parse(),
+                               "Message text.",
+                       ),
+               );
+       }
+
+       /**
+        * @dataProvider provideStripOuterParagraph
+        * @covers Parser::stripOuterParagraph
+        */
+       public function testStripOuterParagraph( $text, $expected ) {
+               $this->assertEquals( $expected, Parser::stripOuterParagraph( $text ) );
+       }
+
        /**
         * @expectedException MWException
         * @expectedExceptionMessage Parser state cleared while parsing. Did you call Parser::parse recursively?