* (bug 2587) Fix for section editing with comment prefix
authorBrion Vibber <brion@users.mediawiki.org>
Tue, 6 Jun 2006 00:51:34 +0000 (00:51 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Tue, 6 Jun 2006 00:51:34 +0000 (00:51 +0000)
* (bug 2607) Fix for section editing with mix of wiki and HTML headings
* (bug 3342) Fix for section editing with headings wrapped in <noinclude>
* (bug 3476) Fix for section editing with faux headings in extensions
* (bug 5272) Fix for section editing with HTML-heading subsections
* Fix for bogus wiki headings improperly detected with following text
* Fix for HTML headings improperly not detected with preceding/following text
* Section extraction and replacement functions merged into one implementation
  on the Parser object, so they can't get out of sync with each other.

RELEASE-NOTES
includes/Article.php
includes/Parser.php
maintenance/parserTests.inc
maintenance/parserTests.txt

index c742a4b..dbd708f 100644 (file)
@@ -429,6 +429,16 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * Add 'SiteNoticeBefore' and 'SiteNoticeAfter' hooks
 * (bug 6182) Date passed in "sp-newimages-showfrom" not adjusted to user time
   preferences
+* (bug 2587) Fix for section editing with comment prefix
+* (bug 2607) Fix for section editing with mix of wiki and HTML headings
+* (bug 3342) Fix for section editing with headings wrapped in <noinclude>
+* (bug 3476) Fix for section editing with faux headings in extensions
+* (bug 5272) Fix for section editing with HTML-heading subsections
+* Fix for bogus wiki headings improperly detected with following text
+* Fix for HTML headings improperly not detected with preceding/following text
+* Section extraction and replacement functions merged into one implementation
+  on the Parser object, so they can't get out of sync with each other.
+
 
 == Compatibility ==
 
index d6890a0..2b32885 100644 (file)
@@ -242,62 +242,8 @@ class Article {
         * @return string text of the requested section
         */
        function getSection($text,$section) {
-
-               # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
-               # comments to be stripped as well)
-               $striparray=array();
-               $parser=new Parser();
-               $parser->mOutputType=OT_WIKI;
-               $parser->mOptions = new ParserOptions();
-               $striptext=$parser->strip($text, $striparray, true);
-
-               # now that we can be sure that no pseudo-sections are in the source,
-               # split it up by section
-               $secs =
-                 preg_split(
-                 '/(^=+.+?=+|^<h[1-6].*?>.*?<\/h[1-6].*?>)(?!\S)/mi',
-                 $striptext, -1,
-                 PREG_SPLIT_DELIM_CAPTURE);
-               if($section==0) {
-                       $rv=$secs[0];
-               } else {
-                       $headline=$secs[$section*2-1];
-                       preg_match( '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$headline,$matches);
-                       $hlevel=$matches[1];
-
-                       # translate wiki heading into level
-                       if(strpos($hlevel,'=')!==false) {
-                               $hlevel=strlen($hlevel);
-                       }
-
-                       $rv=$headline. $secs[$section*2];
-                       $count=$section+1;
-
-                       $break=false;
-                       while(!empty($secs[$count*2-1]) && !$break) {
-
-                               $subheadline=$secs[$count*2-1];
-                               preg_match( '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$subheadline,$matches);
-                               $subhlevel=$matches[1];
-                               if(strpos($subhlevel,'=')!==false) {
-                                       $subhlevel=strlen($subhlevel);
-                               }
-                               if($subhlevel > $hlevel) {
-                                       $rv.=$subheadline.$secs[$count*2];
-                               }
-                               if($subhlevel <= $hlevel) {
-                                       $break=true;
-                               }
-                               $count++;
-
-                       }
-               }
-               # reinsert stripped tags
-               $rv=$parser->unstrip($rv,$striparray);
-               $rv=$parser->unstripNoWiki($rv,$striparray);
-               $rv=trim($rv);
-               return $rv;
-
+               global $wgParser;
+               return $wgParser->getSection( $text, $section );
        }
 
        /**
@@ -1288,8 +1234,10 @@ class Article {
        function replaceSection($section, $text, $summary = '', $edittime = NULL) {
                $fname = 'Article::replaceSection';
                wfProfileIn( $fname );
-
-               if ($section != '') {
+               
+               if( $section == '' ) {
+                       // Whole-page edit; let the text through unmolested.
+               } else {
                        if( is_null( $edittime ) ) {
                                $rev = Revision::newFromTitle( $this->mTitle );
                        } else {
@@ -1307,71 +1255,11 @@ class Article {
                                if($summary) $subject="== {$summary} ==\n\n";
                                $text=$oldtext."\n\n".$subject.$text;
                        } else {
-
-                               # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
-                               # comments to be stripped as well)
-                               $striparray=array();
-                               $parser=new Parser();
-                               $parser->mOutputType=OT_WIKI;
-                               $parser->mOptions = new ParserOptions();
-                               $oldtext=$parser->strip($oldtext, $striparray, true);
-
-                               # now that we can be sure that no pseudo-sections are in the source,
-                               # split it up
-                               # Unfortunately we can't simply do a preg_replace because that might
-                               # replace the wrong section, so we have to use the section counter instead
-                               $secs=preg_split('/(^=+.+?=+|^<h[1-6].*?>.*?<\/h[1-6].*?>)(?!\S)/mi',
-                                 $oldtext,-1,PREG_SPLIT_DELIM_CAPTURE);
-                               $secs[$section*2]=$text."\n\n"; // replace with edited
-
-                               # section 0 is top (intro) section
-                               if($section!=0) {
-
-                                       # headline of old section - we need to go through this section
-                                       # to determine if there are any subsections that now need to
-                                       # be erased, as the mother section has been replaced with
-                                       # the text of all subsections.
-                                       $headline=$secs[$section*2-1];
-                                       preg_match( '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$headline,$matches);
-                                       $hlevel=$matches[1];
-
-                                       # determine headline level for wikimarkup headings
-                                       if(strpos($hlevel,'=')!==false) {
-                                               $hlevel=strlen($hlevel);
-                                       }
-
-                                       $secs[$section*2-1]=''; // erase old headline
-                                       $count=$section+1;
-                                       $break=false;
-                                       while(!empty($secs[$count*2-1]) && !$break) {
-
-                                               $subheadline=$secs[$count*2-1];
-                                               preg_match(
-                                                '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$subheadline,$matches);
-                                               $subhlevel=$matches[1];
-                                               if(strpos($subhlevel,'=')!==false) {
-                                                       $subhlevel=strlen($subhlevel);
-                                               }
-                                               if($subhlevel > $hlevel) {
-                                                       // erase old subsections
-                                                       $secs[$count*2-1]='';
-                                                       $secs[$count*2]='';
-                                               }
-                                               if($subhlevel <= $hlevel) {
-                                                       $break=true;
-                                               }
-                                               $count++;
-
-                                       }
-
-                               }
-                               $text=join('',$secs);
-                               # reinsert the stuff that we stripped out earlier
-                               $text=$parser->unstrip($text,$striparray);
-                               $text=$parser->unstripNoWiki($text,$striparray);
+                               global $wgParser;
+                               $text = $wgParser->replaceSection( $oldtext, $section, $text );
                        }
-
                }
+
                wfProfileOut( $fname );
                return $text;
        }
index 4afa23e..f141df2 100644 (file)
@@ -367,7 +367,7 @@ class Parser
                                $inside     = $p[4];
                        }
 
-                       $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++);
+                       $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU';
                        $stripped .= $marker;
 
                        if ( $close === '/>' ) {
@@ -883,7 +883,7 @@ class Parser
                wfProfileIn( $fname );
                for ( $i = 6; $i >= 1; --$i ) {
                        $h = str_repeat( '=', $i );
-                       $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
+                       $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",
                          "<h{$i}>\\1</h{$i}>\\2", $text );
                }
                wfProfileOut( $fname );
@@ -4209,6 +4209,165 @@ class Parser
         */
        function getTags() { return array_keys( $this->mTagHooks ); }
        /**#@-*/
+
+
+       /**
+        * Break wikitext input into sections, and either pull or replace
+        * some particular section's text.
+        *
+        * External callers should use the getSection and replaceSection methods.
+        *
+        * @param $text Page wikitext
+        * @param $section Numbered section. 0 pulls the text before the first
+        *                 heading; other numbers will pull the given section
+        *                 along with its lower-level subsections.
+        * @param $mode One of "get" or "replace"
+        * @param $newtext Replacement text for section data.
+        * @return string for "get", the extracted section text.
+        *                for "replace", the whole page with the section replaced.
+        */
+       private function extractSections( $text, $section, $mode, $newtext='' ) {
+               # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
+               # comments to be stripped as well)
+               $striparray = array();
+               
+               $oldOutputType = $this->mOutputType;
+               $oldOptions = $this->mOptions;
+               $this->mOptions = new ParserOptions();
+               $this->mOutputType = OT_WIKI;
+               
+               $striptext = $this->strip( $text, $striparray, true );
+               
+               $this->mOutputType = $oldOutputType;
+               $this->mOptions = $oldOptions;
+
+               # now that we can be sure that no pseudo-sections are in the source,
+               # split it up by section
+               $uniq = preg_quote( $this->uniqPrefix(), '/' );
+               $comment = "(?:$uniq-!--.*?QINU)";
+               $secs = preg_split(
+               /*
+                       "/
+                       ^(
+                       (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+                       (?:
+                               (=+) # Should this be limited to 6?
+                               .+?  # Section title...
+                               \\2  # Ending = count must match start
+                       |
+                               ^
+                               <h([1-6])\b.*?>
+                               .*?
+                               <\/h\\3\s*>
+                       )
+                       (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+                       )$
+                       /mix",
+               */
+                       "/
+                       (
+                               ^
+                               (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+                               (=+) # Should this be limited to 6?
+                               .+?  # Section title...
+                               \\2  # Ending = count must match start
+                               (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+                               $
+                       |
+                               <h([1-6])\b.*?>
+                               .*?
+                               <\/h\\3\s*>
+                       )
+                       /mix",
+                       $striptext, -1,
+                       PREG_SPLIT_DELIM_CAPTURE);
+               
+               if( $mode == "get" ) {
+                       if( $section == 0 ) {
+                               // "Section 0" returns the content before any other section.
+                               $rv = $secs[0];
+                       } else {
+                               $rv = "";
+                       }
+               } elseif( $mode == "replace" ) {
+                       if( $section == 0 ) {
+                               $rv = $newtext . "\n\n";
+                               $remainder = true;
+                       } else {
+                               $rv = $secs[0];
+                               $remainder = false;
+                       }
+               }
+               $count = 0;
+               $sectionLevel = 0;
+               for( $index = 1; $index < count( $secs ); ) {
+                       $headerLine = $secs[$index++];
+                       if( $secs[$index] ) {
+                               // A wiki header
+                               $headerLevel = strlen( $secs[$index++] );
+                       } else {
+                               // An HTML header
+                               $index++;
+                               $headerLevel = intval( $secs[$index++] );
+                       }
+                       $content = $secs[$index++];
+
+                       $count++;
+                       if( $mode == "get" ) {
+                               if( $count == $section ) {
+                                       $rv = $headerLine . $content;
+                                       $sectionLevel = $headerLevel;
+                               } elseif( $count > $section ) {
+                                       if( $sectionLevel && $headerLevel > $sectionLevel ) {
+                                               $rv .= $headerLine . $content;
+                                       } else {
+                                               // Broke out to a higher-level section
+                                               break;
+                                       }
+                               }
+                       } elseif( $mode == "replace" ) {
+                               if( $count < $section ) {
+                                       $rv .= $headerLine . $content;
+                               } elseif( $count == $section ) {
+                                       $rv .= $newtext . "\n\n";
+                                       $sectionLevel = $headerLevel;
+                               } elseif( $count > $section ) {
+                                       if( $headerLevel <= $sectionLevel ) {
+                                               // Passed the section's sub-parts.
+                                               $remainder = true;
+                                       }
+                                       if( $remainder ) {
+                                               $rv .= $headerLine . $content;
+                                       }
+                               }
+                       }
+               }
+               # reinsert stripped tags
+               $rv = $this->unstrip( $rv, $striparray );
+               $rv = $this->unstripNoWiki( $rv, $striparray );
+               $rv = trim( $rv );
+               return $rv;
+       }
+       
+       /**
+        * This function returns the text of a section, specified by a number ($section).
+        * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
+        * the first section before any such heading (section 0).
+        *
+        * If a section contains subsections, these are also returned.
+        *
+        * @param $text String: text to look in
+        * @param $section Integer: section number
+        * @return string text of the requested section
+        */
+       function getSection( $text, $section ) {
+               return $this->extractSections( $text, $section, "get" );
+       }
+       
+       function replaceSection( $oldtext, $section, $text ) {
+               return $this->extractSections( $oldtext, $section, "replace", $text );
+       }
+
 }
 
 /**
index 22ddb8d..e8dfb8f 100644 (file)
@@ -269,9 +269,13 @@ class ParserTest {
                        $out = $parser->preSaveTransform( $input, $title, $user, $options );
                } elseif (preg_match('/\\bmsg\\b/i', $opts)) {
                        $out = $parser->transformMsg( $input, $options );
-               } elseif( preg_match( '/\\bsection=(\d+)\b/', $opts, $matches ) ) {
+               } elseif( preg_match( '/\\bsection=(\d+)\b/i', $opts, $matches ) ) {
                        $section = intval( $matches[1] );
-                       $out = Article::getSection( $input, $section );
+                       $out = $parser->getSection( $input, $section );
+               } elseif( preg_match( '/\\breplace=(\d+),"(.*?)"/i', $opts, $matches ) ) {
+                       $section = intval( $matches[1] );
+                       $replace = $matches[2];
+                       $out = $parser->replaceSection( $input, $section, $replace );
                } else {
                        $output = $parser->parse( $input, $title, $options, true, true, 1337 );
                        $out = $output->getText();
index 7662627..72ee3ed 100644 (file)
@@ -4839,6 +4839,437 @@ section=1
 <h3>aa</h3>
 !! end
 
+!! test
+Section extraction, HTML headings should be ignored in extensions (bug 3476)
+!! options
+section=2
+!! input
+<h2>a</h2>
+<tag>
+<h2>not b</h2>
+</tag>
+<h2>b</h2>
+!! result
+<h2>b</h2>
+!! end
+
+!! test
+Section replacement test (section 0)
+!! options
+replace=0,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+xxx
+
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 1)
+!! options
+replace=1,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+xxx
+
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 2)
+!! options
+replace=2,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+xxx
+
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 3)
+!! options
+replace=3,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+xxx
+
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 4)
+!! options
+replace=4,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+xxx
+
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 5)
+!! options
+replace=5,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+xxx
+
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 6)
+!! options
+replace=6,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+xxx
+
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 7)
+!! options
+replace=7,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+xxx
+
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 8)
+!! options
+replace=8,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+xxx
+
+==c==
+===ca===
+!!end
+
+!! test
+Section replacement test (section 9)
+!! options
+replace=9,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+xxx
+!! end
+
+!! test
+Section replacement test (section 10)
+!! options
+replace=10,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+xxx
+!! end
+
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 0)
+!! options
+section=0
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+!! end
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 1)
+!! options
+section=1
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+!! end
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 2)
+!! options
+section=2
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i>
+!! end
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 3)
+!! options
+section=3
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+<h2>Extra nasty</h2>
+
+extra nasty
+!! end
 
 #
 #