From: Brion Vibber <brion@users.mediawiki.org>
Date: Tue, 6 Jun 2006 00:51:34 +0000 (+0000)
Subject: * (bug 2587) Fix for section editing with comment prefix
X-Git-Tag: 1.31.0-rc.0~56890
X-Git-Url: http://git.cyclocoop.org/%24image?a=commitdiff_plain;h=939ddd8793c4417b86f21b5ae7a16a9320972b53;p=lhc%2Fweb%2Fwiklou.git

* (bug 2587) Fix for section editing with comment prefix
* (bug 2607) Fix for section editing with mix of wiki and HTML headings
* (bug 3342) Fix for section editing with headings wrapped in <noinclude>
* (bug 3476) Fix for section editing with faux headings in extensions
* (bug 5272) Fix for section editing with HTML-heading subsections
* Fix for bogus wiki headings improperly detected with following text
* Fix for HTML headings improperly not detected with preceding/following text
* Section extraction and replacement functions merged into one implementation
  on the Parser object, so they can't get out of sync with each other.
---

diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index c742a4b766..dbd708ff05 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -429,6 +429,16 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * Add 'SiteNoticeBefore' and 'SiteNoticeAfter' hooks
 * (bug 6182) Date passed in "sp-newimages-showfrom" not adjusted to user time
   preferences
+* (bug 2587) Fix for section editing with comment prefix
+* (bug 2607) Fix for section editing with mix of wiki and HTML headings
+* (bug 3342) Fix for section editing with headings wrapped in <noinclude>
+* (bug 3476) Fix for section editing with faux headings in extensions
+* (bug 5272) Fix for section editing with HTML-heading subsections
+* Fix for bogus wiki headings improperly detected with following text
+* Fix for HTML headings improperly not detected with preceding/following text
+* Section extraction and replacement functions merged into one implementation
+  on the Parser object, so they can't get out of sync with each other.
+
 
 == Compatibility ==
 
diff --git a/includes/Article.php b/includes/Article.php
index d6890a0d18..2b32885997 100644
--- a/includes/Article.php
+++ b/includes/Article.php
@@ -242,62 +242,8 @@ class Article {
 	 * @return string text of the requested section
 	 */
 	function getSection($text,$section) {
-
-		# strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
-		# comments to be stripped as well)
-		$striparray=array();
-		$parser=new Parser();
-		$parser->mOutputType=OT_WIKI;
-		$parser->mOptions = new ParserOptions();
-		$striptext=$parser->strip($text, $striparray, true);
-
-		# now that we can be sure that no pseudo-sections are in the source,
-		# split it up by section
-		$secs =
-		  preg_split(
-		  '/(^=+.+?=+|^<h[1-6].*?>.*?<\/h[1-6].*?>)(?!\S)/mi',
-		  $striptext, -1,
-		  PREG_SPLIT_DELIM_CAPTURE);
-		if($section==0) {
-			$rv=$secs[0];
-		} else {
-			$headline=$secs[$section*2-1];
-			preg_match( '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$headline,$matches);
-			$hlevel=$matches[1];
-
-			# translate wiki heading into level
-			if(strpos($hlevel,'=')!==false) {
-				$hlevel=strlen($hlevel);
-			}
-
-			$rv=$headline. $secs[$section*2];
-			$count=$section+1;
-
-			$break=false;
-			while(!empty($secs[$count*2-1]) && !$break) {
-
-				$subheadline=$secs[$count*2-1];
-				preg_match( '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$subheadline,$matches);
-				$subhlevel=$matches[1];
-				if(strpos($subhlevel,'=')!==false) {
-					$subhlevel=strlen($subhlevel);
-				}
-				if($subhlevel > $hlevel) {
-					$rv.=$subheadline.$secs[$count*2];
-				}
-				if($subhlevel <= $hlevel) {
-					$break=true;
-				}
-				$count++;
-
-			}
-		}
-		# reinsert stripped tags
-		$rv=$parser->unstrip($rv,$striparray);
-		$rv=$parser->unstripNoWiki($rv,$striparray);
-		$rv=trim($rv);
-		return $rv;
-
+		global $wgParser;
+		return $wgParser->getSection( $text, $section );
 	}
 
 	/**
@@ -1288,8 +1234,10 @@ class Article {
 	function replaceSection($section, $text, $summary = '', $edittime = NULL) {
 		$fname = 'Article::replaceSection';
 		wfProfileIn( $fname );
-
-		if ($section != '') {
+		
+		if( $section == '' ) {
+			// Whole-page edit; let the text through unmolested.
+		} else {
 			if( is_null( $edittime ) ) {
 				$rev = Revision::newFromTitle( $this->mTitle );
 			} else {
@@ -1307,71 +1255,11 @@ class Article {
 				if($summary) $subject="== {$summary} ==\n\n";
 				$text=$oldtext."\n\n".$subject.$text;
 			} else {
-
-				# strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
-				# comments to be stripped as well)
-				$striparray=array();
-				$parser=new Parser();
-				$parser->mOutputType=OT_WIKI;
-				$parser->mOptions = new ParserOptions();
-				$oldtext=$parser->strip($oldtext, $striparray, true);
-
-				# now that we can be sure that no pseudo-sections are in the source,
-				# split it up
-				# Unfortunately we can't simply do a preg_replace because that might
-				# replace the wrong section, so we have to use the section counter instead
-				$secs=preg_split('/(^=+.+?=+|^<h[1-6].*?>.*?<\/h[1-6].*?>)(?!\S)/mi',
-				  $oldtext,-1,PREG_SPLIT_DELIM_CAPTURE);
-				$secs[$section*2]=$text."\n\n"; // replace with edited
-
-				# section 0 is top (intro) section
-				if($section!=0) {
-
-					# headline of old section - we need to go through this section
-					# to determine if there are any subsections that now need to
-					# be erased, as the mother section has been replaced with
-					# the text of all subsections.
-					$headline=$secs[$section*2-1];
-					preg_match( '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$headline,$matches);
-					$hlevel=$matches[1];
-
-					# determine headline level for wikimarkup headings
-					if(strpos($hlevel,'=')!==false) {
-						$hlevel=strlen($hlevel);
-					}
-
-					$secs[$section*2-1]=''; // erase old headline
-					$count=$section+1;
-					$break=false;
-					while(!empty($secs[$count*2-1]) && !$break) {
-
-						$subheadline=$secs[$count*2-1];
-						preg_match(
-						 '/^(=+).+?=+|^<h([1-6]).*?>.*?<\/h[1-6].*?>(?!\S)/mi',$subheadline,$matches);
-						$subhlevel=$matches[1];
-						if(strpos($subhlevel,'=')!==false) {
-							$subhlevel=strlen($subhlevel);
-						}
-						if($subhlevel > $hlevel) {
-							// erase old subsections
-							$secs[$count*2-1]='';
-							$secs[$count*2]='';
-						}
-						if($subhlevel <= $hlevel) {
-							$break=true;
-						}
-						$count++;
-
-					}
-
-				}
-				$text=join('',$secs);
-				# reinsert the stuff that we stripped out earlier
-				$text=$parser->unstrip($text,$striparray);
-				$text=$parser->unstripNoWiki($text,$striparray);
+				global $wgParser;
+				$text = $wgParser->replaceSection( $oldtext, $section, $text );
 			}
-
 		}
+
 		wfProfileOut( $fname );
 		return $text;
 	}
diff --git a/includes/Parser.php b/includes/Parser.php
index 4afa23e68d..f141df2fa8 100644
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -367,7 +367,7 @@ class Parser
 				$inside     = $p[4];
 			}
 
-			$marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++);
+			$marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU';
 			$stripped .= $marker;
 
 			if ( $close === '/>' ) {
@@ -883,7 +883,7 @@ class Parser
 		wfProfileIn( $fname );
 		for ( $i = 6; $i >= 1; --$i ) {
 			$h = str_repeat( '=', $i );
-			$text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
+			$text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",
 			  "<h{$i}>\\1</h{$i}>\\2", $text );
 		}
 		wfProfileOut( $fname );
@@ -4209,6 +4209,165 @@ class Parser
 	 */
 	function getTags() { return array_keys( $this->mTagHooks ); }
 	/**#@-*/
+
+
+	/**
+	 * Break wikitext input into sections, and either pull or replace
+	 * some particular section's text.
+	 *
+	 * External callers should use the getSection and replaceSection methods.
+	 *
+	 * @param $text Page wikitext
+	 * @param $section Numbered section. 0 pulls the text before the first
+	 *                 heading; other numbers will pull the given section
+	 *                 along with its lower-level subsections.
+	 * @param $mode One of "get" or "replace"
+	 * @param $newtext Replacement text for section data.
+	 * @return string for "get", the extracted section text.
+	 *                for "replace", the whole page with the section replaced.
+	 */
+	private function extractSections( $text, $section, $mode, $newtext='' ) {
+		# strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
+		# comments to be stripped as well)
+		$striparray = array();
+		
+		$oldOutputType = $this->mOutputType;
+		$oldOptions = $this->mOptions;
+		$this->mOptions = new ParserOptions();
+		$this->mOutputType = OT_WIKI;
+		
+		$striptext = $this->strip( $text, $striparray, true );
+		
+		$this->mOutputType = $oldOutputType;
+		$this->mOptions = $oldOptions;
+
+		# now that we can be sure that no pseudo-sections are in the source,
+		# split it up by section
+		$uniq = preg_quote( $this->uniqPrefix(), '/' );
+		$comment = "(?:$uniq-!--.*?QINU)";
+		$secs = preg_split(
+		/*
+			"/
+			^(
+			(?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+			(?:
+				(=+) # Should this be limited to 6?
+				.+?  # Section title...
+				\\2  # Ending = count must match start
+			|
+				^
+				<h([1-6])\b.*?>
+				.*?
+				<\/h\\3\s*>
+			)
+			(?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+			)$
+			/mix",
+		*/
+			"/
+			(
+				^
+				(?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+				(=+) # Should this be limited to 6?
+				.+?  # Section title...
+				\\2  # Ending = count must match start
+				(?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+				$
+			|
+				<h([1-6])\b.*?>
+				.*?
+				<\/h\\3\s*>
+			)
+			/mix",
+			$striptext, -1,
+			PREG_SPLIT_DELIM_CAPTURE);
+		
+		if( $mode == "get" ) {
+			if( $section == 0 ) {
+				// "Section 0" returns the content before any other section.
+				$rv = $secs[0];
+			} else {
+				$rv = "";
+			}
+		} elseif( $mode == "replace" ) {
+			if( $section == 0 ) {
+				$rv = $newtext . "\n\n";
+				$remainder = true;
+			} else {
+				$rv = $secs[0];
+				$remainder = false;
+			}
+		}
+		$count = 0;
+		$sectionLevel = 0;
+		for( $index = 1; $index < count( $secs ); ) {
+			$headerLine = $secs[$index++];
+			if( $secs[$index] ) {
+				// A wiki header
+				$headerLevel = strlen( $secs[$index++] );
+			} else {
+				// An HTML header
+				$index++;
+				$headerLevel = intval( $secs[$index++] );
+			}
+			$content = $secs[$index++];
+
+			$count++;
+			if( $mode == "get" ) {
+				if( $count == $section ) {
+					$rv = $headerLine . $content;
+					$sectionLevel = $headerLevel;
+				} elseif( $count > $section ) {
+					if( $sectionLevel && $headerLevel > $sectionLevel ) {
+						$rv .= $headerLine . $content;
+					} else {
+						// Broke out to a higher-level section
+						break;
+					}
+				}
+			} elseif( $mode == "replace" ) {
+				if( $count < $section ) {
+					$rv .= $headerLine . $content;
+				} elseif( $count == $section ) {
+					$rv .= $newtext . "\n\n";
+					$sectionLevel = $headerLevel;
+				} elseif( $count > $section ) {
+					if( $headerLevel <= $sectionLevel ) {
+						// Passed the section's sub-parts.
+						$remainder = true;
+					}
+					if( $remainder ) {
+						$rv .= $headerLine . $content;
+					}
+				}
+			}
+		}
+		# reinsert stripped tags
+		$rv = $this->unstrip( $rv, $striparray );
+		$rv = $this->unstripNoWiki( $rv, $striparray );
+		$rv = trim( $rv );
+		return $rv;
+	}
+	
+	/**
+	 * This function returns the text of a section, specified by a number ($section).
+	 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
+	 * the first section before any such heading (section 0).
+	 *
+	 * If a section contains subsections, these are also returned.
+	 *
+	 * @param $text String: text to look in
+	 * @param $section Integer: section number
+	 * @return string text of the requested section
+	 */
+	function getSection( $text, $section ) {
+		return $this->extractSections( $text, $section, "get" );
+	}
+	
+	function replaceSection( $oldtext, $section, $text ) {
+		return $this->extractSections( $oldtext, $section, "replace", $text );
+	}
+
 }
 
 /**
diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc
index 22ddb8d564..e8dfb8f69c 100644
--- a/maintenance/parserTests.inc
+++ b/maintenance/parserTests.inc
@@ -269,9 +269,13 @@ class ParserTest {
 			$out = $parser->preSaveTransform( $input, $title, $user, $options );
 		} elseif (preg_match('/\\bmsg\\b/i', $opts)) {
 			$out = $parser->transformMsg( $input, $options );
-		} elseif( preg_match( '/\\bsection=(\d+)\b/', $opts, $matches ) ) {
+		} elseif( preg_match( '/\\bsection=(\d+)\b/i', $opts, $matches ) ) {
 			$section = intval( $matches[1] );
-			$out = Article::getSection( $input, $section );
+			$out = $parser->getSection( $input, $section );
+		} elseif( preg_match( '/\\breplace=(\d+),"(.*?)"/i', $opts, $matches ) ) {
+			$section = intval( $matches[1] );
+			$replace = $matches[2];
+			$out = $parser->replaceSection( $input, $section, $replace );
 		} else {
 			$output = $parser->parse( $input, $title, $options, true, true, 1337 );
 			$out = $output->getText();
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt
index 7662627dc0..72ee3ed259 100644
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -4839,6 +4839,437 @@ section=1
 <h3>aa</h3>
 !! end
 
+!! test
+Section extraction, HTML headings should be ignored in extensions (bug 3476)
+!! options
+section=2
+!! input
+<h2>a</h2>
+<tag>
+<h2>not b</h2>
+</tag>
+<h2>b</h2>
+!! result
+<h2>b</h2>
+!! end
+
+!! test
+Section replacement test (section 0)
+!! options
+replace=0,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+xxx
+
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 1)
+!! options
+replace=1,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+xxx
+
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 2)
+!! options
+replace=2,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+xxx
+
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 3)
+!! options
+replace=3,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+xxx
+
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 4)
+!! options
+replace=4,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+xxx
+
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 5)
+!! options
+replace=5,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+xxx
+
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 6)
+!! options
+replace=6,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+xxx
+
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 7)
+!! options
+replace=7,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+xxx
+
+===bc===
+==c==
+===ca===
+!! end
+
+!! test
+Section replacement test (section 8)
+!! options
+replace=8,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+xxx
+
+==c==
+===ca===
+!!end
+
+!! test
+Section replacement test (section 9)
+!! options
+replace=9,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+xxx
+!! end
+
+!! test
+Section replacement test (section 10)
+!! options
+replace=10,"xxx"
+!! input
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+===ca===
+!! result
+start
+==a==
+===aa===
+====aaa====
+==b==
+===ba===
+===bb===
+====bba====
+===bc===
+==c==
+xxx
+!! end
+
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 0)
+!! options
+section=0
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+!! end
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 1)
+!! options
+section=1
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+!! end
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 2)
+!! options
+section=2
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i>
+!! end
+
+!! test
+Section extraction, HTML headings not at line boundaries (section 3)
+!! options
+section=3
+!! input
+<h2>Evil</h2><i>blah blah blah</i>
+
+evil blah
+
+<h2>Nice</h2>
+
+nice blah
+
+<i>extra evil</i><h2>Extra nasty</h2>
+
+extra nasty
+!! result
+<h2>Extra nasty</h2>
+
+extra nasty
+!! end
 
 #
 #