From dfecc217abfb45c3a77100c4c4b58dac7b01d6e0 Mon Sep 17 00:00:00 2001 From: Erik Moeller Date: Wed, 28 Apr 2004 04:50:35 +0000 Subject: [PATCH] fix longstanding bug with section editing where section headers within or tags would cause it to miscount section numbers. Parser::extractTags modified to allow stripping HTML comments as well. Note: Presently HTML comments are completeley removed from the output using preg_replace. Should they ever be rendered instead, Parser::extractTags should be used. --- includes/Article.php | 65 +++++++++++++++++++++++++++----------- includes/Parser.php | 74 +++++++++++++++++++++++++++++--------------- 2 files changed, 96 insertions(+), 43 deletions(-) diff --git a/includes/Article.php b/includes/Article.php index 3289dd14b0..ef285db046 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -70,7 +70,7 @@ class Article { $action = $wgRequest->getText( 'action', 'view' ); $section = $wgRequest->getText( 'section' ); - $fname = "Article::getContent"; + $fname = "Article::getContent"; wfProfileIn( $fname ); if ( 0 == $this->getID() ) { @@ -82,34 +82,50 @@ class Article { return wfMsg( "noarticletext" ); } else { $this->loadContent( $noredir ); - + if( # check if we're displaying a [[User talk:x.x.x.x]] anonymous talk page ( $this->mTitle->getNamespace() == Namespace::getTalk( Namespace::getUser()) ) && preg_match("/^\d{1,3}\.\d{1,3}.\d{1,3}\.\d{1,3}$/",$this->mTitle->getText()) && $action=="view" - ) + ) { wfProfileOut( $fname ); return $this->mContent . "\n" .wfMsg("anontalkpagetext"); } - else { + else { if($action=="edit") { if($section!="") { - if($section=="new") { + if($section=="new") { wfProfileOut( $fname ); - return ""; + return ""; } - $secs=preg_split("/(^=+.*?=+|^.*?<\/h[1-6].*?>)/mi", - $this->mContent, -1, - PREG_SPLIT_DELIM_CAPTURE); + # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML + # comments to be stripped as well) + $striparray=array(); + $parser=new Parser(); + $parser->mOutputType=OT_WIKI; + $striptext=$parser->strip($this->mContent, $striparray, true); + + # now that we can be sure that no pseudo-sections are in the source, + # split it up by section + $secs = + preg_split( + "/(^=+.*?=+|^.*?<\/h[1-6].*?>)/mi", + $striptext, -1, + PREG_SPLIT_DELIM_CAPTURE); + if($section==0) { - wfProfileOut( $fname ); - return trim($secs[0]); + $rv=$secs[0]; } else { - wfProfileOut( $fname ); - return trim($secs[$section*2-1] . $secs[$section*2]); + $rv=$secs[$section*2-1] . $secs[$section*2]; } + + # reinsert stripped tags + $rv=$parser->unstrip($rv,$striparray); + $rv=trim($rv); + wfProfileOut( $fname ); + return $rv; } } wfProfileOut( $fname ); @@ -117,12 +133,12 @@ class Article { } } } - + # Load the revision (including cur_text) into this object function loadContent( $noredir = false ) { global $wgOut, $wgMwRedir, $wgRequest; - + # Query variables :P $oldid = $wgRequest->getVal( 'oldid' ); $redirect = $wgRequest->getVal( 'redirect' ); @@ -131,12 +147,12 @@ class Article { $fname = "Article::loadContent"; # Pre-fill content with error message so that if something - # fails we'll have something telling us what we intended. + # fails we'll have something telling us what we intended. $t = $this->mTitle->getPrefixedText(); if ( isset( $oldid ) ) { $oldid = IntVal( $oldid ); - $t .= ",oldid={$oldid}"; + $t .= ",oldid={$oldid}"; } if ( isset( $redirect ) ) { $redirect = ($redirect == "no") ? "no" : "yes"; @@ -558,11 +574,24 @@ class Article { if($summary) $subject="== {$summary} ==\n\n"; $text=$oldtext."\n\n".$subject.$text; } else { + + # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML + # comments to be stripped as well) + $striparray=array(); + $parser=new Parser(); + $parser->mOutputType=OT_WIKI; + $oldtext=$parser->strip($oldtext, $striparray, true); + + # now that we can be sure that no pseudo-sections are in the source, + # split it up $secs=preg_split("/(^=+.*?=+|^.*?<\/h[1-6].*?>)/mi", $oldtext,-1,PREG_SPLIT_DELIM_CAPTURE); $secs[$section*2]=$text."\n\n"; // replace with edited if($section) { $secs[$section*2-1]=""; } // erase old headline - $text=join("",$secs); + $text=join("",$secs); + + # reinsert the stuff that we stripped out earlier + $text=$parser->unstrip($text,$striparray,true); } } return $text; diff --git a/includes/Parser.php b/includes/Parser.php index 5f1043a007..942621c1eb 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -44,6 +44,12 @@ define( "OT_HTML", 1 ); define( "OT_WIKI", 2 ); define( "OT_MSG", 3 ); +# string parameter for extractTags which will cause it +# to strip HTML comments in addition to regular +# -style tags. This should not be anything we +# may want to use in wikisyntax +define( "STRIP_COMMENTS", "HTMLCommentStrip" ); + # prefix for escaping, used in two functions at least define( "UNIQ_PREFIX", "NaodW29"); @@ -127,6 +133,9 @@ class Parser # If $content is already set, the additional entries will be appended + # If $tag is set to STRIP_COMMENTS, the function will extract + # + /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){ $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString(); if ( !$content ) { @@ -136,12 +145,20 @@ class Parser $stripped = ""; while ( "" != $text ) { - $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 ); + if($tag==STRIP_COMMENTS) { + $p = preg_split( "//i", $p[1], 2 ); + } else { + $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 ); + } $marker = $rnd . sprintf("%08X", $n++); $content[$marker] = $q[0]; $stripped .= $marker; @@ -151,18 +168,23 @@ class Parser return $stripped; } - # Strips ,
 and 
+	# Strips and renders , 
, , 
+	# If $render is set, performs necessary rendering operations on plugins
 	# Returns the text, and fills an array with data needed in unstrip()
 	# If the $state is already a valid strip state, it adds to the state
-	#
-	function strip( $text, &$state )
+
+	# When $stripcomments is set, HTML comments 
+	# will be stripped in addition to other tags. This is important
+	# for section editing, where these comments cause confusion when
+	# counting the sections in the wikisource
+	function strip( $text, &$state, $stripcomments = false )
 	{
 		$render = ($this->mOutputType == OT_HTML);
 		$nowiki_content = array();
 		$hiero_content = array();
 		$math_content = array();
 		$pre_content = array();
-		$item_content = array();
+		$comment_content = array();
 
 		# Replace any instances of the placeholders
 		$uniq_prefix = UNIQ_PREFIX;
@@ -177,25 +199,21 @@ class Parser
 			}
 		}
 
-		if( $GLOBALS['wgUseWikiHiero'] ){
-			$text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
-			foreach( $hiero_content as $marker => $content ){
-				if( $render ){
-					$hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
-				} else {
-					$hiero_content[$marker] = "$content";
-				}
+		$text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
+		foreach( $hiero_content as $marker => $content ){
+			if( $render && $GLOBALS['wgUseWikiHiero']){
+				$hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
+			} else {
+				$hiero_content[$marker] = "$content";
 			}
 		}
 
-		if( $this->mOptions->getUseTeX() ){
-			$text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
-			foreach( $math_content as $marker => $content ){
-				if( $render ){
-					$math_content[$marker] = renderMath( $content );
-				} else {
-					$math_content[$marker] = "$content";
-				}
+		$text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
+		foreach( $math_content as $marker => $content ){
+			if( $render && $this->mOptions->getUseTeX() ){
+				$math_content[$marker] = renderMath( $content );
+			} else {
+				$math_content[$marker] = "$content";
 			}
 		}
 
@@ -207,6 +225,12 @@ class Parser
 				$pre_content[$marker] = "
$content
"; } } + if($stripcomments) { + $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix); + foreach( $comment_content as $marker => $content ){ + $comment_content[$marker] = ""; + } + } # Merge state with the pre-existing state, if there is one if ( $state ) { @@ -214,13 +238,14 @@ class Parser $state['hiero'] = $state['hiero'] + $hiero_content; $state['math'] = $state['math'] + $math_content; $state['pre'] = $state['pre'] + $pre_content; + $state['comment'] = $state['comment'] + $comment_content; } else { $state = array( 'nowiki' => $nowiki_content, 'hiero' => $hiero_content, 'math' => $math_content, 'pre' => $pre_content, - 'item' => $item_content + 'comment' => $comment_content ); } return $text; @@ -251,8 +276,7 @@ class Parser 'nowiki' => array(), 'hiero' => array(), 'math' => array(), - 'pre' => array(), - 'item' => array() + 'pre' => array() ); } $state['item'][$rnd] = $text; -- 2.20.1