fix longstanding bug with section editing where section headers within

author Erik Moeller <erik@users.mediawiki.org>

Wed, 28 Apr 2004 04:50:35 +0000 (04:50 +0000)

committer Erik Moeller <erik@users.mediawiki.org>

Wed, 28 Apr 2004 04:50:35 +0000 (04:50 +0000)
author Erik Moeller <erik@users.mediawiki.org>
Wed, 28 Apr 2004 04:50:35 +0000 (04:50 +0000)
committer Erik Moeller <erik@users.mediawiki.org>
Wed, 28 Apr 2004 04:50:35 +0000 (04:50 +0000)
diff --git a/includes/Article.php b/includes/Article.php

index 3289dd1..ef285db 100644 (file)
--- a/includes/Article.php
+++ b/includes/Article.php
@@ -70,7 +70,7 @@ class Article {
                 $action = $wgRequest->getText( 'action', 'view' );
                 $section = $wgRequest->getText( 'section' );
  
-               $fname =  "Article::getContent"; 
+               $fname =  "Article::getContent";
                 wfProfileIn( $fname );
  
                 if ( 0 == $this->getID() ) {
@@ -82,34 +82,50 @@ class Article {
                         return wfMsg( "noarticletext" );
                 } else {
                         $this->loadContent( $noredir );
-                                               
+
                         if(
                                 # check if we're displaying a [[User talk:x.x.x.x]] anonymous talk page
                                 ( $this->mTitle->getNamespace() == Namespace::getTalk( Namespace::getUser()) ) &&
                                   preg_match("/^\d{1,3}\.\d{1,3}.\d{1,3}\.\d{1,3}$/",$this->mTitle->getText()) &&
                                   $action=="view"
-                               ) 
+                               )
                                 {
                                 wfProfileOut( $fname );
                                 return $this->mContent . "\n" .wfMsg("anontalkpagetext"); }
-                       else {                          
+                       else {
                                 if($action=="edit") {
                                         if($section!="") {
-                                               if($section=="new") { 
+                                               if($section=="new") {
                                                         wfProfileOut( $fname );
-                                                       return ""; 
+                                                       return "";
                                                 }
  
-                                               $secs=preg_split("/(^=+.*?=+|^<h[1-6].*?>.*?<\/h[1-6].*?>)/mi",
-                                                $this->mContent, -1,
-                                                PREG_SPLIT_DELIM_CAPTURE);
+                                               # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
+                                               # comments to be stripped as well)
+                                               $striparray=array();
+                                               $parser=new Parser();
+                                               $parser->mOutputType=OT_WIKI;
+                                               $striptext=$parser->strip($this->mContent, $striparray, true);
+
+                                               # now that we can be sure that no pseudo-sections are in the source,
+                                               # split it up by section
+                                               $secs =
+                                                 preg_split(
+                                                 "/(^=+.*?=+|^<h[1-6].*?>.*?<\/h[1-6].*?>)/mi",
+                                                 $striptext, -1,
+                                                 PREG_SPLIT_DELIM_CAPTURE);
+
                                                 if($section==0) {
-                                                       wfProfileOut( $fname );
-                                                       return trim($secs[0]);
+                                                       $rv=$secs[0];
                                                 } else {
-                                                       wfProfileOut( $fname );
-                                                       return trim($secs[$section*2-1] . $secs[$section*2]);
+                                                       $rv=$secs[$section*2-1] . $secs[$section*2];
                                                 }
+
+                                               # reinsert stripped tags
+                                               $rv=$parser->unstrip($rv,$striparray);
+                                               $rv=trim($rv);
+                                               wfProfileOut( $fname );
+                                               return $rv;
                                         }
                                 }
                                 wfProfileOut( $fname );
@@ -117,12 +133,12 @@ class Article {
                         }
                 }
         }
-       
+
         # Load the revision (including cur_text) into this object
         function loadContent( $noredir = false )
         {
                 global $wgOut, $wgMwRedir, $wgRequest;
-               
+
                 # Query variables :P
                 $oldid = $wgRequest->getVal( 'oldid' );
                 $redirect = $wgRequest->getVal( 'redirect' );
@@ -131,12 +147,12 @@ class Article {
                 $fname = "Article::loadContent";
                 
                 # Pre-fill content with error message so that if something       
-               # fails we'll have something telling us what we intended.        
+               # fails we'll have something telling us what we intended.
  
                 $t = $this->mTitle->getPrefixedText();   
                 if ( isset( $oldid ) ) {         
                         $oldid = IntVal( $oldid );       
-                       $t .= ",oldid={$oldid}";         
+                       $t .= ",oldid={$oldid}";
                 }        
                 if ( isset( $redirect ) ) {      
                         $redirect = ($redirect == "no") ? "no" : "yes";          
@@ -558,11 +574,24 @@ class Article {
                                 if($summary) $subject="== {$summary} ==\n\n";
                                 $text=$oldtext."\n\n".$subject.$text;
                         } else {
+
+                               # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
+                               # comments to be stripped as well)
+                               $striparray=array();
+                               $parser=new Parser();
+                               $parser->mOutputType=OT_WIKI;
+                               $oldtext=$parser->strip($oldtext, $striparray, true);
+
+                               # now that we can be sure that no pseudo-sections are in the source,
+                               # split it up
                                 $secs=preg_split("/(^=+.*?=+|^<h[1-6].*?>.*?<\/h[1-6].*?>)/mi",
                                   $oldtext,-1,PREG_SPLIT_DELIM_CAPTURE);
                                 $secs[$section*2]=$text."\n\n"; // replace with edited
                                 if($section) { $secs[$section*2-1]=""; } // erase old headline
-                               $text=join("",$secs);           
+                               $text=join("",$secs);
+
+                               # reinsert the stuff that we stripped out earlier
+                               $text=$parser->unstrip($text,$striparray,true);
                         }
                 }
                 return $text;
diff --git a/includes/Parser.php b/includes/Parser.php

index 5f1043a..942621c 100644 (file)
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -44,6 +44,12 @@ define( "OT_HTML", 1 );
  define( "OT_WIKI", 2 );
  define( "OT_MSG", 3 );
  
+# string parameter for extractTags which will cause it
+# to strip HTML comments in addition to regular
+# <XML>-style tags. This should not be anything we
+# may want to use in wikisyntax
+define( "STRIP_COMMENTS", "HTMLCommentStrip" );
+
  # prefix for escaping, used in two functions at least
  define( "UNIQ_PREFIX", "NaodW29");
  
@@ -127,6 +133,9 @@ class Parser
  
         # If $content is already set, the additional entries will be appended
  
+       # If $tag is set to STRIP_COMMENTS, the function will extract
+       # <!-- HTML comments -->
+
         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
                 if ( !$content ) {
@@ -136,12 +145,20 @@ class Parser
                 $stripped = "";
  
                 while ( "" != $text ) {
-                       $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+                       if($tag==STRIP_COMMENTS) {
+                               $p = preg_split( "/<!--/i", $text, 2 );
+                       } else {
+                               $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+                       }
                         $stripped .= $p[0];
                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
                                 $text = "";
                         } else {
-                               $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+                               if($tag==STRIP_COMMENTS) {
+                                       $q = preg_split( "/-->/i", $p[1], 2 );
+                               } else {
+                                       $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+                               }
                                 $marker = $rnd . sprintf("%08X", $n++);
                                 $content[$marker] = $q[0];
                                 $stripped .= $marker;
@@ -151,18 +168,23 @@ class Parser
                 return $stripped;
         }
  
-       # Strips <nowiki>, <pre> and <math>
+       # Strips and renders <nowiki>, <pre>, <math>, <hiero>
+       # If $render is set, performs necessary rendering operations on plugins
         # Returns the text, and fills an array with data needed in unstrip()
         # If the $state is already a valid strip state, it adds to the state
-       #
-       function strip( $text, &$state )
+
+       # When $stripcomments is set, HTML comments <!-- like this -->
+       # will be stripped in addition to other tags. This is important
+       # for section editing, where these comments cause confusion when
+       # counting the sections in the wikisource
+       function strip( $text, &$state, $stripcomments = false )
         {
                 $render = ($this->mOutputType == OT_HTML);
                 $nowiki_content = array();
                 $hiero_content = array();
                 $math_content = array();
                 $pre_content = array();
-               $item_content = array();
+               $comment_content = array();
  
                 # Replace any instances of the placeholders
                 $uniq_prefix = UNIQ_PREFIX;
@@ -177,25 +199,21 @@ class Parser
                         }
                 }
  
-               if( $GLOBALS['wgUseWikiHiero'] ){
-                       $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
-                       foreach( $hiero_content as $marker => $content ){
-                               if( $render ){
-                                       $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
-                               } else {
-                                       $hiero_content[$marker] = "<hiero>$content</hiero>";
-                               }
+               $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
+               foreach( $hiero_content as $marker => $content ){
+                       if( $render && $GLOBALS['wgUseWikiHiero']){
+                               $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
+                       } else {
+                               $hiero_content[$marker] = "<hiero>$content</hiero>";
                         }
                 }
  
-               if( $this->mOptions->getUseTeX() ){
-                       $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
-                       foreach( $math_content as $marker => $content ){
-                               if( $render ){
-                                       $math_content[$marker] = renderMath( $content );
-                               } else {
-                                       $math_content[$marker] = "<math>$content</math>";
-                               }
+               $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
+               foreach( $math_content as $marker => $content ){
+                       if( $render && $this->mOptions->getUseTeX() ){
+                               $math_content[$marker] = renderMath( $content );
+                       } else {
+                               $math_content[$marker] = "<math>$content</math>";
                         }
                 }
  
@@ -207,6 +225,12 @@ class Parser
                                 $pre_content[$marker] = "<pre>$content</pre>";
                         }
                 }
+               if($stripcomments) {
+                       $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
+                       foreach( $comment_content as $marker => $content ){
+                               $comment_content[$marker] = "<!--$content-->";
+                       }
+               }
  
                 # Merge state with the pre-existing state, if there is one
                 if ( $state ) {
@@ -214,13 +238,14 @@ class Parser
                         $state['hiero'] = $state['hiero'] + $hiero_content;
                         $state['math'] = $state['math'] + $math_content;
                         $state['pre'] = $state['pre'] + $pre_content;
+                       $state['comment'] = $state['comment'] + $comment_content;
                 } else {
                         $state = array(
                           'nowiki' => $nowiki_content,
                           'hiero' => $hiero_content,
                           'math' => $math_content,
                           'pre' => $pre_content,
-                         'item' => $item_content
+                         'comment' => $comment_content
                         );
                 }
                 return $text;
@@ -251,8 +276,7 @@ class Parser
                           'nowiki' => array(),
                           'hiero' => array(),
                           'math' => array(),
-                         'pre' => array(),
-                         'item' => array()
+                         'pre' => array()
                         );
                 }
                 $state['item'][$rnd] = $text;
author	Erik Moeller <erik@users.mediawiki.org>
	Wed, 28 Apr 2004 04:50:35 +0000 (04:50 +0000)
committer	Erik Moeller <erik@users.mediawiki.org>
	Wed, 28 Apr 2004 04:50:35 +0000 (04:50 +0000)
includes/Article.php		patch \| blob \| history
includes/Parser.php		patch \| blob \| history