From: Brion Vibber Date: Tue, 6 Jun 2006 00:51:34 +0000 (+0000) Subject: * (bug 2587) Fix for section editing with comment prefix X-Git-Tag: 1.31.0-rc.0~56890 X-Git-Url: http://git.cyclocoop.org/%24image?a=commitdiff_plain;h=939ddd8793c4417b86f21b5ae7a16a9320972b53;p=lhc%2Fweb%2Fwiklou.git * (bug 2587) Fix for section editing with comment prefix * (bug 2607) Fix for section editing with mix of wiki and HTML headings * (bug 3342) Fix for section editing with headings wrapped in * (bug 3476) Fix for section editing with faux headings in extensions * (bug 5272) Fix for section editing with HTML-heading subsections * Fix for bogus wiki headings improperly detected with following text * Fix for HTML headings improperly not detected with preceding/following text * Section extraction and replacement functions merged into one implementation on the Parser object, so they can't get out of sync with each other. --- diff --git a/RELEASE-NOTES b/RELEASE-NOTES index c742a4b766..dbd708ff05 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -429,6 +429,16 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * Add 'SiteNoticeBefore' and 'SiteNoticeAfter' hooks * (bug 6182) Date passed in "sp-newimages-showfrom" not adjusted to user time preferences +* (bug 2587) Fix for section editing with comment prefix +* (bug 2607) Fix for section editing with mix of wiki and HTML headings +* (bug 3342) Fix for section editing with headings wrapped in +* (bug 3476) Fix for section editing with faux headings in extensions +* (bug 5272) Fix for section editing with HTML-heading subsections +* Fix for bogus wiki headings improperly detected with following text +* Fix for HTML headings improperly not detected with preceding/following text +* Section extraction and replacement functions merged into one implementation + on the Parser object, so they can't get out of sync with each other. + == Compatibility == diff --git a/includes/Article.php b/includes/Article.php index d6890a0d18..2b32885997 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -242,62 +242,8 @@ class Article { * @return string text of the requested section */ function getSection($text,$section) { - - # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML - # comments to be stripped as well) - $striparray=array(); - $parser=new Parser(); - $parser->mOutputType=OT_WIKI; - $parser->mOptions = new ParserOptions(); - $striptext=$parser->strip($text, $striparray, true); - - # now that we can be sure that no pseudo-sections are in the source, - # split it up by section - $secs = - preg_split( - '/(^=+.+?=+|^.*?<\/h[1-6].*?>)(?!\S)/mi', - $striptext, -1, - PREG_SPLIT_DELIM_CAPTURE); - if($section==0) { - $rv=$secs[0]; - } else { - $headline=$secs[$section*2-1]; - preg_match( '/^(=+).+?=+|^.*?<\/h[1-6].*?>(?!\S)/mi',$headline,$matches); - $hlevel=$matches[1]; - - # translate wiki heading into level - if(strpos($hlevel,'=')!==false) { - $hlevel=strlen($hlevel); - } - - $rv=$headline. $secs[$section*2]; - $count=$section+1; - - $break=false; - while(!empty($secs[$count*2-1]) && !$break) { - - $subheadline=$secs[$count*2-1]; - preg_match( '/^(=+).+?=+|^.*?<\/h[1-6].*?>(?!\S)/mi',$subheadline,$matches); - $subhlevel=$matches[1]; - if(strpos($subhlevel,'=')!==false) { - $subhlevel=strlen($subhlevel); - } - if($subhlevel > $hlevel) { - $rv.=$subheadline.$secs[$count*2]; - } - if($subhlevel <= $hlevel) { - $break=true; - } - $count++; - - } - } - # reinsert stripped tags - $rv=$parser->unstrip($rv,$striparray); - $rv=$parser->unstripNoWiki($rv,$striparray); - $rv=trim($rv); - return $rv; - + global $wgParser; + return $wgParser->getSection( $text, $section ); } /** @@ -1288,8 +1234,10 @@ class Article { function replaceSection($section, $text, $summary = '', $edittime = NULL) { $fname = 'Article::replaceSection'; wfProfileIn( $fname ); - - if ($section != '') { + + if( $section == '' ) { + // Whole-page edit; let the text through unmolested. + } else { if( is_null( $edittime ) ) { $rev = Revision::newFromTitle( $this->mTitle ); } else { @@ -1307,71 +1255,11 @@ class Article { if($summary) $subject="== {$summary} ==\n\n"; $text=$oldtext."\n\n".$subject.$text; } else { - - # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML - # comments to be stripped as well) - $striparray=array(); - $parser=new Parser(); - $parser->mOutputType=OT_WIKI; - $parser->mOptions = new ParserOptions(); - $oldtext=$parser->strip($oldtext, $striparray, true); - - # now that we can be sure that no pseudo-sections are in the source, - # split it up - # Unfortunately we can't simply do a preg_replace because that might - # replace the wrong section, so we have to use the section counter instead - $secs=preg_split('/(^=+.+?=+|^.*?<\/h[1-6].*?>)(?!\S)/mi', - $oldtext,-1,PREG_SPLIT_DELIM_CAPTURE); - $secs[$section*2]=$text."\n\n"; // replace with edited - - # section 0 is top (intro) section - if($section!=0) { - - # headline of old section - we need to go through this section - # to determine if there are any subsections that now need to - # be erased, as the mother section has been replaced with - # the text of all subsections. - $headline=$secs[$section*2-1]; - preg_match( '/^(=+).+?=+|^.*?<\/h[1-6].*?>(?!\S)/mi',$headline,$matches); - $hlevel=$matches[1]; - - # determine headline level for wikimarkup headings - if(strpos($hlevel,'=')!==false) { - $hlevel=strlen($hlevel); - } - - $secs[$section*2-1]=''; // erase old headline - $count=$section+1; - $break=false; - while(!empty($secs[$count*2-1]) && !$break) { - - $subheadline=$secs[$count*2-1]; - preg_match( - '/^(=+).+?=+|^.*?<\/h[1-6].*?>(?!\S)/mi',$subheadline,$matches); - $subhlevel=$matches[1]; - if(strpos($subhlevel,'=')!==false) { - $subhlevel=strlen($subhlevel); - } - if($subhlevel > $hlevel) { - // erase old subsections - $secs[$count*2-1]=''; - $secs[$count*2]=''; - } - if($subhlevel <= $hlevel) { - $break=true; - } - $count++; - - } - - } - $text=join('',$secs); - # reinsert the stuff that we stripped out earlier - $text=$parser->unstrip($text,$striparray); - $text=$parser->unstripNoWiki($text,$striparray); + global $wgParser; + $text = $wgParser->replaceSection( $oldtext, $section, $text ); } - } + wfProfileOut( $fname ); return $text; } diff --git a/includes/Parser.php b/includes/Parser.php index 4afa23e68d..f141df2fa8 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -367,7 +367,7 @@ class Parser $inside = $p[4]; } - $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++); + $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU'; $stripped .= $marker; if ( $close === '/>' ) { @@ -883,7 +883,7 @@ class Parser wfProfileIn( $fname ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); - $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m", + $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m", "\\1\\2", $text ); } wfProfileOut( $fname ); @@ -4209,6 +4209,165 @@ class Parser */ function getTags() { return array_keys( $this->mTagHooks ); } /**#@-*/ + + + /** + * Break wikitext input into sections, and either pull or replace + * some particular section's text. + * + * External callers should use the getSection and replaceSection methods. + * + * @param $text Page wikitext + * @param $section Numbered section. 0 pulls the text before the first + * heading; other numbers will pull the given section + * along with its lower-level subsections. + * @param $mode One of "get" or "replace" + * @param $newtext Replacement text for section data. + * @return string for "get", the extracted section text. + * for "replace", the whole page with the section replaced. + */ + private function extractSections( $text, $section, $mode, $newtext='' ) { + # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML + # comments to be stripped as well) + $striparray = array(); + + $oldOutputType = $this->mOutputType; + $oldOptions = $this->mOptions; + $this->mOptions = new ParserOptions(); + $this->mOutputType = OT_WIKI; + + $striptext = $this->strip( $text, $striparray, true ); + + $this->mOutputType = $oldOutputType; + $this->mOptions = $oldOptions; + + # now that we can be sure that no pseudo-sections are in the source, + # split it up by section + $uniq = preg_quote( $this->uniqPrefix(), '/' ); + $comment = "(?:$uniq-!--.*?QINU)"; + $secs = preg_split( + /* + "/ + ^( + (?:$comment|<\/?noinclude>)* # Initial comments will be stripped + (?: + (=+) # Should this be limited to 6? + .+? # Section title... + \\2 # Ending = count must match start + | + ^ + + .*? + <\/h\\3\s*> + ) + (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok + )$ + /mix", + */ + "/ + ( + ^ + (?:$comment|<\/?noinclude>)* # Initial comments will be stripped + (=+) # Should this be limited to 6? + .+? # Section title... + \\2 # Ending = count must match start + (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok + $ + | + + .*? + <\/h\\3\s*> + ) + /mix", + $striptext, -1, + PREG_SPLIT_DELIM_CAPTURE); + + if( $mode == "get" ) { + if( $section == 0 ) { + // "Section 0" returns the content before any other section. + $rv = $secs[0]; + } else { + $rv = ""; + } + } elseif( $mode == "replace" ) { + if( $section == 0 ) { + $rv = $newtext . "\n\n"; + $remainder = true; + } else { + $rv = $secs[0]; + $remainder = false; + } + } + $count = 0; + $sectionLevel = 0; + for( $index = 1; $index < count( $secs ); ) { + $headerLine = $secs[$index++]; + if( $secs[$index] ) { + // A wiki header + $headerLevel = strlen( $secs[$index++] ); + } else { + // An HTML header + $index++; + $headerLevel = intval( $secs[$index++] ); + } + $content = $secs[$index++]; + + $count++; + if( $mode == "get" ) { + if( $count == $section ) { + $rv = $headerLine . $content; + $sectionLevel = $headerLevel; + } elseif( $count > $section ) { + if( $sectionLevel && $headerLevel > $sectionLevel ) { + $rv .= $headerLine . $content; + } else { + // Broke out to a higher-level section + break; + } + } + } elseif( $mode == "replace" ) { + if( $count < $section ) { + $rv .= $headerLine . $content; + } elseif( $count == $section ) { + $rv .= $newtext . "\n\n"; + $sectionLevel = $headerLevel; + } elseif( $count > $section ) { + if( $headerLevel <= $sectionLevel ) { + // Passed the section's sub-parts. + $remainder = true; + } + if( $remainder ) { + $rv .= $headerLine . $content; + } + } + } + } + # reinsert stripped tags + $rv = $this->unstrip( $rv, $striparray ); + $rv = $this->unstripNoWiki( $rv, $striparray ); + $rv = trim( $rv ); + return $rv; + } + + /** + * This function returns the text of a section, specified by a number ($section). + * A section is text under a heading like == Heading == or \Heading\, or + * the first section before any such heading (section 0). + * + * If a section contains subsections, these are also returned. + * + * @param $text String: text to look in + * @param $section Integer: section number + * @return string text of the requested section + */ + function getSection( $text, $section ) { + return $this->extractSections( $text, $section, "get" ); + } + + function replaceSection( $oldtext, $section, $text ) { + return $this->extractSections( $oldtext, $section, "replace", $text ); + } + } /** diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc index 22ddb8d564..e8dfb8f69c 100644 --- a/maintenance/parserTests.inc +++ b/maintenance/parserTests.inc @@ -269,9 +269,13 @@ class ParserTest { $out = $parser->preSaveTransform( $input, $title, $user, $options ); } elseif (preg_match('/\\bmsg\\b/i', $opts)) { $out = $parser->transformMsg( $input, $options ); - } elseif( preg_match( '/\\bsection=(\d+)\b/', $opts, $matches ) ) { + } elseif( preg_match( '/\\bsection=(\d+)\b/i', $opts, $matches ) ) { $section = intval( $matches[1] ); - $out = Article::getSection( $input, $section ); + $out = $parser->getSection( $input, $section ); + } elseif( preg_match( '/\\breplace=(\d+),"(.*?)"/i', $opts, $matches ) ) { + $section = intval( $matches[1] ); + $replace = $matches[2]; + $out = $parser->replaceSection( $input, $section, $replace ); } else { $output = $parser->parse( $input, $title, $options, true, true, 1337 ); $out = $output->getText(); diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index 7662627dc0..72ee3ed259 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -4839,6 +4839,437 @@ section=1

aa

!! end +!! test +Section extraction, HTML headings should be ignored in extensions (bug 3476) +!! options +section=2 +!! input +

a

+ +

not b

+
+

b

+!! result +

b

+!! end + +!! test +Section replacement test (section 0) +!! options +replace=0,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +xxx + +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 1) +!! options +replace=1,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +xxx + +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 2) +!! options +replace=2,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +xxx + +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 3) +!! options +replace=3,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +xxx + +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 4) +!! options +replace=4,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +xxx + +==c== +===ca=== +!! end + +!! test +Section replacement test (section 5) +!! options +replace=5,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +xxx + +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 6) +!! options +replace=6,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +xxx + +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 7) +!! options +replace=7,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +xxx + +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 8) +!! options +replace=8,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +xxx + +==c== +===ca=== +!!end + +!! test +Section replacement test (section 9) +!! options +replace=9,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +xxx +!! end + +!! test +Section replacement test (section 10) +!! options +replace=10,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +xxx +!! end + + +!! test +Section extraction, HTML headings not at line boundaries (section 0) +!! options +section=0 +!! input +

Evil

blah blah blah + +evil blah + +

Nice

+ +nice blah + +extra evil

Extra nasty

+ +extra nasty +!! result +!! end + +!! test +Section extraction, HTML headings not at line boundaries (section 1) +!! options +section=1 +!! input +

Evil

blah blah blah + +evil blah + +

Nice

+ +nice blah + +extra evil

Extra nasty

+ +extra nasty +!! result +

Evil

blah blah blah + +evil blah +!! end + +!! test +Section extraction, HTML headings not at line boundaries (section 2) +!! options +section=2 +!! input +

Evil

blah blah blah + +evil blah + +

Nice

+ +nice blah + +extra evil

Extra nasty

+ +extra nasty +!! result +

Nice

+ +nice blah + +extra evil +!! end + +!! test +Section extraction, HTML headings not at line boundaries (section 3) +!! options +section=3 +!! input +

Evil

blah blah blah + +evil blah + +

Nice

+ +nice blah + +extra evil

Extra nasty

+ +extra nasty +!! result +

Extra nasty

+ +extra nasty +!! end # #