From 54005e7a9d98e608e898d0fc5f8e670e15dbac2a Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 23 May 2006 07:19:01 +0000 Subject: [PATCH] * Reordered wiki table handling and __TOC__ extraction in the parser to better handle some overlapping tag cases. * Only the first __TOC__ is now turned into a TOC. The table change doesn't disrupt either the parser tests or the en.wikipedia main page. Hopefully it won't break other real content... --- RELEASE-NOTES | 5 +- includes/MagicWord.php | 4 +- includes/Parser.php | 74 +++++++++++++------ includes/Sanitizer.php | 1 + maintenance/parserTests.txt | 141 ++++++++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 27 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 9fa2822953..8ba8452e0b 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -315,7 +315,10 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * (bug 6046) Update to Indonesian localisation (id) #15 * (bug 5523) $wgNoFollowNsExceptions to allow disabling rel="nofollow" in specially-selected namespaces. -* Fix for HTML/JS injection bug in variable handler (found by Nick Jenkins) +* (bug 6055) Fix for HTML/JS injection bug in variable handler (found by Nick Jenkins) +* Reordered wiki table handling and __TOC__ extraction in the parser to better + handle some overlapping tag cases. +* Only the first __TOC__ is now turned into a TOC. == Compatibility == diff --git a/includes/MagicWord.php b/includes/MagicWord.php index c747b7f975..895c0e0261 100644 --- a/includes/MagicWord.php +++ b/includes/MagicWord.php @@ -329,8 +329,8 @@ class MagicWord { /** * Replaces the word with something else */ - function replace( $replacement, $subject ) { - $res = preg_replace( $this->getRegex(), wfRegexReplacement( $replacement ), $subject ); + function replace( $replacement, $subject, $limit=-1 ) { + $res = preg_replace( $this->getRegex(), wfRegexReplacement( $replacement ), $subject, $limit ); $this->mModified = !($res === $subject); return $res; } diff --git a/includes/Parser.php b/includes/Parser.php index cb6d692e07..b71b275f17 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -157,6 +157,9 @@ class Parser $this->mTemplates = array(); $this->mTemplatePath = array(); + $this->mShowToc = true; + $this->mForceTocPosition = false; + wfRunHooks( 'ParserClearState', array( &$this ) ); } @@ -873,12 +876,20 @@ class Parser $text = strtr( $text, array( '' => '' , '' => '' ) ); $text = strtr( $text, array( '' => '', '' => '') ); $text = preg_replace( '/.*?<\/includeonly>/s', '', $text ); - + $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) ); + $text = $this->replaceVariables( $text, $args ); + // Tables need to come after variable replacement for things to work + // properly; putting them before other transformations should keep + // exciting things like link expansions from showing up in surprising + // places. + $text = $this->doTableStuff( $text ); + $text = preg_replace( '/(^|\n)-----*/', '\\1
', $text ); + $text = $this->stripToc( $text ); $text = $this->doHeadings( $text ); if($this->mOptions->getUseDynamicDates()) { $df =& DateFormatter::getInstance(); @@ -893,7 +904,6 @@ class Parser $text = str_replace($this->mUniqPrefix."NOPARSE", "", $text); $text = $this->doMagicLinks( $text ); - $text = $this->doTableStuff( $text ); $text = $this->formatHeadings( $text, $isMain ); wfProfileOut( $fname ); @@ -2411,7 +2421,7 @@ class Parser wfProfileOut( $fname ); return $text; } - + /** * Replace magic variables * @private @@ -3049,6 +3059,31 @@ class Parser } } + /** + * Detect __TOC__ magic word and set a placeholder + */ + function stripToc( $text ) { + # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, + # do not add TOC + $mw = MagicWord::get( MAG_NOTOC ); + if( $mw->matchAndRemove( $text ) ) { + $this->mShowToc = false; + } + + $mw = MagicWord::get( MAG_TOC ); + if( $mw->match( $text ) ) { + $this->mShowToc = true; + $this->mForceTocPosition = true; + + // Set a placeholder. At the end we'll fill it in with the TOC. + $text = $mw->replace( '', $text, 1 ); + + // Only keep the first one. + $text = $mw->replace( '', $text ); + } + return $text; + } + /** * This function accomplishes several tasks: * 1) Auto-number headings if that option is enabled @@ -3067,8 +3102,6 @@ class Parser global $wgMaxTocLevel, $wgContLang; $doNumberHeadings = $this->mOptions->getNumberHeadings(); - $doShowToc = true; - $forceTocHere = false; if( !$this->mTitle->userCanEdit() ) { $showEditLink = 0; } else { @@ -3080,12 +3113,6 @@ class Parser if( $esw->matchAndRemove( $text ) ) { $showEditLink = 0; } - # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, - # do not add TOC - $mw =& MagicWord::get( MAG_NOTOC ); - if( $mw->matchAndRemove( $text ) ) { - $doShowToc = false; - } # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now @@ -3093,7 +3120,7 @@ class Parser # if there are fewer than 4 headlines in the article, do not show TOC if( $numMatches < 4 ) { - $doShowToc = false; + $this->mShowToc = false; } # Allow user to stipulate that a page should have a "new section" @@ -3107,20 +3134,20 @@ class Parser $mw =& MagicWord::get( MAG_TOC ); if($mw->match( $text ) ) { - $doShowToc = true; - $forceTocHere = true; + $this->mShowToc = true; + $this->mForceTocPosition = true; } else { # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, # override above conditions and always show TOC above first header $mw =& MagicWord::get( MAG_FORCETOC ); if ($mw->matchAndRemove( $text ) ) { - $doShowToc = true; + $this->mShowToc = true; } } # Never ever show TOC if no headers if( $numMatches < 1 ) { - $doShowToc = false; + $this->mShowToc = false; } # We need this to perform operations on the HTML @@ -3162,7 +3189,7 @@ class Parser } $level = $matches[1][$headlineCount]; - if( $doNumberHeadings || $doShowToc ) { + if( $doNumberHeadings || $this->mShowToc ) { if ( $level > $prevlevel ) { # Increase TOC level @@ -3258,7 +3285,7 @@ class Parser if($refcount[$headlineCount] > 1 ) { $anchor .= '_' . $refcount[$headlineCount]; } - if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { + if( $this->mShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel); } if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) { @@ -3279,7 +3306,7 @@ class Parser $sectionCount++; } - if( $doShowToc ) { + if( $this->mShowToc ) { if( $toclevel<$wgMaxTocLevel ) { $toc .= $sk->tocUnindent( $toclevel - 1 ); } @@ -3301,8 +3328,8 @@ class Parser # $full .= $sk->editSectionLink(0); } $full .= $block; - if( $doShowToc && !$i && $isMain && !$forceTocHere) { - # Top anchor now in skin + if( $this->mShowToc && !$i && $isMain && !$this->mForceTocPosition ) { + # Top anchor now in skin $full = $full.$toc; } @@ -3311,9 +3338,8 @@ class Parser } $i++; } - if($forceTocHere) { - $mw =& MagicWord::get( MAG_TOC ); - return $mw->replace( $toc, $full ); + if( $this->mForceTocPosition ) { + return str_replace( '', $toc, $full ); } else { return $full; } diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 93745c7e32..25f4d57409 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -603,6 +603,7 @@ class Sanitizer { 'RFC' => 'RFC', 'PMID' => 'PMID', '|' => '|', + '__' => '__', ) ); # Stupid hack diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index ee6e133785..22a25054d2 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -3970,6 +3970,147 @@ mailto:inline@mail.tld

!! end + +# +# Security and HTML correctness +# From Nick Jenkins' fuzz testing +# + +!! test +Fuzz testing: Parser13 +!! input +{| +| http://a| +!! result + + + + +
+
+ +!! end + +!! test +Fuzz testing: Parser14 +!! input +== onmouseover= == +http://__TOC__ +!! result +

onmouseover=

+

http:// +

+!! end + +!! test +Fuzz testing: Parser14-table +!! input +==a== +{| STYLE=__TOC__ +!! result +

a

+ + +
+ +!! end + +# Known to produce bogus xml (extra ) +!! test +Fuzz testing: Parser16 +!! options +noxml +!! input +{| +!https://|||||| +!! result + + + +
https:// + +
+ +!! end + +!! test +Fuzz testing: Parser21 +!! input +{| +! irc://{{ftp://a" onmouseover="alert('hello world');" +| +!! result + + + + +
irc://{{ftp://a" onmouseover="alert('hello world');" + +
+ +!! end + +!! test +Fuzz testing: Parser22 +!! input +http://===r:::https://b + +{| +!!result +

http://===r:::https://b +

+ + +
+ +!! end + +# Known to produce bad XML for now +!! test +Fuzz testing: Parser24 +!! options +noxml +!! input +{| +{{{| +}}}} > +
+ +MOVE YOUR MOUSE CURSOR OVER THIS TEXT +| +!! result + + +} > +
+ +MOVE YOUR MOUSE CURSOR OVER THIS TEXT +
+ + +
+
+ +!! end + +# Known to produce bad XML for now +!!test +Fuzz testing: Parser25 (bug 6055) +!! options +noxml +!! input +{{{ +| +
  • +}}}blah" onmouseover="alert('hello world');" align="left"'''MOVE MOUSE CURSOR OVER HERE +!! result +
  • +blah" onmouseover="alert('hello world');" align="left"MOVE MOUSE CURSOR OVER HERE + +!! end + # # # -- 2.20.1