From: Brion Vibber
Date: Tue, 23 May 2006 07:19:01 +0000 (+0000)
Subject: * Reordered wiki table handling and __TOC__ extraction in the parser to better handle...
X-Git-Tag: 1.31.0-rc.0~57056
X-Git-Url: http://git.cyclocoop.org//%27%40script%40/%27?a=commitdiff_plain;h=54005e7a9d98e608e898d0fc5f8e670e15dbac2a;p=lhc%2Fweb%2Fwiklou.git
* Reordered wiki table handling and __TOC__ extraction in the parser to better handle some overlapping tag cases.
* Only the first __TOC__ is now turned into a TOC.
The table change doesn't disrupt either the parser tests or the en.wikipedia main page. Hopefully it won't break other real content...
---
diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index 9fa2822953..8ba8452e0b 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -315,7 +315,10 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
* (bug 6046) Update to Indonesian localisation (id) #15
* (bug 5523) $wgNoFollowNsExceptions to allow disabling rel="nofollow" in
specially-selected namespaces.
-* Fix for HTML/JS injection bug in variable handler (found by Nick Jenkins)
+* (bug 6055) Fix for HTML/JS injection bug in variable handler (found by Nick Jenkins)
+* Reordered wiki table handling and __TOC__ extraction in the parser to better
+ handle some overlapping tag cases.
+* Only the first __TOC__ is now turned into a TOC.
== Compatibility ==
diff --git a/includes/MagicWord.php b/includes/MagicWord.php
index c747b7f975..895c0e0261 100644
--- a/includes/MagicWord.php
+++ b/includes/MagicWord.php
@@ -329,8 +329,8 @@ class MagicWord {
/**
* Replaces the word with something else
*/
- function replace( $replacement, $subject ) {
- $res = preg_replace( $this->getRegex(), wfRegexReplacement( $replacement ), $subject );
+ function replace( $replacement, $subject, $limit=-1 ) {
+ $res = preg_replace( $this->getRegex(), wfRegexReplacement( $replacement ), $subject, $limit );
$this->mModified = !($res === $subject);
return $res;
}
diff --git a/includes/Parser.php b/includes/Parser.php
index cb6d692e07..b71b275f17 100644
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -157,6 +157,9 @@ class Parser
$this->mTemplates = array();
$this->mTemplatePath = array();
+ $this->mShowToc = true;
+ $this->mForceTocPosition = false;
+
wfRunHooks( 'ParserClearState', array( &$this ) );
}
@@ -873,12 +876,20 @@ class Parser
$text = strtr( $text, array( '' => '' , '' => '' ) );
$text = strtr( $text, array( '' => '', '' => '') );
$text = preg_replace( '/.*?<\/includeonly>/s', '', $text );
-
+
$text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) );
+
$text = $this->replaceVariables( $text, $args );
+ // Tables need to come after variable replacement for things to work
+ // properly; putting them before other transformations should keep
+ // exciting things like link expansions from showing up in surprising
+ // places.
+ $text = $this->doTableStuff( $text );
+
$text = preg_replace( '/(^|\n)-----*/', '\\1
', $text );
+ $text = $this->stripToc( $text );
$text = $this->doHeadings( $text );
if($this->mOptions->getUseDynamicDates()) {
$df =& DateFormatter::getInstance();
@@ -893,7 +904,6 @@ class Parser
$text = str_replace($this->mUniqPrefix."NOPARSE", "", $text);
$text = $this->doMagicLinks( $text );
- $text = $this->doTableStuff( $text );
$text = $this->formatHeadings( $text, $isMain );
wfProfileOut( $fname );
@@ -2411,7 +2421,7 @@ class Parser
wfProfileOut( $fname );
return $text;
}
-
+
/**
* Replace magic variables
* @private
@@ -3049,6 +3059,31 @@ class Parser
}
}
+ /**
+ * Detect __TOC__ magic word and set a placeholder
+ */
+ function stripToc( $text ) {
+ # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
+ # do not add TOC
+ $mw = MagicWord::get( MAG_NOTOC );
+ if( $mw->matchAndRemove( $text ) ) {
+ $this->mShowToc = false;
+ }
+
+ $mw = MagicWord::get( MAG_TOC );
+ if( $mw->match( $text ) ) {
+ $this->mShowToc = true;
+ $this->mForceTocPosition = true;
+
+ // Set a placeholder. At the end we'll fill it in with the TOC.
+ $text = $mw->replace( '', $text, 1 );
+
+ // Only keep the first one.
+ $text = $mw->replace( '', $text );
+ }
+ return $text;
+ }
+
/**
* This function accomplishes several tasks:
* 1) Auto-number headings if that option is enabled
@@ -3067,8 +3102,6 @@ class Parser
global $wgMaxTocLevel, $wgContLang;
$doNumberHeadings = $this->mOptions->getNumberHeadings();
- $doShowToc = true;
- $forceTocHere = false;
if( !$this->mTitle->userCanEdit() ) {
$showEditLink = 0;
} else {
@@ -3080,12 +3113,6 @@ class Parser
if( $esw->matchAndRemove( $text ) ) {
$showEditLink = 0;
}
- # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
- # do not add TOC
- $mw =& MagicWord::get( MAG_NOTOC );
- if( $mw->matchAndRemove( $text ) ) {
- $doShowToc = false;
- }
# Get all headlines for numbering them and adding funky stuff like [edit]
# links - this is for later, but we need the number of headlines right now
@@ -3093,7 +3120,7 @@ class Parser
# if there are fewer than 4 headlines in the article, do not show TOC
if( $numMatches < 4 ) {
- $doShowToc = false;
+ $this->mShowToc = false;
}
# Allow user to stipulate that a page should have a "new section"
@@ -3107,20 +3134,20 @@ class Parser
$mw =& MagicWord::get( MAG_TOC );
if($mw->match( $text ) ) {
- $doShowToc = true;
- $forceTocHere = true;
+ $this->mShowToc = true;
+ $this->mForceTocPosition = true;
} else {
# if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
# override above conditions and always show TOC above first header
$mw =& MagicWord::get( MAG_FORCETOC );
if ($mw->matchAndRemove( $text ) ) {
- $doShowToc = true;
+ $this->mShowToc = true;
}
}
# Never ever show TOC if no headers
if( $numMatches < 1 ) {
- $doShowToc = false;
+ $this->mShowToc = false;
}
# We need this to perform operations on the HTML
@@ -3162,7 +3189,7 @@ class Parser
}
$level = $matches[1][$headlineCount];
- if( $doNumberHeadings || $doShowToc ) {
+ if( $doNumberHeadings || $this->mShowToc ) {
if ( $level > $prevlevel ) {
# Increase TOC level
@@ -3258,7 +3285,7 @@ class Parser
if($refcount[$headlineCount] > 1 ) {
$anchor .= '_' . $refcount[$headlineCount];
}
- if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
+ if( $this->mShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
$toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel);
}
if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
@@ -3279,7 +3306,7 @@ class Parser
$sectionCount++;
}
- if( $doShowToc ) {
+ if( $this->mShowToc ) {
if( $toclevel<$wgMaxTocLevel ) {
$toc .= $sk->tocUnindent( $toclevel - 1 );
}
@@ -3301,8 +3328,8 @@ class Parser
# $full .= $sk->editSectionLink(0);
}
$full .= $block;
- if( $doShowToc && !$i && $isMain && !$forceTocHere) {
- # Top anchor now in skin
+ if( $this->mShowToc && !$i && $isMain && !$this->mForceTocPosition ) {
+ # Top anchor now in skin
$full = $full.$toc;
}
@@ -3311,9 +3338,8 @@ class Parser
}
$i++;
}
- if($forceTocHere) {
- $mw =& MagicWord::get( MAG_TOC );
- return $mw->replace( $toc, $full );
+ if( $this->mForceTocPosition ) {
+ return str_replace( '', $toc, $full );
} else {
return $full;
}
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index 93745c7e32..25f4d57409 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -603,6 +603,7 @@ class Sanitizer {
'RFC' => 'RFC',
'PMID' => 'PMID',
'|' => '|',
+ '__' => '__',
) );
# Stupid hack
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt
index ee6e133785..22a25054d2 100644
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -3970,6 +3970,147 @@ mailto:inline@mail.tld
!! end
+
+#
+# Security and HTML correctness
+# From Nick Jenkins' fuzz testing
+#
+
+!! test
+Fuzz testing: Parser13
+!! input
+{|
+| http://a|
+!! result
+
+
+!! end
+
+!! test
+Fuzz testing: Parser14
+!! input
+== onmouseover= ==
+http://__TOC__
+!! result
+ onmouseover=
+http://
+
+!! end
+
+!! test
+Fuzz testing: Parser14-table
+!! input
+==a==
+{| STYLE=__TOC__
+!! result
+a
+
+
+!! end
+
+# Known to produce bogus xml (extra )
+!! test
+Fuzz testing: Parser16
+!! options
+noxml
+!! input
+{|
+!https://||||||
+!! result
+
+
+!! end
+
+!! test
+Fuzz testing: Parser21
+!! input
+{|
+! irc://{{ftp://a" onmouseover="alert('hello world');"
+|
+!! result
+
+
+!! end
+
+!! test
+Fuzz testing: Parser22
+!! input
+http://===r:::https://b
+
+{|
+!!result
+http://===r:::https://b
+
+
+
+!! end
+
+# Known to produce bad XML for now
+!! test
+Fuzz testing: Parser24
+!! options
+noxml
+!! input
+{|
+{{{|
+}}}} >
+
+
+MOVE YOUR MOUSE CURSOR OVER THIS TEXT
+|
+!! result
+
+
+} >
+
+
+MOVE YOUR MOUSE CURSOR OVER THIS TEXT
+
+
+ |
+
+
+
+!! end
+
+# Known to produce bad XML for now
+!!test
+Fuzz testing: Parser25 (bug 6055)
+!! options
+noxml
+!! input
+{{{
+|
+
+}}}blah" onmouseover="alert('hello world');" align="left"'''MOVE MOUSE CURSOR OVER HERE
+!! result
+
+blah" onmouseover="alert('hello world');" align="left"MOVE MOUSE CURSOR OVER HERE
+
+!! end
+
#
#
#