From 13b514edaec25ff24cf37d58487c0ff598ec4e36 Mon Sep 17 00:00:00 2001
From: Tim Starling <tstarling@users.mediawiki.org>
Date: Tue, 20 Mar 2012 04:39:09 +0000
Subject: [PATCH] Fixed a few "strip tag exposed" bugs. * Introduced
 Parser::killMarkers() based on the concept from StringFunctions. Used it in
 cases where markerStripCallback() doesn't make sense semantically, namely
 grammar, padleft, padright and anchorencode. Used markerStripCallback() in
 other cases. * Changed headline unstrip order as suggested by P.Copp on bug
 18295 * In CPF::lc() and CPF::uc(), removed the is_callable(). This was a
 temporary testing hack committed by me in r30109, which allowed me to do
 differential testing against a copy of the parser from before that revision.

---
 RELEASE-NOTES-1.19                      |  4 ++
 includes/parser/CoreParserFunctions.php | 40 +++++------
 includes/parser/Parser.php              | 18 ++++-
 includes/parser/StripState.php          | 10 +++
 tests/parser/parserTests.txt            | 90 +++++++++++++++++++++++++
 5 files changed, 139 insertions(+), 23 deletions(-)

diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19
index a7e50063e1..396bdf2a18 100644
--- a/RELEASE-NOTES-1.19
+++ b/RELEASE-NOTES-1.19
@@ -27,6 +27,10 @@ production.
 * (bug 35303) Proxy and DNS blacklist blocking works again
 * (bug 35294) jquery.byteLimit shouldn't set element specific variables outside
   the "return this.each" loop.
+* (bug 21054) Remove or skip strip markers from tag hooks like &lt;nowiki&gt; in 
+  core parser functions which operate on strings, such as formatnum.
+* (bug 18295) Don't expose strip markers when a tag appears inside a link 
+  inside a heading.
 
 === Configuration changes in 1.19 ===
 * Removed SkinTemplateSetupPageCss hook; use BeforePageDisplay instead.
diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php
index 91dab0e942..0abfcef849 100644
--- a/includes/parser/CoreParserFunctions.php
+++ b/includes/parser/CoreParserFunctions.php
@@ -165,17 +165,21 @@ class CoreParserFunctions {
 
 			// Encode as though it's a wiki page, '_' for ' '.
 			case 'url_wiki':
-				return wfUrlencode( str_replace( ' ', '_', $s ) );
+				$func = 'wfUrlencode';
+				$s = str_replace( ' ', '_', $s );
+				break;
 
 			// Encode for an HTTP Path, '%20' for ' '.
 			case 'url_path':
-				return rawurlencode( $s );
+				$func = 'rawurlencode';
+				break;
 
 			// Encode for HTTP query, '+' for ' '.
 			case 'url_query':
 			default:
-				return urlencode( $s );
+				$func = 'urlencode';
 		}
+		return $parser->markerSkipCallback( $s, $func );
 	}
 
 	static function lcfirst( $parser, $s = '' ) {
@@ -195,11 +199,7 @@ class CoreParserFunctions {
 	 */
 	static function lc( $parser, $s = '' ) {
 		global $wgContLang;
-		if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
-			return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) );
-		} else {
-			return $wgContLang->lc( $s );
-		}
+		return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) );
 	}
 
 	/**
@@ -209,11 +209,7 @@ class CoreParserFunctions {
 	 */
 	static function uc( $parser, $s = '' ) {
 		global $wgContLang;
-		if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
-			return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) );
-		} else {
-			return $wgContLang->uc( $s );
-		}
+		return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) );
 	}
 
 	static function localurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getLocalURL', $s, $arg ); }
@@ -253,12 +249,13 @@ class CoreParserFunctions {
 	 * @param null $raw
 	 * @return
 	 */
-	static function formatNum( $parser, $num = '', $raw = null) {
-		if ( self::israw( $raw ) ) {
-			return $parser->getFunctionLang()->parseFormattedNumber( $num );
+	static function formatnum( $parser, $num = '', $raw = null) {
+		if ( self::isRaw( $raw ) ) {
+			$func = array( $parser->getFunctionLang(), 'parseFormattedNumber' );
 		} else {
-			return $parser->getFunctionLang()->formatNum( $num );
+			$func = array( $parser->getFunctionLang(), 'formatNum' );
 		}
+		return $parser->markerSkipCallback( $num, $func );
 	}
 
 	/**
@@ -268,6 +265,7 @@ class CoreParserFunctions {
 	 * @return
 	 */
 	static function grammar( $parser, $case = '', $word = '' ) {
+		$word = $parser->killMarkers( $word );
 		return $parser->getFunctionLang()->convertGrammar( $word, $case );
 	}
 
@@ -637,7 +635,8 @@ class CoreParserFunctions {
 	 * Unicode-safe str_pad with the restriction that $length is forced to be <= 500
 	 * @return string
 	 */
-	static function pad( $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) {
+	static function pad( $parser, $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) {
+		$padding = $parser->killMarkers( $padding );
 		$lengthOfPadding = mb_strlen( $padding );
 		if ( $lengthOfPadding == 0 ) return $string;
 
@@ -661,11 +660,11 @@ class CoreParserFunctions {
 	}
 
 	static function padleft( $parser, $string = '', $length = 0, $padding = '0' ) {
-		return self::pad( $string, $length, $padding, STR_PAD_LEFT );
+		return self::pad( $parser, $string, $length, $padding, STR_PAD_LEFT );
 	}
 
 	static function padright( $parser, $string = '', $length = 0, $padding = '0' ) {
-		return self::pad( $string, $length, $padding );
+		return self::pad( $parser, $string, $length, $padding );
 	}
 
 	/**
@@ -674,6 +673,7 @@ class CoreParserFunctions {
 	 * @return string
 	 */
 	static function anchorencode( $parser, $text ) {
+		$text = $parser->killMarkers( $text );
 		return substr( $parser->guessSectionNameFromWikiText( $text ), 1);
 	}
 
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index cff7217782..998286c9f1 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4069,14 +4069,16 @@ class Parser {
 			}
 
 			# The safe header is a version of the header text safe to use for links
-			# Avoid insertion of weird stuff like <math> by expanding the relevant sections
-			$safeHeadline = $this->mStripState->unstripBoth( $headline );
 
 			# Remove link placeholders by the link text.
 			#     <!--LINK number-->
 			# turns into
 			#     link text with suffix
-			$safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
+			# Do this before unstrip since link text can contain strip markers
+			$safeHeadline = $this->replaceLinkHoldersText( $headline );
+
+			# Avoid insertion of weird stuff like <math> by expanding the relevant sections
+			$safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
 
 			# Strip out HTML (first regex removes any tag not allowed)
 			# Allowed tags are <sup> and <sub> (bug 8393), <i> (bug 26375) and <b> (r105284)
@@ -5646,6 +5648,16 @@ class Parser {
 		return $out;
 	}
 
+	/**
+	 * Remove any strip markers found in the given text.
+	 *
+	 * @param $text Input string
+	 * @return string
+	 */
+	function killMarkers( $text ) {
+		return $this->mStripState->killMarkers( $text );
+	}
+
 	/**
 	 * Save the parser state required to convert the given half-parsed text to
 	 * HTML. "Half-parsed" in this context means the output of
diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php
index aac2257a16..7ad80fa16f 100644
--- a/includes/parser/StripState.php
+++ b/includes/parser/StripState.php
@@ -181,5 +181,15 @@ class StripState {
 		$key = $m[1];
 		return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
 	}
+
+	/**
+	 * Remove any strip markers found in the given text.
+	 *
+	 * @param $text Input string
+	 * @return string
+	 */
+	function killMarkers( $text ) {
+		return preg_replace( $this->regex, '', $text );
+	}
 }
 
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 65bd8258dc..d304b19c76 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -9096,6 +9096,96 @@ title=[[Main Page]]
 
 !! end
 
+!! test
+Strip marker in urlencode
+!! input
+{{urlencode:x<nowiki/>y}}
+{{urlencode:x<nowiki/>y|wiki}}
+{{urlencode:x<nowiki/>y|path}}
+!! result
+<p>xy
+xy
+xy
+</p>
+!! end
+
+!! test
+Strip marker in lc
+!! input
+{{lc:x<nowiki/>y}}
+!! result
+<p>xy
+</p>
+!! end
+
+!! test
+Strip marker in uc
+!! input
+{{uc:x<nowiki/>y}}
+!! result
+<p>XY
+</p>
+!! end
+
+!! test
+Strip marker in formatNum
+!! input
+{{formatnum:1<nowiki/>2}}
+{{formatnum:1<nowiki/>2|R}}
+!! result
+<p>12
+12
+</p>
+!! end
+
+!! test
+Strip marker in grammar
+!! options
+language=fi
+!! input
+{{grammar:elative|foo<nowiki/>bar}}
+!! result
+<p>foobarista
+</p>
+!! end
+
+!! test
+Strip marker in padleft
+!! input
+{{padleft:|2|x<nowiki/>y}}
+!! result
+<p>xy
+</p>
+!! end
+
+!! test
+Strip marker in padright
+!! input
+{{padright:|2|x<nowiki/>y}}
+!! result
+<p>xy
+</p>
+!! end
+
+!! test
+Strip marker in anchorencode
+!! input
+{{anchorencode:x<nowiki/>y}}
+!! result
+<p>xy
+</p>
+!! end
+
+!! test
+nowiki inside link inside heading (bug 18295)
+!! input
+==[[foo|x<nowiki>y</nowiki>z]]==
+!! result
+<h2><span class="editsection">[<a href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: xyz">edit</a>]</span> <span class="mw-headline" id="xyz"><a href="/index.php?title=Foo&amp;action=edit&amp;redlink=1" class="new" title="Foo (page does not exist)">xyz</a></span></h2>
+
+!! end
+
+
 TODO:
 more images
 more tables
-- 
2.20.1