From 13b514edaec25ff24cf37d58487c0ff598ec4e36 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Tue, 20 Mar 2012 04:39:09 +0000 Subject: [PATCH] Fixed a few "strip tag exposed" bugs. * Introduced Parser::killMarkers() based on the concept from StringFunctions. Used it in cases where markerStripCallback() doesn't make sense semantically, namely grammar, padleft, padright and anchorencode. Used markerStripCallback() in other cases. * Changed headline unstrip order as suggested by P.Copp on bug 18295 * In CPF::lc() and CPF::uc(), removed the is_callable(). This was a temporary testing hack committed by me in r30109, which allowed me to do differential testing against a copy of the parser from before that revision. --- RELEASE-NOTES-1.19 | 4 ++ includes/parser/CoreParserFunctions.php | 40 +++++------ includes/parser/Parser.php | 18 ++++- includes/parser/StripState.php | 10 +++ tests/parser/parserTests.txt | 90 +++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 23 deletions(-) diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index a7e50063e1..396bdf2a18 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -27,6 +27,10 @@ production. * (bug 35303) Proxy and DNS blacklist blocking works again * (bug 35294) jquery.byteLimit shouldn't set element specific variables outside the "return this.each" loop. +* (bug 21054) Remove or skip strip markers from tag hooks like <nowiki> in + core parser functions which operate on strings, such as formatnum. +* (bug 18295) Don't expose strip markers when a tag appears inside a link + inside a heading. === Configuration changes in 1.19 === * Removed SkinTemplateSetupPageCss hook; use BeforePageDisplay instead. diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index 91dab0e942..0abfcef849 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -165,17 +165,21 @@ class CoreParserFunctions { // Encode as though it's a wiki page, '_' for ' '. case 'url_wiki': - return wfUrlencode( str_replace( ' ', '_', $s ) ); + $func = 'wfUrlencode'; + $s = str_replace( ' ', '_', $s ); + break; // Encode for an HTTP Path, '%20' for ' '. case 'url_path': - return rawurlencode( $s ); + $func = 'rawurlencode'; + break; // Encode for HTTP query, '+' for ' '. case 'url_query': default: - return urlencode( $s ); + $func = 'urlencode'; } + return $parser->markerSkipCallback( $s, $func ); } static function lcfirst( $parser, $s = '' ) { @@ -195,11 +199,7 @@ class CoreParserFunctions { */ static function lc( $parser, $s = '' ) { global $wgContLang; - if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) { - return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) ); - } else { - return $wgContLang->lc( $s ); - } + return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) ); } /** @@ -209,11 +209,7 @@ class CoreParserFunctions { */ static function uc( $parser, $s = '' ) { global $wgContLang; - if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) { - return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) ); - } else { - return $wgContLang->uc( $s ); - } + return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) ); } static function localurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getLocalURL', $s, $arg ); } @@ -253,12 +249,13 @@ class CoreParserFunctions { * @param null $raw * @return */ - static function formatNum( $parser, $num = '', $raw = null) { - if ( self::israw( $raw ) ) { - return $parser->getFunctionLang()->parseFormattedNumber( $num ); + static function formatnum( $parser, $num = '', $raw = null) { + if ( self::isRaw( $raw ) ) { + $func = array( $parser->getFunctionLang(), 'parseFormattedNumber' ); } else { - return $parser->getFunctionLang()->formatNum( $num ); + $func = array( $parser->getFunctionLang(), 'formatNum' ); } + return $parser->markerSkipCallback( $num, $func ); } /** @@ -268,6 +265,7 @@ class CoreParserFunctions { * @return */ static function grammar( $parser, $case = '', $word = '' ) { + $word = $parser->killMarkers( $word ); return $parser->getFunctionLang()->convertGrammar( $word, $case ); } @@ -637,7 +635,8 @@ class CoreParserFunctions { * Unicode-safe str_pad with the restriction that $length is forced to be <= 500 * @return string */ - static function pad( $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) { + static function pad( $parser, $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) { + $padding = $parser->killMarkers( $padding ); $lengthOfPadding = mb_strlen( $padding ); if ( $lengthOfPadding == 0 ) return $string; @@ -661,11 +660,11 @@ class CoreParserFunctions { } static function padleft( $parser, $string = '', $length = 0, $padding = '0' ) { - return self::pad( $string, $length, $padding, STR_PAD_LEFT ); + return self::pad( $parser, $string, $length, $padding, STR_PAD_LEFT ); } static function padright( $parser, $string = '', $length = 0, $padding = '0' ) { - return self::pad( $string, $length, $padding ); + return self::pad( $parser, $string, $length, $padding ); } /** @@ -674,6 +673,7 @@ class CoreParserFunctions { * @return string */ static function anchorencode( $parser, $text ) { + $text = $parser->killMarkers( $text ); return substr( $parser->guessSectionNameFromWikiText( $text ), 1); } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index cff7217782..998286c9f1 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4069,14 +4069,16 @@ class Parser { } # The safe header is a version of the header text safe to use for links - # Avoid insertion of weird stuff like by expanding the relevant sections - $safeHeadline = $this->mStripState->unstripBoth( $headline ); # Remove link placeholders by the link text. # # turns into # link text with suffix - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); + # Do this before unstrip since link text can contain strip markers + $safeHeadline = $this->replaceLinkHoldersText( $headline ); + + # Avoid insertion of weird stuff like by expanding the relevant sections + $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); # Strip out HTML (first regex removes any tag not allowed) # Allowed tags are and (bug 8393), (bug 26375) and (r105284) @@ -5646,6 +5648,16 @@ class Parser { return $out; } + /** + * Remove any strip markers found in the given text. + * + * @param $text Input string + * @return string + */ + function killMarkers( $text ) { + return $this->mStripState->killMarkers( $text ); + } + /** * Save the parser state required to convert the given half-parsed text to * HTML. "Half-parsed" in this context means the output of diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php index aac2257a16..7ad80fa16f 100644 --- a/includes/parser/StripState.php +++ b/includes/parser/StripState.php @@ -181,5 +181,15 @@ class StripState { $key = $m[1]; return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX; } + + /** + * Remove any strip markers found in the given text. + * + * @param $text Input string + * @return string + */ + function killMarkers( $text ) { + return preg_replace( $this->regex, '', $text ); + } } diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 65bd8258dc..d304b19c76 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -9096,6 +9096,96 @@ title=[[Main Page]] !! end +!! test +Strip marker in urlencode +!! input +{{urlencode:xy}} +{{urlencode:xy|wiki}} +{{urlencode:xy|path}} +!! result +

xy +xy +xy +

+!! end + +!! test +Strip marker in lc +!! input +{{lc:xy}} +!! result +

xy +

+!! end + +!! test +Strip marker in uc +!! input +{{uc:xy}} +!! result +

XY +

+!! end + +!! test +Strip marker in formatNum +!! input +{{formatnum:12}} +{{formatnum:12|R}} +!! result +

12 +12 +

+!! end + +!! test +Strip marker in grammar +!! options +language=fi +!! input +{{grammar:elative|foobar}} +!! result +

foobarista +

+!! end + +!! test +Strip marker in padleft +!! input +{{padleft:|2|xy}} +!! result +

xy +

+!! end + +!! test +Strip marker in padright +!! input +{{padright:|2|xy}} +!! result +

xy +

+!! end + +!! test +Strip marker in anchorencode +!! input +{{anchorencode:xy}} +!! result +

xy +

+!! end + +!! test +nowiki inside link inside heading (bug 18295) +!! input +==[[foo|xyz]]== +!! result +

[edit] xyz

+ +!! end + + TODO: more images more tables -- 2.20.1