From 267b582e1620db8d13684001389c4f2e541f9d39 Mon Sep 17 00:00:00 2001 From: MatmaRex Date: Wed, 22 May 2013 10:48:14 +0200 Subject: [PATCH] displaytitle: reject some CSS if $wgRestrictDisplayTitle set $wgRestrictDisplayTitle is intended to make it possible to simply copy-and-paste the title text even if it requires some styling like subscript or superscript. Using a broke that expectation, as the text hidden in such way becomes completely invisible and unselectable. This patch rejects such styles. Also disallowed 'user-select' and 'visibility', since they both prevent the user from selecting and/or copying the text as well. Minor changes in Sanitizer: * checkCss() was made to pass through values which consist of nothing but a single comment, to allow this rejection to display some sort of a notification to the user. * encodeTagAttributes() was added as a counterpart to decodeTagAttributes(), pulling some code out of fixTagAttributes(). Bug: 26547 Change-Id: Ie162535b6bcbebce4ee69f6dcc1957ccccc3c672 --- includes/DefaultSettings.php | 5 +- includes/Sanitizer.php | 65 +++++++++++++++--------- includes/parser/CoreParserFunctions.php | 37 +++++++++++--- tests/parser/parserTests.txt | 34 +++++++++++++ tests/phpunit/includes/SanitizerTest.php | 10 ++-- 5 files changed, 114 insertions(+), 37 deletions(-) diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 9221784939..173605cceb 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -3497,8 +3497,9 @@ $wgNoFollowDomainExceptions = array(); $wgAllowDisplayTitle = true; /** - * For consistency, restrict DISPLAYTITLE to titles that normalize to the same - * canonical DB key. + * For consistency, restrict DISPLAYTITLE to text that normalizes to the same + * canonical DB key. Also disallow some inline CSS rules like display: none; + * which can cause the text to be hidden or unselectable. */ $wgRestrictDisplayTitle = true; diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index ed01235da7..b4a1c62402 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -813,9 +813,10 @@ class Sanitizer { /** * Pick apart some CSS and check it for forbidden or unsafe structures. * Returns a sanitized string. This sanitized string will have - * character references and escape sequences decoded, and comments - * stripped. If the input is just too evil, only a comment complaining - * about evilness will be returned. + * character references and escape sequences decoded and comments + * stripped (unless it is itself one valid comment, in which case the value + * will be passed through). If the input is just too evil, only a comment + * complaining about evilness will be returned. * * Currently URL references, 'expression', 'tps' are forbidden. * @@ -856,19 +857,24 @@ class Sanitizer { $value = preg_replace_callback( $decodeRegex, array( __CLASS__, 'cssDecodeCallback' ), $value ); - // Remove any comments; IE gets token splitting wrong - // This must be done AFTER decoding character references and - // escape sequences, because those steps can introduce comments - // This step cannot introduce character references or escape - // sequences, because it replaces comments with spaces rather - // than removing them completely. - $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); - - // Remove anything after a comment-start token, to guard against - // incorrect client implementations. - $commentPos = strpos( $value, '/*' ); - if ( $commentPos !== false ) { - $value = substr( $value, 0, $commentPos ); + // Let the value through if it's nothing but a single comment, to + // allow other functions which may reject it to pass some error + // message through. + if ( !preg_match( '! ^ \s* /\* [^*\\/]* \*/ \s* $ !x', $value ) ) { + // Remove any comments; IE gets token splitting wrong + // This must be done AFTER decoding character references and + // escape sequences, because those steps can introduce comments + // This step cannot introduce character references or escape + // sequences, because it replaces comments with spaces rather + // than removing them completely. + $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); + + // Remove anything after a comment-start token, to guard against + // incorrect client implementations. + $commentPos = strpos( $value, '/*' ); + if ( $commentPos !== false ) { + $value = substr( $value, 0, $commentPos ); + } } // Reject problematic keywords and control characters @@ -932,14 +938,7 @@ class Sanitizer { $decoded = Sanitizer::decodeTagAttributes( $text ); $stripped = Sanitizer::validateTagAttributes( $decoded, $element ); - $attribs = array(); - foreach ( $stripped as $attribute => $value ) { - $encAttribute = htmlspecialchars( $attribute ); - $encValue = Sanitizer::safeEncodeAttribute( $value ); - - $attribs[] = "$encAttribute=\"$encValue\""; - } - return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : ''; + return Sanitizer::safeEncodeTagAttributes( $stripped ); } /** @@ -1139,6 +1138,24 @@ class Sanitizer { return $attribs; } + /** + * Build a partial tag string from an associative array of attribute + * names and values as returned by decodeTagAttributes. + * + * @param $assoc_array Array + * @return String + */ + public static function safeEncodeTagAttributes( $assoc_array ) { + $attribs = array(); + foreach ( $assoc_array as $attribute => $value ) { + $encAttribute = htmlspecialchars( $attribute ); + $encValue = Sanitizer::safeEncodeAttribute( $value ); + + $attribs[] = "$encAttribute=\"$encValue\""; + } + return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : ''; + } + /** * Pick the appropriate attribute value from a match set from the * attribs regex matches. diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index be945f7aee..375ff2b9bf 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -363,22 +363,43 @@ class CoreParserFunctions { static function displaytitle( $parser, $text = '' ) { global $wgRestrictDisplayTitle; - #parse a limited subset of wiki markup (just the single quote items) + // parse a limited subset of wiki markup (just the single quote items) $text = $parser->doQuotes( $text ); - #remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever + // remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever $text = preg_replace( '/' . preg_quote( $parser->uniqPrefix(), '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '/', '', $text ); - #list of disallowed tags for DISPLAYTITLE - #these will be escaped even though they are allowed in normal wiki text + // list of disallowed tags for DISPLAYTITLE + // these will be escaped even though they are allowed in normal wiki text $bad = array( 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr', 'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rp', 'br' ); - #only requested titles that normalize to the actual title are allowed through - #if $wgRestrictDisplayTitle is true (it is by default) - #mimic the escaping process that occurs in OutputPage::setPageTitle - $text = Sanitizer::normalizeCharReferences( Sanitizer::removeHTMLtags( $text, null, array(), array(), $bad ) ); + // disallow some styles that could be used to bypass $wgRestrictDisplayTitle + if ( $wgRestrictDisplayTitle ) { + $htmlTagsCallback = function ( $params ) { + $decoded = Sanitizer::decodeTagAttributes( $params ); + + if ( isset( $decoded['style'] ) ) { + // this is called later anyway, but we need it right now for the regexes below to be safe + // calling it twice doesn't hurt + $decoded['style'] = Sanitizer::checkCss( $decoded['style'] ); + + if ( preg_match( '/(display|user-select|visibility)\s*:/i', $decoded['style'] ) ) { + $decoded['style'] = '/* attempt to bypass $wgRestrictDisplayTitle */'; + } + } + + $params = Sanitizer::safeEncodeTagAttributes( $decoded ); + }; + } else { + $htmlTagsCallback = null; + } + + // only requested titles that normalize to the actual title are allowed through + // if $wgRestrictDisplayTitle is true (it is by default) + // mimic the escaping process that occurs in OutputPage::setPageTitle + $text = Sanitizer::normalizeCharReferences( Sanitizer::removeHTMLtags( $text, $htmlTagsCallback, array(), array(), $bad ) ); $title = Title::newFromText( Sanitizer::stripAllTags( $text ) ); if ( !$wgRestrictDisplayTitle ) { diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index d5e221a92d..8995593141 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -13289,6 +13289,40 @@ Screen

!! end +!! test +Verify that displaytitle handles inline CSS styles (bug 26547) - rejected value +!! options +showtitle +title=[[Screen]] +!! config +wgAllowDisplayTitle=true +wgRestrictDisplayTitle=true +!! input +this is not the the title +{{DISPLAYTITLE:screen}} +!! result +screen +

this is not the the title +

+!! end + +!! test +Verify that displaytitle handles inline CSS styles (bug 26547) - accepted value +!! options +showtitle +title=[[Screen]] +!! config +wgAllowDisplayTitle=true +wgRestrictDisplayTitle=true +!! input +this is not the the title +{{DISPLAYTITLE:screen}} +!! result +screen +

this is not the the title +

+!! end + !! test preload: check and !! options diff --git a/tests/phpunit/includes/SanitizerTest.php b/tests/phpunit/includes/SanitizerTest.php index b74542309b..38c15eef07 100644 --- a/tests/phpunit/includes/SanitizerTest.php +++ b/tests/phpunit/includes/SanitizerTest.php @@ -227,10 +227,14 @@ class SanitizerTest extends MediaWikiTestCase { public static function provideCssCommentsFixtures() { /** array( , , [message] ) */ return array( - array( ' ', '/**/' ), + // Valid comments spanning entire input + array( '/**/', '/**/' ), + array( '/* comment */', '/* comment */' ), + // Weird stuff array( ' ', '/****/' ), - array( ' ', '/* comment */' ), - array( ' ', "\\2f\\2a foo \\2a\\2f", + array( ' ', '/* /* */' ), + array( 'display: block;', "display:/* foo */block;" ), + array( 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;", 'Backslash-escaped comments must be stripped (bug 28450)' ), array( '', '/* unfinished comment structure', 'Remove anything after a comment-start token' ), -- 2.20.1