From 2fec5a79e17daa3df6ba1e78dfa2fe4797103b5e Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 9 Jun 2006 21:21:00 +0000 Subject: [PATCH] * Whitespace now normalized more or less properly in HTML attributes --- RELEASE-NOTES | 3 ++ includes/Sanitizer.php | 83 +++++++++++++++++++++++++++---------- maintenance/parserTests.txt | 40 +++++++++++++++++- 3 files changed, 101 insertions(+), 25 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index cc6ac370e0..fa0058d9f5 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -467,6 +467,9 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * (bug 2069) Merge the LanguageUtf8 class into the Language class * Update to Yiddish localization (yi) * (bug 6254) Update to Indonesian translation (id) #20 +* (bug 6255) Fix transclusions starting with "#" or "*" in HTML attributes +* Whitespace now normalized more or less properly in HTML attributes + == Compatibility == diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 00b96908dc..6d731c2366 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -618,35 +618,66 @@ class Sanitizer { $attribs = array(); foreach( $stripped as $attribute => $value ) { $encAttribute = htmlspecialchars( $attribute ); - - $encValue = htmlspecialchars( $value ); - # Templates and links may be expanded in later parsing, - # creating invalid or dangerous output. Suppress this. - $encValue = strtr( $encValue, array( - '<' => '<', // This should never happen, - '>' => '>', // we've received invalid input - '"' => '"', // which should have been escaped. - '{' => '{', - '[' => '[', - "''" => '''', - 'ISBN' => 'ISBN', - 'RFC' => 'RFC', - 'PMID' => 'PMID', - '|' => '|', - '__' => '__', - ) ); - - # Stupid hack - $encValue = preg_replace_callback( - '/(' . wfUrlProtocols() . ')/', - array( 'Sanitizer', 'armorLinksCallback' ), - $encValue ); + $encValue = Sanitizer::safeEncodeAttribute( $value ); $attribs[] = "$encAttribute=\"$encValue\""; } return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : ''; } + /** + * Encode an attribute value for HTML output. + * @param $text + * @return HTML-encoded text fragment + */ + function encodeAttribute( $text ) { + $encValue = htmlspecialchars( $text ); + + // Whitespace is normalized during attribute decoding, + // so if we've been passed non-spaces we must encode them + // ahead of time or they won't be preserved. + $encValue = strtr( $encValue, array( + "\n" => ' ', + "\r" => ' ', + "\t" => ' ', + ) ); + + return $encValue; + } + + /** + * Encode an attribute value for HTML tags, with extra armoring + * against further wiki processing. + * @param $text + * @return HTML-encoded text fragment + */ + function safeEncodeAttribute( $text ) { + $encValue = Sanitizer::encodeAttribute( $text ); + + # Templates and links may be expanded in later parsing, + # creating invalid or dangerous output. Suppress this. + $encValue = strtr( $encValue, array( + '<' => '<', // This should never happen, + '>' => '>', // we've received invalid input + '"' => '"', // which should have been escaped. + '{' => '{', + '[' => '[', + "''" => '''', + 'ISBN' => 'ISBN', + 'RFC' => 'RFC', + 'PMID' => 'PMID', + '|' => '|', + '__' => '__', + ) ); + + # Stupid hack + $encValue = preg_replace_callback( + '/(' . wfUrlProtocols() . ')/', + array( 'Sanitizer', 'armorLinksCallback' ), + $encValue ); + return $encValue; + } + /** * Given a value escape it so that it can be used in an id attribute and * return it, this does not validate the value however (see first link) @@ -711,6 +742,12 @@ class Sanitizer { foreach( $pairs as $set ) { $attribute = strtolower( $set[1] ); $value = Sanitizer::getTagAttributeCallback( $set ); + + // Normalize whitespace + $value = preg_replace( '/[\t\r\n ]+/', ' ', $value ); + $value = trim( $value ); + + // Decode character references $attribs[$attribute] = Sanitizer::decodeCharReferences( $value ); } return $attribs; diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index 904156147c..489729ba21 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -3497,7 +3497,7 @@ Bug 2304: HTML attribute safety (unsafe breakout parameter 2; 2309) !! input {{div style|" >}} !! result -
Magic div
+
Magic div
!! end @@ -3668,6 +3668,42 @@ Table attribute safety !! end + +!! article +Template:Identity +!! text +{{{1}}} +!! endarticle + +!! test +Expansion of multi-line templates in attribute values (bug 6255) +!! input +
-
+!! result +
-
+ +!! end + + +!! test +Expansion of multi-line templates in attribute values (bug 6255 sanity check) +!! input +
-
+!! result +
-
+ +!! end + +!! test +Expansion of multi-line templates in attribute values (bug 6255 sanity check) +!! input +
-
+!! result +
-
+ +!! end + ### ### Parser hooks (see maintenance/parserTestsParserHook.php for the extension) ### @@ -4290,7 +4326,7 @@ MOVE YOUR MOUSE CURSOR OVER THIS TEXT } > -
+
MOVE YOUR MOUSE CURSOR OVER THIS TEXT
-- 2.20.1