From 4cf29ca48fdc44c0672fc4c3215c70885f427672 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 20 Apr 2007 20:26:15 +0000 Subject: [PATCH] Change Sanitzer::stripAllTags() to return plaintext instead of encoded HTML literals; this fits in better with updated code and fixes a regression in image alt text encoding. The only other remaining use I see of it is in ChemFunctions, which seems to be assuming plaintext output already and applies extra encoding. --- includes/Sanitizer.php | 27 ++++++++++++++------------- maintenance/parserTests.txt | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index ebb0dcccae..572dafddec 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -847,11 +847,16 @@ class Sanitizer { */ private static function normalizeAttributeValue( $text ) { return str_replace( '"', '"', - preg_replace( - '/\r\n|[\x20\x0d\x0a\x09]/', - ' ', + self::normalizeWhitespace( Sanitizer::normalizeCharReferences( $text ) ) ); } + + private static function normalizeWhitespace( $text ) { + return preg_replace( + '/\r\n|[\x20\x0d\x0a\x09]/', + ' ', + $text ); + } /** * Ensure that any entities and character references are legal @@ -1170,8 +1175,10 @@ class Sanitizer { /** * Take a fragment of (potentially invalid) HTML and return - * a version with any tags removed, encoded suitably for literal - * inclusion in an attribute value. + * a version with any tags removed, encoded as plain text. + * + * Warning: this return value must be further escaped for literal + * inclusion in HTML output as of 1.10! * * @param string $text HTML fragment * @return string @@ -1181,14 +1188,8 @@ class Sanitizer { $text = StringUtils::delimiterReplace( '<', '>', '', $text ); # Normalize &entities and whitespace - $text = Sanitizer::normalizeAttributeValue( $text ); - - # Will be placed into "double-quoted" attributes, - # make sure remaining bits are safe. - $text = str_replace( - array('<', '>', '"'), - array('<', '>', '"'), - $text ); + $text = self::decodeCharReferences( $text ); + $text = self::normalizeWhitespace( $text ); return $text; } diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index 69fbe391ae..dd1a6c4c4e 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -3105,7 +3105,7 @@ BUG 499: Alt text should have Ӓ, not &1234; !! input [[Image:foobar.jpg|♀]] !! result -

♀ +

♀

!! end -- 2.20.1