From 7551bdca4e9b7d96e2dd1172595a6b0f2086081c Mon Sep 17 00:00:00 2001 From: "This, that and the other" Date: Sun, 1 Jan 2017 15:04:56 +1100 Subject: [PATCH] Replace invalid UTF-8 sequences with U+FFFD in edit summaries Bug: T95353 Change-Id: Ib22b563e7bc4022754752632f0eebfad00e8eb01 --- includes/Sanitizer.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 44e4e3eb91..6779189486 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -1262,8 +1262,9 @@ class Sanitizer { static function escapeHtmlAllowEntities( $html ) { $html = Sanitizer::decodeCharReferences( $html ); # It seems wise to escape ' as well as ", as a matter of course. Can't - # hurt. - $html = htmlspecialchars( $html, ENT_QUOTES ); + # hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters + # don't cause the entire string to disappear. + $html = htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE ); return $html; } -- 2.20.1