From: This, that and the other Date: Sun, 1 Jan 2017 04:04:56 +0000 (+1100) Subject: Replace invalid UTF-8 sequences with U+FFFD in edit summaries X-Git-Tag: 1.31.0-rc.0~4398^2 X-Git-Url: https://git.cyclocoop.org/%27.%24link.%27?a=commitdiff_plain;h=7551bdca4e9b7d96e2dd1172595a6b0f2086081c;p=lhc%2Fweb%2Fwiklou.git Replace invalid UTF-8 sequences with U+FFFD in edit summaries Bug: T95353 Change-Id: Ib22b563e7bc4022754752632f0eebfad00e8eb01 --- diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 44e4e3eb91..6779189486 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -1262,8 +1262,9 @@ class Sanitizer { static function escapeHtmlAllowEntities( $html ) { $html = Sanitizer::decodeCharReferences( $html ); # It seems wise to escape ' as well as ", as a matter of course. Can't - # hurt. - $html = htmlspecialchars( $html, ENT_QUOTES ); + # hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters + # don't cause the entire string to disappear. + $html = htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE ); return $html; }