Replace invalid UTF-8 sequences with U+FFFD in edit summaries
authorThis, that and the other <at.light@live.com.au>
Sun, 1 Jan 2017 04:04:56 +0000 (15:04 +1100)
committerBartosz Dziewoński <matma.rex@gmail.com>
Wed, 4 Jan 2017 21:18:48 +0000 (21:18 +0000)
Bug: T95353
Change-Id: Ib22b563e7bc4022754752632f0eebfad00e8eb01

includes/Sanitizer.php

index 44e4e3e..6779189 100644 (file)
@@ -1262,8 +1262,9 @@ class Sanitizer {
        static function escapeHtmlAllowEntities( $html ) {
                $html = Sanitizer::decodeCharReferences( $html );
                # It seems wise to escape ' as well as ", as a matter of course.  Can't
-               # hurt.
-               $html = htmlspecialchars( $html, ENT_QUOTES );
+               # hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters
+               # don't cause the entire string to disappear.
+               $html = htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE );
                return $html;
        }