From e4d6238c00466c3c374313def97b32b55070661d Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Mon, 26 Oct 2015 20:17:37 -0700 Subject: [PATCH] Language::truncate(): don't chop up multibyte characters when input contains newlines To detect whether the truncation had chopped up a multibyte character after the first byte, a regex was used. But in this regex, the dot (.) didn't match newlines, so it failed to detect chopped multibyte characters (after the first byte) if there was a newline preceding the chopped character. Bug: T116693 Change-Id: I66e4fd451acac0a1019da7060d5a37d70963a15a --- languages/Language.php | 3 ++- tests/phpunit/languages/LanguageTest.php | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/languages/Language.php b/languages/Language.php index 50ed513734..3ea2693598 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -3691,8 +3691,9 @@ class Language { # We got the first byte only of a multibyte char; remove it. $string = substr( $string, 0, -1 ); } elseif ( $char >= 0x80 && + // Use the /s modifier (PCRE_DOTALL) so (.*) also matches newlines preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . - '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) + '[\xf0-\xf7][\x80-\xbf]{1,2})$/s', $string, $m ) ) { # We chopped in the middle of a character; remove it $string = $m[1]; diff --git a/tests/phpunit/languages/LanguageTest.php b/tests/phpunit/languages/LanguageTest.php index 4fca00237a..77c3c02368 100644 --- a/tests/phpunit/languages/LanguageTest.php +++ b/tests/phpunit/languages/LanguageTest.php @@ -261,6 +261,16 @@ class LanguageTest extends LanguageClassesTestCase { $this->getLang()->truncate( "1234567890", 5, 'XXX', false ), 'truncate without adjustment' ); + $this->assertEquals( + "泰乐菌...", + $this->getLang()->truncate( "泰乐菌素123456789", 11, '...', false ), + 'truncate does not chop Unicode characters in half' + ); + $this->assertEquals( + "\n泰乐菌...", + $this->getLang()->truncate( "\n泰乐菌素123456789", 12, '...', false ), + 'truncate does not chop Unicode characters in half if there is a preceding newline' + ); } /** -- 2.20.1