To detect whether the truncation had chopped up a multibyte
character after the first byte, a regex was used. But in this
regex, the dot (.) didn't match newlines, so it failed to
detect chopped multibyte characters (after the first byte)
if there was a newline preceding the chopped character.
Bug: T116693
Change-Id: I66e4fd451acac0a1019da7060d5a37d70963a15a
# We got the first byte only of a multibyte char; remove it.
$string = substr( $string, 0, -1 );
} elseif ( $char >= 0x80 &&
+ // Use the /s modifier (PCRE_DOTALL) so (.*) also matches newlines
preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
- '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m )
+ '[\xf0-\xf7][\x80-\xbf]{1,2})$/s', $string, $m )
) {
# We chopped in the middle of a character; remove it
$string = $m[1];
$this->getLang()->truncate( "1234567890", 5, 'XXX', false ),
'truncate without adjustment'
);
+ $this->assertEquals(
+ "泰乐菌...",
+ $this->getLang()->truncate( "泰乐菌素123456789", 11, '...', false ),
+ 'truncate does not chop Unicode characters in half'
+ );
+ $this->assertEquals(
+ "\n泰乐菌...",
+ $this->getLang()->truncate( "\n泰乐菌素123456789", 12, '...', false ),
+ 'truncate does not chop Unicode characters in half if there is a preceding newline'
+ );
}
/**