From 2d2575852c01a180ccd75c8a4fa60d8db323eeb6 Mon Sep 17 00:00:00 2001 From: petarpetkovic Date: Wed, 6 Dec 2017 16:59:30 +0100 Subject: [PATCH] Truncate tag filter descriptions Introduce truncateInternal() method in Language class, based on existing truncate() method. New method abstracts string truncation, allowing users to specify callable functions for text length measurement and string truncation. New method, truncateInternal(), is used to provide two options for text truncation: * For DB usage: truncateForDatabase() method is truncating text by number of bytes. * For UI usage: truncateForVisual() method is truncating text by number of characters, using multibyte string PHP methods. Old truncate() method is deprecated and just returns the results of truncateForDatabase() method. Newly introduced truncateForVisual() method is used for truncation of long tag descriptions in RCFilters menu. Bug: T179626 Change-Id: Ib01a8c303304064dde3ce983b817d93a88a5affd --- includes/changetags/ChangeTags.php | 22 ++++ .../specialpage/ChangesListSpecialPage.php | 14 ++- languages/Language.php | 104 +++++++++++++++--- tests/phpunit/languages/LanguageTest.php | 60 +++++++--- 4 files changed, 170 insertions(+), 30 deletions(-) diff --git a/includes/changetags/ChangeTags.php b/includes/changetags/ChangeTags.php index 7e4dd006ac..b30b82df1d 100644 --- a/includes/changetags/ChangeTags.php +++ b/includes/changetags/ChangeTags.php @@ -181,6 +181,28 @@ class ChangeTags { return $msg; } + /** + * Get truncated message for the tag's long description. + * + * @param string $tag Tag name. + * @param int $length Maximum length of truncated message, including ellipsis. + * @param IContextSource $context + * + * @return string Truncated long tag description. + */ + public static function truncateTagDescription( $tag, $length, IContextSource $context ) { + $originalDesc = self::tagLongDescriptionMessage( $tag, $context ); + // If there is no tag description, return empty string + if ( !$originalDesc ) { + return ''; + } + + $taglessDesc = Sanitizer::stripAllTags( $originalDesc->parse() ); + $escapedDesc = Sanitizer::escapeHtmlAllowEntities( $taglessDesc ); + + return $context->getLanguage()->truncateForVisual( $escapedDesc, $length ); + } + /** * Add tags to a change given its rc_id, rev_id and/or log_id * diff --git a/includes/specialpage/ChangesListSpecialPage.php b/includes/specialpage/ChangesListSpecialPage.php index cf990c2839..5aa1c6bc31 100644 --- a/includes/specialpage/ChangesListSpecialPage.php +++ b/includes/specialpage/ChangesListSpecialPage.php @@ -33,6 +33,12 @@ use Wikimedia\Rdbms\IDatabase; * @ingroup SpecialPage */ abstract class ChangesListSpecialPage extends SpecialPage { + /** + * Maximum length of a tag description in UTF-8 characters. + * Longer descriptions will be truncated. + */ + const TAG_DESC_CHARACTER_LIMIT = 120; + /** * Preference name for saved queries. Subclasses that use saved queries should override this. * @var string @@ -794,15 +800,15 @@ abstract class ChangesListSpecialPage extends SpecialPage { isset( $explicitlyDefinedTags[ $tagName ] ) || isset( $softwareActivatedTags[ $tagName ] ) ) { - // Parse description - $desc = ChangeTags::tagLongDescriptionMessage( $tagName, $context ); - $result[] = [ 'name' => $tagName, 'label' => Sanitizer::stripAllTags( ChangeTags::tagDescription( $tagName, $context ) ), - 'description' => $desc ? Sanitizer::stripAllTags( $desc->parse() ) : '', + 'description' => + ChangeTags::truncateTagDescription( + $tagName, self::TAG_DESC_CHARACTER_LIMIT, $context + ), 'cssClass' => Sanitizer::escapeClass( 'mw-tag-' . $tagName ), 'hits' => $hits, ]; diff --git a/languages/Language.php b/languages/Language.php index 27c9faf307..252ce47ae0 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -3472,27 +3472,103 @@ class Language { } /** - * Truncate a string to a specified length in bytes, appending an optional - * string (e.g. for ellipses) + * This method is deprecated since 1.31 and kept as alias for truncateForDatabase, which + * has replaced it. This method provides truncation suitable for DB. * * The database offers limited byte lengths for some columns in the database; * multi-byte character sets mean we need to ensure that only whole characters - * are included, otherwise broken characters can be passed to the user + * are included, otherwise broken characters can be passed to the user. * - * If $length is negative, the string will be truncated from the beginning + * @deprecated since 1.31, use truncateForDatabase or truncateForVisual as appropriate. * * @param string $string String to truncate - * @param int $length Maximum length (including ellipses) + * @param int $length Maximum length (including ellipsis) * @param string $ellipsis String to append to the truncated text * @param bool $adjustLength Subtract length of ellipsis from $length. * $adjustLength was introduced in 1.18, before that behaved as if false. * @return string */ function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) { + return $this->truncateForDatabase( $string, $length, $ellipsis, $adjustLength ); + } + + /** + * Truncate a string to a specified length in bytes, appending an optional + * string (e.g. for ellipsis) + * + * If $length is negative, the string will be truncated from the beginning + * + * @since 1.31 + * + * @param string $string String to truncate + * @param int $length Maximum length in bytes + * @param string $ellipsis String to append to the end of truncated text + * @param bool $adjustLength Subtract length of ellipsis from $length + * + * @return string + */ + function truncateForDatabase( $string, $length, $ellipsis = '...', $adjustLength = true ) { + return $this->truncateInternal( + $string, $length, $ellipsis, $adjustLength, 'strlen', 'substr' + ); + } + + /** + * Truncate a string to a specified number of characters, appending an optional + * string (e.g. for ellipsis). + * + * This provides multibyte version of truncate() method of this class, suitable for truncation + * based on number of characters, instead of number of bytes. + * + * If $length is negative, the string will be truncated from the beginning. + * + * @since 1.31 + * + * @param string $string String to truncate + * @param int $length Maximum number of characters + * @param string $ellipsis String to append to the end of truncated text + * @param bool $adjustLength Subtract length of ellipsis from $length + * + * @return string + */ + function truncateForVisual( $string, $length, $ellipsis = '...', $adjustLength = true ) { + // Passing encoding to mb_strlen and mb_substr is optional. + // Encoding defaults to mb_internal_encoding(), which is set to UTF-8 in Setup.php, so + // explicit specification of encoding is skipped. + // Note: Both multibyte methods are callables invoked in truncateInternal. + return $this->truncateInternal( + $string, $length, $ellipsis, $adjustLength, 'mb_strlen', 'mb_substr' + ); + } + + /** + * Internal method used for truncation. This method abstracts text truncation into + * one common method, allowing users to provide length measurement function and + * function for finding substring. + * + * For usages, see truncateForDatabase and truncateForVisual. + * + * @param string $string String to truncate + * @param int $length Maximum length of final text + * @param string $ellipsis String to append to the end of truncated text + * @param bool $adjustLength Subtract length of ellipsis from $length + * @param callable $measureLength Callable function used for determining the length of text + * @param callable $getSubstring Callable function used for getting the substrings + * + * @return string + */ + private function truncateInternal( + $string, $length, $ellipsis = '...', $adjustLength = true, $measureLength, $getSubstring + ) { + if ( !is_callable( $measureLength ) || !is_callable( $getSubstring ) ) { + throw new InvalidArgumentException( 'Invalid callback provided' ); + } + # Check if there is no need to truncate - if ( strlen( $string ) <= abs( $length ) ) { + if ( $measureLength( $string ) <= abs( $length ) ) { return $string; // no need to truncate } + # Use the localized ellipsis character if ( $ellipsis == '...' ) { $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped(); @@ -3500,31 +3576,33 @@ class Language { if ( $length == 0 ) { return $ellipsis; // convention } + $stringOriginal = $string; # If ellipsis length is >= $length then we can't apply $adjustLength - if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) { + if ( $adjustLength && $measureLength( $ellipsis ) >= abs( $length ) ) { $string = $ellipsis; // this can be slightly unexpected # Otherwise, truncate and add ellipsis... } else { - $eLength = $adjustLength ? strlen( $ellipsis ) : 0; + $ellipsisLength = $adjustLength ? $measureLength( $ellipsis ) : 0; if ( $length > 0 ) { - $length -= $eLength; - $string = substr( $string, 0, $length ); // xyz... + $length -= $ellipsisLength; + $string = $getSubstring( $string, 0, $length ); // xyz... $string = $this->removeBadCharLast( $string ); $string = rtrim( $string ); $string = $string . $ellipsis; } else { - $length += $eLength; - $string = substr( $string, $length ); // ...xyz + $length += $ellipsisLength; + $string = $getSubstring( $string, $length ); // ...xyz $string = $this->removeBadCharFirst( $string ); $string = ltrim( $string ); $string = $ellipsis . $string; } } + # Do not truncate if the ellipsis makes the string longer/equal (T24181). # This check is *not* redundant if $adjustLength, due to the single case where # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string. - if ( strlen( $string ) < strlen( $stringOriginal ) ) { + if ( $measureLength( $string ) < $measureLength( $stringOriginal ) ) { return $string; } else { return $stringOriginal; diff --git a/tests/phpunit/languages/LanguageTest.php b/tests/phpunit/languages/LanguageTest.php index 5cb560299f..050ed83bed 100644 --- a/tests/phpunit/languages/LanguageTest.php +++ b/tests/phpunit/languages/LanguageTest.php @@ -209,70 +209,104 @@ class LanguageTest extends LanguageClassesTestCase { } /** - * @covers Language::truncate + * @covers Language::truncateForDatabase + * @covers Language::truncateInternal */ - public function testTruncate() { + public function testTruncateForDatabase() { $this->assertEquals( "XXX", - $this->getLang()->truncate( "1234567890", 0, 'XXX' ), + $this->getLang()->truncateForDatabase( "1234567890", 0, 'XXX' ), 'truncate prefix, len 0, small ellipsis' ); $this->assertEquals( "12345XXX", - $this->getLang()->truncate( "1234567890", 8, 'XXX' ), + $this->getLang()->truncateForDatabase( "1234567890", 8, 'XXX' ), 'truncate prefix, small ellipsis' ); $this->assertEquals( "123456789", - $this->getLang()->truncate( "123456789", 5, 'XXXXXXXXXXXXXXX' ), + $this->getLang()->truncateForDatabase( "123456789", 5, 'XXXXXXXXXXXXXXX' ), 'truncate prefix, large ellipsis' ); $this->assertEquals( "XXX67890", - $this->getLang()->truncate( "1234567890", -8, 'XXX' ), + $this->getLang()->truncateForDatabase( "1234567890", -8, 'XXX' ), 'truncate suffix, small ellipsis' ); $this->assertEquals( "123456789", - $this->getLang()->truncate( "123456789", -5, 'XXXXXXXXXXXXXXX' ), + $this->getLang()->truncateForDatabase( "123456789", -5, 'XXXXXXXXXXXXXXX' ), 'truncate suffix, large ellipsis' ); $this->assertEquals( "123XXX", - $this->getLang()->truncate( "123 ", 9, 'XXX' ), + $this->getLang()->truncateForDatabase( "123 ", 9, 'XXX' ), 'truncate prefix, with spaces' ); $this->assertEquals( "12345XXX", - $this->getLang()->truncate( "12345 8", 11, 'XXX' ), + $this->getLang()->truncateForDatabase( "12345 8", 11, 'XXX' ), 'truncate prefix, with spaces and non-space ending' ); $this->assertEquals( "XXX234", - $this->getLang()->truncate( "1 234", -8, 'XXX' ), + $this->getLang()->truncateForDatabase( "1 234", -8, 'XXX' ), 'truncate suffix, with spaces' ); $this->assertEquals( "12345XXX", - $this->getLang()->truncate( "1234567890", 5, 'XXX', false ), + $this->getLang()->truncateForDatabase( "1234567890", 5, 'XXX', false ), 'truncate without adjustment' ); $this->assertEquals( "泰乐菌...", - $this->getLang()->truncate( "泰乐菌素123456789", 11, '...', false ), + $this->getLang()->truncateForDatabase( "泰乐菌素123456789", 11, '...', false ), 'truncate does not chop Unicode characters in half' ); $this->assertEquals( "\n泰乐菌...", - $this->getLang()->truncate( "\n泰乐菌素123456789", 12, '...', false ), + $this->getLang()->truncateForDatabase( "\n泰乐菌素123456789", 12, '...', false ), 'truncate does not chop Unicode characters in half if there is a preceding newline' ); } + /** + * @dataProvider provideTruncateData + * @covers Language::truncateForVisual + * @covers Language::truncateInternal + */ + public function testTruncateForVisual( + $expected, $string, $length, $ellipsis = '...', $adjustLength = true + ) { + $this->assertEquals( + $expected, + $this->getLang()->truncateForVisual( $string, $length, $ellipsis, $adjustLength ) + ); + } + + /** + * @return array Format is ($expected, $string, $length, $ellipsis, $adjustLength) + */ + public static function provideTruncateData() { + return [ + [ "XXX", "тестирам да ли ради", 0, "XXX" ], + [ "testnXXX", "testni scenarij", 8, "XXX" ], + [ "حالة اختبار", "حالة اختبار", 5, "XXXXXXXXXXXXXXX" ], + [ "XXXедент", "прецедент", -8, "XXX" ], + [ "XXപിൾ", "ആപ്പിൾ", -5, "XX" ], + [ "神秘XXX", "神秘 ", 9, "XXX" ], + [ "ΔημιουργXXX", "Δημιουργία Σύμπαντος", 11, "XXX" ], + [ "XXXの家です", "地球は私たちの唯 の家です", -8, "XXX" ], + [ "زندگیXXX", "زندگی زیباست", 6, "XXX", false ], + [ "ცხოვრება...", "ცხოვრება არის საოცარი", 8, "...", false ], + [ "\nທ່ານ...", "\nທ່ານບໍ່ຮູ້ຫນັງສື", 5, "...", false ], + ]; + } + /** * @dataProvider provideHTMLTruncateData * @covers Language::truncateHTML -- 2.20.1