return $msg;
}
+ /**
+ * Get truncated message for the tag's long description.
+ *
+ * @param string $tag Tag name.
+ * @param int $length Maximum length of truncated message, including ellipsis.
+ * @param IContextSource $context
+ *
+ * @return string Truncated long tag description.
+ */
+ public static function truncateTagDescription( $tag, $length, IContextSource $context ) {
+ $originalDesc = self::tagLongDescriptionMessage( $tag, $context );
+ // If there is no tag description, return empty string
+ if ( !$originalDesc ) {
+ return '';
+ }
+
+ $taglessDesc = Sanitizer::stripAllTags( $originalDesc->parse() );
+ $escapedDesc = Sanitizer::escapeHtmlAllowEntities( $taglessDesc );
+
+ return $context->getLanguage()->truncateForVisual( $escapedDesc, $length );
+ }
+
/**
* Add tags to a change given its rc_id, rev_id and/or log_id
*
* @ingroup SpecialPage
*/
abstract class ChangesListSpecialPage extends SpecialPage {
+ /**
+ * Maximum length of a tag description in UTF-8 characters.
+ * Longer descriptions will be truncated.
+ */
+ const TAG_DESC_CHARACTER_LIMIT = 120;
+
/**
* Preference name for saved queries. Subclasses that use saved queries should override this.
* @var string
isset( $explicitlyDefinedTags[ $tagName ] ) ||
isset( $softwareActivatedTags[ $tagName ] )
) {
- // Parse description
- $desc = ChangeTags::tagLongDescriptionMessage( $tagName, $context );
-
$result[] = [
'name' => $tagName,
'label' => Sanitizer::stripAllTags(
ChangeTags::tagDescription( $tagName, $context )
),
- 'description' => $desc ? Sanitizer::stripAllTags( $desc->parse() ) : '',
+ 'description' =>
+ ChangeTags::truncateTagDescription(
+ $tagName, self::TAG_DESC_CHARACTER_LIMIT, $context
+ ),
'cssClass' => Sanitizer::escapeClass( 'mw-tag-' . $tagName ),
'hits' => $hits,
];
}
/**
- * Truncate a string to a specified length in bytes, appending an optional
- * string (e.g. for ellipses)
+ * This method is deprecated since 1.31 and kept as alias for truncateForDatabase, which
+ * has replaced it. This method provides truncation suitable for DB.
*
* The database offers limited byte lengths for some columns in the database;
* multi-byte character sets mean we need to ensure that only whole characters
- * are included, otherwise broken characters can be passed to the user
+ * are included, otherwise broken characters can be passed to the user.
*
- * If $length is negative, the string will be truncated from the beginning
+ * @deprecated since 1.31, use truncateForDatabase or truncateForVisual as appropriate.
*
* @param string $string String to truncate
- * @param int $length Maximum length (including ellipses)
+ * @param int $length Maximum length (including ellipsis)
* @param string $ellipsis String to append to the truncated text
* @param bool $adjustLength Subtract length of ellipsis from $length.
* $adjustLength was introduced in 1.18, before that behaved as if false.
* @return string
*/
function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
+ return $this->truncateForDatabase( $string, $length, $ellipsis, $adjustLength );
+ }
+
+ /**
+ * Truncate a string to a specified length in bytes, appending an optional
+ * string (e.g. for ellipsis)
+ *
+ * If $length is negative, the string will be truncated from the beginning
+ *
+ * @since 1.31
+ *
+ * @param string $string String to truncate
+ * @param int $length Maximum length in bytes
+ * @param string $ellipsis String to append to the end of truncated text
+ * @param bool $adjustLength Subtract length of ellipsis from $length
+ *
+ * @return string
+ */
+ function truncateForDatabase( $string, $length, $ellipsis = '...', $adjustLength = true ) {
+ return $this->truncateInternal(
+ $string, $length, $ellipsis, $adjustLength, 'strlen', 'substr'
+ );
+ }
+
+ /**
+ * Truncate a string to a specified number of characters, appending an optional
+ * string (e.g. for ellipsis).
+ *
+ * This provides multibyte version of truncate() method of this class, suitable for truncation
+ * based on number of characters, instead of number of bytes.
+ *
+ * If $length is negative, the string will be truncated from the beginning.
+ *
+ * @since 1.31
+ *
+ * @param string $string String to truncate
+ * @param int $length Maximum number of characters
+ * @param string $ellipsis String to append to the end of truncated text
+ * @param bool $adjustLength Subtract length of ellipsis from $length
+ *
+ * @return string
+ */
+ function truncateForVisual( $string, $length, $ellipsis = '...', $adjustLength = true ) {
+ // Passing encoding to mb_strlen and mb_substr is optional.
+ // Encoding defaults to mb_internal_encoding(), which is set to UTF-8 in Setup.php, so
+ // explicit specification of encoding is skipped.
+ // Note: Both multibyte methods are callables invoked in truncateInternal.
+ return $this->truncateInternal(
+ $string, $length, $ellipsis, $adjustLength, 'mb_strlen', 'mb_substr'
+ );
+ }
+
+ /**
+ * Internal method used for truncation. This method abstracts text truncation into
+ * one common method, allowing users to provide length measurement function and
+ * function for finding substring.
+ *
+ * For usages, see truncateForDatabase and truncateForVisual.
+ *
+ * @param string $string String to truncate
+ * @param int $length Maximum length of final text
+ * @param string $ellipsis String to append to the end of truncated text
+ * @param bool $adjustLength Subtract length of ellipsis from $length
+ * @param callable $measureLength Callable function used for determining the length of text
+ * @param callable $getSubstring Callable function used for getting the substrings
+ *
+ * @return string
+ */
+ private function truncateInternal(
+ $string, $length, $ellipsis = '...', $adjustLength = true, $measureLength, $getSubstring
+ ) {
+ if ( !is_callable( $measureLength ) || !is_callable( $getSubstring ) ) {
+ throw new InvalidArgumentException( 'Invalid callback provided' );
+ }
+
# Check if there is no need to truncate
- if ( strlen( $string ) <= abs( $length ) ) {
+ if ( $measureLength( $string ) <= abs( $length ) ) {
return $string; // no need to truncate
}
+
# Use the localized ellipsis character
if ( $ellipsis == '...' ) {
$ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
if ( $length == 0 ) {
return $ellipsis; // convention
}
+
$stringOriginal = $string;
# If ellipsis length is >= $length then we can't apply $adjustLength
- if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
+ if ( $adjustLength && $measureLength( $ellipsis ) >= abs( $length ) ) {
$string = $ellipsis; // this can be slightly unexpected
# Otherwise, truncate and add ellipsis...
} else {
- $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
+ $ellipsisLength = $adjustLength ? $measureLength( $ellipsis ) : 0;
if ( $length > 0 ) {
- $length -= $eLength;
- $string = substr( $string, 0, $length ); // xyz...
+ $length -= $ellipsisLength;
+ $string = $getSubstring( $string, 0, $length ); // xyz...
$string = $this->removeBadCharLast( $string );
$string = rtrim( $string );
$string = $string . $ellipsis;
} else {
- $length += $eLength;
- $string = substr( $string, $length ); // ...xyz
+ $length += $ellipsisLength;
+ $string = $getSubstring( $string, $length ); // ...xyz
$string = $this->removeBadCharFirst( $string );
$string = ltrim( $string );
$string = $ellipsis . $string;
}
}
+
# Do not truncate if the ellipsis makes the string longer/equal (T24181).
# This check is *not* redundant if $adjustLength, due to the single case where
# LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
- if ( strlen( $string ) < strlen( $stringOriginal ) ) {
+ if ( $measureLength( $string ) < $measureLength( $stringOriginal ) ) {
return $string;
} else {
return $stringOriginal;
}
/**
- * @covers Language::truncate
+ * @covers Language::truncateForDatabase
+ * @covers Language::truncateInternal
*/
- public function testTruncate() {
+ public function testTruncateForDatabase() {
$this->assertEquals(
"XXX",
- $this->getLang()->truncate( "1234567890", 0, 'XXX' ),
+ $this->getLang()->truncateForDatabase( "1234567890", 0, 'XXX' ),
'truncate prefix, len 0, small ellipsis'
);
$this->assertEquals(
"12345XXX",
- $this->getLang()->truncate( "1234567890", 8, 'XXX' ),
+ $this->getLang()->truncateForDatabase( "1234567890", 8, 'XXX' ),
'truncate prefix, small ellipsis'
);
$this->assertEquals(
"123456789",
- $this->getLang()->truncate( "123456789", 5, 'XXXXXXXXXXXXXXX' ),
+ $this->getLang()->truncateForDatabase( "123456789", 5, 'XXXXXXXXXXXXXXX' ),
'truncate prefix, large ellipsis'
);
$this->assertEquals(
"XXX67890",
- $this->getLang()->truncate( "1234567890", -8, 'XXX' ),
+ $this->getLang()->truncateForDatabase( "1234567890", -8, 'XXX' ),
'truncate suffix, small ellipsis'
);
$this->assertEquals(
"123456789",
- $this->getLang()->truncate( "123456789", -5, 'XXXXXXXXXXXXXXX' ),
+ $this->getLang()->truncateForDatabase( "123456789", -5, 'XXXXXXXXXXXXXXX' ),
'truncate suffix, large ellipsis'
);
$this->assertEquals(
"123XXX",
- $this->getLang()->truncate( "123 ", 9, 'XXX' ),
+ $this->getLang()->truncateForDatabase( "123 ", 9, 'XXX' ),
'truncate prefix, with spaces'
);
$this->assertEquals(
"12345XXX",
- $this->getLang()->truncate( "12345 8", 11, 'XXX' ),
+ $this->getLang()->truncateForDatabase( "12345 8", 11, 'XXX' ),
'truncate prefix, with spaces and non-space ending'
);
$this->assertEquals(
"XXX234",
- $this->getLang()->truncate( "1 234", -8, 'XXX' ),
+ $this->getLang()->truncateForDatabase( "1 234", -8, 'XXX' ),
'truncate suffix, with spaces'
);
$this->assertEquals(
"12345XXX",
- $this->getLang()->truncate( "1234567890", 5, 'XXX', false ),
+ $this->getLang()->truncateForDatabase( "1234567890", 5, 'XXX', false ),
'truncate without adjustment'
);
$this->assertEquals(
"泰乐菌...",
- $this->getLang()->truncate( "泰乐菌素123456789", 11, '...', false ),
+ $this->getLang()->truncateForDatabase( "泰乐菌素123456789", 11, '...', false ),
'truncate does not chop Unicode characters in half'
);
$this->assertEquals(
"\n泰乐菌...",
- $this->getLang()->truncate( "\n泰乐菌素123456789", 12, '...', false ),
+ $this->getLang()->truncateForDatabase( "\n泰乐菌素123456789", 12, '...', false ),
'truncate does not chop Unicode characters in half if there is a preceding newline'
);
}
+ /**
+ * @dataProvider provideTruncateData
+ * @covers Language::truncateForVisual
+ * @covers Language::truncateInternal
+ */
+ public function testTruncateForVisual(
+ $expected, $string, $length, $ellipsis = '...', $adjustLength = true
+ ) {
+ $this->assertEquals(
+ $expected,
+ $this->getLang()->truncateForVisual( $string, $length, $ellipsis, $adjustLength )
+ );
+ }
+
+ /**
+ * @return array Format is ($expected, $string, $length, $ellipsis, $adjustLength)
+ */
+ public static function provideTruncateData() {
+ return [
+ [ "XXX", "тестирам да ли ради", 0, "XXX" ],
+ [ "testnXXX", "testni scenarij", 8, "XXX" ],
+ [ "حالة اختبار", "حالة اختبار", 5, "XXXXXXXXXXXXXXX" ],
+ [ "XXXедент", "прецедент", -8, "XXX" ],
+ [ "XXപിൾ", "ആപ്പിൾ", -5, "XX" ],
+ [ "神秘XXX", "神秘 ", 9, "XXX" ],
+ [ "ΔημιουργXXX", "Δημιουργία Σύμπαντος", 11, "XXX" ],
+ [ "XXXの家です", "地球は私たちの唯 の家です", -8, "XXX" ],
+ [ "زندگیXXX", "زندگی زیباست", 6, "XXX", false ],
+ [ "ცხოვრება...", "ცხოვრება არის საოცარი", 8, "...", false ],
+ [ "\nທ່ານ...", "\nທ່ານບໍ່ຮູ້ຫນັງສື", 5, "...", false ],
+ ];
+ }
+
/**
* @dataProvider provideHTMLTruncateData
* @covers Language::truncateHTML