From 62f7cdc33169d8abf590e8e2f70c8ddf43aeec14 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bartosz=20Dziewo=C5=84ski?= Date: Mon, 30 Jul 2018 21:37:45 +0200 Subject: [PATCH] Use multibyte-aware truncation to avoid invalid UTF-8 in cl_sortkey_prefix The invalid UTF-8 could cause incorrect sorting of affected pages in category lists on wikis using UCA collations. On my local testing wiki, the generated cl_sortkey was just 0x30 regardless of the value of cl_sortkey_prefix. This doesn't fix existing bad data in the database. It will only be updated when the affected page is edited (or null-edited). The cl_timestamp field will also be updated when that happens, which apparently may affect Wikinews' DynamicPageList extension, according to comments on T27254. This is not easily avoidable. Bug: T200623 Change-Id: I4baa9ea3c7f831ff3c9c51e6b8e5d66e7da42a91 --- includes/deferred/LinksUpdate.php | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index 39e8bd989e..141888cda6 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -141,14 +141,9 @@ class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate { } foreach ( $this->mCategories as &$sortkey ) { - # If the sortkey is longer then 255 bytes, - # it truncated by DB, and then doesn't get - # matched when comparing existing vs current - # categories, causing T27254. - # Also. substr behaves weird when given "". - if ( $sortkey !== '' ) { - $sortkey = substr( $sortkey, 0, 255 ); - } + # If the sortkey is longer then 255 bytes, it is truncated by DB, and then doesn't match + # when comparing existing vs current categories, causing T27254. + $sortkey = mb_strcut( $sortkey, 0, 255 ); } $this->mRecursive = $recursive; -- 2.20.1