From: Bartosz DziewoƄski Date: Mon, 30 Jul 2018 19:37:45 +0000 (+0200) Subject: Use multibyte-aware truncation to avoid invalid UTF-8 in cl_sortkey_prefix X-Git-Tag: 1.34.0-rc.0~4622^2 X-Git-Url: http://git.cyclocoop.org/?a=commitdiff_plain;h=62f7cdc33169d8abf590e8e2f70c8ddf43aeec14;p=lhc%2Fweb%2Fwiklou.git Use multibyte-aware truncation to avoid invalid UTF-8 in cl_sortkey_prefix The invalid UTF-8 could cause incorrect sorting of affected pages in category lists on wikis using UCA collations. On my local testing wiki, the generated cl_sortkey was just 0x30 regardless of the value of cl_sortkey_prefix. This doesn't fix existing bad data in the database. It will only be updated when the affected page is edited (or null-edited). The cl_timestamp field will also be updated when that happens, which apparently may affect Wikinews' DynamicPageList extension, according to comments on T27254. This is not easily avoidable. Bug: T200623 Change-Id: I4baa9ea3c7f831ff3c9c51e6b8e5d66e7da42a91 --- diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index 39e8bd989e..141888cda6 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -141,14 +141,9 @@ class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate { } foreach ( $this->mCategories as &$sortkey ) { - # If the sortkey is longer then 255 bytes, - # it truncated by DB, and then doesn't get - # matched when comparing existing vs current - # categories, causing T27254. - # Also. substr behaves weird when given "". - if ( $sortkey !== '' ) { - $sortkey = substr( $sortkey, 0, 255 ); - } + # If the sortkey is longer then 255 bytes, it is truncated by DB, and then doesn't match + # when comparing existing vs current categories, causing T27254. + $sortkey = mb_strcut( $sortkey, 0, 255 ); } $this->mRecursive = $recursive;