From e5c07464786df7ceec4252fad9962dd669ed754e Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Fri, 25 Sep 2009 15:15:33 +0000 Subject: [PATCH] (r55382) Removed U+200B, zero-width space, from the list of characters to be converted to an underscore. This character is used in Khmer, Burmese and Thai (km, my, th) as an invisible word break indicator for the purposes of line wrapping. This is especially necessary for Khmer and Burmese since browsers do not have automatic word segmentation for those languages, like they do for Thai. It's not appropriate for these word break hints to be displayed as spaces. --- includes/Title.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/Title.php b/includes/Title.php index 3ceb7b1a1d..4ec6b8e680 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -2252,7 +2252,7 @@ class Title { # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x, # conveniently disabling them. # - $dbkey = preg_replace( '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200B}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u', '_', $dbkey ); + $dbkey = preg_replace( '/[ _\xA0\x{1680}\x{180E}\x{2000}-\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}]+/u', '_', $dbkey ); $dbkey = trim( $dbkey, '_' ); if ( '' == $dbkey ) { -- 2.20.1