From 0313128b1038de8f2ee52a181eafdee8c5e430f7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bartosz=20Dziewo=C5=84ski?= Date: Sat, 7 Oct 2017 02:26:23 +0200 Subject: [PATCH] Use PHP 7 "\u{NNNN}" Unicode codepoint escapes in string literals In cases where we're operating on text data (and not binary data), use e.g. "\u{00A0}" to refer directly to the Unicode character 'NO-BREAK SPACE' instead of "\xc2\xa0" to specify the bytes C2h A0h (which correspond to the UTF-8 encoding of that character). This makes it easier to look up those mysterious sequences, as not all are as recognizable as the no-break space. This is not enforced by PHP, but I think we should write those in uppercase and zero-padded to at least four characters, like the Unicode standard does. Note that not all "\xNN" escapes can be automatically replaced: * We can't use Unicode escapes for binary data that is not UTF-8 (e.g. in code converting from legacy encodings or testing the handling of invalid UTF-8 byte sequences). * '\xNN' escapes in regular expressions in single-quoted strings are actually handled by PCRE and have to be dealt with carefully (those regexps should probably be changed to use the /u modifier). * "\xNN" referring to ASCII characters ("\x7F" and lower) should probably be left as-is. The replacements in this commit were done semi-manually by piping the existing "\xNN" escapes through the following terrible Ruby script I devised: chars = eval('"' + ARGV[0] + '"').force_encoding('utf-8') puts chars.split('').map{|char| '\\u{' + char.ord.to_s(16).upcase.rjust(4, '0') + '}' }.join('') Change-Id: Idc3dee3a7fb5ebfaef395754d8859b18f1f8769a --- includes/cache/MessageCache.php | 6 +- includes/collation/IcuCollation.php | 36 +++++------ includes/installer/Installer.php | 2 +- includes/json/FormatJson.php | 4 +- includes/libs/CSSMin.php | 2 +- includes/specials/formfields/Licenses.php | 2 +- includes/tidy/RemexCompatFormatter.php | 4 +- languages/Language.php | 38 ++++++------ languages/data/Names.php | 60 +++++++++---------- languages/messages/MessagesAf.php | 2 +- languages/messages/MessagesBe.php | 2 +- languages/messages/MessagesBe_tarask.php | 2 +- languages/messages/MessagesBg.php | 2 +- languages/messages/MessagesBr.php | 2 +- languages/messages/MessagesCs.php | 2 +- languages/messages/MessagesEo.php | 2 +- languages/messages/MessagesEs.php | 2 +- languages/messages/MessagesEt.php | 2 +- languages/messages/MessagesFi.php | 2 +- languages/messages/MessagesFr.php | 2 +- languages/messages/MessagesFrp.php | 2 +- languages/messages/MessagesFur.php | 2 +- languages/messages/MessagesHu.php | 2 +- languages/messages/MessagesHy.php | 2 +- languages/messages/MessagesIa.php | 2 +- languages/messages/MessagesIt.php | 2 +- languages/messages/MessagesKaa.php | 2 +- languages/messages/MessagesKk_cyrl.php | 2 +- languages/messages/MessagesKk_latn.php | 2 +- languages/messages/MessagesKsh.php | 2 +- languages/messages/MessagesLa.php | 2 +- languages/messages/MessagesLbe.php | 2 +- languages/messages/MessagesLn.php | 2 +- languages/messages/MessagesLt.php | 2 +- languages/messages/MessagesLv.php | 2 +- languages/messages/MessagesMr.php | 2 +- languages/messages/MessagesNb.php | 2 +- languages/messages/MessagesNn.php | 2 +- languages/messages/MessagesOc.php | 2 +- languages/messages/MessagesPl.php | 2 +- languages/messages/MessagesPt.php | 2 +- languages/messages/MessagesPt_br.php | 2 +- languages/messages/MessagesRu.php | 2 +- languages/messages/MessagesSe.php | 2 +- languages/messages/MessagesSk.php | 2 +- languages/messages/MessagesSv.php | 2 +- languages/messages/MessagesTa.php | 2 +- languages/messages/MessagesTe.php | 2 +- languages/messages/MessagesUdm.php | 2 +- languages/messages/MessagesUk.php | 2 +- languages/messages/MessagesUz.php | 2 +- languages/messages/MessagesWa.php | 2 +- maintenance/generateSitemap.php | 2 +- maintenance/language/languages.inc | 24 ++++---- .../CustomUppercaseCollationTest.php | 10 ++-- tests/phpunit/includes/libs/CSSMinTest.php | 6 +- tests/phpunit/languages/LanguageTest.php | 6 +- 57 files changed, 144 insertions(+), 144 deletions(-) diff --git a/includes/cache/MessageCache.php b/includes/cache/MessageCache.php index 23a5da5f80..b6213c1d32 100644 --- a/includes/cache/MessageCache.php +++ b/includes/cache/MessageCache.php @@ -836,9 +836,9 @@ class MessageCache { ], [ ' ', - "\xc2\xa0", - "\xc2\xa0", - "\xc2\xad" + "\u{00A0}", + "\u{00A0}", + "\u{00AD}" ], $message ); diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php index 3fb7d8b579..d92c215e11 100644 --- a/includes/collation/IcuCollation.php +++ b/includes/collation/IcuCollation.php @@ -94,7 +94,7 @@ class IcuCollation extends Collation { 'af' => [], 'am' => [], 'ar' => [], - 'as' => [ "\xe0\xa6\x82", "\xe0\xa6\x81", "\xe0\xa6\x83", "\xe0\xa7\x8e", "ক্ষ " ], + 'as' => [ "\u{0982}", "\u{0981}", "\u{0983}", "\u{09CE}", "ক্ষ " ], 'ast' => [ "Ch", "Ll", "Ñ" ], // not in libicu 'az' => [ "Ç", "Ə", "Ğ", "Ä°", "Ö", "Ş", "Ü" ], 'be' => [ "Ё" ], @@ -144,11 +144,11 @@ class IcuCollation extends Collation { 'ga' => [], 'gd' => [], // not in libicu 'gl' => [ "Ch", "Ll", "Ñ" ], - 'gu' => [ "\xe0\xaa\x82", "\xe0\xaa\x83", "\xe0\xaa\x81", "\xe0\xaa\xb3" ], + 'gu' => [ "\u{0A82}", "\u{0A83}", "\u{0A81}", "\u{0AB3}" ], 'ha' => [ 'Ɓ', 'Ɗ', 'Ƙ', 'Sh', 'Ts', 'Ƴ' ], 'haw' => [ 'Ê»' ], 'he' => [], - 'hi' => [ "\xe0\xa4\x82", "\xe0\xa4\x83" ], + 'hi' => [ "\u{0902}", "\u{0903}" ], 'hr' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Å ", "Ž" ], 'hsb' => [ "Č", "Dź", "Ě", "Ch", "Ł", "Ń", "Ř", "Å ", "Ć", "Ž" ], 'hu' => [ "Cs", "Dz", "Dzs", "Gy", "Ly", "Ny", "Ö", "Sz", "Ty", "Ü", "Zs" ], @@ -161,13 +161,13 @@ class IcuCollation extends Collation { 'kk' => [ "Ò®", "І" ], 'kl' => [ "Æ", "Ø", "Å" ], 'km' => [ - "រ", "ឫ", "ឬ", "ល", "ឭ", "ឮ", "\xe1\x9e\xbb\xe1\x9f\x86", - "\xe1\x9f\x86", "\xe1\x9e\xb6\xe1\x9f\x86", "\xe1\x9f\x87", - "\xe1\x9e\xb7\xe1\x9f\x87", "\xe1\x9e\xbb\xe1\x9f\x87", - "\xe1\x9f\x81\xe1\x9f\x87", "\xe1\x9f\x84\xe1\x9f\x87", + "រ", "ឫ", "ឬ", "ល", "ឭ", "ឮ", "\u{17BB}\u{17C6}", + "\u{17C6}", "\u{17B6}\u{17C6}", "\u{17C7}", + "\u{17B7}\u{17C7}", "\u{17BB}\u{17C7}", + "\u{17C1}\u{17C7}", "\u{17C4}\u{17C7}", ], - 'kn' => [ "\xe0\xb2\x81", "\xe0\xb2\x83", "\xe0\xb3\xb1", "\xe0\xb3\xb2" ], - 'kok' => [ "\xe0\xa4\x82", "\xe0\xa4\x83", "ळ", "क्ष" ], + 'kn' => [ "\u{0C81}", "\u{0C83}", "\u{0CF1}", "\u{0CF2}" ], + 'kok' => [ "\u{0902}", "\u{0903}", "ळ", "क्ष" ], 'ku' => [ "Ç", "Ê", "Î", "Ş", "Û" ], // not in libicu 'ky' => [ "Ё" ], 'la' => [], // not in libicu @@ -181,7 +181,7 @@ class IcuCollation extends Collation { 'ml' => [], 'mn' => [], 'mo' => [ "Ă", "Â", "Î", "Ș", "Ț" ], // not in libicu - 'mr' => [ "\xe0\xa4\x82", "\xe0\xa4\x83", "ळ", "क्ष", "ज्ञ" ], + 'mr' => [ "\u{0902}", "\u{0903}", "ळ", "क्ष", "ज्ञ" ], 'ms' => [], 'mt' => [ "Ċ", "Ä ", "Għ", "Ħ", "Å»" ], 'nb' => [ "Æ", "Ø", "Å" ], @@ -191,8 +191,8 @@ class IcuCollation extends Collation { 'no' => [ "Æ", "Ø", "Å" ], // not in libicu. You should probably use nb or nn instead. 'oc' => [], // not in libicu 'om' => [ 'Ch', 'Dh', 'Kh', 'Ny', 'Ph', 'Sh' ], - 'or' => [ "\xe0\xac\x81", "\xe0\xac\x82", "\xe0\xac\x83", "କ୍ଷ" ], - 'pa' => [ "\xe0\xa9\x8d" ], + 'or' => [ "\u{0B01}", "\u{0B02}", "\u{0B03}", "କ୍ଷ" ], + 'pa' => [ "\u{0A4D}" ], 'pl' => [ "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Å»" ], 'pt' => [], 'rm' => [], // not in libicu @@ -204,7 +204,7 @@ class IcuCollation extends Collation { 'Á', 'Č', 'Æ·', 'Ç®', 'Đ', 'Ǧ', 'Ǥ', 'Ǩ', 'Ŋ', 'Å ', 'Ŧ', 'Ž', 'Ø', 'Æ', 'Ȧ', 'Ä', 'Ö' ], - 'si' => [ "\xe0\xb6\x82", "\xe0\xb6\x83", "\xe0\xb6\xa4" ], + 'si' => [ "\u{0D82}", "\u{0D83}", "\u{0DA4}" ], 'sk' => [ "Ä", "Č", "Ch", "Ô", "Å ", "Ž" ], 'sl' => [ "Č", "Å ", "Ž" ], 'smn' => [ "Á", "Č", "Đ", "Ŋ", "Å ", "Ŧ", "Ž", "Æ", "Ø", "Å", "Ä", "Ö" ], @@ -215,12 +215,12 @@ class IcuCollation extends Collation { 'sv@collation=standard' => [ "Å", "Ä", "Ö" ], 'sw' => [], 'ta' => [ - "\xE0\xAE\x82", "ஃ", "க்ஷ", "க்", "ங்", "ச்", "ஞ்", "ட்", "ண்", "த்", "ந்", + "\u{0B82}", "ஃ", "க்ஷ", "க்", "ங்", "ச்", "ஞ்", "ட்", "ண்", "த்", "ந்", "ப்", "ம்", "ய்", "ர்", "ல்", "வ்", "ழ்", "ள்", "ற்", "ன்", "ஜ்", "ஶ்", "ஷ்", "ஸ்", "ஹ்", "க்ஷ்" ], - 'te' => [ "\xe0\xb0\x81", "\xe0\xb0\x82", "\xe0\xb0\x83" ], - 'th' => [ "ฯ", "\xe0\xb9\x86", "\xe0\xb9\x8d", "\xe0\xb8\xba" ], + 'te' => [ "\u{0C01}", "\u{0C02}", "\u{0C03}" ], + 'th' => [ "ฯ", "\u{0E46}", "\u{0E4D}", "\u{0E3A}" ], 'tk' => [ "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ], 'tl' => [ "Ñ", "Ng" ], // not in libicu 'to' => [ "Ng", "Ê»" ], @@ -231,8 +231,8 @@ class IcuCollation extends Collation { 'vi' => [ "Ă", "Â", "Đ", "Ê", "Ô", "Æ ", "Ư" ], 'vo' => [ "Ä", "Ö", "Ü" ], 'yi' => [ - "\xd7\x91\xd6\xbf", "\xd7\x9b\xd6\xbc", "\xd7\xa4\xd6\xbc", - "\xd7\xa9\xd7\x82", "\xd7\xaa\xd6\xbc" + "\u{05D1}\u{05BF}", "\u{05DB}\u{05BC}", "\u{05E4}\u{05BC}", + "\u{05E9}\u{05C2}", "\u{05EA}\u{05BC}" ], 'yo' => [ "Ẹ", "Gb", "Ọ", "á¹¢" ], 'zu' => [], diff --git a/includes/installer/Installer.php b/includes/installer/Installer.php index 284d5dd2c1..00bd4d0605 100644 --- a/includes/installer/Installer.php +++ b/includes/installer/Installer.php @@ -811,7 +811,7 @@ abstract class Installer { // with utf8 support, but not unicode property support. // check that \p{Zs} (space separators) matches // U+3000 (Ideographic space) - $regexprop = preg_replace( '/\p{Zs}/u', '', "-\xE3\x80\x80-" ); + $regexprop = preg_replace( '/\p{Zs}/u', '', "-\u{3000}-" ); Wikimedia\restoreWarnings(); if ( $regexd != '--' || $regexprop != '--' ) { $this->showError( 'config-pcre-no-utf8' ); diff --git a/includes/json/FormatJson.php b/includes/json/FormatJson.php index acbbf2604f..b99b0b826c 100644 --- a/includes/json/FormatJson.php +++ b/includes/json/FormatJson.php @@ -84,8 +84,8 @@ class FormatJson { * and U+000D (CR). However, PHP already escapes LF and CR according to RFC 4627. */ private static $badChars = [ - "\xe2\x80\xa8", // U+2028 LINE SEPARATOR - "\xe2\x80\xa9", // U+2029 PARAGRAPH SEPARATOR + "\u{2028}", // U+2028 LINE SEPARATOR + "\u{2029}", // U+2029 PARAGRAPH SEPARATOR ]; /** diff --git a/includes/libs/CSSMin.php b/includes/libs/CSSMin.php index da75ed4a75..74e8b54e2a 100644 --- a/includes/libs/CSSMin.php +++ b/includes/libs/CSSMin.php @@ -179,7 +179,7 @@ class CSSMin { * @return string */ public static function serializeStringValue( $value ) { - $value = strtr( $value, [ "\0" => "\xEF\xBF\xBD", '\\' => '\\\\', '"' => '\\"' ] ); + $value = strtr( $value, [ "\0" => "\u{FFFD}", '\\' => '\\\\', '"' => '\\"' ] ); $value = preg_replace_callback( '/[\x01-\x1f\x7f]/', function ( $match ) { return '\\' . base_convert( ord( $match[0] ), 10, 16 ) . ' '; }, $value ); diff --git a/includes/specials/formfields/Licenses.php b/includes/specials/formfields/Licenses.php index a2f3128462..0b6c1659fb 100644 --- a/includes/specials/formfields/Licenses.php +++ b/includes/specials/formfields/Licenses.php @@ -186,7 +186,7 @@ class Licenses extends HTMLFormField { $attribs['selected'] = 'selected'; } - $val = str_repeat( /*   */ "\xc2\xa0", $depth * 2 ) . $text; + $val = str_repeat( /*   */ "\u{00A0}", $depth * 2 ) . $text; return str_repeat( "\t", $depth ) . Xml::element( 'option', $attribs, $val ) . "\n"; } diff --git a/includes/tidy/RemexCompatFormatter.php b/includes/tidy/RemexCompatFormatter.php index c8a715b183..4e933827c2 100644 --- a/includes/tidy/RemexCompatFormatter.php +++ b/includes/tidy/RemexCompatFormatter.php @@ -18,9 +18,9 @@ class RemexCompatFormatter extends HtmlFormatter { public function __construct( $options = [] ) { parent::__construct( $options ); - $this->attributeEscapes["\xc2\xa0"] = ' '; + $this->attributeEscapes["\u{00A0}"] = ' '; unset( $this->attributeEscapes["&"] ); - $this->textEscapes["\xc2\xa0"] = ' '; + $this->textEscapes["\u{00A0}"] = ' '; unset( $this->textEscapes["&"] ); } diff --git a/languages/Language.php b/languages/Language.php index 9d2bdb2e50..da7bc94b21 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -154,9 +154,9 @@ class Language { /** * Unicode directional formatting characters, for embedBidi() */ - static private $lre = "\xE2\x80\xAA"; // U+202A LEFT-TO-RIGHT EMBEDDING - static private $rle = "\xE2\x80\xAB"; // U+202B RIGHT-TO-LEFT EMBEDDING - static private $pdf = "\xE2\x80\xAC"; // U+202C POP DIRECTIONAL FORMATTING + static private $lre = "\u{202A}"; // U+202A LEFT-TO-RIGHT EMBEDDING + static private $rle = "\u{202B}"; // U+202B RIGHT-TO-LEFT EMBEDDING + static private $pdf = "\u{202C}"; // U+202C POP DIRECTIONAL FORMATTING /** * Directionality test regex for embedBidi(). Matches the first strong directionality codepoint: @@ -2915,33 +2915,33 @@ class Language { if ( $code < 0xac00 || 0xd7a4 <= $code ) { return $matches[1]; } elseif ( $code < 0xb098 ) { - return "\xe3\x84\xb1"; + return "\u{3131}"; } elseif ( $code < 0xb2e4 ) { - return "\xe3\x84\xb4"; + return "\u{3134}"; } elseif ( $code < 0xb77c ) { - return "\xe3\x84\xb7"; + return "\u{3137}"; } elseif ( $code < 0xb9c8 ) { - return "\xe3\x84\xb9"; + return "\u{3139}"; } elseif ( $code < 0xbc14 ) { - return "\xe3\x85\x81"; + return "\u{3141}"; } elseif ( $code < 0xc0ac ) { - return "\xe3\x85\x82"; + return "\u{3142}"; } elseif ( $code < 0xc544 ) { - return "\xe3\x85\x85"; + return "\u{3145}"; } elseif ( $code < 0xc790 ) { - return "\xe3\x85\x87"; + return "\u{3147}"; } elseif ( $code < 0xcc28 ) { - return "\xe3\x85\x88"; + return "\u{3148}"; } elseif ( $code < 0xce74 ) { - return "\xe3\x85\x8a"; + return "\u{314A}"; } elseif ( $code < 0xd0c0 ) { - return "\xe3\x85\x8b"; + return "\u{314B}"; } elseif ( $code < 0xd30c ) { - return "\xe3\x85\x8c"; + return "\u{314C}"; } elseif ( $code < 0xd558 ) { - return "\xe3\x85\x8d"; + return "\u{314D}"; } else { - return "\xe3\x85\x8e"; + return "\u{314E}"; } } else { return ''; @@ -3088,8 +3088,8 @@ class Language { * @return string */ function getDirMark( $opposite = false ) { - $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM - $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM + $lrm = "\u{200E}"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM + $rlm = "\u{200F}"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM if ( $opposite ) { return $this->isRTL() ? $lrm : $rlm; } diff --git a/languages/data/Names.php b/languages/data/Names.php index 3d0ee8ea0c..af844ee5e3 100644 --- a/languages/data/Names.php +++ b/languages/data/Names.php @@ -29,8 +29,8 @@ namespace MediaWiki\Languages\Data; * These determine things like interwikis, language selectors, and so on. * Safe to change without running scripts on the respective sites. * - * \xE2\x80\x8E is the left-to-right marker and - * \xE2\x80\x8F is the right-to-left marker. + * \u{200E} is the left-to-right marker and + * \u{200F} is the right-to-left marker. * They are required for ensuring the correct display of brackets in * mixed rtl/ltr environment. * @@ -88,8 +88,8 @@ class Names { 'bcc' => 'جهلسری بلوچی', # Southern Balochi 'bcl' => 'Bikol Central', # Bikol: Central Bicolano language 'be' => 'беларуская', # Belarusian normative - 'be-tarask' => "беларуская (тарашкевіца)\xE2\x80\x8E", # Belarusian in Taraskievica orthography - 'be-x-old' => "беларуская (тарашкевіца)\xE2\x80\x8E", # (be-tarask compat) + 'be-tarask' => "беларуская (тарашкевіца)\u{200E}", # Belarusian in Taraskievica orthography + 'be-x-old' => "беларуская (тарашкевіца)\u{200E}", # (be-tarask compat) 'bg' => 'български', # Bulgarian 'bgn' => 'روچ کپتین بلوچی', # Western Balochi 'bh' => 'भोजपुरी', # Bihari macro language. Falls back to Bhojpuri (bho) @@ -122,8 +122,8 @@ class Names { 'cps' => 'Capiceño', # Capiznon 'cr' => 'Nēhiyawēwin / ᓀᐦᐃᔭᐍᐏᐣ', # Cree 'crh' => 'qırımtatarca', # Crimean Tatar (multiple scripts - defaults to Latin) - 'crh-latn' => "qırımtatarca (Latin)\xE2\x80\x8E", # Crimean Tatar (Latin) - 'crh-cyrl' => "къырымтатарджа (Кирилл)\xE2\x80\x8E", # Crimean Tatar (Cyrillic) + 'crh-latn' => "qırımtatarca (Latin)\u{200E}", # Crimean Tatar (Latin) + 'crh-cyrl' => "къырымтатарджа (Кирилл)\u{200E}", # Crimean Tatar (Cyrillic) 'cs' => 'čeÅ¡tina', # Czech 'csb' => 'kaszëbsczi', # Cassubian 'cu' => 'словѣньскъ / ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ', # Old Church Slavonic (ancient language) @@ -133,7 +133,7 @@ class Names { 'de' => 'Deutsch', # German ("Du") 'de-at' => 'Österreichisches Deutsch', # Austrian German 'de-ch' => 'Schweizer Hochdeutsch', # Swiss Standard German - 'de-formal' => "Deutsch (Sie-Form)\xE2\x80\x8E", # German - formal address ("Sie") + 'de-formal' => "Deutsch (Sie-Form)\u{200E}", # German - formal address ("Sie") 'din' => 'Thuɔŋjäŋ', # Dinka 'diq' => 'Zazaki', # Zazaki 'dsb' => 'dolnoserbski', # Lower Sorbian @@ -150,7 +150,7 @@ class Names { 'en-gb' => 'British English', # British English 'eo' => 'Esperanto', # Esperanto 'es' => 'español', # Spanish - 'es-formal' => "español (formal)\xE2\x80\x8E", # Spanish formal address + 'es-formal' => "español (formal)\u{200E}", # Spanish formal address 'et' => 'eesti', # Estonian 'eu' => 'euskara', # Basque 'ext' => 'estremeñu', # Extremaduran @@ -170,8 +170,8 @@ class Names { 'ga' => 'Gaeilge', # Irish 'gag' => 'Gagauz', # Gagauz 'gan' => '贛語', # Gan (multiple scripts - defaults to Traditional) - 'gan-hans' => "赣语(简体)\xE2\x80\x8E", # Gan (Simplified Han) - 'gan-hant' => "贛語(繁體)\xE2\x80\x8E", # Gan (Traditional Han) + 'gan-hans' => "赣语(简体)\u{200E}", # Gan (Simplified Han) + 'gan-hant' => "贛語(繁體)\u{200E}", # Gan (Traditional Han) 'gcr' => 'kréyòl gwiyanè', # Guianan Creole 'gd' => 'Gàidhlig', # Scots Gaelic 'gl' => 'galego', # Galician @@ -200,7 +200,7 @@ class Names { 'hsb' => 'hornjoserbsce', # Upper Sorbian 'ht' => 'Kreyòl ayisyen', # Haitian Creole French 'hu' => 'magyar', # Hungarian - 'hu-formal' => "magyar (formal)\xE2\x80\x8E", # Hungarian formal address + 'hu-formal' => "magyar (formal)\u{200E}", # Hungarian formal address 'hy' => 'Հայերեն', # Armenian 'hz' => 'Otsiherero', # Herero 'ia' => 'interlingua', # Interlingua (IALA) @@ -234,12 +234,12 @@ class Names { 'kiu' => 'Kırmancki', # Kirmanjki 'kj' => 'Kwanyama', # Kwanyama 'kk' => 'қазақша', # Kazakh (multiple scripts - defaults to Cyrillic) - 'kk-arab' => "قازاقشا (تٴوتە)\xE2\x80\x8F", # Kazakh Arabic - 'kk-cyrl' => "қазақша (кирил)\xE2\x80\x8E", # Kazakh Cyrillic - 'kk-latn' => "qazaqşa (latın)\xE2\x80\x8E", # Kazakh Latin - 'kk-cn' => "قازاقشا (جۇنگو)\xE2\x80\x8F", # Kazakh (China) - 'kk-kz' => "қазақша (Қазақстан)\xE2\x80\x8E", # Kazakh (Kazakhstan) - 'kk-tr' => "qazaqşa (Türkïya)\xE2\x80\x8E", # Kazakh (Turkey) + 'kk-arab' => "قازاقشا (تٴوتە)\u{200F}", # Kazakh Arabic + 'kk-cyrl' => "қазақша (кирил)\u{200E}", # Kazakh Cyrillic + 'kk-latn' => "qazaqşa (latın)\u{200E}", # Kazakh Latin + 'kk-cn' => "قازاقشا (جۇنگو)\u{200F}", # Kazakh (China) + 'kk-kz' => "қазақша (Қазақстан)\u{200E}", # Kazakh (Kazakhstan) + 'kk-tr' => "qazaqşa (Türkïya)\u{200E}", # Kazakh (Turkey) 'kl' => 'kalaallisut', # Inuktitut, Greenlandic/Greenlandic/Kalaallisut (kal) 'km' => 'ភាសាខ្មែរ', # Khmer, Central 'kn' => 'ಕನ್ನಡ', # Kannada @@ -256,8 +256,8 @@ class Names { 'ks-deva' => 'कॉशुर', # Kashmiri (Devanagari script) 'ksh' => 'Ripoarisch', # Ripuarian 'ku' => 'kurdî', # Kurdish (multiple scripts - defaults to Latin) - 'ku-latn' => "kurdî (latînî)\xE2\x80\x8E", # Northern Kurdish (Latin script) - 'ku-arab' => "كوردي (عەرەبی)\xE2\x80\x8F", # Northern Kurdish (Arabic script) (falls back to ckb) + 'ku-latn' => "kurdî (latînî)\u{200E}", # Northern Kurdish (Latin script) + 'ku-arab' => "كوردي (عەرەبی)\u{200F}", # Northern Kurdish (Arabic script) (falls back to ckb) 'kum' => 'къумукъ', # Kumyk (Cyrillic, 'kum-latn' for Latin script) 'kv' => 'коми', # Komi-Zyrian (Cyrillic is common script but also written in Latin script) 'kw' => 'kernowek', # Cornish @@ -318,7 +318,7 @@ class Names { 'ng' => 'Oshiwambo', # Ndonga 'niu' => 'Niuē', # Niuean 'nl' => 'Nederlands', # Dutch - 'nl-informal' => "Nederlands (informeel)\xE2\x80\x8E", # Dutch (informal address ("je")) + 'nl-informal' => "Nederlands (informeel)\u{200E}", # Dutch (informal address ("je")) 'nn' => 'norsk nynorsk', # Norwegian (Nynorsk) 'no' => 'norsk', # Norwegian macro language (falls back to nb). 'nov' => 'Novial', # Novial @@ -401,8 +401,8 @@ class Names { 'so' => 'Soomaaliga', # Somali 'sq' => 'shqip', # Albanian 'sr' => 'српски / srpski', # Serbian (multiple scripts - defaults to Cyrillic) - 'sr-ec' => "српски (ћирилица)\xE2\x80\x8E", # Serbian Cyrillic ekavian - 'sr-el' => "srpski (latinica)\xE2\x80\x8E", # Serbian Latin ekavian + 'sr-ec' => "српски (ћирилица)\u{200E}", # Serbian Cyrillic ekavian + 'sr-el' => "srpski (latinica)\u{200E}", # Serbian Latin ekavian 'srn' => 'Sranantongo', # Sranan Tongo 'ss' => 'SiSwati', # Swati 'st' => 'Sesotho', # Southern Sotho @@ -471,15 +471,15 @@ class Names { 'zea' => 'Zeêuws', # Zeeuws/Zeaws 'zh' => '中文', # (Zhōng Wén) - Chinese 'zh-classical' => '文言', # Classical Chinese/Literary Chinese -- (see T10217) - 'zh-cn' => "中文(中国大陆)\xE2\x80\x8E", # Chinese (PRC) - 'zh-hans' => "中文(简体)\xE2\x80\x8E", # Mandarin Chinese (Simplified Chinese script) (cmn-hans) - 'zh-hant' => "中文(繁體)\xE2\x80\x8E", # Mandarin Chinese (Traditional Chinese script) (cmn-hant) - 'zh-hk' => "中文(香港)\xE2\x80\x8E", # Chinese (Hong Kong) + 'zh-cn' => "中文(中国大陆)\u{200E}", # Chinese (PRC) + 'zh-hans' => "中文(简体)\u{200E}", # Mandarin Chinese (Simplified Chinese script) (cmn-hans) + 'zh-hant' => "中文(繁體)\u{200E}", # Mandarin Chinese (Traditional Chinese script) (cmn-hant) + 'zh-hk' => "中文(香港)\u{200E}", # Chinese (Hong Kong) 'zh-min-nan' => 'Bân-lâm-gú', # Min-nan -- (see T10217) - 'zh-mo' => "中文(澳門)\xE2\x80\x8E", # Chinese (Macau) - 'zh-my' => "中文(马来西亚)\xE2\x80\x8E", # Chinese (Malaysia) - 'zh-sg' => "中文(新加坡)\xE2\x80\x8E", # Chinese (Singapore) - 'zh-tw' => "中文(台灣)\xE2\x80\x8E", # Chinese (Taiwan) + 'zh-mo' => "中文(澳門)\u{200E}", # Chinese (Macau) + 'zh-my' => "中文(马来西亚)\u{200E}", # Chinese (Malaysia) + 'zh-sg' => "中文(新加坡)\u{200E}", # Chinese (Singapore) + 'zh-tw' => "中文(台灣)\u{200E}", # Chinese (Taiwan) 'zh-yue' => '粵語', # Cantonese -- (see T10217) 'zu' => 'isiZulu' # Zulu ]; diff --git a/languages/messages/MessagesAf.php b/languages/messages/MessagesAf.php index d82cde7c82..5c69ac3c5e 100644 --- a/languages/messages/MessagesAf.php +++ b/languages/messages/MessagesAf.php @@ -194,5 +194,5 @@ $specialPageAliases = [ # South Africa uses space for thousands and comma for decimal # Reference: AWS Reël 7.4 p. 52, 2002 edition # glibc is wrong in this respect in some versions -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $linkTrail = "/^([a-z]+)(.*)$/sD"; diff --git a/languages/messages/MessagesBe.php b/languages/messages/MessagesBe.php index ea807fd3e6..c54c613b99 100644 --- a/languages/messages/MessagesBe.php +++ b/languages/messages/MessagesBe.php @@ -65,7 +65,7 @@ $dateFormats = [ # Per discussion on https://translatewiki.net/wiki/Thread:Support/Customization_of number format $separatorTransformTable = [ - ',' => "\xc2\xa0", # nbsp + ',' => "\u{00A0}", # nbsp '.' => ',' ]; diff --git a/languages/messages/MessagesBe_tarask.php b/languages/messages/MessagesBe_tarask.php index ee2bbb3a58..00295fef87 100644 --- a/languages/messages/MessagesBe_tarask.php +++ b/languages/messages/MessagesBe_tarask.php @@ -233,7 +233,7 @@ $dateFormats = [ ]; $separatorTransformTable = [ - ',' => "\xc2\xa0", # nbsp + ',' => "\u{00A0}", # nbsp '.' => ',' ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesBg.php b/languages/messages/MessagesBg.php index 383a3cd952..5c49964554 100644 --- a/languages/messages/MessagesBg.php +++ b/languages/messages/MessagesBg.php @@ -217,5 +217,5 @@ $bookstoreList = [ $linkTrail = '/^([a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]+)(.*)$/sDu'; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesBr.php b/languages/messages/MessagesBr.php index d7614ccef4..2aef1ed1a3 100644 --- a/languages/messages/MessagesBr.php +++ b/languages/messages/MessagesBr.php @@ -164,5 +164,5 @@ $dateFormats = [ 'dmy both' => 'j M Y "da" H:i', ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $linkTrail = "/^((?:c\'h|C\'H|C\'h|c’h|C’H|C’h|[a-zA-ZàâçéèêîôûäëïöüùñÇÉÂÊÎÔÛÄËÏÖÜÀÈÙÑ])+)(.*)$/sDu"; diff --git a/languages/messages/MessagesCs.php b/languages/messages/MessagesCs.php index affcc8358a..6a2e41d29f 100644 --- a/languages/messages/MessagesCs.php +++ b/languages/messages/MessagesCs.php @@ -383,4 +383,4 @@ $bookstoreList = [ # Písmena, která se mají objevit jako část odkazu ve formě '[[jazyk]]y' atd: $linkTrail = '/^([a-záčďéěíňóřšťúůýž]+)(.*)$/sDu'; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; diff --git a/languages/messages/MessagesEo.php b/languages/messages/MessagesEo.php index 0ce25e20e8..2e96cc887d 100644 --- a/languages/messages/MessagesEo.php +++ b/languages/messages/MessagesEo.php @@ -275,7 +275,7 @@ $magicWords = [ 'url_query' => [ '0', 'INFORMPETO', 'QUERY' ], ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $datePreferences = false; $defaultDateFormat = 'dmy'; diff --git a/languages/messages/MessagesEs.php b/languages/messages/MessagesEs.php index e0676a63e6..0d8258e0fe 100644 --- a/languages/messages/MessagesEs.php +++ b/languages/messages/MessagesEs.php @@ -302,7 +302,7 @@ $dateFormats = [ 'dmy both' => 'H:i j M Y', ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $minimumGroupingDigits = 2; $linkTrail = '/^([a-záéíóúñ]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesEt.php b/languages/messages/MessagesEt.php index d2f8f32972..63365fd9e0 100644 --- a/languages/messages/MessagesEt.php +++ b/languages/messages/MessagesEt.php @@ -258,7 +258,7 @@ $magicWords = [ 'formatdate' => [ '0', 'kuupäevavormindus', 'formatdate', 'dateformat' ], ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $minimumGroupingDigits = 2; $linkTrail = '/^([äöõšüža-z]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesFi.php b/languages/messages/MessagesFi.php index b019fd637d..5fbc47bd16 100644 --- a/languages/messages/MessagesFi.php +++ b/languages/messages/MessagesFi.php @@ -245,7 +245,7 @@ $magicWords = [ 'protectionlevel' => [ '1', 'SUOJAUSTASO', 'PROTECTIONLEVEL' ], ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $datePreferences = [ 'default', diff --git a/languages/messages/MessagesFr.php b/languages/messages/MessagesFr.php index 3e3cf80184..9844a5ea43 100644 --- a/languages/messages/MessagesFr.php +++ b/languages/messages/MessagesFr.php @@ -316,4 +316,4 @@ $dateFormats = [ 'ymd both' => 'Y F j à H:i', ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; diff --git a/languages/messages/MessagesFrp.php b/languages/messages/MessagesFrp.php index 0ac1cafaa1..fc160b4a9b 100644 --- a/languages/messages/MessagesFrp.php +++ b/languages/messages/MessagesFrp.php @@ -301,4 +301,4 @@ $dateFormats = [ 'ymd both' => 'Y F j "a" H:i', ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; diff --git a/languages/messages/MessagesFur.php b/languages/messages/MessagesFur.php index 9846686e93..717538b9b7 100644 --- a/languages/messages/MessagesFur.php +++ b/languages/messages/MessagesFur.php @@ -104,4 +104,4 @@ $dateFormats = [ 'dmy both' => 'j "di" M Y "a lis" H:i', ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; diff --git a/languages/messages/MessagesHu.php b/languages/messages/MessagesHu.php index 7fcfad8863..6cab75ddec 100644 --- a/languages/messages/MessagesHu.php +++ b/languages/messages/MessagesHu.php @@ -40,7 +40,7 @@ $namespaceAliases = [ ]; $fallback8bitEncoding = "iso8859-2"; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $specialPageAliases = [ 'Activeusers' => [ 'Aktív_felhasználók', 'Aktív_szerkesztők' ], diff --git a/languages/messages/MessagesHy.php b/languages/messages/MessagesHy.php index 34306d3617..1eeaa609ae 100644 --- a/languages/messages/MessagesHy.php +++ b/languages/messages/MessagesHy.php @@ -9,7 +9,7 @@ */ $separatorTransformTable = [ - ',' => "\xc2\xa0", # nbsp + ',' => "\u{00A0}", # nbsp '.' => ',' ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesIa.php b/languages/messages/MessagesIa.php index c3a2905741..3192e2aeec 100644 --- a/languages/messages/MessagesIa.php +++ b/languages/messages/MessagesIa.php @@ -8,7 +8,7 @@ * */ -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $namespaceNames = [ NS_MEDIA => 'Multimedia', diff --git a/languages/messages/MessagesIt.php b/languages/messages/MessagesIt.php index e73a2fc946..fdbce22ee1 100644 --- a/languages/messages/MessagesIt.php +++ b/languages/messages/MessagesIt.php @@ -32,7 +32,7 @@ $namespaceAliases = [ 'Discussioni_immagine' => NS_FILE_TALK, ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $dateFormats = [ 'mdy time' => 'H:i', diff --git a/languages/messages/MessagesKaa.php b/languages/messages/MessagesKaa.php index 2c7cb9e26b..397af46202 100644 --- a/languages/messages/MessagesKaa.php +++ b/languages/messages/MessagesKaa.php @@ -11,7 +11,7 @@ $fallback = 'kk-latn, kk-cyrl'; $separatorTransformTable = [ - ',' => "\xc2\xa0", + ',' => "\u{00A0}", '.' => ',', ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesKk_cyrl.php b/languages/messages/MessagesKk_cyrl.php index c559d02854..aeaa06a8d5 100644 --- a/languages/messages/MessagesKk_cyrl.php +++ b/languages/messages/MessagesKk_cyrl.php @@ -19,7 +19,7 @@ */ $separatorTransformTable = [ - ',' => "\xc2\xa0", + ',' => "\u{00A0}", '.' => ',', ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesKk_latn.php b/languages/messages/MessagesKk_latn.php index 36e8ecbdde..9c2b84e018 100644 --- a/languages/messages/MessagesKk_latn.php +++ b/languages/messages/MessagesKk_latn.php @@ -15,7 +15,7 @@ $fallback = 'kk-cyrl'; $separatorTransformTable = [ - ',' => "\xc2\xa0", + ',' => "\u{00A0}", '.' => ',', ]; diff --git a/languages/messages/MessagesKsh.php b/languages/messages/MessagesKsh.php index 291ed14d53..c96c94df57 100644 --- a/languages/messages/MessagesKsh.php +++ b/languages/messages/MessagesKsh.php @@ -94,7 +94,7 @@ $namespaceAliases = [ 'Katejorije_Klaaf' => NS_CATEGORY_TALK, ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $minimumGroupingDigits = 2; $linkTrail = '/^([äöüėëijßəğåůæœça-z]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesLa.php b/languages/messages/MessagesLa.php index 1fa3e117ae..e6bedd0c60 100644 --- a/languages/messages/MessagesLa.php +++ b/languages/messages/MessagesLa.php @@ -51,7 +51,7 @@ $namespaceAliases = [ 'Disputatio_Imaginis' => NS_FILE_TALK, ]; -$separatorTransformTable = [ ',' => "\xc2\xa0" ]; +$separatorTransformTable = [ ',' => "\u{00A0}" ]; $dateFormats = [ 'mdy time' => 'H:i', diff --git a/languages/messages/MessagesLbe.php b/languages/messages/MessagesLbe.php index 9b4aa82990..ae72fac8a2 100644 --- a/languages/messages/MessagesLbe.php +++ b/languages/messages/MessagesLbe.php @@ -12,7 +12,7 @@ $fallback = 'ru'; $separatorTransformTable = [ - ',' => "\xc2\xa0", # nbsp + ',' => "\u{00A0}", # nbsp '.' => ',' ]; diff --git a/languages/messages/MessagesLn.php b/languages/messages/MessagesLn.php index 6a71ca78e8..a92dc02d35 100644 --- a/languages/messages/MessagesLn.php +++ b/languages/messages/MessagesLn.php @@ -19,4 +19,4 @@ $namespaceGenderAliases = []; $linkPrefixExtension = true; # Same as the French (T10485) -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; diff --git a/languages/messages/MessagesLt.php b/languages/messages/MessagesLt.php index b79f9d8c97..68d41c0a93 100644 --- a/languages/messages/MessagesLt.php +++ b/languages/messages/MessagesLt.php @@ -171,7 +171,7 @@ $magicWords = [ ]; $fallback8bitEncoding = 'windows-1257'; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $dateFormats = [ 'ymd time' => 'H:i', diff --git a/languages/messages/MessagesLv.php b/languages/messages/MessagesLv.php index 94aac6018c..6d8e198642 100644 --- a/languages/messages/MessagesLv.php +++ b/languages/messages/MessagesLv.php @@ -63,7 +63,7 @@ $namespaceGenderAliases = [ NS_USER_TALK => [ 'male' => 'DalÄ«bnieka_diskusija', 'female' => 'DalÄ«bnieces_diskusija' ] ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; /** * A list of date format preference keys, which can be selected in user diff --git a/languages/messages/MessagesMr.php b/languages/messages/MessagesMr.php index b51839db1d..c172d087c1 100644 --- a/languages/messages/MessagesMr.php +++ b/languages/messages/MessagesMr.php @@ -323,6 +323,6 @@ $digitTransformTable = [ '9' => '९', # ९ ]; -$linkTrail = "/^([\xE0\xA4\x80-\xE0\xA5\xA3\xE0\xA5\xB1-\xE0\xA5\xBF\xEF\xBB\xBF\xE2\x80\x8D]+)(.*)$/sDu"; +$linkTrail = "/^([\u{0900}-\u{0963}\u{0971}-\u{097F}\u{FEFF}\u{200D}]+)(.*)$/sDu"; $digitGroupingPattern = "##,##,###"; diff --git a/languages/messages/MessagesNb.php b/languages/messages/MessagesNb.php index 66f05275ba..002dcac75b 100644 --- a/languages/messages/MessagesNb.php +++ b/languages/messages/MessagesNb.php @@ -82,7 +82,7 @@ $namespaceAliases = [ 'Bildediskusjon' => NS_FILE_TALK, ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $linkTrail = '/^([æøåa-z]+)(.*)$/sDu'; $datePreferenceMigrationMap = [ diff --git a/languages/messages/MessagesNn.php b/languages/messages/MessagesNn.php index 816cff046c..9107b9a8e8 100644 --- a/languages/messages/MessagesNn.php +++ b/languages/messages/MessagesNn.php @@ -317,7 +317,7 @@ $specialPageAliases = [ ]; $separatorTransformTable = [ - ',' => "\xc2\xa0", + ',' => "\u{00A0}", '.' => ',' ]; $linkTrail = '/^([æøåa-z]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesOc.php b/languages/messages/MessagesOc.php index eb8ed882ce..16edc8e682 100644 --- a/languages/messages/MessagesOc.php +++ b/languages/messages/MessagesOc.php @@ -286,6 +286,6 @@ $dateFormats = [ 'oc normal both' => 'j F "de" Y "a" H.i', ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $linkTrail = "/^([a-zàâçéèêîôû]+)(.*)$/sDu"; diff --git a/languages/messages/MessagesPl.php b/languages/messages/MessagesPl.php index 1af008f184..dcc9a44ed2 100644 --- a/languages/messages/MessagesPl.php +++ b/languages/messages/MessagesPl.php @@ -121,7 +121,7 @@ $dateFormats = [ $fallback8bitEncoding = 'iso-8859-2'; $separatorTransformTable = [ - ',' => "\xc2\xa0", // T4749 + ',' => "\u{00A0}", // T4749 '.' => ',' ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesPt.php b/languages/messages/MessagesPt.php index 78503ccb46..f57f3228f0 100644 --- a/languages/messages/MessagesPt.php +++ b/languages/messages/MessagesPt.php @@ -110,7 +110,7 @@ $dateFormats = [ 'dmy both' => 'H\hi\m\i\n \d\e j \d\e F \d\e Y', ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $linkTrail = '/^([áâãàéêẽçíòóôõq̃úüűũa-z]+)(.*)$/sDu'; # T23168, T29633 $specialPageAliases = [ diff --git a/languages/messages/MessagesPt_br.php b/languages/messages/MessagesPt_br.php index 61625e16d3..499e9360d0 100644 --- a/languages/messages/MessagesPt_br.php +++ b/languages/messages/MessagesPt_br.php @@ -120,7 +120,7 @@ $dateFormats = [ ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $specialPageAliases = [ 'Activeusers' => [ 'Usuários_ativos' ], diff --git a/languages/messages/MessagesRu.php b/languages/messages/MessagesRu.php index 1abecad68f..b513648810 100644 --- a/languages/messages/MessagesRu.php +++ b/languages/messages/MessagesRu.php @@ -417,7 +417,7 @@ $bookstoreList = [ ]; $separatorTransformTable = [ - ',' => "\xc2\xa0", # nbsp + ',' => "\u{00A0}", # nbsp '.' => ',' ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesSe.php b/languages/messages/MessagesSe.php index 8e1d476fbd..4859719e6b 100644 --- a/languages/messages/MessagesSe.php +++ b/languages/messages/MessagesSe.php @@ -134,6 +134,6 @@ $magicWords = [ 'img_link' => [ '1', 'liŋka=$1', 'link=$1' ], ]; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; $linkTrail = '/^(:?[a-zàáâçčʒǯđðéèêëǧǥȟíìîïıǩŋñóòôõßšŧúùûýÿüžþæøåäö]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesSk.php b/languages/messages/MessagesSk.php index 0399bfab17..847a13d1f8 100644 --- a/languages/messages/MessagesSk.php +++ b/languages/messages/MessagesSk.php @@ -285,7 +285,7 @@ $namespaceGenderAliases = [ ]; $separatorTransformTable = [ - ',' => "\xc2\xa0", + ',' => "\u{00A0}", '.' => ',' ]; diff --git a/languages/messages/MessagesSv.php b/languages/messages/MessagesSv.php index d2ce3575f3..5511dab9a2 100644 --- a/languages/messages/MessagesSv.php +++ b/languages/messages/MessagesSv.php @@ -321,7 +321,7 @@ $magicWords = [ $linkTrail = '/^([a-zåäöéÅÄÖÉ]+)(.*)$/sDu'; $separatorTransformTable = [ - ',' => "\xc2\xa0", // T4749 + ',' => "\u{00A0}", // T4749 '.' => ',' ]; diff --git a/languages/messages/MessagesTa.php b/languages/messages/MessagesTa.php index f089896fe9..48a96c4fa3 100644 --- a/languages/messages/MessagesTa.php +++ b/languages/messages/MessagesTa.php @@ -80,6 +80,6 @@ $magicWords = [ 'url_wiki' => [ '0', 'விக்கி', 'WIKI' ], ]; -$linkTrail = "/^([\xE0\xAE\x80-\xE0\xAF\xBF]+)(.*)$/sDu"; +$linkTrail = "/^([\u{0B80}-\u{0BFF}]+)(.*)$/sDu"; $digitGroupingPattern = "##,##,###"; diff --git a/languages/messages/MessagesTe.php b/languages/messages/MessagesTe.php index 4f385cf89a..47bc707b5c 100644 --- a/languages/messages/MessagesTe.php +++ b/languages/messages/MessagesTe.php @@ -144,6 +144,6 @@ $magicWords = [ 'special' => [ '0', 'ప్రత్యేక', 'special' ], ]; -$linkTrail = "/^([\xE0\xB0\x81-\xE0\xB1\xAF]+)(.*)$/sDu"; +$linkTrail = "/^([\u{0C01}-\u{0C6F}]+)(.*)$/sDu"; $digitGroupingPattern = "##,##,###"; diff --git a/languages/messages/MessagesUdm.php b/languages/messages/MessagesUdm.php index 46d5dcb053..cdfe308109 100644 --- a/languages/messages/MessagesUdm.php +++ b/languages/messages/MessagesUdm.php @@ -44,4 +44,4 @@ $namespaceGenderAliases = []; $linkTrail = '/^([a-zа-яёӝӟӥӧӵ]+)(.*)$/sDu'; $fallback8bitEncoding = 'windows-1251'; -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; diff --git a/languages/messages/MessagesUk.php b/languages/messages/MessagesUk.php index 68d37c9cec..2b6857296d 100644 --- a/languages/messages/MessagesUk.php +++ b/languages/messages/MessagesUk.php @@ -55,7 +55,7 @@ */ $separatorTransformTable = [ - ',' => "\xc2\xa0", # nbsp + ',' => "\u{00A0}", # nbsp '.' => ',' ]; $minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesUz.php b/languages/messages/MessagesUz.php index 53d8efac80..1720545c80 100644 --- a/languages/messages/MessagesUz.php +++ b/languages/messages/MessagesUz.php @@ -121,5 +121,5 @@ $dateFormats = [ */ $separatorTransformTable = [ '.' => ',', - ',' => "\xc2\xa0", # nbsp + ',' => "\u{00A0}", # nbsp ]; diff --git a/languages/messages/MessagesWa.php b/languages/messages/MessagesWa.php index 68f3ae7da0..f42dd8c96a 100644 --- a/languages/messages/MessagesWa.php +++ b/languages/messages/MessagesWa.php @@ -79,7 +79,7 @@ $specialPageAliases = [ # definixha del cogne po les limeros # (number format definition) # en: 12,345.67 -> wa: 12 345,67 -$separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$separatorTransformTable = [ ',' => "\u{00A0}", '.' => ',' ]; # $linkTrail = '/^([a-zåâêîôûçéèA-ZÅÂÊÎÔÛÇÉÈ]+)(.*)$/sDu'; $linkTrail = '/^([a-zåâêîôûçéè]+)(.*)$/sDu'; diff --git a/maintenance/generateSitemap.php b/maintenance/generateSitemap.php index b3e7aecf2f..ff3e2fce1a 100644 --- a/maintenance/generateSitemap.php +++ b/maintenance/generateSitemap.php @@ -539,7 +539,7 @@ class GenerateSitemap extends Maintenance { */ function generateLimit( $namespace ) { // T19961: make a title with the longest possible URL in this namespace - $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" ); + $title = Title::makeTitle( $namespace, str_repeat( "\u{28B81}", 63 ) . "\u{5583}" ); $this->limit = [ strlen( $this->openFile() ), diff --git a/maintenance/language/languages.inc b/maintenance/language/languages.inc index c8fb629e77..c6a5c68299 100644 --- a/maintenance/language/languages.inc +++ b/maintenance/language/languages.inc @@ -508,18 +508,18 @@ class Languages { $this->loadGeneralMessages(); $this->loadMessages( $code ); $wrongChars = [ - '[LRM]' => "\xE2\x80\x8E", - '[RLM]' => "\xE2\x80\x8F", - '[LRE]' => "\xE2\x80\xAA", - '[RLE]' => "\xE2\x80\xAB", - '[POP]' => "\xE2\x80\xAC", - '[LRO]' => "\xE2\x80\xAD", - '[RLO]' => "\xE2\x80\xAB", - '[ZWSP]' => "\xE2\x80\x8B", - '[NBSP]' => "\xC2\xA0", - '[WJ]' => "\xE2\x81\xA0", - '[BOM]' => "\xEF\xBB\xBF", - '[FFFD]' => "\xEF\xBF\xBD", + '[LRM]' => "\u{200E}", + '[RLM]' => "\u{200F}", + '[LRE]' => "\u{202A}", + '[RLE]' => "\u{202B}", + '[POP]' => "\u{202C}", + '[LRO]' => "\u{202D}", + '[RLO]' => "\u{202B}", + '[ZWSP]' => "\u{200B}", + '[NBSP]' => "\u{00A0}", + '[WJ]' => "\u{2060}", + '[BOM]' => "\u{FEFF}", + '[FFFD]' => "\u{FFFD}", ]; $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu'; $wrongCharsMessages = []; diff --git a/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php b/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php index f9e0bc9bc8..417b468b1d 100644 --- a/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php +++ b/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php @@ -53,11 +53,11 @@ class CustomUppercaseCollationTest extends MediaWikiTestCase { [ 'do', 'D' ], [ 'Ao', 'A' ], [ 'afdsa', 'A' ], - [ "\xF3\xB3\x80\x80Foo", 'D' ], - [ "\xF3\xB3\x80\x81Foo", 'C' ], - [ "\xF3\xB3\x80\x82Foo", 'Cs' ], - [ "\xF3\xB3\x80\x83Foo", 'B' ], - [ "\xF3\xB3\x80\x84Foo", "\xF3\xB3\x80\x84" ], + [ "\u{F3000}Foo", 'D' ], + [ "\u{F3001}Foo", 'C' ], + [ "\u{F3002}Foo", 'Cs' ], + [ "\u{F3003}Foo", 'B' ], + [ "\u{F3004}Foo", "\u{F3004}" ], [ 'C', 'C' ], [ 'Cz', 'C' ], [ 'Cs', 'Cs' ], diff --git a/tests/phpunit/includes/libs/CSSMinTest.php b/tests/phpunit/includes/libs/CSSMinTest.php index 354dae203b..c711291483 100644 --- a/tests/phpunit/includes/libs/CSSMinTest.php +++ b/tests/phpunit/includes/libs/CSSMinTest.php @@ -35,7 +35,7 @@ class CSSMinTest extends MediaWikiTestCase { public static function provideSerializeStringValue() { return [ [ 'Hello World!', '"Hello World!"' ], - [ "Null\0Null", "\"Null\xEF\xBF\xBDNull\"" ], + [ "Null\0Null", "\"Null\u{FFFD}Null\"" ], [ '"', '"\\""' ], [ "'", '"\'"' ], [ "\\", '"\\\\"' ], @@ -43,9 +43,9 @@ class CSSMinTest extends MediaWikiTestCase { [ "Space tab \t space", '"Space tab \\9 space"' ], [ "Line\nfeed", '"Line\\a feed"' ], [ "Return\rreturn", '"Return\\d return"' ], - [ "Next\xc2\x85line", "\"Next\xc2\x85line\"" ], + [ "Next\u{0085}line", "\"Next\u{0085}line\"" ], [ "Del\x7fDel", '"Del\\7f Del"' ], - [ "nb\xc2\xa0sp", "\"nb\xc2\xa0sp\"" ], + [ "nb\u{00A0}sp", "\"nb\u{00A0}sp\"" ], [ "AMP&AMP", "\"AMP&AMP\"" ], [ '!"#$%&\'()*+,-./0123456789:;<=>?', '"!\\"#$%&\'()*+,-./0123456789:;<=>?"' ], [ '@[\\]^_`{|}~', '"@[\\\\]^_`{|}~"' ], diff --git a/tests/phpunit/languages/LanguageTest.php b/tests/phpunit/languages/LanguageTest.php index 8653bcd5b8..7e29c9292d 100644 --- a/tests/phpunit/languages/LanguageTest.php +++ b/tests/phpunit/languages/LanguageTest.php @@ -1593,9 +1593,9 @@ class LanguageTest extends LanguageClassesTestCase { * @covers Language::embedBidi() */ public function testEmbedBidi() { - $lre = "\xE2\x80\xAA"; // U+202A LEFT-TO-RIGHT EMBEDDING - $rle = "\xE2\x80\xAB"; // U+202B RIGHT-TO-LEFT EMBEDDING - $pdf = "\xE2\x80\xAC"; // U+202C POP DIRECTIONAL FORMATTING + $lre = "\u{202A}"; // U+202A LEFT-TO-RIGHT EMBEDDING + $rle = "\u{202B}"; // U+202B RIGHT-TO-LEFT EMBEDDING + $pdf = "\u{202C}"; // U+202C POP DIRECTIONAL FORMATTING $lang = $this->getLang(); $this->assertEquals( '123', -- 2.20.1