From eb6bb6b7b9798a4f797232afda9571d4120ed03d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bartosz=20Dziewo=C5=84ski?= Date: Tue, 10 Oct 2017 20:51:50 +0200 Subject: [PATCH] Generalize non-digit-grouping of four-digit numbers In some languages it's conventional not to insert a thousands separator in numbers that are four digits long (1000-9999). Rather than copy-paste the custom code to do this between 13 files, introduce another option and have the base Language class handle it. This also fixes an issue in several languages where this logic previously would not work for negative or fractional numbers. To implement this, a new option is added to MessagesXx.php files, `$minimumGroupingDigits = 2;`, with the meaning as defined in . It is a little roundabout, but it could allow us to migrate the number formatting (currently all custom code) to some generic library easily. Bug: T177846 Change-Id: Iedd8de5648cf2de1c94044918626de2f96365d48 --- autoload.php | 7 --- .../cache/localisation/LocalisationCache.php | 3 +- languages/Language.php | 22 +++++++- languages/classes/LanguageBe_tarask.php | 16 ------ languages/classes/LanguageBg.php | 45 ----------------- languages/classes/LanguageEs.php | 42 ---------------- languages/classes/LanguageEt.php | 42 ---------------- languages/classes/LanguageHy.php | 15 ------ languages/classes/LanguageKaa.php | 15 ------ languages/classes/LanguageKk_cyrl.php | 15 ------ languages/classes/LanguageKsh.php | 15 ------ languages/classes/LanguageKu.php | 2 +- languages/classes/LanguageKu_ku.php | 45 ----------------- languages/classes/LanguagePl.php | 41 --------------- languages/classes/LanguageRu.php | 50 ------------------- languages/classes/LanguageUk.php | 48 ------------------ languages/messages/MessagesBe_tarask.php | 1 + languages/messages/MessagesBg.php | 1 + languages/messages/MessagesEs.php | 2 + languages/messages/MessagesEt.php | 2 + languages/messages/MessagesHy.php | 1 + languages/messages/MessagesKaa.php | 1 + languages/messages/MessagesKk_cyrl.php | 1 + languages/messages/MessagesKsh.php | 2 + languages/messages/MessagesKu_latn.php | 1 + languages/messages/MessagesPl.php | 2 + languages/messages/MessagesRu.php | 1 + languages/messages/MessagesUk.php | 1 + .../languages/classes/LanguagePlTest.php | 2 +- .../languages/classes/LanguageRuTest.php | 3 -- .../languages/classes/LanguageUkTest.php | 3 -- 31 files changed, 41 insertions(+), 406 deletions(-) delete mode 100644 languages/classes/LanguageBg.php delete mode 100644 languages/classes/LanguageEs.php delete mode 100644 languages/classes/LanguageEt.php delete mode 100644 languages/classes/LanguageKu_ku.php delete mode 100644 languages/classes/LanguagePl.php delete mode 100644 languages/classes/LanguageRu.php delete mode 100644 languages/classes/LanguageUk.php diff --git a/autoload.php b/autoload.php index 884dbbef32..4a8e18dd9c 100644 --- a/autoload.php +++ b/autoload.php @@ -705,7 +705,6 @@ $wgAutoloadLocalClasses = [ 'LanguageAr' => __DIR__ . '/languages/classes/LanguageAr.php', 'LanguageAz' => __DIR__ . '/languages/classes/LanguageAz.php', 'LanguageBe_tarask' => __DIR__ . '/languages/classes/LanguageBe_tarask.php', - 'LanguageBg' => __DIR__ . '/languages/classes/LanguageBg.php', 'LanguageBs' => __DIR__ . '/languages/classes/LanguageBs.php', 'LanguageCode' => __DIR__ . '/languages/LanguageCode.php', 'LanguageConverter' => __DIR__ . '/languages/LanguageConverter.php', @@ -713,8 +712,6 @@ $wgAutoloadLocalClasses = [ 'LanguageCu' => __DIR__ . '/languages/classes/LanguageCu.php', 'LanguageDsb' => __DIR__ . '/languages/classes/LanguageDsb.php', 'LanguageEn' => __DIR__ . '/languages/classes/LanguageEn.php', - 'LanguageEs' => __DIR__ . '/languages/classes/LanguageEs.php', - 'LanguageEt' => __DIR__ . '/languages/classes/LanguageEt.php', 'LanguageFi' => __DIR__ . '/languages/classes/LanguageFi.php', 'LanguageGa' => __DIR__ . '/languages/classes/LanguageGa.php', 'LanguageGan' => __DIR__ . '/languages/classes/LanguageGan.php', @@ -729,21 +726,17 @@ $wgAutoloadLocalClasses = [ 'LanguageKm' => __DIR__ . '/languages/classes/LanguageKm.php', 'LanguageKsh' => __DIR__ . '/languages/classes/LanguageKsh.php', 'LanguageKu' => __DIR__ . '/languages/classes/LanguageKu.php', - 'LanguageKu_ku' => __DIR__ . '/languages/classes/LanguageKu_ku.php', 'LanguageLa' => __DIR__ . '/languages/classes/LanguageLa.php', 'LanguageMl' => __DIR__ . '/languages/classes/LanguageMl.php', 'LanguageMy' => __DIR__ . '/languages/classes/LanguageMy.php', 'LanguageOs' => __DIR__ . '/languages/classes/LanguageOs.php', - 'LanguagePl' => __DIR__ . '/languages/classes/LanguagePl.php', 'LanguageQqx' => __DIR__ . '/languages/classes/LanguageQqx.php', - 'LanguageRu' => __DIR__ . '/languages/classes/LanguageRu.php', 'LanguageShi' => __DIR__ . '/languages/classes/LanguageShi.php', 'LanguageSl' => __DIR__ . '/languages/classes/LanguageSl.php', 'LanguageSr' => __DIR__ . '/languages/classes/LanguageSr.php', 'LanguageTg' => __DIR__ . '/languages/classes/LanguageTg.php', 'LanguageTr' => __DIR__ . '/languages/classes/LanguageTr.php', 'LanguageTyv' => __DIR__ . '/languages/classes/LanguageTyv.php', - 'LanguageUk' => __DIR__ . '/languages/classes/LanguageUk.php', 'LanguageUz' => __DIR__ . '/languages/classes/LanguageUz.php', 'LanguageWa' => __DIR__ . '/languages/classes/LanguageWa.php', 'LanguageYue' => __DIR__ . '/languages/classes/LanguageYue.php', diff --git a/includes/cache/localisation/LocalisationCache.php b/includes/cache/localisation/LocalisationCache.php index a0ce95e47a..5e0a688e81 100644 --- a/includes/cache/localisation/LocalisationCache.php +++ b/includes/cache/localisation/LocalisationCache.php @@ -109,7 +109,8 @@ class LocalisationCache { static public $allKeys = [ 'fallback', 'namespaceNames', 'bookstoreList', 'magicWords', 'messages', 'rtl', 'capitalizeAllNouns', 'digitTransformTable', - 'separatorTransformTable', 'fallback8bitEncoding', 'linkPrefixExtension', + 'separatorTransformTable', 'minimumGroupingDigits', + 'fallback8bitEncoding', 'linkPrefixExtension', 'linkTrail', 'linkPrefixCharset', 'namespaceAliases', 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap', 'defaultDateFormat', 'extraUserToggles', 'specialPageAliases', diff --git a/languages/Language.php b/languages/Language.php index 7ef441b167..fdf2d05d9d 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -3313,12 +3313,25 @@ class Language { */ function commafy( $number ) { $digitGroupingPattern = $this->digitGroupingPattern(); + $minimumGroupingDigits = $this->minimumGroupingDigits(); if ( $number === null ) { return ''; } if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) { - // default grouping is at thousands, use the same for ###,###,### pattern too. + // Default grouping is at thousands, use the same for ###,###,### pattern too. + // In some languages it's conventional not to insert a thousands separator + // in numbers that are four digits long (1000-9999). + if ( $minimumGroupingDigits ) { + // Number of '#' characters after last comma in the grouping pattern. + // The pattern is hardcoded here, but this would vary for different patterns. + $primaryGroupingSize = 3; + // Maximum length of a number to suppress digit grouping for. + $maximumLength = $minimumGroupingDigits + $primaryGroupingSize - 1; + if ( preg_match( '/^\-?\d{1,' . $maximumLength . '}(\.\d+)?$/', $number ) ) { + return $number; + } + } return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) ); } else { // Ref: http://cldr.unicode.org/translation/number-patterns @@ -3381,6 +3394,13 @@ class Language { return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' ); } + /** + * @return int|null + */ + function minimumGroupingDigits() { + return self::$dataCache->getItem( $this->mCode, 'minimumGroupingDigits' ); + } + /** * Take a list of strings and build a locale-friendly comma-separated * list, using the local comma-separator message. diff --git a/languages/classes/LanguageBe_tarask.php b/languages/classes/LanguageBe_tarask.php index 96e040f917..1f9b767aab 100644 --- a/languages/classes/LanguageBe_tarask.php +++ b/languages/classes/LanguageBe_tarask.php @@ -53,20 +53,4 @@ class LanguageBe_tarask extends Language { return $s; } - - /** - * Four-digit number should be without group commas (spaces) - * So "1 234 567", "12 345" but "1234" - * - * @param string $_ - * - * @return string - */ - function commafy( $_ ) { - if ( preg_match( '/^-?\d{1,4}(\.\d*)?$/', $_ ) ) { - return $_; - } else { - return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) ); - } - } } diff --git a/languages/classes/LanguageBg.php b/languages/classes/LanguageBg.php deleted file mode 100644 index a592d025b4..0000000000 --- a/languages/classes/LanguageBg.php +++ /dev/null @@ -1,45 +0,0 @@ - "\xc2\xa0", # nbsp '.' => ',' ]; +$minimumGroupingDigits = 2; $linkTrail = '/^([абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesBg.php b/languages/messages/MessagesBg.php index 3a21e75dbf..383a3cd952 100644 --- a/languages/messages/MessagesBg.php +++ b/languages/messages/MessagesBg.php @@ -218,3 +218,4 @@ $bookstoreList = [ $linkTrail = '/^([a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]+)(.*)$/sDu'; $separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$minimumGroupingDigits = 2; diff --git a/languages/messages/MessagesEs.php b/languages/messages/MessagesEs.php index d7a780ebe2..16698d124d 100644 --- a/languages/messages/MessagesEs.php +++ b/languages/messages/MessagesEs.php @@ -299,4 +299,6 @@ $dateFormats = [ ]; $separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$minimumGroupingDigits = 2; + $linkTrail = '/^([a-záéíóúñ]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesEt.php b/languages/messages/MessagesEt.php index fcc12c9544..d2f8f32972 100644 --- a/languages/messages/MessagesEt.php +++ b/languages/messages/MessagesEt.php @@ -259,6 +259,8 @@ $magicWords = [ ]; $separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$minimumGroupingDigits = 2; + $linkTrail = '/^([äöõšüža-z]+)(.*)$/sDu'; $datePreferences = [ diff --git a/languages/messages/MessagesHy.php b/languages/messages/MessagesHy.php index 9a5a8098ad..34306d3617 100644 --- a/languages/messages/MessagesHy.php +++ b/languages/messages/MessagesHy.php @@ -12,6 +12,7 @@ $separatorTransformTable = [ ',' => "\xc2\xa0", # nbsp '.' => ',' ]; +$minimumGroupingDigits = 2; $fallback8bitEncoding = 'UTF-8'; diff --git a/languages/messages/MessagesKaa.php b/languages/messages/MessagesKaa.php index 52e8e77eb6..2c7cb9e26b 100644 --- a/languages/messages/MessagesKaa.php +++ b/languages/messages/MessagesKaa.php @@ -14,6 +14,7 @@ $separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',', ]; +$minimumGroupingDigits = 2; $fallback8bitEncoding = 'windows-1254'; diff --git a/languages/messages/MessagesKk_cyrl.php b/languages/messages/MessagesKk_cyrl.php index 2857721140..c559d02854 100644 --- a/languages/messages/MessagesKk_cyrl.php +++ b/languages/messages/MessagesKk_cyrl.php @@ -22,6 +22,7 @@ $separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',', ]; +$minimumGroupingDigits = 2; $fallback8bitEncoding = 'windows-1251'; diff --git a/languages/messages/MessagesKsh.php b/languages/messages/MessagesKsh.php index 1fef8385ae..291ed14d53 100644 --- a/languages/messages/MessagesKsh.php +++ b/languages/messages/MessagesKsh.php @@ -95,6 +95,8 @@ $namespaceAliases = [ ]; $separatorTransformTable = [ ',' => "\xc2\xa0", '.' => ',' ]; +$minimumGroupingDigits = 2; + $linkTrail = '/^([äöüėëijßəğåůæœça-z]+)(.*)$/sDu'; // Remove German aliases diff --git a/languages/messages/MessagesKu_latn.php b/languages/messages/MessagesKu_latn.php index 32844a29dd..c1bf76df96 100644 --- a/languages/messages/MessagesKu_latn.php +++ b/languages/messages/MessagesKu_latn.php @@ -51,6 +51,7 @@ $namespaceAliases = [ ]; $separatorTransformTable = [ ',' => '.', '.' => ',' ]; +$minimumGroupingDigits = 2; $specialPageAliases = [ 'Allmessages' => [ 'Hemû_Peyam' ], diff --git a/languages/messages/MessagesPl.php b/languages/messages/MessagesPl.php index 01a9d1a5a8..1af008f184 100644 --- a/languages/messages/MessagesPl.php +++ b/languages/messages/MessagesPl.php @@ -119,10 +119,12 @@ $dateFormats = [ ]; $fallback8bitEncoding = 'iso-8859-2'; + $separatorTransformTable = [ ',' => "\xc2\xa0", // T4749 '.' => ',' ]; +$minimumGroupingDigits = 2; $linkTrail = '/^([a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]+)(.*)$/sDu'; diff --git a/languages/messages/MessagesRu.php b/languages/messages/MessagesRu.php index b5f310dbe5..1abecad68f 100644 --- a/languages/messages/MessagesRu.php +++ b/languages/messages/MessagesRu.php @@ -420,6 +420,7 @@ $separatorTransformTable = [ ',' => "\xc2\xa0", # nbsp '.' => ',' ]; +$minimumGroupingDigits = 2; $fallback8bitEncoding = 'windows-1251'; $linkPrefixExtension = false; diff --git a/languages/messages/MessagesUk.php b/languages/messages/MessagesUk.php index d851e1c2c1..55bc1fca98 100644 --- a/languages/messages/MessagesUk.php +++ b/languages/messages/MessagesUk.php @@ -58,6 +58,7 @@ $separatorTransformTable = [ ',' => "\xc2\xa0", # nbsp '.' => ',' ]; +$minimumGroupingDigits = 2; $fallback8bitEncoding = 'windows-1251'; $linkPrefixExtension = true; diff --git a/tests/phpunit/languages/classes/LanguagePlTest.php b/tests/phpunit/languages/classes/LanguagePlTest.php index d7a0074397..14877290e6 100644 --- a/tests/phpunit/languages/classes/LanguagePlTest.php +++ b/tests/phpunit/languages/classes/LanguagePlTest.php @@ -76,7 +76,7 @@ class LanguagePlTest extends LanguageClassesTestCase { } /** - * @covers LanguagePl::commafy() + * @covers Language::commafy() * @dataProvider provideCommafyData */ public function testCommafy( $number, $numbersWithCommas ) { diff --git a/tests/phpunit/languages/classes/LanguageRuTest.php b/tests/phpunit/languages/classes/LanguageRuTest.php index 9124040f61..3e120f4f28 100644 --- a/tests/phpunit/languages/classes/LanguageRuTest.php +++ b/tests/phpunit/languages/classes/LanguageRuTest.php @@ -6,9 +6,6 @@ * @file */ -/** - * @covers LanguageRu - */ class LanguageRuTest extends LanguageClassesTestCase { /** * @dataProvider providePlural diff --git a/tests/phpunit/languages/classes/LanguageUkTest.php b/tests/phpunit/languages/classes/LanguageUkTest.php index 379162230d..0ccebbe2be 100644 --- a/tests/phpunit/languages/classes/LanguageUkTest.php +++ b/tests/phpunit/languages/classes/LanguageUkTest.php @@ -6,9 +6,6 @@ * @file */ -/** - * @covers LanguageUk - */ class LanguageUkTest extends LanguageClassesTestCase { /** * @dataProvider providePlural -- 2.20.1