From b8522fac08e80e4d5f61e9f71a8aed2115604f63 Mon Sep 17 00:00:00 2001 From: Antoine Musso Date: Mon, 14 Mar 2011 22:14:39 +0000 Subject: [PATCH] =?utf8?q?bug=2028040=20Turkish:=20properly=20lower=20case?= =?utf8?q?=20'I'=20to=20'=C4=B1'=20(dotless=20i)?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Turkish has two different i, one with a dot and another without a dot. They are totally different letters in this language, so we have to override the ucfirst and lcfirst methods. See http://en.wikipedia.org/wiki/Dotted_and_dotless_I Credits to #wikipedia-tr users berm, []LuCkY[] and Emperyan --- RELEASE-NOTES | 1 + languages/classes/LanguageTr.php | 15 +++++ tests/phpunit/languages/LanguageTrTest.php | 67 ++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 tests/phpunit/languages/LanguageTrTest.php diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 0a38d43d9a..6540d16d52 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -275,6 +275,7 @@ changes to languages because of Bugzilla reports. * (bug 27681) Set $namespaceGenderAliases for Portuguese (pt and pt-br) * (bug 27785) Fallback language for Kabardian (kbd) is English now. * (bug 27825) Raw watchlist edit message now uses formatted numbers. +* (bug 28040) Turkish: properly lower case 'I' to 'ı' (dotless i) == Compatibility == diff --git a/languages/classes/LanguageTr.php b/languages/classes/LanguageTr.php index 245b5b0656..dec504588a 100644 --- a/languages/classes/LanguageTr.php +++ b/languages/classes/LanguageTr.php @@ -3,9 +3,15 @@ /** * Turkish (Türkçe) * + * Turkish has two different i, one with a dot and another without a dot. They + * are totally different letters in this language, so we have to override the + * ucfirst and lcfirst methods. + * See http://en.wikipedia.org/wiki/Dotted_and_dotless_I + * and @bug 28040 * @ingroup Language */ class LanguageTr extends Language { + function ucfirst ( $string ) { if ( !empty( $string ) && $string[0] == 'i' ) { return 'İ' . substr( $string, 1 ); @@ -13,4 +19,13 @@ class LanguageTr extends Language { return parent::ucfirst( $string ); } } + + function lcfirst ( $string ) { + if ( !empty( $string ) && $string[0] == 'I' ) { + return 'ı' . substr( $string, 1 ); + } else { + return parent::lcfirst( $string ); + } + } + } diff --git a/tests/phpunit/languages/LanguageTrTest.php b/tests/phpunit/languages/LanguageTrTest.php new file mode 100644 index 0000000000..ddc8ea6a99 --- /dev/null +++ b/tests/phpunit/languages/LanguageTrTest.php @@ -0,0 +1,67 @@ +lang = Language::factory( 'Tr' ); + } + function tearDown() { + unset( $this->lang ); + } + + /** + * See @bug 28040 + * Credits to #wikipedia-tr users berm, []LuCkY[] and Emperyan + * @see http://en.wikipedia.org/wiki/Dotted_and_dotless_I + * @dataProvider provideDottedAndDotlessI + */ + function testDottedAndDotlessI( $func, $input, $inputCase, $expected ) { + if( $func == 'ucfirst' ) { + $res = $this->lang->ucfirst( $input ); + } elseif( $func == 'lcfirst' ) { + $res = $this->lang->lcfirst( $input ); + } else { + throw new MWException( __METHOD__ . " given an invalid function name '$func'" ); + } + + $msg = "Converting $inputCase case '$input' with $func should give '$expected'"; + + $this->assertEquals( $expected, $res, $msg ); + } + + function provideDottedAndDotlessI() { + return array( + # function, input, input case, expected + # Case changed: + array( 'ucfirst', 'ı', 'lower', 'I' ), + array( 'ucfirst', 'i', 'lower', 'İ' ), + array( 'lcfirst', 'I', 'upper', 'ı' ), + array( 'lcfirst', 'İ', 'upper', 'i' ), + + # Already using the correct case + array( 'ucfirst', 'I', 'upper', 'I' ), + array( 'ucfirst', 'İ', 'upper', 'İ' ), + array( 'lcfirst', 'ı', 'lower', 'ı' ), + array( 'lcfirst', 'i', 'lower', 'i' ), + + # A real example taken from bug 28040 using + # http://tr.wikipedia.org/wiki/%C4%B0Phone + array( 'lcfirst', 'iPhone', 'lower', 'iPhone' ), + + # next case is valid in Turkish but are different words if we + # consider IPhone is English! + array( 'lcfirst', 'IPhone', 'upper', 'ıPhone' ), + + ); + } + +} -- 2.20.1