From b8522fac08e80e4d5f61e9f71a8aed2115604f63 Mon Sep 17 00:00:00 2001
From: Antoine Musso <hashar@users.mediawiki.org>
Date: Mon, 14 Mar 2011 22:14:39 +0000
Subject: [PATCH] =?utf8?q?bug=2028040=20Turkish:=20properly=20lower=20case?=
 =?utf8?q?=20'I'=20to=20'=C4=B1'=20(dotless=20i)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Turkish has two different i, one with a dot and another without a dot. They
are totally different letters in this language, so we have to override the
ucfirst and lcfirst methods.
See http://en.wikipedia.org/wiki/Dotted_and_dotless_I

Credits to #wikipedia-tr users berm, []LuCkY[] and Emperyan
---
 RELEASE-NOTES                              |  1 +
 languages/classes/LanguageTr.php           | 15 +++++
 tests/phpunit/languages/LanguageTrTest.php | 67 ++++++++++++++++++++++
 3 files changed, 83 insertions(+)
 create mode 100644 tests/phpunit/languages/LanguageTrTest.php

diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index 0a38d43d9a..6540d16d52 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -275,6 +275,7 @@ changes to languages because of Bugzilla reports.
 * (bug 27681) Set $namespaceGenderAliases for Portuguese (pt and pt-br)
 * (bug 27785) Fallback language for Kabardian (kbd) is English now.
 * (bug 27825) Raw watchlist edit message now uses formatted numbers.
+* (bug 28040) Turkish: properly lower case 'I' to 'Ä±' (dotless i)
 
 == Compatibility ==
 
diff --git a/languages/classes/LanguageTr.php b/languages/classes/LanguageTr.php
index 245b5b0656..dec504588a 100644
--- a/languages/classes/LanguageTr.php
+++ b/languages/classes/LanguageTr.php
@@ -3,9 +3,15 @@
 /**
  * Turkish (TÃ¼rkÃ§e)
  *
+ * Turkish has two different i, one with a dot and another without a dot. They
+ * are totally different letters in this language, so we have to override the
+ * ucfirst and lcfirst methods.
+ * See http://en.wikipedia.org/wiki/Dotted_and_dotless_I
+ * and @bug 28040
  * @ingroup Language
  */
 class LanguageTr extends Language {
+
 	function ucfirst ( $string ) {
 		if ( !empty( $string ) && $string[0] == 'i' ) {
 			return 'Ä°' . substr( $string, 1 );
@@ -13,4 +19,13 @@ class LanguageTr extends Language {
 			return parent::ucfirst( $string );
 		}
 	}
+
+	function lcfirst ( $string ) {
+		if ( !empty( $string ) && $string[0] == 'I' ) {
+			return 'Ä±' . substr( $string, 1 );
+		} else {
+			return parent::lcfirst( $string );
+		}
+	}
+
 }
diff --git a/tests/phpunit/languages/LanguageTrTest.php b/tests/phpunit/languages/LanguageTrTest.php
new file mode 100644
index 0000000000..ddc8ea6a99
--- /dev/null
+++ b/tests/phpunit/languages/LanguageTrTest.php
@@ -0,0 +1,67 @@
+<?php
+/**
+ * @author Ashar Voultoiz
+ * @copyright Copyright Â© 2011, Ashar Voultoiz
+ * @file
+ */
+
+require_once dirname(dirname(__FILE__)). '/bootstrap.php';
+
+/** Tests for MediaWiki languages/LanguageTr.php */
+class LanguageTrTest extends MediaWikiTestCase {
+	private $lang;
+
+	function setUp() {
+		$this->lang = Language::factory( 'Tr' );
+	}
+	function tearDown() {
+		unset( $this->lang );
+	}
+
+	/**
+	 * See @bug 28040
+	 * Credits to #wikipedia-tr users berm, []LuCkY[] and Emperyan
+	 * @see http://en.wikipedia.org/wiki/Dotted_and_dotless_I
+	 * @dataProvider provideDottedAndDotlessI
+	 */
+	function testDottedAndDotlessI( $func, $input, $inputCase, $expected ) {
+		if( $func == 'ucfirst' ) {
+			$res = $this->lang->ucfirst( $input );
+		} elseif( $func == 'lcfirst' ) {
+			$res = $this->lang->lcfirst( $input );
+		} else {
+			throw new MWException( __METHOD__ . " given an invalid function name '$func'" );
+		}
+
+		$msg = "Converting $inputCase case '$input' with $func should give '$expected'";
+
+		$this->assertEquals( $expected, $res, $msg );
+	}
+
+	function provideDottedAndDotlessI() {
+		return array(
+			# function, input, input case, expected
+			# Case changed:
+			array( 'ucfirst', 'Ä±', 'lower', 'I' ),
+			array( 'ucfirst', 'i', 'lower', 'Ä°' ),
+			array( 'lcfirst', 'I', 'upper', 'Ä±' ),
+			array( 'lcfirst', 'Ä°', 'upper', 'i' ),
+
+			# Already using the correct case
+			array( 'ucfirst', 'I', 'upper', 'I' ),
+			array( 'ucfirst', 'Ä°', 'upper', 'Ä°' ),
+			array( 'lcfirst', 'Ä±', 'lower', 'Ä±' ),
+			array( 'lcfirst', 'i', 'lower', 'i' ),
+
+			# A real example taken from bug 28040 using
+			# http://tr.wikipedia.org/wiki/%C4%B0Phone
+			array( 'lcfirst', 'iPhone', 'lower', 'iPhone' ),
+
+			# next case is valid in Turkish but are different words if we
+			# consider IPhone is English!
+			array( 'lcfirst', 'IPhone', 'upper', 'Ä±Phone' ),
+
+		);
+	}
+
+}
-- 
2.20.1