From c6156c4b06260a809d1e0d68c76e468199659780 Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 15 Jul 2012 15:47:21 -0400 Subject: [PATCH] Add language converter for Uzbek. Also add tests per Hashar, based on the Serbian file but less since Uzbek does not have "variant guessing" or custom plural rules. Change-Id: I27da994b88fbe13cfb7df12851e704d881397666 --- RELEASE-NOTES-1.20 | 1 + languages/classes/LanguageUz.php | 136 +++++++++++++++++++++ tests/phpunit/languages/LanguageUzTest.php | 120 ++++++++++++++++++ 3 files changed, 257 insertions(+) create mode 100644 languages/classes/LanguageUz.php create mode 100644 tests/phpunit/languages/LanguageUzTest.php diff --git a/RELEASE-NOTES-1.20 b/RELEASE-NOTES-1.20 index 8473a275ec..29341a6bd6 100644 --- a/RELEASE-NOTES-1.20 +++ b/RELEASE-NOTES-1.20 @@ -218,6 +218,7 @@ changes to languages because of Bugzilla reports. * (bug 36012) Space in $separatorTransformTable should be non-breaking in Portuguese, Esperanto and Udmurt. * Turoyo (tru) added. +* Cyrillic-Latin language converter added for Uzbek (uz). === Other changes in 1.20 === * The user_token field is now left empty until a user attempts to login and diff --git a/languages/classes/LanguageUz.php b/languages/classes/LanguageUz.php new file mode 100644 index 0000000000..6c0f1f62be --- /dev/null +++ b/languages/classes/LanguageUz.php @@ -0,0 +1,136 @@ + 'a', 'А' => 'A', + 'б' => 'b', 'Б' => 'B', + 'д' => 'd', 'Д' => 'D', + 'е' => 'e', 'Е' => 'E', + 'э' => 'e', 'Э' => 'E', + 'в' => 'v', 'В' => 'V', + 'х' => 'x', 'Х' => 'X', + 'ғ' => 'gʻ', 'Ғ' => 'Gʻ', + 'г' => 'g', 'Г' => 'G', + 'ҳ' => 'h', 'Ҳ' => 'H', + 'ж' => 'j', 'Ж' => 'J', + 'з' => 'z', 'З' => 'Z', + 'и' => 'i', 'И' => 'I', + 'к' => 'k', 'К' => 'K', + 'л' => 'l', 'Л' => 'L', + 'м' => 'm', 'М' => 'M', + 'н' => 'n', 'Н' => 'N', + 'о' => 'o', 'О' => 'O', + 'п' => 'p', 'П' => 'P', + 'р' => 'r', 'Р' => 'R', + 'с' => 's', 'С' => 'S', + 'т' => 't', 'Т' => 'T', + 'у' => 'u', 'У' => 'U', + 'ф' => 'f', 'Ф' => 'F', + 'ц' => 'c', 'Ц' => 'C', + 'ў' => 'oʻ', 'Ў' => 'Oʻ', + 'ц' => 'ts', 'Ц' => 'Ts', // note: at the beginning of a word and right after a consonant, only "s" is used + 'қ' => 'q', 'Қ' => 'Q', + 'ё' => 'yo', 'Ё' => 'Yo', + 'ю' => 'yu', 'Ю' => 'Yu', + 'ч' => 'ch', 'Ч' => 'Ch', + 'ш' => 'sh', 'Ш' => 'Sh', + 'й' => 'y', 'Й' => 'Y', + 'я' => 'ya', 'Я' => 'Ya', + 'ъ' => 'ʼ', + ); + + var $toCyrillic = array( + 'a' => 'а', 'A' => 'А', + 'b' => 'б', 'B' => 'Б', + 'd' => 'д', 'D' => 'Д', + 'e' => 'е', 'E' => 'Е', + ' e' => ' э', ' E' => ' Э', // "э" is used at the beginning of a word instead of "e" + 'ye' => 'е', 'Ye' => 'Е', + 'f' => 'ф', 'F' => 'Ф', + 'g' => 'г', 'G' => 'Г', + 'g‘' => 'ғ', 'G‘' => 'Ғ', 'gʻ' => 'ғ', 'Gʻ' => 'Ғ', + 'h' => 'ҳ', 'H' => 'Ҳ', + 'i' => 'и', 'I' => 'И', + 'k' => 'к', 'K' => 'К', + 'l' => 'л', 'L' => 'Л', + 'm' => 'м', 'M' => 'М', + 'n' => 'н', 'N' => 'Н', + 'o' => 'о', 'O' => 'О', + 'p' => 'п', 'P' => 'П', + 'r' => 'р', 'R' => 'Р', + 's' => 'с', 'S' => 'С', + 't' => 'т', 'T' => 'Т', + 'u' => 'у', 'U' => 'У', + 'v' => 'в', 'V' => 'В', + 'x' => 'х', 'X' => 'Х', + 'z' => 'з', 'Z' => 'З', + 'j' => 'ж', 'J' => 'Ж', + 'o‘' => 'ў', 'O‘' => 'Ў', 'oʻ' => 'ў', 'Oʻ' => 'Ў', + 'ts' => 'ц', 'Ts' => 'Ц', + 'q' => 'қ', 'Q' => 'Қ', + 'yo' => 'ё', 'Yo' => 'Ё', + 'yu' => 'ю', 'Yu' => 'Ю', + 'ch' => 'ч', 'Ch' => 'Ч', + 'sh' => 'ш', 'Sh' => 'Ш', + 'y' => 'й', 'Y' => 'Й', + 'ya' => 'я', 'Ya' => 'Я', + 'ʼ' => 'ъ', + ); + + function loadDefaultTables() { + $this->mTables = array( + 'uz-cyrl' => new ReplacementArray( $this->toCyrillic ), + 'uz-latn' => new ReplacementArray( $this->toLatin ), + 'uz' => new ReplacementArray() + ); + } + +} + +/** + * Uzbek + * + * @ingroup Language + */ +class LanguageUz extends Language { + function __construct() { + global $wgHooks; + parent::__construct(); + + $variants = array( 'uz', 'uz-latn', 'uz-cyrl' ); + $variantfallbacks = array( + 'uz' => 'uz-latn', + 'uz-cyrl' => 'uz', + 'uz-latn' => 'uz', + ); + + $this->mConverter = new UzConverter( $this, 'uz', $variants, $variantfallbacks ); + $wgHooks['ArticleSaveComplete'][] = $this->mConverter; + } +} diff --git a/tests/phpunit/languages/LanguageUzTest.php b/tests/phpunit/languages/LanguageUzTest.php new file mode 100644 index 0000000000..86f483d0fc --- /dev/null +++ b/tests/phpunit/languages/LanguageUzTest.php @@ -0,0 +1,120 @@ + + * @copyright Copyright © 2012, Robin Pepermans + * @copyright Copyright © 2011, Antoine Musso + * @file + */ + +require_once dirname( dirname( __FILE__ ) ) . '/bootstrap.php'; + +/** Tests for MediaWiki languages/LanguageUz.php */ +class LanguageUzTest extends MediaWikiTestCase { + /* Language object. Initialized before each test */ + private $lang; + + function setUp() { + $this->lang = Language::factory( 'uz' ); + } + function tearDown() { + unset( $this->lang ); + } + + /** + * @author Nikola Smolenski + */ + function testConversionToCyrillic() { + // A convertion of Latin to Cyrillic + $this->assertEquals( 'абвгғ', + $this->convertToCyrillic( 'abvggʻ' ) + ); + // Same as above, but assert that -{}-s must be removed and not converted + $this->assertEquals( 'ljабnjвгўоdb', + $this->convertToCyrillic( '-{lj}-ab-{nj}-vgoʻo-{db}-' ) + ); + // A simple convertion of Cyrillic to Cyrillic + $this->assertEquals( 'абвг', + $this->convertToCyrillic( 'абвг' ) + ); + // Same as above, but assert that -{}-s must be removed and not converted + $this->assertEquals( 'ljабnjвгdaž', + $this->convertToCyrillic( '-{lj}-аб-{nj}-вг-{da}-ž' ) + ); + } + + function testConversionToLatin() { + // A simple convertion of Latin to Latin + $this->assertEquals( 'abdef', + $this->convertToLatin( 'abdef' ) + ); + // A convertion of Cyrillic to Latin + $this->assertEquals( 'gʻabtsdOʻQyo', + $this->convertToLatin( 'ғабцдЎҚё' ) + ); + } + + ##### HELPERS ##################################################### + /** + * Wrapper to verify text stay the same after applying conversion + * @param $text string Text to convert + * @param $variant string Language variant 'uz-cyrl' or 'uz-latn' + * @param $msg string Optional message + */ + function assertUnConverted( $text, $variant, $msg = '' ) { + $this->assertEquals( + $text, + $this->convertTo( $text, $variant ), + $msg + ); + } + /** + * Wrapper to verify a text is different once converted to a variant. + * @param $text string Text to convert + * @param $variant string Language variant 'uz-cyrl' or 'uz-latn' + * @param $msg string Optional message + */ + function assertConverted( $text, $variant, $msg = '' ) { + $this->assertNotEquals( + $text, + $this->convertTo( $text, $variant ), + $msg + ); + } + + /** + * Verifiy the given Cyrillic text is not converted when using + * using the cyrillic variant and converted to Latin when using + * the Latin variant. + */ + function assertCyrillic( $text, $msg = '' ) { + $this->assertUnConverted( $text, 'uz-cyrl', $msg ); + $this->assertConverted( $text, 'uz-latn', $msg ); + } + /** + * Verifiy the given Latin text is not converted when using + * using the Latin variant and converted to Cyrillic when using + * the Cyrillic variant. + */ + function assertLatin( $text, $msg = '' ) { + $this->assertUnConverted( $text, 'uz-latn', $msg ); + $this->assertConverted( $text, 'uz-cyrl', $msg ); + } + + + /** Wrapper for converter::convertTo() method*/ + function convertTo( $text, $variant ) { + return $this->lang->mConverter->convertTo( $text, $variant ); + } + function convertToCyrillic( $text ) { + return $this->convertTo( $text, 'uz-cyrl' ); + } + function convertToLatin( $text ) { + return $this->convertTo( $text, 'uz-latn' ); + } +} -- 2.20.1