From 73f5937047440e8c82aa58e90c150aa9797f5a4b Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Fri, 28 Apr 2017 04:52:49 +0000 Subject: [PATCH] Add collation for Bashkir (ba) This is based on a numeric uppercase collation. Bashkir characters will be remapped to the private use area for the purpose of sorting. Bug: T162823 Change-Id: I65f1af0b57ff6ded7d464e39efd401f178a3519e --- autoload.php | 2 + .../collation/BashkirUppercaseCollation.php | 71 +++++++++++++++ includes/collation/Collation.php | 2 + .../collation/CustomUppercaseCollation.php | 87 +++++++++++++++++++ .../{ => collation}/CollationTest.php | 1 + .../CustomUppercaseCollationTest.php | 57 ++++++++++++ 6 files changed, 220 insertions(+) create mode 100644 includes/collation/BashkirUppercaseCollation.php create mode 100644 includes/collation/CustomUppercaseCollation.php rename tests/phpunit/includes/{ => collation}/CollationTest.php (98%) create mode 100644 tests/phpunit/includes/collation/CustomUppercaseCollationTest.php diff --git a/autoload.php b/autoload.php index e5161f1cb7..36773746b0 100644 --- a/autoload.php +++ b/autoload.php @@ -177,6 +177,7 @@ $wgAutoloadLocalClasses = [ 'BagOStuff' => __DIR__ . '/includes/libs/objectcache/BagOStuff.php', 'BaseDump' => __DIR__ . '/maintenance/backupPrefetch.inc', 'BaseTemplate' => __DIR__ . '/includes/skins/BaseTemplate.php', + 'BashkirUppercaseCollation' => __DIR__ . '/includes/collation/BashkirUppercaseCollation.php', 'BatchRowIterator' => __DIR__ . '/includes/utils/BatchRowIterator.php', 'BatchRowUpdate' => __DIR__ . '/includes/utils/BatchRowUpdate.php', 'BatchRowWriter' => __DIR__ . '/includes/utils/BatchRowWriter.php', @@ -315,6 +316,7 @@ $wgAutoloadLocalClasses = [ 'CssContentHandler' => __DIR__ . '/includes/content/CssContentHandler.php', 'CsvStatsOutput' => __DIR__ . '/maintenance/language/StatOutputs.php', 'CurlHttpRequest' => __DIR__ . '/includes/http/CurlHttpRequest.php', + 'CustomUppercaseCollation' => __DIR__ . '/includes/collation/CustomUppercaseCollation.php', 'DBAccessBase' => __DIR__ . '/includes/dao/DBAccessBase.php', 'DBAccessError' => __DIR__ . '/includes/libs/rdbms/exception/DBAccessError.php', 'DBAccessObjectUtils' => __DIR__ . '/includes/dao/DBAccessObjectUtils.php', diff --git a/includes/collation/BashkirUppercaseCollation.php b/includes/collation/BashkirUppercaseCollation.php new file mode 100644 index 0000000000..33ed9bc8d8 --- /dev/null +++ b/includes/collation/BashkirUppercaseCollation.php @@ -0,0 +1,71 @@ += 64 ) { + throw new UnexpectedValueException( "Alphabet must be < 64 items" ); + } + $this->alphabet = $alphabet; + + $this->puaSubset = []; + $len = count( $alphabet ); + for ( $i = 0; $i < $len; $i++ ) { + $this->puaSubset[] = "\xF3\xB3\x80" . chr( $i + 128 ); + } + parent::__construct( $lang ); + } + + private function convertToPua( $string ) { + return str_replace( $this->alphabet, $this->puaSubset, $string ); + } + + public function getSortKey( $string ) { + return $this->convertToPua( parent::getSortKey( $string ) ); + } + + public function getFirstLetter( $string ) { + // In case a title has a PUA code in it, make it sort + // under the header for the character it would replace + // to avoid inconsistent behaviour. This class mostly + // assumes that people will not use PUA codes. + return parent::getFirstLetter( + str_replace( $this->puaSubset, $this->alphabet, $string ) + ); + } +} diff --git a/tests/phpunit/includes/CollationTest.php b/tests/phpunit/includes/collation/CollationTest.php similarity index 98% rename from tests/phpunit/includes/CollationTest.php rename to tests/phpunit/includes/collation/CollationTest.php index bf283aae73..25911a79c1 100644 --- a/tests/phpunit/includes/CollationTest.php +++ b/tests/phpunit/includes/collation/CollationTest.php @@ -112,6 +112,7 @@ class CollationTest extends MediaWikiLangTestCase { [ 'uca-fi', 'Ŧ', 'T' ], [ 'uca-fi', 'Ʒ', 'Z' ], [ 'uca-fi', 'Ŋ', 'N' ], + [ 'uppercase-ba', 'в', 'В' ], ]; } } diff --git a/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php b/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php new file mode 100644 index 0000000000..5d5317be7b --- /dev/null +++ b/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php @@ -0,0 +1,57 @@ +collation = new CustomUppercaseCollation( [ + 'D', + 'C', + 'B' + ], Language::factory( 'en' ) ); + + parent::setUp(); + } + + /** + * @dataProvider providerOrder + */ + public function testOrder( $first, $second, $msg ) { + $sortkey1 = $this->collation->getSortKey( $first ); + $sortkey2 = $this->collation->getSortKey( $second ); + + $this->assertTrue( strcmp( $sortkey1, $sortkey2 ) < 0, $msg ); + } + + public function providerOrder() { + return [ + [ 'X', 'Z', 'Maintain order of unrearranged' ], + [ 'D', 'C', 'Actually resorts' ], + [ 'D', 'B', 'resort test 2' ], + [ 'Adobe', 'Abode', 'not first letter' ], + [ '💩 ', 'C', 'Test relocated to end' ], + [ 'c', 'b', 'lowercase' ], + [ 'x', 'z', 'lowercase original' ], + [ 'C50D', 'C100', 'Numbers' ] + ]; + } + + /** + * @dataProvider provideGetFirstLetter + */ + public function testGetFirstLetter( $string, $first ) { + $this->assertSame( $this->collation->getFirstLetter( $string ), $first ); + } + + public function provideGetFirstLetter() { + return [ + [ 'Do', 'D' ], + [ 'do', 'D' ], + [ 'Ao', 'A' ], + [ 'afdsa', 'A' ], + [ "\xF3\xB3\x80\x80Foo", 'D' ], + [ "\xF3\xB3\x80\x81Foo", 'C' ], + [ "\xF3\xB3\x80\x82Foo", 'B' ], + [ "\xF3\xB3\x80\x83Foo", "\xF3\xB3\x80\x83" ], + ]; + } +} -- 2.20.1