From e8c15f0e9437dfabf6d92156f8f1abdd8015665c Mon Sep 17 00:00:00 2001 From: addshore Date: Sun, 3 Apr 2016 11:23:20 +0300 Subject: [PATCH] Split Collation.php Change-Id: I6abfecf91cdce83dd34b1e8aa8e0b35315f62742 --- autoload.php | 12 +- includes/collation/Collation.php | 111 ++++++++++ includes/collation/CollationCkb.php | 33 +++ includes/collation/CollationEt.php | 58 +++++ .../IcuCollation.php} | 204 +----------------- includes/collation/IdentityCollation.php | 42 ++++ includes/collation/UppercaseCollation.php | 42 ++++ 7 files changed, 300 insertions(+), 202 deletions(-) create mode 100644 includes/collation/Collation.php create mode 100644 includes/collation/CollationCkb.php create mode 100644 includes/collation/CollationEt.php rename includes/{Collation.php => collation/IcuCollation.php} (73%) create mode 100644 includes/collation/IdentityCollation.php create mode 100644 includes/collation/UppercaseCollation.php diff --git a/autoload.php b/autoload.php index fd4f873d41..54a0ba0972 100644 --- a/autoload.php +++ b/autoload.php @@ -240,9 +240,9 @@ $wgAutoloadLocalClasses = [ 'CliInstaller' => __DIR__ . '/includes/installer/CliInstaller.php', 'CloneDatabase' => __DIR__ . '/includes/db/CloneDatabase.php', 'CodeContentHandler' => __DIR__ . '/includes/content/CodeContentHandler.php', - 'Collation' => __DIR__ . '/includes/Collation.php', - 'CollationCkb' => __DIR__ . '/includes/Collation.php', - 'CollationEt' => __DIR__ . '/includes/Collation.php', + 'Collation' => __DIR__ . '/includes/collation/Collation.php', + 'CollationCkb' => __DIR__ . '/includes/collation/CollationCkb.php', + 'CollationEt' => __DIR__ . '/includes/collation/CollationEt.php', 'CommandLineInc' => __DIR__ . '/maintenance/commandLine.inc', 'CommandLineInstaller' => __DIR__ . '/maintenance/install.php', 'CompareParserCache' => __DIR__ . '/maintenance/compareParserCache.php', @@ -564,8 +564,8 @@ $wgAutoloadLocalClasses = [ 'IPSet' => __DIR__ . '/includes/compat/IPSetCompat.php', 'IPTC' => __DIR__ . '/includes/media/IPTC.php', 'IRCColourfulRCFeedFormatter' => __DIR__ . '/includes/rcfeed/IRCColourfulRCFeedFormatter.php', - 'IcuCollation' => __DIR__ . '/includes/Collation.php', - 'IdentityCollation' => __DIR__ . '/includes/Collation.php', + 'IcuCollation' => __DIR__ . '/includes/collation/IcuCollation.php', + 'IdentityCollation' => __DIR__ . '/includes/collation/IdentityCollation.php', 'ImageBuilder' => __DIR__ . '/maintenance/rebuildImages.php', 'ImageCleanup' => __DIR__ . '/maintenance/cleanupImages.php', 'ImageGallery' => __DIR__ . '/includes/gallery/TraditionalImageGallery.php', @@ -1375,7 +1375,7 @@ $wgAutoloadLocalClasses = [ 'UploadStashNotLoggedInException' => __DIR__ . '/includes/upload/UploadStash.php', 'UploadStashWrongOwnerException' => __DIR__ . '/includes/upload/UploadStash.php', 'UploadStashZeroLengthFileException' => __DIR__ . '/includes/upload/UploadStash.php', - 'UppercaseCollation' => __DIR__ . '/includes/Collation.php', + 'UppercaseCollation' => __DIR__ . '/includes/collation/UppercaseCollation.php', 'UsageException' => __DIR__ . '/includes/api/ApiMain.php', 'User' => __DIR__ . '/includes/user/User.php', 'UserArray' => __DIR__ . '/includes/user/UserArray.php', diff --git a/includes/collation/Collation.php b/includes/collation/Collation.php new file mode 100644 index 0000000000..84d1f249cc --- /dev/null +++ b/includes/collation/Collation.php @@ -0,0 +1,111 @@ +digitTransformLanguage = Language::factory( 'ckb' ); + } +} diff --git a/includes/collation/CollationEt.php b/includes/collation/CollationEt.php new file mode 100644 index 0000000000..d80bce30e1 --- /dev/null +++ b/includes/collation/CollationEt.php @@ -0,0 +1,58 @@ +lang = Language::factory( 'en' ); - } - - function getSortKey( $string ) { - return $this->lang->uc( $string ); - } - - function getFirstLetter( $string ) { - if ( $string[0] == "\0" ) { - $string = substr( $string, 1 ); - } - return $this->lang->ucfirst( $this->lang->firstChar( $string ) ); - } -} - -/** - * Collation class that's essentially a no-op. - * - * Does sorting based on binary value of the string. - * Like how things were pre 1.17. - */ -class IdentityCollation extends Collation { - - function getSortKey( $string ) { - return $string; - } - - function getFirstLetter( $string ) { - global $wgContLang; - // Copied from UppercaseCollation. - // I'm kind of unclear on when this could happen... - if ( $string[0] == "\0" ) { - $string = substr( $string, 1 ); - } - return $wgContLang->firstChar( $string ); - } -} - class IcuCollation extends Collation { const FIRST_LETTER_VERSION = 2; @@ -296,7 +161,7 @@ class IcuCollation extends Collation { const RECORD_LENGTH = 14; - function __construct( $locale ) { + public function __construct( $locale ) { if ( !extension_loaded( 'intl' ) ) { throw new MWException( 'An ICU collation was requested, ' . 'but the intl extension is not available.' ); @@ -316,7 +181,7 @@ class IcuCollation extends Collation { $this->primaryCollator->setStrength( Collator::PRIMARY ); } - function getSortKey( $string ) { + public function getSortKey( $string ) { // intl extension produces non null-terminated // strings. Appending '' fixes it so that it doesn't generate // a warning on each access in debug php. @@ -326,14 +191,14 @@ class IcuCollation extends Collation { return $key; } - function getPrimarySortKey( $string ) { + public function getPrimarySortKey( $string ) { MediaWiki\suppressWarnings(); $key = $this->primaryCollator->getSortKey( $string ) . ''; MediaWiki\restoreWarnings(); return $key; } - function getFirstLetter( $string ) { + public function getFirstLetter( $string ) { $string = strval( $string ); if ( $string === '' ) { return ''; @@ -361,7 +226,7 @@ class IcuCollation extends Collation { return $this->getLetterByIndex( $min ); } - function getFirstLetterData() { + public function getFirstLetterData() { if ( $this->firstLetterData !== null ) { return $this->firstLetterData; } @@ -512,21 +377,21 @@ class IcuCollation extends Collation { return $data; } - function getLetterByIndex( $index ) { + public function getLetterByIndex( $index ) { if ( $this->firstLetterData === null ) { $this->getFirstLetterData(); } return $this->firstLetterData['chars'][$index]; } - function getSortKeyByLetterIndex( $index ) { + public function getSortKeyByLetterIndex( $index ) { if ( $this->firstLetterData === null ) { $this->getFirstLetterData(); } return $this->firstLetterData['keys'][$index]; } - function getFirstLetterCount() { + public function getFirstLetterCount() { if ( $this->firstLetterData === null ) { $this->getFirstLetterData(); } @@ -593,56 +458,3 @@ class IcuCollation extends Collation { } } } - -/** - * Workaround for the lack of support of Sorani Kurdish / Central Kurdish language ('ckb') in ICU. - * - * Uses the same collation rules as Persian / Farsi ('fa'), but different characters for digits. - */ -class CollationCkb extends IcuCollation { - function __construct() { - // This will set $locale and collators, which affect the actual sorting order - parent::__construct( 'fa' ); - // Override the 'fa' language set by parent constructor, which affects #getFirstLetterData() - $this->digitTransformLanguage = Language::factory( 'ckb' ); - } -} - -/** - * Workaround for incorrect collation of Estonian language ('et') in ICU (bug 54168). - * - * 'W' and 'V' should not be considered the same letter for the purposes of collation in modern - * Estonian. We work around this by replacing 'W' and 'w' with 'ᴡ' U+1D21 'LATIN LETTER SMALL - * CAPITAL W' for sortkey generation, which is collated like 'W' and is not tailored to have the - * same primary weight as 'V' in Estonian. - */ -class CollationEt extends IcuCollation { - function __construct() { - parent::__construct( 'et' ); - } - - private static function mangle( $string ) { - return str_replace( - [ 'w', 'W' ], - 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W' - $string - ); - } - - private static function unmangle( $string ) { - // Casing data is lost… - return str_replace( - 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W' - 'W', - $string - ); - } - - function getSortKey( $string ) { - return parent::getSortKey( self::mangle( $string ) ); - } - - function getFirstLetter( $string ) { - return self::unmangle( parent::getFirstLetter( self::mangle( $string ) ) ); - } -} diff --git a/includes/collation/IdentityCollation.php b/includes/collation/IdentityCollation.php new file mode 100644 index 0000000000..9a99f1a4e8 --- /dev/null +++ b/includes/collation/IdentityCollation.php @@ -0,0 +1,42 @@ +firstChar( $string ); + } +} diff --git a/includes/collation/UppercaseCollation.php b/includes/collation/UppercaseCollation.php new file mode 100644 index 0000000000..c589a760b9 --- /dev/null +++ b/includes/collation/UppercaseCollation.php @@ -0,0 +1,42 @@ +lang = Language::factory( 'en' ); + } + + public function getSortKey( $string ) { + return $this->lang->uc( $string ); + } + + public function getFirstLetter( $string ) { + if ( $string[0] == "\0" ) { + $string = substr( $string, 1 ); + } + return $this->lang->ucfirst( $this->lang->firstChar( $string ) ); + } + +} -- 2.20.1