Change-Id: I16c660e54191b63cd6eb3407cb00504665930c4e
14 files changed:
In the "maintenance" directory, run the updateDoubleWidthSearch.php
script. This will update the searchindex table for those pages that
In the "maintenance" directory, run the updateDoubleWidthSearch.php
script. This will update the searchindex table for those pages that
-contain double-byte latin characters.
+contain double-byte Latin characters.
== Upgrading from 1.10 or earlier ==
== Upgrading from 1.10 or earlier ==
$wgDefaultLanguageVariant = false;
/**
$wgDefaultLanguageVariant = false;
/**
- * Whether to enable the pig latin variant of English (en-x-piglatin),
+ * Whether to enable the pig Latin variant of English (en-x-piglatin),
* used to ease variant development work.
*/
$wgUsePigLatinVariant = false;
* used to ease variant development work.
*/
$wgUsePigLatinVariant = false;
* letters (denoted by keys starting with '-').
*
* These are additions to (or subtractions from) the data stored in the
* letters (denoted by keys starting with '-').
*
* These are additions to (or subtractions from) the data stored in the
- * first-letters-root.php data file (which among others includes full basic latin,
- * cyrillic and greek alphabets).
+ * first-letters-root.php data file (which among others includes full basic Latin,
+ * Cyrillic and Greek alphabets).
*
* "Separate letter" is a letter that would have a separate heading/section
* for it in a dictionary or a phone book in this language. This data isn't
*
* "Separate letter" is a letter that would have a separate heading/section
* for it in a dictionary or a phone book in this language. This data isn't
} else {
// For Chinese, words may legitimately abut other words in the text literal.
// Don't add \b boundary checks... note this could cause false positives
} else {
// For Chinese, words may legitimately abut other words in the text literal.
// Don't add \b boundary checks... note this could cause false positives
} else {
// For Chinese, words may legitimately abut other words in the text literal.
// Don't add \b boundary checks... note this could cause false positives
} else {
// For Chinese, words may legitimately abut other words in the text literal.
// Don't add \b boundary checks... note this could cause false positives
* Invoked with {{grammar:case|word}}
*
* Depending on word there are four different ways of converting to other cases.
* Invoked with {{grammar:case|word}}
*
* Depending on word there are four different ways of converting to other cases.
- * 1) Word consist of not cyrillic letters or is an abbreviation.
+ * 1) Word consist of not Cyrillic letters or is an abbreviation.
* Then result word is: word + hyphen + case ending.
*
* Then result word is: word + hyphen + case ending.
*
- * 2) Word consist of cyrillic letters.
+ * 2) Word consist of Cyrillic letters.
* 2.1) Word is in plural.
* Then result word is: word - last letter + case ending. Ending of allative case here is 'æм'.
*
* 2.1) Word is in plural.
* Then result word is: word - last letter + case ending. Ending of allative case here is 'æм'.
*
$jot = 'й';
} elseif ( preg_match( "/у$/u", $word ) ) {
# Checking if $word ends on 'у'. 'У'
$jot = 'й';
} elseif ( preg_match( "/у$/u", $word ) ) {
# Checking if $word ends on 'у'. 'У'
- # can be either consonant 'W' or vowel 'U' in cyrillic Ossetic.
+ # can be either consonant 'W' or vowel 'U' in Cyrillic Ossetic.
# Examples: {{grammar:genitive|аунеу}} = аунеуы, {{grammar:genitive|лæппу}} = лæппуйы.
if ( !preg_match( "/[аæеёиоыэюя]$/u", mb_substr( $word, -2, 1 ) ) ) {
$jot = 'й';
# Examples: {{grammar:genitive|аунеу}} = аунеуы, {{grammar:genitive|лæппу}} = лæппуйы.
if ( !preg_match( "/[аæеёиоыэюя]$/u", mb_substr( $word, -2, 1 ) ) ) {
$jot = 'й';
- * Converts Tajiki to latin orthography
+ * Converts Tajiki to Latin orthography
'he' => 'עברית', # Hebrew
'hi' => 'हिन्दी', # Hindi
'hif' => 'Fiji Hindi', # Fijian Hindi (multiple scripts - defaults to Latin)
'he' => 'עברית', # Hebrew
'hi' => 'हिन्दी', # Hindi
'hif' => 'Fiji Hindi', # Fijian Hindi (multiple scripts - defaults to Latin)
- 'hif-latn' => 'Fiji Hindi', # Fiji Hindi (latin)
+ 'hif-latn' => 'Fiji Hindi', # Fiji Hindi (Latin script)
'hil' => 'Ilonggo', # Hiligaynon
'ho' => 'Hiri Motu', # Hiri Motu
'hr' => 'hrvatski', # Croatian
'hil' => 'Ilonggo', # Hiligaynon
'ho' => 'Hiri Motu', # Hiri Motu
'hr' => 'hrvatski', # Croatian
// character has a longer primary weight sequence with an initial
// portion equal to the first character, then remove the second
// character. This avoids having characters like U+A732 (double A)
// character has a longer primary weight sequence with an initial
// portion equal to the first character, then remove the second
// character. This avoids having characters like U+A732 (double A)
- // polluting the basic latin sort area.
+ // polluting the basic Latin sort area.
foreach ( $this->groups as $weight => $group ) {
if ( preg_match( '/(\.[0-9A-F]*)\./', $weight, $m ) ) {
foreach ( $this->groups as $weight => $group ) {
if ( preg_match( '/(\.[0-9A-F]*)\./', $weight, $m ) ) {
- * Normalize double-byte latin UTF-8 characters
+ * Normalize double-byte Latin UTF-8 characters
*
* Usage: php updateDoubleWidthSearch.php
*
*
* Usage: php updateDoubleWidthSearch.php
*
require_once __DIR__ . '/Maintenance.php';
/**
require_once __DIR__ . '/Maintenance.php';
/**
- * Maintenance script to normalize double-byte latin UTF-8 characters.
+ * Maintenance script to normalize double-byte Latin UTF-8 characters.
*
* @ingroup Maintenance
*/
*
* @ingroup Maintenance
*/
public function __construct() {
parent::__construct();
public function __construct() {
parent::__construct();
- $this->addDescription( 'Script to normalize double-byte latin UTF-8 characters' );
+ $this->addDescription( 'Script to normalize double-byte Latin UTF-8 characters' );
$this->addOption( 'q', 'quiet', false, true );
$this->addOption(
'l',
$this->addOption( 'q', 'quiet', false, true );
$this->addOption(
'l',
var i, match, pos, spannode, middlebit, middleclone;
if ( node.nodeType === Node.TEXT_NODE ) {
// TODO - need to be smarter about the character matching here.
var i, match, pos, spannode, middlebit, middleclone;
if ( node.nodeType === Node.TEXT_NODE ) {
// TODO - need to be smarter about the character matching here.
- // non latin characters can make regex think a new word has begun: do not use \b
+ // non Latin characters can make regex think a new word has begun: do not use \b
// http://stackoverflow.com/questions/3787072/regex-wordwrap-with-utf8-characters-in-js
// look for an occurrence of our pattern and store the starting position
match = node.data.match( pat );
// http://stackoverflow.com/questions/3787072/regex-wordwrap-with-utf8-characters-in-js
// look for an occurrence of our pattern and store the starting position
match = node.data.match( pat );
// Checking if word ends on one of the vowels: е, ё, и, о, ы, э, ю, я.
jot = 'й';
} else if ( word.match( /у$/i ) ) {
// Checking if word ends on one of the vowels: е, ё, и, о, ы, э, ю, я.
jot = 'й';
} else if ( word.match( /у$/i ) ) {
- // Checking if word ends on 'у'. 'У' can be either consonant 'W' or vowel 'U' in cyrillic Ossetic.
+ // Checking if word ends on 'у'. 'У' can be either consonant 'W' or vowel 'U' in Cyrillic Ossetic.
// Examples: {{grammar:genitive|аунеу}} = аунеуы, {{grammar:genitive|лæппу}} = лæппуйы.
if ( !word.slice( -2, -1 ).match( /[аæеёиоыэюя]$/i ) ) {
// Examples: {{grammar:genitive|аунеу}} = аунеуы, {{grammar:genitive|лæппу}} = лæппуйы.
if ( !word.slice( -2, -1 ).match( /[аæеёиоыэюя]$/i ) ) {
public function testMixedConversions() {
$this->assertCyrillic(
'шђчћжШЂЧЋЖ - šđčćž',
public function testMixedConversions() {
$this->assertCyrillic(
'шђчћжШЂЧЋЖ - šđčćž',
- 'Mostly cyrillic characters'
+ 'Mostly Cyrillic characters'
);
$this->assertLatin(
'šđč枊ĐČĆŽ - шђчћж',
);
$this->assertLatin(
'šđč枊ĐČĆŽ - шђчћж',
- 'Mostly latin characters'
+ 'Mostly Latin characters'
*/
public function testSameAmountOfLatinAndCyrillicGetConverted() {
$this->assertConverted(
*/
public function testSameAmountOfLatinAndCyrillicGetConverted() {
$this->assertConverted(
- '4 latin: šđčć | 4 cyrillic: шђчћ',
+ '4 Latin: šđčć | 4 Cyrillic: шђчћ',
'sr-ec'
);
$this->assertConverted(
'sr-ec'
);
$this->assertConverted(
- '4 latin: šđčć | 4 cyrillic: шђчћ',
+ '4 Latin: šđčć | 4 Cyrillic: шђчћ',
/**
* Verifiy the given Cyrillic text is not converted when using
/**
* Verifiy the given Cyrillic text is not converted when using
- * using the cyrillic variant and converted to Latin when using
+ * using the Cyrillic variant and converted to Latin when using
* the Latin variant.
* @param string $text Text to convert
* @param string $msg Optional message
* the Latin variant.
* @param string $text Text to convert
* @param string $msg Optional message
/**
* Verifiy the given Cyrillic text is not converted when using
/**
* Verifiy the given Cyrillic text is not converted when using
- * using the cyrillic variant and converted to Latin when using
+ * using the Cyrillic variant and converted to Latin when using
* the Latin variant.
* @param string $text Text to convert
* @param string $msg Optional message
* the Latin variant.
* @param string $text Text to convert
* @param string $msg Optional message