'GenerateNormalizerDataAr' => __DIR__ . '/maintenance/language/generateNormalizerDataAr.php',
'GenerateNormalizerDataMl' => __DIR__ . '/maintenance/language/generateNormalizerDataMl.php',
'GenerateSitemap' => __DIR__ . '/maintenance/generateSitemap.php',
+ 'GenerateUcfirstOverrides' => __DIR__ . '/maintenance/language/generateUcfirstOverrides.php',
+ 'GenerateUpperCharTable' => __DIR__ . '/maintenance/language/generateUpperCharTable.php',
'GenericArrayObject' => __DIR__ . '/includes/libs/GenericArrayObject.php',
'GenericParameterJob' => __DIR__ . '/includes/jobqueue/GenericParameterJob.php',
'GetConfiguration' => __DIR__ . '/maintenance/getConfiguration.php',
*/
$wgLocalTZoffset = null;
+/**
+ * List of Unicode characters for which capitalization is overridden in
+ * Language::ucfirst. The characters should be
+ * represented as char_to_convert => conversion_override. See T219279 for details
+ * on why this is useful during php version transitions.
+ *
+ * @warning: EXPERIMENTAL!
+ *
+ * @since 1.34
+ * @var array
+ */
+$wgOverrideUcfirstCharacters = [];
+
/** @} */ # End of language/charset settings
/*************************************************************************//**
public function uc( $str, $first = false ) {
if ( $first ) {
if ( $this->isMultibyte( $str ) ) {
- return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
+ return $this->mbUpperChar( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
} else {
return ucfirst( $str );
}
}
}
+ /**
+ * Convert character to uppercase, allowing overrides of the default mb_upper
+ * behaviour, which is buggy in many ways. Having a conversion table can be
+ * useful during transitions between PHP versions where unicode changes happen.
+ * This can make some resources unreachable on-wiki, see discussion at T219279.
+ * Providing such a conversion table can allow to manage the transition period.
+ *
+ * @since 1.34
+ *
+ * @param string $char
+ *
+ * @return string
+ */
+ protected function mbUpperChar( $char ) {
+ global $wgOverrideUcfirstCharacters;
+ if ( array_key_exists( $char, $wgOverrideUcfirstCharacters ) ) {
+ return $wgOverrideUcfirstCharacters[$char];
+ } else {
+ return mb_strtoupper( $char );
+ }
+ }
+
/**
* @param string $str
* @return mixed|string
--- /dev/null
+<?php
+/**
+ * Generate a php file containg an array of
+ * utf8_lowercase => utf8_uppercase
+ * overrides. Takes as input two json files generated with generateUpperCharTable.php
+ * as input.
+ *
+ * Example run:
+ * # this will prepare a file to use to make hhvm's Language::ucfirst work like php7's
+ *
+ * $ php7.2 maintenance/language/generateUpperCharTable.php --outfile php7.2.json
+ * $ hhvm --php maintenance/language/generateUpperCharTable.php --outfile hhvm.json
+ * $ hhvm maintenance/language/generateUcfirstOverrides.php \
+ * --override hhvm.json --with php7.2.json --outfile test.php
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup MaintenanceLanguage
+ */
+
+require_once __DIR__ . '/../Maintenance.php';
+
+class GenerateUcfirstOverrides extends Maintenance {
+
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription(
+ 'Generates a php source file containing a definition for mb_strtoupper overrides' );
+ $this->addOption( 'outfile', 'Output file', true, true, 'o' );
+ $this->addOption( 'override', 'Char table we want to override', true, true );
+ $this->addOption( 'with', 'Char table we want to obtain', true, true );
+ }
+
+ public function execute() {
+ $outfile = $this->getOption( 'outfile' );
+ $from = $this->loadJson( $this->getOption( 'override' ) );
+ $to = $this->loadJson( $this->getOption( 'with' ) );
+ $overrides = [];
+
+ foreach ( $from as $lc => $uc ) {
+ $ref = $to[$lc] ?? null;
+ if ( $ref !== null && $ref !== $uc ) {
+ $overrides[$lc] = $uc;
+ }
+ }
+ $writer = new StaticArrayWriter();
+ file_put_contents(
+ $outfile,
+ $writer->create( $overrides, 'File created by generateUcfirstOverrides.php' )
+ );
+ }
+
+ private function loadJson( $filename ) {
+ $data = file_get_contents( $filename );
+ if ( $data === false ) {
+ $msg = sprintf( "Could not load data from file '%s'\n", $filename );
+ $this->fatalError( $msg );
+ }
+ $json = json_decode( $data );
+ if ( $result === null ) {
+ $msg = sprintf( "Invalid json in the data file %s\n", $filename );
+ $this->fatalError( $msg, 2 );
+ }
+ return $json;
+ }
+}
+
+$maintClass = GenerateUcfirstOverrides::class;
+require_once RUN_MAINTENANCE_IF_MAIN;
--- /dev/null
+<?php
+/**
+ * Generate a json file containing an array of
+ * utf8_lowercase => utf8_uppercase
+ * for all of the utf-8 range. This provides the input for generateUcfirstOverrides.php
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup MaintenanceLanguage
+ */
+
+require_once __DIR__ . '/../Maintenance.php';
+
+class GenerateUpperCharTable extends Maintenance {
+
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription( 'Generates the lowercase => uppercase json table' );
+ $this->addOption( 'outfile', 'Output file', true, true, 'o' );
+ }
+
+ public function execute() {
+ $outfile = $this->getOption( 'outfile', 'upperchar.json' );
+ $toUpperTable = [];
+ for ( $i = 0; $i <= 0x10ffff; $i++ ) {
+ $char = UtfNormal\Utils::codepointToUtf8( $i );
+ $upper = mb_strtoupper( $char );
+ $toUpperTable[$char] = $upper;
+ }
+ file_put_contents( $outfile, json_encode( $toUpperTable ) );
+ }
+}
+
+$maintClass = GenerateUpperCharTable::class;
+require_once RUN_MAINTENANCE_IF_MAIN;
$ar2 = new LanguageAr();
$this->assertTrue( $ar1->equals( $ar2 ), 'ar equals ar' );
}
+
+ /**
+ * @dataProvider provideUcfirst
+ * @covers Language::ucfirst
+ */
+ public function testUcfirst( $orig, $expected, $desc, $overrides = false ) {
+ $lang = new Language();
+ if ( is_array( $overrides ) ) {
+ $this->setMwGlobals( [ 'wgOverrideUcfirstCharacters' => $overrides ] );
+ }
+ $this->assertSame( $lang->ucfirst( $orig ), $expected, $desc );
+ }
+
+ public static function provideUcfirst() {
+ return [
+ [ 'alice', 'Alice', 'simple ASCII string', false ],
+ [ 'århus', 'Århus', 'unicode string', false ],
+ //overrides do not affect ASCII characters
+ [ 'foo', 'Foo', 'ASCII is not overriden', [ 'f' => 'b' ] ],
+ // but they do affect non-ascii ones
+ [ 'èl', 'Ll' , 'Non-ASCII is overridden', [ 'è' => 'L' ] ],
+ ];
+ }
}