From 5b3106f12b95e7509e66301a62ae8c6198c5fda0 Mon Sep 17 00:00:00 2001 From: Ed Sanders Date: Wed, 10 Apr 2019 14:46:32 +0100 Subject: [PATCH] Make generatePhpCharToUpperMappings.php a proper maintenance script This allows us to use Title for converting to upper case which will respect any compatibility fixes added later. Bug: T219279 Change-Id: I746487df12e4628f1e37b33b7cc3cce597853596 --- autoload.php | 1 + .../generateJsToUpperCaseList.js | 5 +- .../generatePhpCharToUpperMappings.php | 91 ++++++++++++++----- 3 files changed, 72 insertions(+), 25 deletions(-) diff --git a/autoload.php b/autoload.php index 4f41c8af91..42fed59e7e 100644 --- a/autoload.php +++ b/autoload.php @@ -563,6 +563,7 @@ $wgAutoloadLocalClasses = [ 'GenerateJsonI18n' => __DIR__ . '/maintenance/generateJsonI18n.php', 'GenerateNormalizerDataAr' => __DIR__ . '/maintenance/language/generateNormalizerDataAr.php', 'GenerateNormalizerDataMl' => __DIR__ . '/maintenance/language/generateNormalizerDataMl.php', + 'GeneratePhpCharToUpperMappings' => __DIR__ . '/maintenance/mediawiki.Title/generatePhpCharToUpperMappings.php', 'GenerateSitemap' => __DIR__ . '/maintenance/generateSitemap.php', 'GenerateUcfirstOverrides' => __DIR__ . '/maintenance/language/generateUcfirstOverrides.php', 'GenerateUpperCharTable' => __DIR__ . '/maintenance/language/generateUpperCharTable.php', diff --git a/maintenance/mediawiki.Title/generateJsToUpperCaseList.js b/maintenance/mediawiki.Title/generateJsToUpperCaseList.js index fd742f6502..0104ec2205 100644 --- a/maintenance/mediawiki.Title/generateJsToUpperCaseList.js +++ b/maintenance/mediawiki.Title/generateJsToUpperCaseList.js @@ -1,8 +1,9 @@ /* eslint-env node, es6 */ var i, chars = []; -for ( i = 0; i < 65536; i++ ) { - chars.push( String.fromCharCode( i ).toUpperCase() ); +for ( i = 0; i <= 0x10ffff; i++ ) { + // eslint-disable-next-line no-restricted-properties + chars.push( String.fromCodePoint( i ).toUpperCase() ); } // eslint-disable-next-line no-console console.log( JSON.stringify( chars ) ); diff --git a/maintenance/mediawiki.Title/generatePhpCharToUpperMappings.php b/maintenance/mediawiki.Title/generatePhpCharToUpperMappings.php index a04958c642..8073c7c129 100755 --- a/maintenance/mediawiki.Title/generatePhpCharToUpperMappings.php +++ b/maintenance/mediawiki.Title/generatePhpCharToUpperMappings.php @@ -1,34 +1,79 @@ -#!/usr/bin/env php = 0xd800 && $i <= 0xdfff ) { - // Skip surrogate pairs - continue; + public function __construct() { + parent::__construct(); + $this->addDescription( 'Update list of upper case differences between JS and PHP.' ); } - $char = mb_convert_encoding( '&#' . $i . ';', 'UTF-8', 'HTML-ENTITIES' ); - $phpUpper = mb_strtoupper( $char ); - $jsUpper = $jsUpperChars[$i]; - if ( $jsUpper !== $phpUpper ) { - $data[$char] = $phpUpper; + + public function execute() { + global $wgContLang; + + $data = []; + + $result = Shell::command( [ 'node', __DIR__ . '/generateJsToUpperCaseList.js' ] ) + // Node allocates lots of memory + ->limits( [ 'memory' => 1024 * 1024 ] ) + ->execute(); + + if ( $result->getExitcode() !== 0 ) { + $this->output( $result->getStderr() ); + return; + } + + $jsUpperChars = json_decode( $result->getStdout() ); + + for ( $i = 0; $i <= 0x10ffff; $i++ ) { + if ( $i >= 0xd800 && $i <= 0xdfff ) { + // Skip surrogate pairs + continue; + } + $char = \UtfNormal\Utils::codepointToUtf8( $i ); + $phpUpper = $wgContLang->ucfirst( $char ); + $jsUpper = $jsUpperChars[$i]; + if ( $jsUpper !== $phpUpper ) { + $data[$char] = $phpUpper; + } + } + + $this->output( str_replace( ' ', "\t", + json_encode( $data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE ) + ) . "\n" ); } } -echo str_replace( ' ', "\t", - json_encode( $data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE ) -) . "\n"; +$maintClass = GeneratePhpCharToUpperMappings::class; +require_once RUN_MAINTENANCE_IF_MAIN; -- 2.20.1