From e8c0c2ad46f4b1c36dcc920f3a280c14d277a2fe Mon Sep 17 00:00:00 2001 From: MatmaRex Date: Fri, 18 Jan 2013 22:36:32 +0100 Subject: [PATCH] (bug 43801) add a getter for ICU version to ICUCollation It will be necessary to be able to use correct version of Unicode data files. The constant INTL_ICU_VERSION this getter returns isn't really documented. It is available since PHP 5.3.7 (see PHP bug 54561), the getter will fail gracefully on older PHPs. It should be possible to determine the ICU version on these by grepping the output of phpinfo(), but I don't think such a minor improvement is worth such a huge hack. Change-Id: I85353559439bfddee7c5ba90894d30dd8ef0e0e8 --- includes/Collation.php | 35 ++++++++ .../language/generateCollationData.php | 82 ++++++++++++++++--- 2 files changed, 107 insertions(+), 10 deletions(-) diff --git a/includes/Collation.php b/includes/Collation.php index 7879ebecdc..301904e353 100644 --- a/includes/Collation.php +++ b/includes/Collation.php @@ -408,4 +408,39 @@ class IcuCollation extends Collation { static function getICUVersion() { return defined( 'INTL_ICU_VERSION' ) ? INTL_ICU_VERSION : false; } + + /** + * Return the version of Unicode appropriate for the version of ICU library + * currently in use, or false when it can't be determined. + * + * @since 1.21 + * @return string|false + */ + static function getUnicodeVersionForICU() { + $icuVersion = IcuCollation::getICUVersion(); + if ( !$icuVersion ) { + return false; + } + + $versionPrefix = substr( $icuVersion, 0, 3 ); + // Source: http://site.icu-project.org/download + $map = array( + '50.' => '6.2', + '49.' => '6.1', + '4.8' => '6.0', + '4.6' => '6.0', + '4.4' => '5.2', + '4.2' => '5.1', + '4.0' => '5.1', + '3.8' => '5.0', + '3.6' => '5.0', + '3.4' => '4.1', + ); + + if ( isset( $map[$versionPrefix] ) ) { + return $map[$versionPrefix]; + } else { + return false; + } + } } diff --git a/maintenance/language/generateCollationData.php b/maintenance/language/generateCollationData.php index e2ad6a7112..aba346f0fc 100644 --- a/maintenance/language/generateCollationData.php +++ b/maintenance/language/generateCollationData.php @@ -61,18 +61,80 @@ class GenerateCollationData extends Maintenance { public function execute() { $this->dataDir = $this->getOption( 'data-dir', '.' ); - if ( !file_exists( "{$this->dataDir}/allkeys.txt" ) ) { - $this->error( "Unable to find allkeys.txt. Please download it from " . - "http://www.unicode.org/Public/UCA/latest/allkeys.txt and specify " . - "its location with --data-dir=" ); - exit( 1 ); - } - if ( !file_exists( "{$this->dataDir}/ucd.all.grouped.xml" ) ) { - $this->error( "Unable to find ucd.all.grouped.xml. Please download it " . - "from http://www.unicode.org/Public/6.0.0/ucdxml/ucd.all.grouped.zip " . - "and specify its location with --data-dir=" ); + + $allkeysPresent = file_exists( "{$this->dataDir}/allkeys.txt" ); + $ucdallPresent = file_exists( "{$this->dataDir}/ucd.all.grouped.xml" ); + + // As of January 2013, these links work for all versions of Unicode + // between 5.1 and 6.2, inclusive. + $allkeysURL = "http://www.unicode.org/Public/UCA//allkeys.txt"; + $ucdallURL = "http://www.unicode.org/Public//ucdxml/ucd.all.grouped.zip"; + + if ( !$allkeysPresent || !$ucdallPresent ) { + $icuVersion = ICUCollation::getICUVersion(); + $unicodeVersion = ICUCollation::getUnicodeVersionForICU(); + + $error = ""; + + if ( !$allkeysPresent ) { + $error .= "Unable to find allkeys.txt. " + . "Download it and specify its location with --data-dir=. " + . "\n\n"; + } + if ( !$ucdallPresent ) { + $error = "Unable to find ucd.all.grouped.xml. " + . "Download it, unzip, and specify its location with --data-dir=. " + . "\n\n"; + } + + $versionKnown = false; + if ( !$icuVersion ) { + // Unknown version - either very old intl, + // or PHP < 5.3.7 which does not expose this information + $error .= "As MediaWiki could not determine the version of ICU library used by your PHP's " + . "intl extension it can't suggest which file version to download. " + . "This can be caused by running a very old version of intl or PHP < 5.3.7. " + . "If you are sure everything is all right, find out the ICU version " + . "by running phpinfo(), check what is the Unicode version it is using " + . "at http://site.icu-project.org/download, then try finding appropriate data file(s) at:"; + } elseif ( version_compare( $icuVersion, "4.0", "<" ) ) { + // Extra old version + $error .= "You are using outdated version of ICU ($icuVersion), intended for " + . ( $unicodeVersion ? "Unicode $unicodeVersion" : "an unknown version of Unicode" ) + . "; this file might not be avalaible for it, and it's not supported by MediaWiki. " + ." You are on your own; consider upgrading PHP's intl extension or try " + . "one of the files available at:"; + } elseif ( version_compare( $icuVersion, "51.0", ">=" ) ) { + // Extra recent version + $error .= "You are using ICU $icuVersion, released after this script was last updated. " + . "Check what is the Unicode version it is using at http://site.icu-project.org/download . " + . "It can't be guaranteed everything will work, but appropriate file(s) should " + . "be available at:"; + } else { + // ICU 4.0 to 50.x + $versionKnown = true; + $error .= "You are using ICU $icuVersion, intended for " + . ( $unicodeVersion ? "Unicode $unicodeVersion" : "an unknown version of Unicode" ) + . ". Appropriate file(s) should be available at:"; + } + $error .= "\n"; + + if ( $versionKnown && $unicodeVersion ) { + $allkeysURL = str_replace( "", "$unicodeVersion.0", $allkeysURL ); + $ucdallURL = str_replace( "", "$unicodeVersion.0", $ucdallURL ); + } + + if ( !$allkeysPresent ) { + $error .= "* $allkeysURL\n"; + } + if ( !$ucdallPresent ) { + $error .= "* $ucdallURL\n"; + } + + $this->error( $error ); exit( 1 ); } + $debugOutFileName = $this->getOption( 'debug-output' ); if ( $debugOutFileName ) { $this->debugOutFile = fopen( $debugOutFileName, 'w' ); -- 2.20.1