From 3b84e7f31cf740308869fece49792748e93ab3f2 Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Tue, 28 Jul 2015 12:17:40 -0400 Subject: [PATCH] Change default $wgShellLocale to C.UTF-8, and use it to set LC_ALL It's less likely to cause surprises than language-specific defaults. Bug: T107128 Change-Id: Ife7673255798f3a3d72028a26607c56b9b7fb224 --- RELEASE-NOTES-1.30 | 7 +++- includes/DefaultSettings.php | 40 ++++++++++++++++--- includes/GlobalFunctions.php | 15 +++++-- includes/installer/Installer.php | 2 +- includes/installer/LocalSettingsGenerator.php | 2 +- 5 files changed, 54 insertions(+), 12 deletions(-) diff --git a/RELEASE-NOTES-1.30 b/RELEASE-NOTES-1.30 index 8b6a932a8f..cdf8ba4421 100644 --- a/RELEASE-NOTES-1.30 +++ b/RELEASE-NOTES-1.30 @@ -6,7 +6,12 @@ MediaWiki 1.30 is an alpha-quality branch and is not recommended for use in production. === Configuration changes in 1.30 === -* … +* The C.UTF-8 locale should be used for $wgShellLocale, if available, to avoid + unexpected behavior when things use local-sensitive string comparisons. For + example, Scribunto considers "bar" < "Foo" in most locales since it ignores + case. +* $wgShellLocale now affects LC_ALL rather than only LC_CTYPE. See + documentation of $wgShellLocale for details. === New features in 1.30 === * … diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index ac2261c541..7c18fcc594 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -8162,11 +8162,41 @@ $wgShellCgroup = false; $wgPhpCli = '/usr/bin/php'; /** - * Locale for LC_CTYPE, to work around https://bugs.php.net/bug.php?id=45132 - * For Unix-like operating systems, set this to to a locale that has a UTF-8 - * character set. Only the character set is relevant. - */ -$wgShellLocale = 'en_US.utf8'; + * Locale for LC_ALL, to provide a known environment for locale-sensitive operations + * + * For Unix-like operating systems, this should be set to C.UTF-8 or an + * equivalent to provide the most consistent behavior for locale-sensitive + * C library operations across different-language wikis. If that locale is not + * available, use another locale that has a UTF-8 character set. + * + * This setting mainly affects the behavior of C library functions, including: + * - String collation (order when sorting using locale-sensitive comparison) + * - For example, whether "Å" and "A" are considered to be the same letter or + * different letters and if different whether it comes after "A" or after + * "Z", and whether sorting is case sensitive. + * - String character set (how characters beyond basic ASCII are represented) + * - We need this to be a UTF-8 character set to work around + * https://bugs.php.net/bug.php?id=45132 + * - Language used for low-level error messages. + * - Formatting of date/time and numeric values (e.g. '.' versus ',' as the + * decimal separator) + * + * MediaWiki provides its own methods and classes to perform many + * locale-sensitive operations, which are designed to be able to vary locale + * based on wiki language or user preference: + * - MediaWiki's Collation class should generally be used instead of the C + * library collation functions when locale-sensitive sorting is needed. + * - MediaWiki's Message class should be used for localization of messages + * displayed to the user. + * - MediaWiki's Language class should be used for formatting numeric and + * date/time values. + * + * @note If multiple wikis are being served from the same process (e.g. the + * same fastCGI or Apache server), this setting must be the same on all those + * wikis. + * @see wfInitShellLocale() + */ +$wgShellLocale = 'C.UTF-8'; /** @} */ # End shell } diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 4325328e22..c7f132a09b 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -2535,8 +2535,15 @@ function wfShellExecWithStderr( $cmd, &$retval = null, $environ = [], $limits = } /** - * Workaround for https://bugs.php.net/bug.php?id=45132 - * escapeshellarg() destroys non-ASCII characters if LANG is not a UTF-8 locale + * Set the locale for locale-sensitive operations + * + * Sets LC_ALL to a known value to work around issues like the following: + * - https://bugs.php.net/bug.php?id=45132 escapeshellarg() destroys non-ASCII + * characters if LANG is not a UTF-8 locale + * - T107128 Scribunto string comparison works case insensitive while the + * standard Lua case sensitive + * + * @see $wgShellLocale */ function wfInitShellLocale() { static $done = false; @@ -2545,8 +2552,8 @@ function wfInitShellLocale() { } $done = true; global $wgShellLocale; - putenv( "LC_CTYPE=$wgShellLocale" ); - setlocale( LC_CTYPE, $wgShellLocale ); + putenv( "LC_ALL=$wgShellLocale" ); + setlocale( LC_ALL, $wgShellLocale ); } /** diff --git a/includes/installer/Installer.php b/includes/installer/Installer.php index 12e8dd1c10..70282248fb 100644 --- a/includes/installer/Installer.php +++ b/includes/installer/Installer.php @@ -1016,7 +1016,7 @@ abstract class Installer { } # Try the most common ones. - $commonLocales = [ 'en_US.UTF-8', 'en_US.utf8', 'de_DE.UTF-8', 'de_DE.utf8' ]; + $commonLocales = [ 'C.UTF-8', 'en_US.UTF-8', 'en_US.utf8', 'de_DE.UTF-8', 'de_DE.utf8' ]; foreach ( $commonLocales as $commonLocale ) { if ( isset( $candidatesByLocale[$commonLocale] ) ) { $this->setVar( 'wgShellLocale', $commonLocale ); diff --git a/includes/installer/LocalSettingsGenerator.php b/includes/installer/LocalSettingsGenerator.php index 697188ef08..7df1009722 100644 --- a/includes/installer/LocalSettingsGenerator.php +++ b/includes/installer/LocalSettingsGenerator.php @@ -241,7 +241,7 @@ class LocalSettingsGenerator { } if ( !$this->values['wgShellLocale'] ) { - $this->values['wgShellLocale'] = 'en_US.UTF-8'; + $this->values['wgShellLocale'] = 'C.UTF-8'; $locale = '#'; } else { $locale = ''; -- 2.20.1