=== PHP version requirement ===
As of 1.27, MediaWiki now requires PHP 5.5.9 or higher. This corresponds with
-HHVM 3.1.
+HHVM 3.1. Additionally, the following PHP extensions are required:
+* ctype
+* iconv
+* json
+* mbstring
+* xml
=== Configuration changes in 1.27 ===
* $wgAllowMicrodataAttributes and $wgAllowRdfaAttributes were removed,
'FakeConverter' => __DIR__ . '/languages/FakeConverter.php',
'FakeMaintenance' => __DIR__ . '/maintenance/Maintenance.php',
'FakeResultWrapper' => __DIR__ . '/includes/db/DatabaseUtility.php',
- 'Fallback' => __DIR__ . '/includes/Fallback.php',
'FatalError' => __DIR__ . '/includes/exception/FatalError.php',
'FauxRequest' => __DIR__ . '/includes/FauxRequest.php',
'FauxResponse' => __DIR__ . '/includes/WebResponse.php',
"composer/semver": "1.4.0",
"cssjanus/cssjanus": "1.1.2",
"ext-iconv": "*",
+ "ext-mbstring": "*",
"liuggio/statsd-php-client": "1.0.18",
"mediawiki/at-ease": "1.1.0",
"oojs/oojs-ui": "0.16.4",
"ext-apc": "Local data and opcode cache",
"ext-fileinfo": "Improved mime magic detection",
"ext-intl": "ICU integration",
- "ext-mbstring": "Multibyte string support",
"ext-wikidiff2": "Diff accelerator",
"monolog/monolog": "Flexible debug logging system",
"nmred/kafka-php": "Send debug log events to kafka",
* Requires PHP's Exif extension: http://www.php.net/manual/en/ref.exif.php
*
* @note FOR WINDOWS USERS:
- * To enable Exif functions, add the following lines to the "Windows
+ * To enable Exif functions, add the following line to the "Windows
* extensions" section of php.ini:
* @code{.ini}
- * extension=extensions/php_mbstring.dll
* extension=extensions/php_exif.dll
* @endcode
*/
+++ /dev/null
-<?php
-/**
- * Fallback functions for PHP installed without mbstring support.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * Fallback functions for PHP installed without mbstring support
- */
-class Fallback {
-
- /**
- * Fallback implementation for mb_substr, hardcoded to UTF-8.
- * Attempts to be at least _moderately_ efficient; best optimized
- * for relatively small offset and count values -- about 5x slower
- * than native mb_string in my testing.
- *
- * Larger offsets are still fairly efficient for Latin text, but
- * can be up to 100x slower than native if the text is heavily
- * multibyte and we have to slog through a few hundred kb.
- *
- * @param string $str
- * @param int $start
- * @param string $count
- *
- * @return string
- */
- public static function mb_substr( $str, $start, $count = 'end' ) {
- if ( $start != 0 ) {
- $split = self::mb_substr_split_unicode( $str, intval( $start ) );
- $str = substr( $str, $split );
- }
-
- if ( $count !== 'end' ) {
- $split = self::mb_substr_split_unicode( $str, intval( $count ) );
- $str = substr( $str, 0, $split );
- }
-
- return $str;
- }
-
- /**
- * @param string $str
- * @param int $splitPos
- * @return int
- */
- public static function mb_substr_split_unicode( $str, $splitPos ) {
- if ( $splitPos == 0 ) {
- return 0;
- }
-
- $byteLen = strlen( $str );
-
- if ( $splitPos > 0 ) {
- if ( $splitPos > 256 ) {
- // Optimize large string offsets by skipping ahead N bytes.
- // This will cut out most of our slow time on Latin-based text,
- // and 1/2 to 1/3 on East European and Asian scripts.
- $bytePos = $splitPos;
- while ( $bytePos < $byteLen && $str[$bytePos] >= "\x80" && $str[$bytePos] < "\xc0" ) {
- ++$bytePos;
- }
- $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
- } else {
- $charPos = 0;
- $bytePos = 0;
- }
-
- while ( $charPos++ < $splitPos ) {
- ++$bytePos;
- // Move past any tail bytes
- while ( $bytePos < $byteLen && $str[$bytePos] >= "\x80" && $str[$bytePos] < "\xc0" ) {
- ++$bytePos;
- }
- }
- } else {
- $splitPosX = $splitPos + 1;
- $charPos = 0; // relative to end of string; we don't care about the actual char position here
- $bytePos = $byteLen;
- while ( $bytePos > 0 && $charPos-- >= $splitPosX ) {
- --$bytePos;
- // Move past any tail bytes
- while ( $bytePos > 0 && $str[$bytePos] >= "\x80" && $str[$bytePos] < "\xc0" ) {
- --$bytePos;
- }
- }
- }
-
- return $bytePos;
- }
-
- /**
- * Fallback implementation of mb_strlen, hardcoded to UTF-8.
- * @param string $str
- * @param string $enc Optional encoding; ignored
- * @return int
- */
- public static function mb_strlen( $str, $enc = '' ) {
- $counts = count_chars( $str );
- $total = 0;
-
- // Count ASCII bytes
- for ( $i = 0; $i < 0x80; $i++ ) {
- $total += $counts[$i];
- }
-
- // Count multibyte sequence heads
- for ( $i = 0xc0; $i < 0xff; $i++ ) {
- $total += $counts[$i];
- }
- return $total;
- }
-
- /**
- * Fallback implementation of mb_strpos, hardcoded to UTF-8.
- * @param string $haystack
- * @param string $needle
- * @param string $offset Optional start position
- * @param string $encoding Optional encoding; ignored
- * @return int
- */
- public static function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
- $needle = preg_quote( $needle, '/' );
-
- $ar = [];
- preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
-
- if ( isset( $ar[0][1] ) ) {
- return $ar[0][1];
- } else {
- return false;
- }
- }
-
- /**
- * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
- * @param string $haystack
- * @param string $needle
- * @param string $offset Optional start position
- * @param string $encoding Optional encoding; ignored
- * @return int
- */
- public static function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
- $needle = preg_quote( $needle, '/' );
-
- $ar = [];
- preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
-
- if ( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
- isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
- return $ar[0][count( $ar[0] ) - 1][1];
- } else {
- return false;
- }
- }
-}
* PHP extensions may be included here.
*/
-if ( !function_exists( 'mb_substr' ) ) {
- /**
- * @codeCoverageIgnore
- * @see Fallback::mb_substr
- * @return string
- */
- function mb_substr( $str, $start, $count = 'end' ) {
- return Fallback::mb_substr( $str, $start, $count );
- }
-
- /**
- * @codeCoverageIgnore
- * @see Fallback::mb_substr_split_unicode
- * @return int
- */
- function mb_substr_split_unicode( $str, $splitPos ) {
- return Fallback::mb_substr_split_unicode( $str, $splitPos );
- }
-}
-
-if ( !function_exists( 'mb_strlen' ) ) {
- /**
- * @codeCoverageIgnore
- * @see Fallback::mb_strlen
- * @return int
- */
- function mb_strlen( $str, $enc = '' ) {
- return Fallback::mb_strlen( $str, $enc );
- }
-}
-
-if ( !function_exists( 'mb_strpos' ) ) {
- /**
- * @codeCoverageIgnore
- * @see Fallback::mb_strpos
- * @return int
- */
- function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
- return Fallback::mb_strpos( $haystack, $needle, $offset, $encoding );
- }
-}
-
-if ( !function_exists( 'mb_strrpos' ) ) {
- /**
- * @codeCoverageIgnore
- * @see Fallback::mb_strrpos
- * @return int
- */
- function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
- return Fallback::mb_strrpos( $haystack, $needle, $offset, $encoding );
- }
-}
-
// hash_equals function only exists in PHP >= 5.6.0
// http://php.net/hash_equals
if ( !function_exists( 'hash_equals' ) ) {
*/
public function getDoc() {
if ( !$this->doc ) {
- // DOMDocument::loadHTML apparently isn't very good with encodings, so
+ // DOMDocument::loadHTML isn't very good with encodings, so
// convert input to ASCII by encoding everything above 128 as entities.
- if ( function_exists( 'mb_convert_encoding' ) ) {
- $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
- } else {
- $html = preg_replace_callback( '/[\x{80}-\x{10ffff}]/u', function ( $m ) {
- return '&#' . UtfNormal\Utils::utf8ToCodepoint( $m[0] ) . ';';
- }, $this->html );
- }
+ $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
// Workaround for bug that caused spaces before references
// to disappear during processing: https://phabricator.wikimedia.org/T55086
}
$html = $replacements->replace( $html );
- if ( function_exists( 'mb_convert_encoding' ) ) {
- // Just in case the conversion in getDoc() above used named
- // entities that aren't known to html_entity_decode().
- $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
- } else {
- $html = html_entity_decode( $html, ENT_COMPAT, 'utf-8' );
- }
+ // Just in case the conversion in getDoc() above used named
+ // entities that aren't known to html_entity_decode().
+ $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
+
return $html;
}
}
public function doQuery( $sql ) {
- if ( function_exists( 'mb_convert_encoding' ) ) {
- $sql = mb_convert_encoding( $sql, 'UTF-8' );
- }
+ $sql = mb_convert_encoding( $sql, 'UTF-8' );
// Clear previously left over PQresult
while ( $res = pg_get_result( $this->mConn ) ) {
pg_free_result( $res );
}
protected function resolveContainerPath( $container, $relStoragePath ) {
- if ( !mb_check_encoding( $relStoragePath, 'UTF-8' ) ) { // mb_string required by CF
+ if ( !mb_check_encoding( $relStoragePath, 'UTF-8' ) ) {
return null; // not UTF-8, makes it hard to use CF and the swift HTTP API
} elseif ( strlen( urlencode( $relStoragePath ) ) > 1024 ) {
return null; // too long for Swift
return false;
}
+ if ( !function_exists( 'mb_substr' ) ) {
+ $this->showError( 'config-mbstring-absent' );
+
+ return false;
+ }
+
return true;
}
"config-ctype": "<strong>Fatal:</strong> PHP must be compiled with support for the [http://www.php.net/manual/en/ctype.installation.php Ctype extension].",
"config-iconv": "<strong>Fatal:</strong> PHP must be compiled with support for the [http://www.php.net/manual/en/iconv.installation.php iconv extension].",
"config-json": "<strong>Fatal:</strong> PHP was compiled without JSON support.\nYou must install either the PHP JSON extension or the [http://pecl.php.net/package/jsonc PECL jsonc] extension before installing MediaWiki.\n* The PHP extension is included in Red Hat Enterprise Linux (CentOS) 5 and 6, though must be enabled in <code>/etc/php.ini</code> or <code>/etc/php.d/json.ini</code>.\n* Some Linux distributions released after May 2013 omit the PHP extension, instead packaging the PECL extension as <code>php5-json</code> or <code>php-pecl-jsonc</code>.",
+ "config-mbstring-absent": "<strong>Fatal:</strong> PHP must be compiled with support for the [http://www.php.net/manual/en/mbstring.setup.php mbstring extension].",
"config-xcache": "[http://xcache.lighttpd.net/ XCache] is installed",
"config-apc": "[http://www.php.net/apc APC] is installed",
"config-wincache": "[http://www.iis.net/download/WinCacheForPhp WinCache] is installed",
"config-ctype": "Message if support for [http://www.php.net/manual/en/ctype.installation.php Ctype] is missing from PHP.\n{{Related|Config-fatal}}",
"config-iconv": "Message if support for [http://www.php.net/manual/en/iconv.installation.php iconv] is missing from PHP.\n{{Related|Config-fatal}}",
"config-json": "Message if support for [[wikipedia:JSON|JSON]] is missing from PHP.\n* \"[[wikipedia:Red Hat Enterprise Linux|Red Hat Enterprise Linux]]\" (RHEL) and \"[[wikipedia:CentOS|CentOS]]\" refer to two almost-identical Linux distributions. \"5 and 6\" refers to version 5 or 6 of either distribution. Because RHEL 7 likely will not include the PHP extension, do not translate as \"5 or newer\".\n* \"The [http://www.php.net/json PHP extension]\" is the JSON extension included with PHP 5.2 and newer.\n* \"The [http://pecl.php.net/package/jsonc PECL extension]\" is based on the PHP extension, though excludes code some distributions have found unacceptable (see [[phab:T49431]]).\n{{Related|Config-fatal}}",
+ "config-mbstring-absent": "Message if support for [http://www.php.net/manual/en/mbstring.installation.php mbstring] is missing from PHP.\n{{Related|Config-fatal}}",
"config-xcache": "Message indicates if this program is available",
"config-apc": "Message indicates if this program is available",
"config-wincache": "Message indicates if this program is available",
* The function check for invalid byte sequences, overlong encoding but
* not for different normalisations.
*
- * This relies internally on the mbstring function mb_check_encoding()
- * hardcoded to check against UTF-8. Whenever the function is not available
- * we fallback to a pure PHP implementation. Setting $disableMbstring to
- * true will skip the use of mb_check_encoding, this is mostly intended for
- * unit testing our internal implementation.
- *
* @note In MediaWiki 1.21, this function did not provide proper UTF-8 validation.
* In particular, the pure PHP code path did not in fact check for overlong forms.
* Beware of this when backporting code to that version of MediaWiki.
*
* @since 1.21
* @param string $value String to check
- * @param bool $disableMbstring Whether to use the pure PHP
- * implementation instead of trying mb_check_encoding. Intended for unit
- * testing. Default: false
* @return bool Whether the given $value is a valid UTF-8 encoded string
*/
- static function isUtf8( $value, $disableMbstring = false ) {
+ static function isUtf8( $value ) {
$value = (string)$value;
- // If the mbstring extension is loaded, use it. However, before PHP 5.4, values above
- // U+10FFFF are incorrectly allowed, so we have to check for them separately.
- if ( !$disableMbstring && function_exists( 'mb_check_encoding' ) ) {
- static $newPHP;
- if ( $newPHP === null ) {
- $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
- }
-
- return mb_check_encoding( $value, 'UTF-8' ) &&
- ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
- }
-
- if ( preg_match( "/[\x80-\xff]/S", $value ) === 0 ) {
- // String contains only ASCII characters, has to be valid
- return true;
- }
-
- // PCRE implements repetition using recursion; to avoid a stack overflow (and segfault)
- // for large input, we check for invalid sequences (<= 5 bytes) rather than valid
- // sequences, which can be as long as the input string is. Multiple short regexes are
- // used rather than a single long regex for performance.
- static $regexes;
- if ( $regexes === null ) {
- $cont = "[\x80-\xbf]";
- $after = "(?!$cont)"; // "(?:[^\x80-\xbf]|$)" would work here
- $regexes = [
- // Continuation byte at the start
- "/^$cont/",
-
- // ASCII byte followed by a continuation byte
- "/[\\x00-\x7f]$cont/S",
-
- // Illegal byte
- "/[\xc0\xc1\xf5-\xff]/S",
-
- // Invalid 2-byte sequence, or valid one then an extra continuation byte
- "/[\xc2-\xdf](?!$cont$after)/S",
-
- // Invalid 3-byte sequence, or valid one then an extra continuation byte
- "/\xe0(?![\xa0-\xbf]$cont$after)/",
- "/[\xe1-\xec\xee\xef](?!$cont{2}$after)/S",
- "/\xed(?![\x80-\x9f]$cont$after)/",
-
- // Invalid 4-byte sequence, or valid one then an extra continuation byte
- "/\xf0(?![\x90-\xbf]$cont{2}$after)/",
- "/[\xf1-\xf3](?!$cont{3}$after)/S",
- "/\xf4(?![\x80-\x8f]$cont{2}$after)/",
- ];
- }
-
- foreach ( $regexes as $regex ) {
- if ( preg_match( $regex, $value ) !== 0 ) {
- return false;
- }
+ // Before PHP 5.4, values above U+10FFFF are incorrectly allowed, so we have to
+ // check for them separately.
+ static $newPHP;
+ if ( $newPHP === null ) {
+ $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
}
- return true;
+ return mb_check_encoding( $value, 'UTF-8' ) &&
+ ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
}
/**
"ABORTING (see https://bugs.php.net/bug.php?id=45996).\n",
true );
}
+
+ if ( !function_exists( 'mb_strlen' ) ) {
+ $this->error(
+ "MediaWiki now requires the mbstring PHP extension, your system doesn't have it.\n"
+ . "ABORTING.\n" );
+ }
}
function execute() {
+++ /dev/null
-<?php
-
-/**
- * @covers Fallback
- */
-class FallbackTest extends MediaWikiTestCase {
- public function testFallbackMbstringFunctions() {
- if ( !extension_loaded( 'mbstring' ) ) {
- $this->markTestSkipped(
- "The mb_string functions must be installed to test the fallback functions"
- );
- }
-
- $sampleUTF = "Östergötland_coat_of_arms.png";
-
- // mb_substr
- $substr_params = [
- [ 0, 0 ],
- [ 5, -4 ],
- [ 33 ],
- [ 100, -5 ],
- [ -8, 10 ],
- [ 1, 1 ],
- [ 2, -1 ]
- ];
-
- foreach ( $substr_params as $param_set ) {
- $old_param_set = $param_set;
- array_unshift( $param_set, $sampleUTF );
-
- $this->assertEquals(
- call_user_func_array( 'mb_substr', $param_set ),
- call_user_func_array( 'Fallback::mb_substr', $param_set ),
- 'Fallback mb_substr with params ' . implode( ', ', $old_param_set )
- );
- }
-
- // mb_strlen
- $this->assertEquals(
- mb_strlen( $sampleUTF ),
- Fallback::mb_strlen( $sampleUTF ),
- 'Fallback mb_strlen'
- );
-
- // mb_str(r?)pos
- $strpos_params = [
- // array( 'ter' ),
- // array( 'Ö' ),
- // array( 'Ö', 3 ),
- // array( 'oat_', 100 ),
- // array( 'c', -10 ),
- // Broken for now
- ];
-
- foreach ( $strpos_params as $param_set ) {
- $old_param_set = $param_set;
- array_unshift( $param_set, $sampleUTF );
-
- $this->assertEquals(
- call_user_func_array( 'mb_strpos', $param_set ),
- call_user_func_array( 'Fallback::mb_strpos', $param_set ),
- 'Fallback mb_strpos with params ' . implode( ', ', $old_param_set )
- );
-
- $this->assertEquals(
- call_user_func_array( 'mb_strrpos', $param_set ),
- call_user_func_array( 'Fallback::mb_strrpos', $param_set ),
- 'Fallback mb_strrpos with params ' . implode( ', ', $old_param_set )
- );
- }
- }
-}