From: X! Date: Sun, 2 Jan 2011 15:54:18 +0000 (+0000) Subject: Followup to r79463: Move fallback functions to new Fallback class X-Git-Tag: 1.31.0-rc.0~32878 X-Git-Url: http://git.cyclocoop.org/%40spipnet%40?a=commitdiff_plain;h=cbff3fe980bf5c7f5349d0748272def34c19b1b0;p=lhc%2Fweb%2Fwiklou.git Followup to r79463: Move fallback functions to new Fallback class --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 2c7bfcabb0..73c64255b1 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -86,6 +86,7 @@ $wgAutoloadLocalClasses = array( 'FatalError' => 'includes/Exception.php', 'FakeTitle' => 'includes/FakeTitle.php', 'FakeMemCachedClient' => 'includes/ObjectCache.php', + 'Fallback' => 'includes/Fallback.php', 'FauxRequest' => 'includes/WebRequest.php', 'FauxResponse' => 'includes/WebResponse.php', 'FeedItem' => 'includes/Feed.php', diff --git a/includes/Fallback.php b/includes/Fallback.php new file mode 100644 index 0000000000..422ea8a27e --- /dev/null +++ b/includes/Fallback.php @@ -0,0 +1,177 @@ + 0 ) { + if( $splitPos > 256 ) { + // Optimize large string offsets by skipping ahead N bytes. + // This will cut out most of our slow time on Latin-based text, + // and 1/2 to 1/3 on East European and Asian scripts. + $bytePos = $splitPos; + while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { + ++$bytePos; + } + $charPos = mb_strlen( substr( $str, 0, $bytePos ) ); + } else { + $charPos = 0; + $bytePos = 0; + } + + while( $charPos++ < $splitPos ) { + ++$bytePos; + // Move past any tail bytes + while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { + ++$bytePos; + } + } + } else { + $splitPosX = $splitPos + 1; + $charPos = 0; // relative to end of string; we don't care about the actual char position here + $bytePos = $byteLen; + while( $bytePos > 0 && $charPos-- >= $splitPosX ) { + --$bytePos; + // Move past any tail bytes + while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { + --$bytePos; + } + } + } + + return $bytePos; + } + + /** + * Fallback implementation of mb_strlen, hardcoded to UTF-8. + * @param string $str + * @param string $enc optional encoding; ignored + * @return int + */ + public static function fallback_mb_strlen( $str, $enc = '' ) { + $counts = count_chars( $str ); + $total = 0; + + // Count ASCII bytes + for( $i = 0; $i < 0x80; $i++ ) { + $total += $counts[$i]; + } + + // Count multibyte sequence heads + for( $i = 0xc0; $i < 0xff; $i++ ) { + $total += $counts[$i]; + } + return $total; + } + + + /** + * Fallback implementation of mb_strpos, hardcoded to UTF-8. + * @param $haystack String + * @param $needle String + * @param $offset String: optional start position + * @param $encoding String: optional encoding; ignored + * @return int + */ + public static function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) { + $needle = preg_quote( $needle, '/' ); + + $ar = array(); + preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); + + if( isset( $ar[0][1] ) ) { + return $ar[0][1]; + } else { + return false; + } + } + + /** + * Fallback implementation of mb_strrpos, hardcoded to UTF-8. + * @param $haystack String + * @param $needle String + * @param $offset String: optional start position + * @param $encoding String: optional encoding; ignored + * @return int + */ + public static function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) { + $needle = preg_quote( $needle, '/' ); + + $ar = array(); + preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); + + if( isset( $ar[0] ) && count( $ar[0] ) > 0 && + isset( $ar[0][count( $ar[0] ) - 1][1] ) ) { + return $ar[0][count( $ar[0] ) - 1][1]; + } else { + return false; + } + } + +} \ No newline at end of file diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index d9f5585ea2..d4a816b2f8 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -21,201 +21,39 @@ require_once dirname( __FILE__ ) . '/normal/UtfNormalUtil.php'; * PHP extensions may be included here. */ -# iconv support is not in the default configuration and so may not be present. -# Assume will only ever use utf-8 and iso-8859-1. -# This will *not* work in all circumstances. -function fallback_iconv( $from, $to, $string ) { - if ( substr( $to, -8 ) == '//IGNORE' ) { - $to = substr( $to, 0, strlen( $to ) - 8 ); - } - if( strcasecmp( $from, $to ) == 0 ) { - return $string; - } - if( strcasecmp( $from, 'utf-8' ) == 0 ) { - return utf8_decode( $string ); - } - if( strcasecmp( $to, 'utf-8' ) == 0 ) { - return utf8_encode( $string ); - } - return $string; -} - if( !function_exists( 'iconv' ) ) { function iconv( $from, $to, $string ) { - return fallback_iconv( $from, $to, $string ); - } -} - - - - -/** - * Fallback implementation for mb_substr, hardcoded to UTF-8. - * Attempts to be at least _moderately_ efficient; best optimized - * for relatively small offset and count values -- about 5x slower - * than native mb_string in my testing. - * - * Larger offsets are still fairly efficient for Latin text, but - * can be up to 100x slower than native if the text is heavily - * multibyte and we have to slog through a few hundred kb. - */ -function fallback_mb_substr( $str, $start, $count='end' ) { - if( $start != 0 ) { - $split = fallback_mb_substr_split_unicode( $str, intval( $start ) ); - $str = substr( $str, $split ); - } - - if( $count !== 'end' ) { - $split = fallback_mb_substr_split_unicode( $str, intval( $count ) ); - $str = substr( $str, 0, $split ); - } - - return $str; -} - -function fallback_mb_substr_split_unicode( $str, $splitPos ) { - if( $splitPos == 0 ) { - return 0; - } - - $byteLen = strlen( $str ); - - if( $splitPos > 0 ) { - if( $splitPos > 256 ) { - // Optimize large string offsets by skipping ahead N bytes. - // This will cut out most of our slow time on Latin-based text, - // and 1/2 to 1/3 on East European and Asian scripts. - $bytePos = $splitPos; - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { - ++$bytePos; - } - $charPos = mb_strlen( substr( $str, 0, $bytePos ) ); - } else { - $charPos = 0; - $bytePos = 0; - } - - while( $charPos++ < $splitPos ) { - ++$bytePos; - // Move past any tail bytes - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { - ++$bytePos; - } - } - } else { - $splitPosX = $splitPos + 1; - $charPos = 0; // relative to end of string; we don't care about the actual char position here - $bytePos = $byteLen; - while( $bytePos > 0 && $charPos-- >= $splitPosX ) { - --$bytePos; - // Move past any tail bytes - while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { - --$bytePos; - } - } + return Fallback::fallback_iconv( $from, $to, $string ); } - - return $bytePos; } if ( !function_exists( 'mb_substr' ) ) { function mb_substr( $str, $start, $count='end' ) { - return fallback_mb_substr( $str, $start, $count ); + return Fallback::fallback_mb_substr( $str, $start, $count ); } function mb_substr_split_unicode( $str, $splitPos ) { - return fallback_mb_substr_split_unicode( $str, $splitPos ); - } -} - - - -/** - * Fallback implementation of mb_strlen, hardcoded to UTF-8. - * @param string $str - * @param string $enc optional encoding; ignored - * @return int - */ -function fallback_mb_strlen( $str, $enc = '' ) { - $counts = count_chars( $str ); - $total = 0; - - // Count ASCII bytes - for( $i = 0; $i < 0x80; $i++ ) { - $total += $counts[$i]; + return Fallback::fallback_mb_substr_split_unicode( $str, $splitPos ); } - - // Count multibyte sequence heads - for( $i = 0xc0; $i < 0xff; $i++ ) { - $total += $counts[$i]; - } - return $total; } if ( !function_exists( 'mb_strlen' ) ) { function mb_strlen( $str, $enc = '' ) { - return fallback_mb_strlen( $str, $enc ); - } -} - - - -/** - * Fallback implementation of mb_strpos, hardcoded to UTF-8. - * @param $haystack String - * @param $needle String - * @param $offset String: optional start position - * @param $encoding String: optional encoding; ignored - * @return int - */ -function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) { - $needle = preg_quote( $needle, '/' ); - - $ar = array(); - preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); - - if( isset( $ar[0][1] ) ) { - return $ar[0][1]; - } else { - return false; + return Fallback::fallback_mb_strlen( $str, $enc ); } } if( !function_exists( 'mb_strpos' ) ) { function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) { - return fallback_mb_strpos( $haystack, $needle, $offset, $encoding ); + return Fallback::fallback_mb_strpos( $haystack, $needle, $offset, $encoding ); } } - - -/** - * Fallback implementation of mb_strrpos, hardcoded to UTF-8. - * @param $haystack String - * @param $needle String - * @param $offset String: optional start position - * @param $encoding String: optional encoding; ignored - * @return int - */ -function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) { - $needle = preg_quote( $needle, '/' ); - - $ar = array(); - preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); - - if( isset( $ar[0] ) && count( $ar[0] ) > 0 && - isset( $ar[0][count( $ar[0] ) - 1][1] ) ) { - return $ar[0][count( $ar[0] ) - 1][1]; - } else { - return false; - } -} - if( !function_exists( 'mb_strrpos' ) ) { function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) { - return fallback_mb_strrpos( $haystack, $needle, $offset, $encoding ); + return Fallback::fallback_mb_strrpos( $haystack, $needle, $offset, $encoding ); } } diff --git a/tests/phpunit/includes/GlobalTest.php b/tests/phpunit/includes/GlobalTest.php index cdea1fe071..ecaa8d3e8a 100644 --- a/tests/phpunit/includes/GlobalTest.php +++ b/tests/phpunit/includes/GlobalTest.php @@ -422,7 +422,7 @@ class GlobalTest extends MediaWikiTestCase { $this->assertEquals( call_user_func_array( 'mb_substr', $param_set ), - call_user_func_array( 'fallback_mb_substr', $param_set ), + call_user_func_array( array( 'Fallback', 'fallback_mb_substr' ), $param_set ), 'Fallback mb_substr with params ' . implode( ', ', $old_param_set ) ); } @@ -431,7 +431,7 @@ class GlobalTest extends MediaWikiTestCase { //mb_strlen $this->assertEquals( mb_strlen( $sampleUTF ), - fallback_mb_strlen( $sampleUTF ), + Fallback::fallback_mb_strlen( $sampleUTF ), 'Fallback mb_strlen' ); @@ -452,13 +452,13 @@ class GlobalTest extends MediaWikiTestCase { $this->assertEquals( call_user_func_array( 'mb_strpos', $param_set ), - call_user_func_array( 'fallback_mb_strpos', $param_set ), + call_user_func_array( array( 'Fallback', 'fallback_mb_strpos' ), $param_set ), 'Fallback mb_strpos with params ' . implode( ', ', $old_param_set ) ); $this->assertEquals( call_user_func_array( 'mb_strrpos', $param_set ), - call_user_func_array( 'fallback_mb_strrpos', $param_set ), + call_user_func_array( array( 'Fallback', 'fallback_mb_strrpos' ), $param_set ), 'Fallback mb_strrpos with params ' . implode( ', ', $old_param_set ) ); }