From 373389b98f2f3adcb48595679ec841987ad44eca Mon Sep 17 00:00:00 2001 From: X! Date: Sun, 2 Jan 2011 01:29:00 +0000 Subject: [PATCH] Move fallback function creation out of function_exists() conditionals. This allows for unit testing of the fallback functions to ensure that they work like the real functions do --- includes/GlobalFunctions.php | 283 ++++++++++++++++++++--------------- 1 file changed, 160 insertions(+), 123 deletions(-) diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index d636b44397..64fc333db8 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -20,170 +20,206 @@ require_once dirname( __FILE__ ) . '/normal/UtfNormalUtil.php'; * Re-implementations of newer functions or functions in non-standard * PHP extensions may be included here. */ + +# iconv support is not in the default configuration and so may not be present. +# Assume will only ever use utf-8 and iso-8859-1. +# This will *not* work in all circumstances. +function fallback_iconv( $from, $to, $string ) { + if ( substr( $to, -8 ) == '//IGNORE' ) { + $to = substr( $to, 0, strlen( $to ) - 8 ); + } + if( strcasecmp( $from, $to ) == 0 ) { + return $string; + } + if( strcasecmp( $from, 'utf-8' ) == 0 ) { + return utf8_decode( $string ); + } + if( strcasecmp( $to, 'utf-8' ) == 0 ) { + return utf8_encode( $string ); + } + return $string; +} + if( !function_exists( 'iconv' ) ) { - # iconv support is not in the default configuration and so may not be present. - # Assume will only ever use utf-8 and iso-8859-1. - # This will *not* work in all circumstances. function iconv( $from, $to, $string ) { - if ( substr( $to, -8 ) == '//IGNORE' ) { - $to = substr( $to, 0, strlen( $to ) - 8 ); - } - if( strcasecmp( $from, $to ) == 0 ) { - return $string; - } - if( strcasecmp( $from, 'utf-8' ) == 0 ) { - return utf8_decode( $string ); - } - if( strcasecmp( $to, 'utf-8' ) == 0 ) { - return utf8_encode( $string ); - } - return $string; + return fallback_iconv( $from, $to, $string ) } } -if ( !function_exists( 'mb_substr' ) ) { - /** - * Fallback implementation for mb_substr, hardcoded to UTF-8. - * Attempts to be at least _moderately_ efficient; best optimized - * for relatively small offset and count values -- about 5x slower - * than native mb_string in my testing. - * - * Larger offsets are still fairly efficient for Latin text, but - * can be up to 100x slower than native if the text is heavily - * multibyte and we have to slog through a few hundred kb. - */ - function mb_substr( $str, $start, $count='end' ) { - if( $start != 0 ) { - $split = mb_substr_split_unicode( $str, intval( $start ) ); - $str = substr( $str, $split ); - } - if( $count !== 'end' ) { - $split = mb_substr_split_unicode( $str, intval( $count ) ); - $str = substr( $str, 0, $split ); - } - return $str; + +/** + * Fallback implementation for mb_substr, hardcoded to UTF-8. + * Attempts to be at least _moderately_ efficient; best optimized + * for relatively small offset and count values -- about 5x slower + * than native mb_string in my testing. + * + * Larger offsets are still fairly efficient for Latin text, but + * can be up to 100x slower than native if the text is heavily + * multibyte and we have to slog through a few hundred kb. + */ +function fallback_mb_substr( $str, $start, $count='end' ) { + if( $start != 0 ) { + $split = fallback_mb_substr_split_unicode( $str, intval( $start ) ); + $str = substr( $str, $split ); } - function mb_substr_split_unicode( $str, $splitPos ) { - if( $splitPos == 0 ) { - return 0; - } + if( $count !== 'end' ) { + $split = fallback_mb_substr_split_unicode( $str, intval( $count ) ); + $str = substr( $str, 0, $split ); + } - $byteLen = strlen( $str ); + return $str; +} - if( $splitPos > 0 ) { - if( $splitPos > 256 ) { - // Optimize large string offsets by skipping ahead N bytes. - // This will cut out most of our slow time on Latin-based text, - // and 1/2 to 1/3 on East European and Asian scripts. - $bytePos = $splitPos; - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { - ++$bytePos; - } - $charPos = mb_strlen( substr( $str, 0, $bytePos ) ); - } else { - $charPos = 0; - $bytePos = 0; - } +function fallback_mb_substr_split_unicode( $str, $splitPos ) { + if( $splitPos == 0 ) { + return 0; + } + + $byteLen = strlen( $str ); - while( $charPos++ < $splitPos ) { + if( $splitPos > 0 ) { + if( $splitPos > 256 ) { + // Optimize large string offsets by skipping ahead N bytes. + // This will cut out most of our slow time on Latin-based text, + // and 1/2 to 1/3 on East European and Asian scripts. + $bytePos = $splitPos; + while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { ++$bytePos; - // Move past any tail bytes - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { - ++$bytePos; - } } + $charPos = mb_strlen( substr( $str, 0, $bytePos ) ); } else { - $splitPosX = $splitPos + 1; - $charPos = 0; // relative to end of string; we don't care about the actual char position here - $bytePos = $byteLen; - while( $bytePos > 0 && $charPos-- >= $splitPosX ) { + $charPos = 0; + $bytePos = 0; + } + + while( $charPos++ < $splitPos ) { + ++$bytePos; + // Move past any tail bytes + while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { + ++$bytePos; + } + } + } else { + $splitPosX = $splitPos + 1; + $charPos = 0; // relative to end of string; we don't care about the actual char position here + $bytePos = $byteLen; + while( $bytePos > 0 && $charPos-- >= $splitPosX ) { + --$bytePos; + // Move past any tail bytes + while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { --$bytePos; - // Move past any tail bytes - while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { - --$bytePos; - } } } + } - return $bytePos; + return $bytePos; +} + +if ( !function_exists( 'mb_substr' ) ) { + function mb_substr( $str, $start, $count='end' ) { + return fallback_mb_substr( $str, $start, $count ) } + + function mb_substr_split_unicode( $str, $splitPos ) { + return fallback_mb_substr_split_unicode( $str, $splitPos ); + } +} + + + +/** + * Fallback implementation of mb_strlen, hardcoded to UTF-8. + * @param string $str + * @param string $enc optional encoding; ignored + * @return int + */ +function fallback_mb_strlen( $str, $enc = '' ) { + $counts = count_chars( $str ); + $total = 0; + + // Count ASCII bytes + for( $i = 0; $i < 0x80; $i++ ) { + $total += $counts[$i]; + } + + // Count multibyte sequence heads + for( $i = 0xc0; $i < 0xff; $i++ ) { + $total += $counts[$i]; + } + return $total; } if ( !function_exists( 'mb_strlen' ) ) { - /** - * Fallback implementation of mb_strlen, hardcoded to UTF-8. - * @param string $str - * @param string $enc optional encoding; ignored - * @return int - */ function mb_strlen( $str, $enc = '' ) { - $counts = count_chars( $str ); - $total = 0; + return fallback_mb_strlen( $str, $enc ); + } +} - // Count ASCII bytes - for( $i = 0; $i < 0x80; $i++ ) { - $total += $counts[$i]; - } - // Count multibyte sequence heads - for( $i = 0xc0; $i < 0xff; $i++ ) { - $total += $counts[$i]; - } - return $total; + +/** + * Fallback implementation of mb_strpos, hardcoded to UTF-8. + * @param $haystack String + * @param $needle String + * @param $offset String: optional start position + * @param $encoding String: optional encoding; ignored + * @return int + */ +function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) { + $needle = preg_quote( $needle, '/' ); + + $ar = array(); + preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); + + if( isset( $ar[0][1] ) ) { + return $ar[0][1]; + } else { + return false; } } - if( !function_exists( 'mb_strpos' ) ) { - /** - * Fallback implementation of mb_strpos, hardcoded to UTF-8. - * @param $haystack String - * @param $needle String - * @param $offset String: optional start position - * @param $encoding String: optional encoding; ignored - * @return int - */ + function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) { - $needle = preg_quote( $needle, '/' ); + return fallback_mb_strpos( $haystack, $needle, $offset, $encoding ); + } + +} - $ar = array(); - preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); - if( isset( $ar[0][1] ) ) { - return $ar[0][1]; - } else { - return false; - } + +/** + * Fallback implementation of mb_strrpos, hardcoded to UTF-8. + * @param $haystack String + * @param $needle String + * @param $offset String: optional start position + * @param $encoding String: optional encoding; ignored + * @return int + */ +function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) { + $needle = preg_quote( $needle, '/' ); + + $ar = array(); + preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); + + if( isset( $ar[0] ) && count( $ar[0] ) > 0 && + isset( $ar[0][count( $ar[0] ) - 1][1] ) ) { + return $ar[0][count( $ar[0] ) - 1][1]; + } else { + return false; } } if( !function_exists( 'mb_strrpos' ) ) { - /** - * Fallback implementation of mb_strrpos, hardcoded to UTF-8. - * @param $haystack String - * @param $needle String - * @param $offset String: optional start position - * @param $encoding String: optional encoding; ignored - * @return int - */ function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) { - $needle = preg_quote( $needle, '/' ); - - $ar = array(); - preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); - - if( isset( $ar[0] ) && count( $ar[0] ) > 0 && - isset( $ar[0][count( $ar[0] ) - 1][1] ) ) { - return $ar[0][count( $ar[0] ) - 1][1]; - } else { - return false; - } + return fallback_mb_strrpos( $haystack, $needle, $offset, $encoding ); } } + // Support for Wietse Venema's taint feature if ( !function_exists( 'istainted' ) ) { function istainted( $var ) { @@ -200,6 +236,7 @@ if ( !function_exists( 'istainted' ) ) { /// @endcond + /** * Like array_diff( $a, $b ) except that it works with two-dimensional arrays. */ -- 2.20.1