From: Tyler Anthony Romeo Date: Tue, 25 Dec 2012 20:32:04 +0000 (-0500) Subject: Cleaned up and optimized wfBaseConvert(); X-Git-Tag: 1.31.0-rc.0~21175^2 X-Git-Url: http://git.cyclocoop.org/%24action?a=commitdiff_plain;h=9b9daadc46b3456dd9b9120e4aeb98e2ae38f26a;p=lhc%2Fweb%2Fwiklou.git Cleaned up and optimized wfBaseConvert(); * Added code to use BCMath or GMP if they are loaded, since they are both significantly faster. A parameter was added that can be used to force usage of a specific engine (mainly for testing). * Made an array of base digits rather than using strpos() every time. * Used casting instead of intval(), since the latter is six times slower. Also added unit tests for wfBaseConvert as well as a benchmarking class that measures the difference between using GMP, BCMath, and pure PHP. Change-Id: I841717be2b29a0b7fc57a13fde5cc0642cda82df --- diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 208befe8fa..e667d8791d 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -3179,88 +3179,97 @@ function wfDoUpdates( $commit = '' ) { * Supports base 2 through 36; digit values 10-36 are represented * as lowercase letters a-z. Input is case-insensitive. * - * @param $input String: of digits - * @param $sourceBase Integer: 2-36 - * @param $destBase Integer: 2-36 - * @param $pad Integer: 1 or greater - * @param $lowercase Boolean - * @return String or false on invalid input - */ -function wfBaseConvert( $input, $sourceBase, $destBase, $pad = 1, $lowercase = true ) { - $input = strval( $input ); - if( $sourceBase < 2 || + * @param string $input Input number + * @param int $sourceBase Base of the input number + * @param int $destBase Desired base of the output + * @param int $pad Minimum number of digits in the output (pad with zeroes) + * @param bool $lowercase Whether to output in lowercase or uppercase + * @param string $engine Either "gmp", "bcmath", or "php" + * @return string|bool The output number as a string, or false on error + */ +function wfBaseConvert( $input, $sourceBase, $destBase, $pad = 1, $lowercase = true, $engine = 'auto' ) { + if( + $sourceBase < 2 || $sourceBase > 36 || $destBase < 2 || $destBase > 36 || - $pad < 1 || - $sourceBase != intval( $sourceBase ) || - $destBase != intval( $destBase ) || - $pad != intval( $pad ) || - !is_string( $input ) || - $input == '' ) { + $sourceBase != (int) $sourceBase || + $destBase != (int) $destBase || + $pad != (int) $pad || + !preg_match( "/^[" . substr( '0123456789abcdefghijklmnopqrstuvwxyz', 0, $sourceBase ) . "]+$/i", $input ) + ) { return false; } - $digitChars = '0123456789abcdefghijklmnopqrstuvwxyz'; - $inDigits = array(); - $outChars = ''; - // Decode and validate input string - $input = strtolower( $input ); - for( $i = 0; $i < strlen( $input ); $i++ ) { - $n = strpos( $digitChars, $input[$i] ); - if( $n === false || $n > $sourceBase ) { - return false; + static $baseChars = array ( + 10 => 'a', 11 => 'b', 12 => 'c', 13 => 'd', 14 => 'e', 15 => 'f', + 16 => 'g', 17 => 'h', 18 => 'i', 19 => 'j', 20 => 'k', 21 => 'l', + 22 => 'm', 23 => 'n', 24 => 'o', 25 => 'p', 26 => 'q', 27 => 'r', + 28 => 's', 29 => 't', 30 => 'u', 31 => 'v', 32 => 'w', 33 => 'x', + 34 => 'y', 35 => 'z', + + '0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4, '5' => 5, + '6' => 6, '7' => 7, '8' => 8, '9' => 9, 'a' => 10, 'b' => 11, + 'c' => 12, 'd' => 13, 'e' => 14, 'f' => 15, 'g' => 16, 'h' => 17, + 'i' => 18, 'j' => 19, 'k' => 20, 'l' => 21, 'm' => 22, 'n' => 23, + 'o' => 24, 'p' => 25, 'q' => 26, 'r' => 27, 's' => 28, 't' => 29, + 'u' => 30, 'v' => 31, 'w' => 32, 'x' => 33, 'y' => 34, 'z' => 35 + ); + + if( extension_loaded( 'gmp' ) && ( $engine == 'auto' || $engine == 'gmp' ) ) { + $result = gmp_strval( gmp_init( $input, $sourceBase ), $destBase ); + } elseif( extension_loaded( 'bcmath' ) && ( $engine == 'auto' || $engine == 'bcmath' ) ) { + $decimal = '0'; + foreach( str_split( strtolower( $input ) ) as $char ) { + $decimal = bcmul( $decimal, $sourceBase ); + $decimal = bcadd( $decimal, $baseChars[$char] ); } - $inDigits[] = $n; - } - // Iterate over the input, modulo-ing out an output digit - // at a time until input is gone. - while( count( $inDigits ) ) { - $work = 0; - $workDigits = array(); + for( $result = ''; bccomp( $decimal, 0 ); $decimal = bcdiv( $decimal, $destBase, 0 ) ) { + $result .= $baseChars[bcmod( $decimal, $destBase )]; + } - // Long division... - foreach( $inDigits as $digit ) { - $work *= $sourceBase; - $work += $digit; + $result = strrev( $result ); + } else { + $inDigits = array(); + foreach( str_split( strtolower( $input ) ) as $char ) { + $inDigits[] = $baseChars[$char]; + } - if( $work < $destBase ) { - // Gonna need to pull another digit. - if( count( $workDigits ) ) { - // Avoid zero-padding; this lets us find - // the end of the input very easily when - // length drops to zero. - $workDigits[] = 0; - } - } else { - // Finally! Actual division! - $workDigits[] = intval( $work / $destBase ); + // Iterate over the input, modulo-ing out an output digit + // at a time until input is gone. + $result = ''; + while( $inDigits ) { + $work = 0; + $workDigits = array(); - // Isn't it annoying that most programming languages - // don't have a single divide-and-remainder operator, - // even though the CPU implements it that way? - $work = $work % $destBase; + // Long division... + foreach( $inDigits as $digit ) { + $work *= $sourceBase; + $work += $digit; + + if( $workDigits || $work >= $destBase ) { + $workDigits[] = (int) ( $work / $destBase ); + } + $work %= $destBase; } - } - // All that division leaves us with a remainder, - // which is conveniently our next output digit. - $outChars .= $digitChars[$work]; + // All that division leaves us with a remainder, + // which is conveniently our next output digit. + $result .= $baseChars[$work]; - // And we continue! - $inDigits = $workDigits; - } + // And we continue! + $inDigits = $workDigits; + } - while( strlen( $outChars ) < $pad ) { - $outChars .= '0'; + $result = strrev( $result ); } - if ( !$lowercase ) { - $outChars = strtoupper( $outChars ); + if( !$lowercase ) { + $result = strtoupper( $result ); } - return strrev( $outChars ); + return str_pad( $result, $pad, '0', STR_PAD_LEFT ); } /** diff --git a/maintenance/benchmarks/bench_wfBaseConvert.php b/maintenance/benchmarks/bench_wfBaseConvert.php new file mode 100644 index 0000000000..a1e5c6a4e8 --- /dev/null +++ b/maintenance/benchmarks/bench_wfBaseConvert.php @@ -0,0 +1,77 @@ +mDescription = "Benchmark for wfBaseConvert."; + $this->addOption( "inbase", "Input base", false, true ); + $this->addOption( "outbase", "Output base", false, true ); + $this->addOption( "length", "Size in digits to generate for input", false, true ); + } + + public function execute() { + $inbase = $this->getOption( "inbase", 36 ); + $outbase = $this->getOption( "outbase", 16 ); + $length = $this->getOption( "length", 128 ); + $number = self::makeRandomNumber( $inbase, $length ); + + $this->bench( array( + array( + 'function' => 'wfBaseConvert', + 'args' => array( $number, $inbase, $outbase, 0, true, 'php' ) + ), + array( + 'function' => 'wfBaseConvert', + 'args' => array( $number, $inbase, $outbase, 0, true, 'bcmath' ) + ), + array( + 'function' => 'wfBaseConvert', + 'args' => array( $number, $inbase, $outbase, 0, true, 'gmp' ) + ), + )); + + $this->output( $this->getFormattedResults() ); + } + + protected static function makeRandomNumber( $base, $length ) { + $baseChars = "0123456789abcdefghijklmnopqrstuvwxyz"; + $res = ""; + for( $i = 0; $i < $length; $i++ ) { + $res .= $baseChars[mt_rand(0, $base - 1)]; + } + return $res; + } +} + +$maintClass = 'bench_wfBaseConvert'; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/tests/phpunit/includes/GlobalFunctions/wfBaseConvertTest.php b/tests/phpunit/includes/GlobalFunctions/wfBaseConvertTest.php new file mode 100644 index 0000000000..21e50328b8 --- /dev/null +++ b/tests/phpunit/includes/GlobalFunctions/wfBaseConvertTest.php @@ -0,0 +1,182 @@ +assertSame( $base2, wfBaseConvert( $base3, '3', '2' ) ); + $this->assertSame( $base2, wfBaseConvert( $base5, '5', '2' ) ); + $this->assertSame( $base2, wfBaseConvert( $base8, '8', '2' ) ); + $this->assertSame( $base2, wfBaseConvert( $base10, '10', '2' ) ); + $this->assertSame( $base2, wfBaseConvert( $base16, '16', '2' ) ); + $this->assertSame( $base2, wfBaseConvert( $base36, '36', '2' ) ); + } + + /** + * @dataProvider provideSingleDigitConversions + */ + public function testDigitToBase3( $base2, $base3, $base5, $base8, $base10, $base16, $base36 ) { + $this->assertSame( $base3, wfBaseConvert( $base2, '2', '3' ) ); + $this->assertSame( $base3, wfBaseConvert( $base5, '5', '3' ) ); + $this->assertSame( $base3, wfBaseConvert( $base8, '8', '3' ) ); + $this->assertSame( $base3, wfBaseConvert( $base10, '10', '3' ) ); + $this->assertSame( $base3, wfBaseConvert( $base16, '16', '3' ) ); + $this->assertSame( $base3, wfBaseConvert( $base36, '36', '3' ) ); + } + + /** + * @dataProvider provideSingleDigitConversions + */ + public function testDigitToBase5( $base2, $base3, $base5, $base8, $base10, $base16, $base36 ) { + $this->assertSame( $base5, wfBaseConvert( $base2, '2', '5' ) ); + $this->assertSame( $base5, wfBaseConvert( $base3, '3', '5' ) ); + $this->assertSame( $base5, wfBaseConvert( $base8, '8', '5' ) ); + $this->assertSame( $base5, wfBaseConvert( $base10, '10', '5' ) ); + $this->assertSame( $base5, wfBaseConvert( $base16, '16', '5' ) ); + $this->assertSame( $base5, wfBaseConvert( $base36, '36', '5' ) ); + } + + /** + * @dataProvider provideSingleDigitConversions + */ + public function testDigitToBase8( $base2, $base3, $base5, $base8, $base10, $base16, $base36 ) { + $this->assertSame( $base8, wfBaseConvert( $base2, '2', '8' ) ); + $this->assertSame( $base8, wfBaseConvert( $base3, '3', '8' ) ); + $this->assertSame( $base8, wfBaseConvert( $base5, '5', '8' ) ); + $this->assertSame( $base8, wfBaseConvert( $base10, '10', '8' ) ); + $this->assertSame( $base8, wfBaseConvert( $base16, '16', '8' ) ); + $this->assertSame( $base8, wfBaseConvert( $base36, '36', '8' ) ); + } + + /** + * @dataProvider provideSingleDigitConversions + */ + public function testDigitToBase10( $base2, $base3, $base5, $base8, $base10, $base16, $base36 ) { + $this->assertSame( $base10, wfBaseConvert( $base2, '2', '10' ) ); + $this->assertSame( $base10, wfBaseConvert( $base3, '3', '10' ) ); + $this->assertSame( $base10, wfBaseConvert( $base5, '5', '10' ) ); + $this->assertSame( $base10, wfBaseConvert( $base8, '8', '10' ) ); + $this->assertSame( $base10, wfBaseConvert( $base16, '16', '10' ) ); + $this->assertSame( $base10, wfBaseConvert( $base36, '36', '10' ) ); + } + + /** + * @dataProvider provideSingleDigitConversions + */ + public function testDigitToBase16( $base2, $base3, $base5, $base8, $base10, $base16, $base36 ) { + $this->assertSame( $base16, wfBaseConvert( $base2, '2', '16' ) ); + $this->assertSame( $base16, wfBaseConvert( $base3, '3', '16' ) ); + $this->assertSame( $base16, wfBaseConvert( $base5, '5', '16' ) ); + $this->assertSame( $base16, wfBaseConvert( $base8, '8', '16' ) ); + $this->assertSame( $base16, wfBaseConvert( $base10, '10', '16' ) ); + $this->assertSame( $base16, wfBaseConvert( $base36, '36', '16' ) ); + } + + /** + * @dataProvider provideSingleDigitConversions + */ + public function testDigitToBase36( $base2, $base3, $base5, $base8, $base10, $base16, $base36 ) { + $this->assertSame( $base36, wfBaseConvert( $base2, '2', '36' ) ); + $this->assertSame( $base36, wfBaseConvert( $base3, '3', '36' ) ); + $this->assertSame( $base36, wfBaseConvert( $base5, '5', '36' ) ); + $this->assertSame( $base36, wfBaseConvert( $base8, '8', '36' ) ); + $this->assertSame( $base36, wfBaseConvert( $base10, '10', '36' ) ); + $this->assertSame( $base36, wfBaseConvert( $base16, '16', '36' ) ); + } + + public function testLargeNumber() { + $this->assertSame( '1100110001111010000000101110100', wfBaseConvert( 'sd89ys', 36, 2 ) ); + $this->assertSame( '11102112120221201101', wfBaseConvert( 'sd89ys', 36, 3 ) ); + $this->assertSame( '12003102232400', wfBaseConvert( 'sd89ys', 36, 5 ) ); + $this->assertSame( '14617200564', wfBaseConvert( 'sd89ys', 36, 8 ) ); + $this->assertSame( '1715274100', wfBaseConvert( 'sd89ys', 36, 10 ) ); + $this->assertSame( '663d0174', wfBaseConvert( 'sd89ys', 36, 16 ) ); + } + + public static function provideNumbers() { + $x = array(); + $chars = '0123456789abcdefghijklmnopqrstuvwxyz'; + for( $i = 0; $i < 50; $i++ ) { + $base = mt_rand( 2, 36 ); + $len = mt_rand( 10, 100 ); + + $str = ''; + for( $j = 0; $j < $len; $j++ ) { + $str .= $chars[mt_rand(0, $base - 1)]; + } + + $x[] = array( $base, $str ); + } + return $x; + } + + /** + * @dataProvider provideNumbers + */ + public function testIdentity( $base, $number ) { + $this->assertSame( $number, wfBaseConvert( $number, $base, $base, strlen( $number ) ) ); + } + + public function testInvalid() { + $this->assertFalse( wfBaseConvert( '101', 1, 15 ) ); + $this->assertFalse( wfBaseConvert( '101', 15, 1 ) ); + $this->assertFalse( wfBaseConvert( '101', 37, 15 ) ); + $this->assertFalse( wfBaseConvert( '101', 15, 37 ) ); + $this->assertFalse( wfBaseConvert( 'abcde', 10, 11 ) ); + $this->assertFalse( wfBaseConvert( '12930', 2, 10 ) ); + $this->assertFalse( wfBaseConvert( '101', 'abc', 15 ) ); + $this->assertFalse( wfBaseConvert( '101', 15, 'abc' ) ); + } + + public function testPadding() { + $number = "10101010101"; + $this->assertSame( strlen( $number ) + 5, strlen( wfBaseConvert( $number, 2, 2, strlen( $number ) + 5 ) ) ); + $this->assertSame( strlen( $number ), strlen( wfBaseConvert( $number, 2, 2, strlen( $number ) - 5 ) ) ); + } +}