From: Ori Livneh Date: Fri, 3 Jan 2014 01:07:10 +0000 (-0800) Subject: Add class implementing MessagePack serialization X-Git-Tag: 1.31.0-rc.0~17409 X-Git-Url: http://git.cyclocoop.org/%24href?a=commitdiff_plain;h=681607e55b44860cf5dc985128daca8b7bda8ae0;p=lhc%2Fweb%2Fwiklou.git Add class implementing MessagePack serialization MessagePack is a space-efficient binary data interchange format. I am going to use it to encode profiling data in ProfilerSimpleUDP. The official PHP implementation is provided as a C extension, so using it would further encumber migration to HHVM. This patch adds MWMessagePack, a class implementing a pack() method for encoding native PHP values as MessagePack byte strings. The implementation is based on , but revised for clarity and conformity with MediaWiki coding conventions. Change-Id: Id2833c5a9da659cb13ec1330de9dd57138ada9c8 --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 1f81249fe9..5b99c8d25b 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -185,7 +185,6 @@ $wgAutoloadLocalClasses = array( 'Revision' => 'includes/Revision.php', 'RevisionList' => 'includes/RevisionList.php', 'RSSFeed' => 'includes/Feed.php', - 'RunningStat' => 'includes/profiler/RunningStat.php', 'Sanitizer' => 'includes/Sanitizer.php', 'SiteConfiguration' => 'includes/SiteConfiguration.php', 'SiteStats' => 'includes/SiteStats.php', @@ -688,6 +687,7 @@ $wgAutoloadLocalClasses = array( 'JSParser' => 'includes/libs/jsminplus.php', 'JSToken' => 'includes/libs/jsminplus.php', 'JSTokenizer' => 'includes/libs/jsminplus.php', + 'MWMessagePack' => 'includes/libs/MWMessagePack.php', 'ScopedCallback' => 'includes/libs/ScopedCallback.php', 'ScopedPHPTimeout' => 'includes/libs/ScopedPHPTimeout.php', 'XmlTypeCheck' => 'includes/libs/XmlTypeCheck.php', @@ -831,6 +831,7 @@ $wgAutoloadLocalClasses = array( 'ProfilerSimpleUDP' => 'includes/profiler/ProfilerSimpleUDP.php', 'ProfilerStub' => 'includes/profiler/ProfilerStub.php', 'ProfileSection' => 'includes/profiler/Profiler.php', + 'RunningStat' => 'includes/profiler/RunningStat.php', # includes/rcfeed 'RCFeedEngine' => 'includes/rcfeed/RCFeedEngine.php', diff --git a/includes/libs/MWMessagePack.php b/includes/libs/MWMessagePack.php new file mode 100644 index 0000000000..b44635df47 --- /dev/null +++ b/includes/libs/MWMessagePack.php @@ -0,0 +1,179 @@ + + * Copyright (c) 2011 OnlineCity . + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * @see + * @see + * + * @since 1.23 + * @file + */ +class MWMessagePack { + + /** @var boolean|null Whether current system is bigendian. **/ + public static $bigendian; + + /** + * Encode a value using MessagePack + * + * This method supports null, boolean, integer, float, string and array + * (both indexed and associative) types. Object serialization is not + * supported. + * + * @param mixed $value + * @return string + */ + public static function pack( $value ) { + if ( self::$bigendian === null ) { + self::$bigendian = pack( 'S', 1 ) === pack( 'n', 1 ); + } + + switch ( gettype( $value ) ) { + case 'NULL': + return "\xC0"; + + case 'boolean': + return $value ? "\xC3" : "\xC2"; + + case 'double': + case 'float': + return self::$bigendian + ? "\xCB" . pack( 'd', $value ) + : "\xCB" . strrev( pack( 'd', $value ) ); + + case 'string': + $length = strlen( $value ); + if ( $length < 32 ) { + return pack( 'Ca*', 0xA0 | $length, $value ); + } elseif ( $length <= 0xFFFF ) { + return pack( 'Cna*', 0xDA, $length, $value ); + } elseif ( $length <= 0xFFFFFFFF ) { + return pack( 'CNa*', 0xDB, $length, $value ); + } + throw new LengthException( "String too long: $length (max: 4294967295)." ); + + case 'integer': + if ( $value >= 0 ) { + if ( $value <= 0x7F ) { + // positive fixnum + return chr( $value ); + } + if ( $value <= 0xFF ) { + // uint8 + return pack( 'CC', 0xCC, $value ); + } + if ( $value <= 0xFFFF ) { + // uint16 + return pack( 'Cn', 0xCD, $value ); + } + if ( $value <= 0xFFFFFFFF ) { + // uint32 + return pack( 'CN', 0xCE, $value ); + } + if ( $value <= 0xFFFFFFFFFFFFFFFF ) { + // uint64 + $hi = ( $value & 0xFFFFFFFF00000000 ) >> 32; + $lo = $value & 0xFFFFFFFF; + return self::$bigendian + ? pack( 'CNN', 0xCF, $lo, $hi ) + : pack( 'CNN', 0xCF, $hi, $lo ); + } + } else { + if ( $value >= -32 ) { + // negative fixnum + return pack( 'c', $value ); + } + if ( $value >= -0x80 ) { + // int8 + return pack( 'Cc', 0xD0, $value ); + } + if ( $value >= -0x8000 ) { + // int16 + $p = pack('s',$value); + return self::$bigendian + ? pack( 'Ca2', 0xD1, $p ) + : pack( 'Ca2', 0xD1, strrev( $p ) ); + } + if ( $value >= -0x80000000 ) { + // int32 + $p = pack( 'l', $value ); + return self::$bigendian + ? pack( 'Ca4', 0xD2, $p ) + : pack( 'Ca4', 0xD2, strrev( $p ) ); + } + if ( $value >= -0x8000000000000000 ) { + // int64 + // pack() does not support 64-bit ints either so pack into two 32-bits + $p1 = pack( 'l', $value & 0xFFFFFFFF ); + $p2 = pack( 'l', ( $value >> 32 ) & 0xFFFFFFFF ); + return self::$bigendian + ? pack( 'Ca4a4', 0xD3, $p1, $p2 ) + : pack( 'Ca4a4', 0xD3, strrev( $p2 ), strrev( $p1 ) ); + } + } + throw new LengthException( 'Invalid integer: ' . $value ); + + case 'array': + $associative = array_values( $value ) !== $value; + $length = count( $value ); + $buffer = ''; + + if ( $length > 0xFFFFFFFF ) { + throw new LengthException( "Array too long: $length (max: 4294967295)." ); + } + + if ( $associative ) { + if ( $length < 16 ) { + $buffer .= pack( 'C', 0x80 | $length ); + } elseif ( $length <= 0xFFFF ) { + $buffer .= pack( 'Cn', 0xDE, $length ); + } else { + $buffer .= pack( 'CN', 0xDF, $length ); + } + foreach ( $value as $k => $v ) { + $buffer .= self::pack( $k ); + $buffer .= self::pack( $v ); + } + } else { + if ( $length < 16 ) { + $buffer .= pack( 'C', 0x90 | $length ); + } elseif ( $length <= 0xFFFF ) { + $buffer .= pack( 'Cn', 0xDC, $length ); + } else { + $buffer .= pack( 'CN', 0xDD, $length ); + } + foreach ( $value as $v ) { + $buffer .= self::pack( $v ); + } + } + return $buffer; + + default: + throw new LengthException( 'Unsupported type: ' . gettype( $value ) ); + } + } +} diff --git a/tests/phpunit/includes/libs/MWMessagePackTest.php b/tests/phpunit/includes/libs/MWMessagePackTest.php new file mode 100644 index 0000000000..de5848d8a9 --- /dev/null +++ b/tests/phpunit/includes/libs/MWMessagePackTest.php @@ -0,0 +1,72 @@ +, which + * includes a serialization function. + */ + public $data = array( + 'integer' => array( + array( 0, '00' ), + array( 1, '01' ), + array( 5, '05' ), + array( -1, 'ff' ), + array( -2, 'fe' ), + array( 35, '23' ), + array( -35, 'd0dd' ), + array( 128, 'cc80' ), + array( -128, 'd080' ), + array( 1000, 'cd03e8' ), + array( -1000, 'd1fc18' ), + array( 100000, 'ce000186a0' ), + array( -100000, 'd2fffe7960' ), + array( 10000000000, 'cf00000002540be400' ), + array( -10000000000, 'd3fffffffdabf41c00' ), + array( -223372036854775807, 'd3fce66c50e2840001' ), + array( -9223372036854775807, 'd38000000000000001' ), + ), + 'NULL' => array( + array( null, 'c0' ), + ), + 'boolean' => array( + array( true, 'c3' ), + array( false, 'c2' ), + ), + 'double' => array( + array( 0.1, 'cb3fb999999999999a' ), + array( 1.1, 'cb3ff199999999999a' ), + array( 123.456, 'cb405edd2f1a9fbe77' ), + ), + 'string' => array( + array( '', 'a0' ), + array( 'foobar', 'a6666f6f626172' ), + array( + 'Lorem ipsum dolor sit amet amet.', + 'da00204c6f72656d20697073756d20646f6c6f722073697420616d657420616d65742e' + ), + ), + 'array' => array( + array( array( 'abc', 'def', 'ghi' ), '93a3616263a3646566a3676869' ), + array( array( 'one' => 1, 'two' => 2 ), '82a36f6e6501a374776f02' ), + ), + ); + + /** + * Verify that values are serialized correctly. + * @covers MWMessagePack::pack + */ + public function testMessagePack() { + foreach( $this->data as $type => $cases ) { + foreach( $cases as $case ) { + list( $value, $expected ) = $case; + $actual = bin2hex( MWMessagePack::pack( $value ) ); + $this->assertEquals( $actual, $expected, $type ); + } + } + } +}