From e8c17972dd80b68a3f59f31f8640e92e4b39c8ec Mon Sep 17 00:00:00 2001 From: Ori Livneh Date: Fri, 26 Jun 2015 12:26:46 -0700 Subject: [PATCH] Replace bundled IPSet library with composer dependency Complete the 'librarization' of IPSet by replacing the code in core with a dependency on the external library. Change-Id: I789b4fb42ee1da44ea3d8e1db551b047e11a439e --- autoload.php | 2 +- composer.json | 3 +- includes/compat/IPSetCompat.php | 28 +++ includes/libs/IPSet.php | 276 ---------------------- includes/utils/IP.php | 2 + tests/phpunit/includes/libs/IPSetTest.php | 252 -------------------- 6 files changed, 33 insertions(+), 530 deletions(-) create mode 100644 includes/compat/IPSetCompat.php delete mode 100644 includes/libs/IPSet.php delete mode 100644 tests/phpunit/includes/libs/IPSetTest.php diff --git a/autoload.php b/autoload.php index 6bc7238ce0..82a45b49d5 100644 --- a/autoload.php +++ b/autoload.php @@ -543,7 +543,7 @@ $wgAutoloadLocalClasses = array( 'IORMRow' => __DIR__ . '/includes/db/IORMRow.php', 'IORMTable' => __DIR__ . '/includes/db/IORMTable.php', 'IP' => __DIR__ . '/includes/utils/IP.php', - 'IPSet' => __DIR__ . '/includes/libs/IPSet.php', + 'IPSet' => __DIR__ . '/includes/compat/IPSetCompat.php', 'IPTC' => __DIR__ . '/includes/media/IPTC.php', 'IRCColourfulRCFeedFormatter' => __DIR__ . '/includes/rcfeed/IRCColourfulRCFeedFormatter.php', 'IcuCollation' => __DIR__ . '/includes/Collation.php', diff --git a/composer.json b/composer.json index 1eb85fd81b..852f2d2247 100644 --- a/composer.json +++ b/composer.json @@ -24,9 +24,10 @@ "oojs/oojs-ui": "0.12.6", "php": ">=5.3.3", "psr/log": "1.0.0", - "wikimedia/cdb": "1.0.1", "wikimedia/assert": "0.2.2", + "wikimedia/cdb": "1.0.1", "wikimedia/composer-merge-plugin": "1.2.1", + "wikimedia/ip-set": "1.0.1", "wikimedia/utfnormal": "1.0.2", "wikimedia/wrappedstring": "2.0.0", "zordius/lightncandy": "0.21" diff --git a/includes/compat/IPSetCompat.php b/includes/compat/IPSetCompat.php new file mode 100644 index 0000000000..79c600046f --- /dev/null +++ b/includes/compat/IPSetCompat.php @@ -0,0 +1,28 @@ + - */ - -/** - * Matches IP addresses against a set of CIDR specifications - * - * Usage: - * // At startup, calculate the optimized data structure for the set: - * $ipset = new IPSet( $wgSquidServersNoPurge ); - * // runtime check against cached set (returns bool): - * $allowme = $ipset->match( $ip ); - * - * In rough benchmarking, this takes about 80% more time than - * in_array() checks on a short (a couple hundred at most) array - * of addresses. It's fast either way at those levels, though, - * and IPSet would scale better than in_array if the array were - * much larger. - * - * For mixed-family CIDR sets, however, this code gives well over - * 100x speedup vs iterating IP::isInRange() over an array - * of CIDR specs. - * - * The basic implementation is two separate binary trees - * (IPv4 and IPv6) as nested php arrays with keys named 0 and 1. - * The values false and true are terminal match-fail and match-success, - * otherwise the value is a deeper node in the tree. - * - * A simple depth-compression scheme is also implemented: whole-byte - * tree compression at whole-byte boundaries only, where no branching - * occurs during that whole byte of depth. A compressed node has - * keys 'comp' (the byte to compare) and 'next' (the next node to - * recurse into if 'comp' matched successfully). - * - * For example, given these inputs: - * 25.0.0.0/9 - * 25.192.0.0/10 - * - * The v4 tree would look like: - * root4 => array( - * 'comp' => 25, - * 'next' => array( - * 0 => true, - * 1 => array( - * 0 => false, - * 1 => true, - * ), - * ), - * ); - * - * (multi-byte compression nodes were attempted as well, but were - * a net loss in my test scenarios due to additional match complexity) - * - * @since 1.24 - */ -class IPSet { - /** @var array $root4: the root of the IPv4 matching tree */ - private $root4 = array( false, false ); - - /** @var array $root6: the root of the IPv6 matching tree */ - private $root6 = array( false, false ); - - /** - * __construct() instantiate the object from an array of CIDR specs - * - * @param array $cfg array of IPv[46] CIDR specs as strings - * @return IPSet new IPSet object - * - * Invalid input network/mask values in $cfg will result in issuing - * E_WARNING and/or E_USER_WARNING and the bad values being ignored. - */ - public function __construct( array $cfg ) { - foreach ( $cfg as $cidr ) { - $this->addCidr( $cidr ); - } - - self::recOptimize( $this->root4 ); - self::recCompress( $this->root4, 0, 24 ); - self::recOptimize( $this->root6 ); - self::recCompress( $this->root6, 0, 120 ); - } - - /** - * Add a single CIDR spec to the internal matching trees - * - * @param string $cidr string CIDR spec, IPv[46], optional /mask (def all-1's) - */ - private function addCidr( $cidr ) { - // v4 or v6 check - if ( strpos( $cidr, ':' ) === false ) { - $node =& $this->root4; - $defMask = '32'; - } else { - $node =& $this->root6; - $defMask = '128'; - } - - // Default to all-1's mask if no netmask in the input - if ( strpos( $cidr, '/' ) === false ) { - $net = $cidr; - $mask = $defMask; - } else { - list( $net, $mask ) = explode( '/', $cidr, 2 ); - if ( !ctype_digit( $mask ) || intval( $mask ) > $defMask ) { - trigger_error( "IPSet: Bad mask '$mask' from '$cidr', ignored", E_USER_WARNING ); - return; - } - } - $mask = intval( $mask ); // explicit integer convert, checked above - - // convert $net to an array of integer bytes, length 4 or 16: - $raw = inet_pton( $net ); - if ( $raw === false ) { - return; // inet_pton() sends an E_WARNING for us - } - $rawOrd = array_map( 'ord', str_split( $raw ) ); - - // special-case: zero mask overwrites the whole tree with a pair of terminal successes - if ( $mask == 0 ) { - $node = array( true, true ); - return; - } - - // iterate the bits of the address while walking the tree structure for inserts - $curBit = 0; - while ( 1 ) { - $maskShift = 7 - ( $curBit & 7 ); - $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift]; - ++$curBit; - if ( $node === true ) { - // already added a larger supernet, no need to go deeper - return; - } elseif ( $curBit == $mask ) { - // this may wipe out deeper subnets from earlier - $node = true; - return; - } elseif ( $node === false ) { - // create new subarray to go deeper - $node = array( false, false ); - } - } - } - - /** - * Match an IP address against the set - * - * @param string $ip string IPv[46] address - * @return bool true is match success, false is match failure - * - * If $ip is unparseable, inet_pton may issue an E_WARNING to that effect - */ - public function match( $ip ) { - $raw = inet_pton( $ip ); - if ( $raw === false ) { - return false; // inet_pton() sends an E_WARNING for us - } - - $rawOrd = array_map( 'ord', str_split( $raw ) ); - if ( count( $rawOrd ) == 4 ) { - $node =& $this->root4; - } else { - $node =& $this->root6; - } - - $curBit = 0; - while ( 1 ) { - if ( isset( $node['comp'] ) ) { - // compressed node, matches 1 whole byte on a byte boundary - if ( $rawOrd[$curBit >> 3] != $node['comp'] ) { - return false; - } - $curBit += 8; - $node =& $node['next']; - } else { - // uncompressed node, walk in the correct direction for the current bit-value - $maskShift = 7 - ( $curBit & 7 ); - $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift]; - ++$curBit; - } - - if ( $node === true || $node === false ) { - return $node; - } - } - } - - /** - * Recursively merges adjacent nets into larger supernets - * - * @param array &$node Tree node to optimize, by-reference - * - * e.g.: 8.0.0.0/8 + 9.0.0.0/8 -> 8.0.0.0/7 - */ - private static function recOptimize( &$node ) { - if ( $node[0] !== false && $node[0] !== true && self::recOptimize( $node[0] ) ) { - $node[0] = true; - } - if ( $node[1] !== false && $node[1] !== true && self::recOptimize( $node[1] ) ) { - $node[1] = true; - } - if ( $node[0] === true && $node[1] === true ) { - return true; - } - return false; - } - - /** - * Recursively compresses a tree - * - * @param array &$node Tree node to compress, by-reference - * @param integer $curBit current depth in the tree - * @param integer $maxCompStart maximum depth at which compression can start, family-specific - * - * This is a very simplistic compression scheme: if we go through a whole - * byte of address starting at a byte boundary with no real branching - * other than immediate false-vs-(node|true), compress that subtree down to a single - * byte-matching node. - * The $maxCompStart check elides recursing the final 7 levels of depth (family-dependent) - */ - private static function recCompress( &$node, $curBit, $maxCompStart ) { - if ( !( $curBit & 7 ) ) { // byte boundary, check for depth-8 single path(s) - $byte = 0; - $cnode =& $node; - $i = 8; - while ( $i-- ) { - if ( $cnode[0] === false ) { - $byte |= 1 << $i; - $cnode =& $cnode[1]; - } elseif ( $cnode[1] === false ) { - $cnode =& $cnode[0]; - } else { - // partial-byte branching, give up - break; - } - } - if ( $i == -1 ) { // means we did not exit the while() via break - $node = array( - 'comp' => $byte, - 'next' => &$cnode, - ); - $curBit += 8; - if ( $cnode !== true ) { - self::recCompress( $cnode, $curBit, $maxCompStart ); - } - return; - } - } - - ++$curBit; - if ( $curBit <= $maxCompStart ) { - if ( $node[0] !== false && $node[0] !== true ) { - self::recCompress( $node[0], $curBit, $maxCompStart ); - } - if ( $node[1] !== false && $node[1] !== true ) { - self::recCompress( $node[1], $curBit, $maxCompStart ); - } - } - } -} diff --git a/includes/utils/IP.php b/includes/utils/IP.php index ae3736aeec..666660aa30 100644 --- a/includes/utils/IP.php +++ b/includes/utils/IP.php @@ -21,6 +21,8 @@ * @author Antoine Musso "", Aaron Schulz */ +use IPSet\IPSet; + // Some regex definition to "play" with IP address and IP address blocks // An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255 diff --git a/tests/phpunit/includes/libs/IPSetTest.php b/tests/phpunit/includes/libs/IPSetTest.php deleted file mode 100644 index 5bbacef42d..0000000000 --- a/tests/phpunit/includes/libs/IPSetTest.php +++ /dev/null @@ -1,252 +0,0 @@ - expected (boolean) result against the config dataset. - */ - public static function provideIPSets() { - return array( - array( - 'old_list_subset', - array( - '208.80.152.162', - '10.64.0.123', - '10.64.0.124', - '10.64.0.125', - '10.64.0.126', - '10.64.0.127', - '10.64.0.128', - '10.64.0.129', - '10.64.32.104', - '10.64.32.105', - '10.64.32.106', - '10.64.32.107', - '91.198.174.45', - '91.198.174.46', - '91.198.174.47', - '91.198.174.57', - '2620:0:862:1:A6BA:DBFF:FE30:CFB3', - '91.198.174.58', - '2620:0:862:1:A6BA:DBFF:FE38:FFDA', - '208.80.152.16', - '208.80.152.17', - '208.80.152.18', - '208.80.152.19', - '91.198.174.102', - '91.198.174.103', - '91.198.174.104', - '91.198.174.105', - '91.198.174.106', - '91.198.174.107', - '91.198.174.81', - '2620:0:862:1:26B6:FDFF:FEF5:B2D4', - '91.198.174.82', - '2620:0:862:1:26B6:FDFF:FEF5:ABB4', - '10.20.0.113', - '2620:0:862:102:26B6:FDFF:FEF5:AD9C', - '10.20.0.114', - '2620:0:862:102:26B6:FDFF:FEF5:7C38', - ), - array( - '0.0.0.0' => false, - '255.255.255.255' => false, - '10.64.0.122' => false, - '10.64.0.123' => true, - '10.64.0.124' => true, - '10.64.0.129' => true, - '10.64.0.130' => false, - '91.198.174.81' => true, - '91.198.174.80' => false, - '0::0' => false, - 'ffff:ffff:ffff:ffff:FFFF:FFFF:FFFF:FFFF' => false, - '2001:db8::1234' => false, - '2620:0:862:1:26b6:fdff:fef5:abb3' => false, - '2620:0:862:1:26b6:fdff:fef5:abb4' => true, - '2620:0:862:1:26b6:fdff:fef5:abb5' => false, - ), - ), - array( - 'new_cidr_set', - array( - '208.80.154.0/26', - '2620:0:861:1::/64', - '208.80.154.128/26', - '2620:0:861:2::/64', - '208.80.154.64/26', - '2620:0:861:3::/64', - '208.80.155.96/27', - '2620:0:861:4::/64', - '10.64.0.0/22', - '2620:0:861:101::/64', - '10.64.16.0/22', - '2620:0:861:102::/64', - '10.64.32.0/22', - '2620:0:861:103::/64', - '10.64.48.0/22', - '2620:0:861:107::/64', - '91.198.174.0/25', - '2620:0:862:1::/64', - '10.20.0.0/24', - '2620:0:862:102::/64', - '10.128.0.0/24', - '2620:0:863:101::/64', - '10.2.4.26', - ), - array( - '0.0.0.0' => false, - '255.255.255.255' => false, - '10.2.4.25' => false, - '10.2.4.26' => true, - '10.2.4.27' => false, - '10.20.0.255' => true, - '10.128.0.0' => true, - '10.64.17.55' => true, - '10.64.20.0' => false, - '10.64.27.207' => false, - '10.64.31.255' => false, - '0::0' => false, - 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => false, - '2001:DB8::1' => false, - '2620:0:861:106::45' => false, - '2620:0:862:103::' => false, - '2620:0:862:102:10:20:0:113' => true, - ), - ), - array( - 'empty_set', - array(), - array( - '0.0.0.0' => false, - '255.255.255.255' => false, - '10.2.4.25' => false, - '10.2.4.26' => false, - '10.2.4.27' => false, - '10.20.0.255' => false, - '10.128.0.0' => false, - '10.64.17.55' => false, - '10.64.20.0' => false, - '10.64.27.207' => false, - '10.64.31.255' => false, - '0::0' => false, - 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => false, - '2001:DB8::1' => false, - '2620:0:861:106::45' => false, - '2620:0:862:103::' => false, - '2620:0:862:102:10:20:0:113' => false, - ), - ), - array( - 'edge_cases', - array( - '0.0.0.0', - '255.255.255.255', - '::', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', - '10.10.10.10/25', // host bits intentional - ), - array( - '0.0.0.0' => true, - '255.255.255.255' => true, - '10.2.4.25' => false, - '10.2.4.26' => false, - '10.2.4.27' => false, - '10.20.0.255' => false, - '10.128.0.0' => false, - '10.64.17.55' => false, - '10.64.20.0' => false, - '10.64.27.207' => false, - '10.64.31.255' => false, - '0::0' => true, - 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => true, - '2001:DB8::1' => false, - '2620:0:861:106::45' => false, - '2620:0:862:103::' => false, - '2620:0:862:102:10:20:0:113' => false, - '10.10.9.255' => false, - '10.10.10.0' => true, - '10.10.10.1' => true, - '10.10.10.10' => true, - '10.10.10.126' => true, - '10.10.10.127' => true, - '10.10.10.128' => false, - '10.10.10.177' => false, - '10.10.10.255' => false, - '10.10.11.0' => false, - ), - ), - array( - 'exercise_optimizer', - array( - 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fffe:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fffd:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fffc:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fffb:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fffa:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff9:8000/113', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff9:0/113', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff8:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff7:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff6:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff5:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff4:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff3:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff2:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff1:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:fff0:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffef:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffee:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffec:0/111', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffeb:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffea:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe9:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe8:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe7:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe6:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe5:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe4:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe3:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe2:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe1:0/112', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0/110', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0/107', - 'ffff:ffff:ffff:ffff:ffff:ffff:ffa0:0/107', - ), - array( - '0.0.0.0' => false, - '255.255.255.255' => false, - '::' => false, - 'ffff:ffff:ffff:ffff:ffff:ffff:ff9f:ffff' => false, - 'ffff:ffff:ffff:ffff:ffff:ffff:ffa0:0' => true, - 'ffff:ffff:ffff:ffff:ffff:ffff:ffc0:1234' => true, - 'ffff:ffff:ffff:ffff:ffff:ffff:ffed:ffff' => true, - 'ffff:ffff:ffff:ffff:ffff:ffff:fff4:4444' => true, - 'ffff:ffff:ffff:ffff:ffff:ffff:fff9:8080' => true, - 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => true, - ), - ), - ); - } - - /** - * Validates IPSet loading and matching code - * - * @covers IPSet - * @dataProvider provideIPSets - */ - public function testIPSet( $desc, array $cfg, array $tests ) { - $ipset = new IPSet( $cfg ); - foreach ( $tests as $ip => $expected ) { - $result = $ipset->match( $ip ); - $this->assertEquals( $expected, $result, "Incorrect match() result for $ip in dataset $desc" ); - } - } -} -- 2.20.1