Removed weird unused "IPv4toIPv6" function
[lhc/web/wiklou.git] / includes / IP.php
1 <?php
2 /**
3 * Functions and constants to play with IP addresses and ranges
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @author Ashar Voultoiz <hashar at free dot fr>, Aaron Schulz
22 */
23
24 // Some regex definition to "play" with IP address and IP address blocks
25
26 // An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255
27 define( 'RE_IP_BYTE', '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])' );
28 define( 'RE_IP_ADD' , RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE );
29 // An IPv4 block is an IP address and a prefix (d1 to d32)
30 define( 'RE_IP_PREFIX', '(3[0-2]|[12]?\d)' );
31 define( 'RE_IP_BLOCK', RE_IP_ADD . '\/' . RE_IP_PREFIX );
32
33 // An IPv6 address is made up of 8 words (each x0000 to xFFFF).
34 // However, the "::" abbreviation can be used on consecutive x0000 words.
35 define( 'RE_IPV6_WORD', '([0-9A-Fa-f]{1,4})' );
36 define( 'RE_IPV6_PREFIX', '(12[0-8]|1[01][0-9]|[1-9]?\d)');
37 define( 'RE_IPV6_ADD',
38 '(' . // starts with "::" (includes the address "::")
39 '(::|:(:' . RE_IPV6_WORD . '){1,7})' .
40 '|' . // ends with "::" (not including the address "::")
41 RE_IPV6_WORD . '(:' . RE_IPV6_WORD . '){0,6}::' .
42 '|' . // has no "::"
43 RE_IPV6_WORD . '(:' . RE_IPV6_WORD . '){7}' .
44 '|' . // contains one "::" in the middle ("^" check always fails if no "::" found)
45 RE_IPV6_WORD . '(:(?P<abbr>(?(abbr)|:))?' . RE_IPV6_WORD . '){1,6}(?(abbr)|^)' .
46 ')'
47 );
48 // An IPv6 block is an IP address and a prefix (d1 to d128)
49 define( 'RE_IPV6_BLOCK', RE_IPV6_ADD . '\/' . RE_IPV6_PREFIX );
50 // For IPv6 canonicalization (NOT for strict validation; these are quite lax!)
51 define( 'RE_IPV6_GAP', ':(?:0+:)*(?::(?:0+:)*)?' );
52 define( 'RE_IPV6_V4_PREFIX', '0*' . RE_IPV6_GAP . '(?:ffff:)?' );
53
54 // This might be useful for regexps used elsewhere, matches any IPv6 or IPv6 address or network
55 define( 'IP_ADDRESS_STRING',
56 '(?:' .
57 RE_IP_ADD . '(\/' . RE_IP_PREFIX . '|)' . // IPv4
58 '|' .
59 RE_IPV6_ADD . '(\/' . RE_IPV6_PREFIX . '|)' . // IPv6
60 ')'
61 );
62
63 /**
64 * A collection of public static functions to play with IP address
65 * and IP blocks.
66 */
67 class IP {
68 /**
69 * Determine if a string is as valid IP address or network (CIDR prefix).
70 * SIIT IPv4-translated addresses are rejected.
71 * Note: canonicalize() tries to convert translated addresses to IPv4.
72 * @param string $ip possible IP address
73 * @return bool
74 */
75 public static function isIPAddress( $ip ) {
76 return (bool)preg_match( '/^' . IP_ADDRESS_STRING . '$/', $ip );
77 }
78
79 /**
80 * Given a string, determine if it as valid IP in IPv6 only.
81 * Note: Unlike isValid(), this looks for networks too.
82 * @param string $ip possible IP address
83 * @return bool
84 */
85 public static function isIPv6( $ip ) {
86 return (bool)preg_match( '/^' . RE_IPV6_ADD . '(\/' . RE_IPV6_PREFIX . '|)$/', $ip );
87 }
88
89 /**
90 * Given a string, determine if it as valid IP in IPv4 only.
91 * Note: Unlike isValid(), this looks for networks too.
92 * @param string $ip possible IP address
93 * @return bool
94 */
95 public static function isIPv4( $ip ) {
96 return (bool)preg_match( '/^' . RE_IP_ADD . '(\/' . RE_IP_PREFIX . '|)$/', $ip );
97 }
98
99 /**
100 * Validate an IP address. Ranges are NOT considered valid.
101 * SIIT IPv4-translated addresses are rejected.
102 * Note: canonicalize() tries to convert translated addresses to IPv4.
103 * @param string $ip
104 * @return boolean True if it is valid.
105 */
106 public static function isValid( $ip ) {
107 return ( preg_match( '/^' . RE_IP_ADD . '$/', $ip )
108 || preg_match( '/^' . RE_IPV6_ADD . '$/', $ip ) );
109 }
110
111 /**
112 * Validate an IP Block (valid address WITH a valid prefix).
113 * SIIT IPv4-translated addresses are rejected.
114 * Note: canonicalize() tries to convert translated addresses to IPv4.
115 * @param string $ipblock
116 * @return boolean True if it is valid.
117 */
118 public static function isValidBlock( $ipblock ) {
119 return ( preg_match( '/^' . RE_IPV6_BLOCK . '$/', $ipblock )
120 || preg_match( '/^' . RE_IP_BLOCK . '$/', $ipblock ) );
121 }
122
123 /**
124 * Convert an IP into a nice standard form.
125 * IPv6 addresses in octet notation are expanded to 8 words.
126 * IPv4 addresses are just trimmed.
127 * @param string $ip IP address in quad or octet form (CIDR or not).
128 * @return string
129 */
130 public static function sanitizeIP( $ip ) {
131 $ip = trim( $ip );
132 if ( $ip === '' ) {
133 return null;
134 }
135 if ( self::isIPv4( $ip ) || !self::isIPv6( $ip ) ) {
136 return $ip; // nothing else to do for IPv4 addresses or invalid ones
137 }
138 // Remove any whitespaces, convert to upper case
139 $ip = strtoupper( $ip );
140 // Expand zero abbreviations
141 $abbrevPos = strpos( $ip, '::' );
142 if ( $abbrevPos !== false ) {
143 // We know this is valid IPv6. Find the last index of the
144 // address before any CIDR number (e.g. "a:b:c::/24").
145 $CIDRStart = strpos( $ip, "/" );
146 $addressEnd = ( $CIDRStart !== false )
147 ? $CIDRStart - 1
148 : strlen( $ip ) - 1;
149 // If the '::' is at the beginning...
150 if ( $abbrevPos == 0 ) {
151 $repeat = '0:';
152 $extra = ( $ip == '::' ) ? '0' : ''; // for the address '::'
153 $pad = 9; // 7+2 (due to '::')
154 // If the '::' is at the end...
155 } elseif ( $abbrevPos == ( $addressEnd - 1 ) ) {
156 $repeat = ':0';
157 $extra = '';
158 $pad = 9; // 7+2 (due to '::')
159 // If the '::' is in the middle...
160 } else {
161 $repeat = ':0';
162 $extra = ':';
163 $pad = 8; // 6+2 (due to '::')
164 }
165 $ip = str_replace( '::',
166 str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra,
167 $ip
168 );
169 }
170 // Remove leading zereos from each bloc as needed
171 $ip = preg_replace( '/(^|:)0+' . RE_IPV6_WORD . '/', '$1$2', $ip );
172 return $ip;
173 }
174
175 /**
176 * Given an unsigned integer, returns an IPv6 address in octet notation
177 * @param string $ip_int IP address.
178 * @return string
179 */
180 public static function toOctet( $ip_int ) {
181 return self::hexToOctet( wfBaseConvert( $ip_int, 10, 16, 32, false ) );
182 }
183
184 /**
185 * Convert an IPv4 or IPv6 hexadecimal representation back to readable format
186 * @param string $hex number, with "v6-" prefix if it is IPv6
187 * @return string quad-dotted (IPv4) or octet notation (IPv6)
188 */
189 public static function formatHex( $hex ) {
190 if ( substr( $hex, 0, 3 ) == 'v6-' ) { // IPv6
191 return self::hexToOctet( substr( $hex, 3 ) );
192 } else { // IPv4
193 return self::hexToQuad( $hex );
194 }
195 }
196
197 /**
198 * Converts a hexadecimal number to an IPv6 address in octet notation
199 * @param string $ip_hex pure hex (no v6- prefix)
200 * @return string (of format a:b:c:d:e:f:g:h)
201 */
202 public static function hexToOctet( $ip_hex ) {
203 // Pad hex to 32 chars (128 bits)
204 $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT );
205 // Separate into 8 words
206 $ip_oct = substr( $ip_hex, 0, 4 );
207 for ( $n = 1; $n < 8; $n++ ) {
208 $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 );
209 }
210 // NO leading zeroes
211 $ip_oct = preg_replace( '/(^|:)0+' . RE_IPV6_WORD . '/', '$1$2', $ip_oct );
212 return $ip_oct;
213 }
214
215 /**
216 * Converts a hexadecimal number to an IPv4 address in quad-dotted notation
217 * @param string $ip_hex pure hex
218 * @return string (of format a.b.c.d)
219 */
220 public static function hexToQuad( $ip_hex ) {
221 // Pad hex to 8 chars (32 bits)
222 $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT );
223 // Separate into four quads
224 $s = '';
225 for ( $i = 0; $i < 4; $i++ ) {
226 if ( $s !== '' ) {
227 $s .= '.';
228 }
229 $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 );
230 }
231 return $s;
232 }
233
234 /**
235 * Determine if an IP address really is an IP address, and if it is public,
236 * i.e. not RFC 1918 or similar
237 * Comes from ProxyTools.php
238 * @param string $ip
239 * @return bool
240 */
241 public static function isPublic( $ip ) {
242 if ( self::isIPv6( $ip ) ) {
243 return self::isPublic6( $ip );
244 }
245 $n = self::toUnsigned( $ip );
246 if ( !$n ) {
247 return false;
248 }
249
250 // ip2long accepts incomplete addresses, as well as some addresses
251 // followed by garbage characters. Check that it's really valid.
252 if ( $ip != long2ip( $n ) ) {
253 return false;
254 }
255
256 static $privateRanges = false;
257 if ( !$privateRanges ) {
258 $privateRanges = array(
259 array( '10.0.0.0', '10.255.255.255' ), # RFC 1918 (private)
260 array( '172.16.0.0', '172.31.255.255' ), # "
261 array( '192.168.0.0', '192.168.255.255' ), # "
262 array( '0.0.0.0', '0.255.255.255' ), # this network
263 array( '127.0.0.0', '127.255.255.255' ), # loopback
264 );
265 }
266
267 foreach ( $privateRanges as $r ) {
268 $start = self::toUnsigned( $r[0] );
269 $end = self::toUnsigned( $r[1] );
270 if ( $n >= $start && $n <= $end ) {
271 return false;
272 }
273 }
274 return true;
275 }
276
277 /**
278 * Determine if an IPv6 address really is an IP address, and if it is public,
279 * i.e. not RFC 4193 or similar
280 * @param string $ip
281 * @return bool
282 */
283 private static function isPublic6( $ip ) {
284 static $privateRanges = false;
285 if ( !$privateRanges ) {
286 $privateRanges = array(
287 array( 'fc::', 'fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' ), # RFC 4193 (local)
288 array( '0:0:0:0:0:0:0:1', '0:0:0:0:0:0:0:1' ), # loopback
289 );
290 }
291 $n = self::toHex( $ip );
292 foreach ( $privateRanges as $r ) {
293 $start = self::toHex( $r[0] );
294 $end = self::toHex( $r[1] );
295 if ( $n >= $start && $n <= $end ) {
296 return false;
297 }
298 }
299 return true;
300 }
301
302 /**
303 * Return a zero-padded upper case hexadecimal representation of an IP address.
304 *
305 * Hexadecimal addresses are used because they can easily be extended to
306 * IPv6 support. To separate the ranges, the return value from this
307 * function for an IPv6 address will be prefixed with "v6-", a non-
308 * hexadecimal string which sorts after the IPv4 addresses.
309 *
310 * @param string $ip Quad dotted/octet IP address.
311 * @return string
312 */
313 public static function toHex( $ip ) {
314 if ( self::isIPv6( $ip ) ) {
315 $n = 'v6-' . self::IPv6ToRawHex( $ip );
316 } else {
317 $n = self::toUnsigned( $ip );
318 if ( $n !== false ) {
319 $n = wfBaseConvert( $n, 10, 16, 8, false );
320 }
321 }
322 return $n;
323 }
324
325 /**
326 * Given an IPv6 address in octet notation, returns a pure hex string.
327 * @param string $ip octet ipv6 IP address.
328 * @return string pure hex (uppercase)
329 */
330 private static function IPv6ToRawHex( $ip ) {
331 $ip = self::sanitizeIP( $ip );
332 if ( !$ip ) {
333 return null;
334 }
335 $r_ip = '';
336 foreach ( explode( ':', $ip ) as $v ) {
337 $r_ip .= str_pad( $v, 4, 0, STR_PAD_LEFT );
338 }
339 return $r_ip;
340 }
341
342 /**
343 * Given an IP address in dotted-quad/octet notation, returns an unsigned integer.
344 * Like ip2long() except that it actually works and has a consistent error return value.
345 * Comes from ProxyTools.php
346 * @param string $ip Quad dotted IP address.
347 * @return mixed (string/int/false)
348 */
349 public static function toUnsigned( $ip ) {
350 if ( self::isIPv6( $ip ) ) {
351 $n = self::toUnsigned6( $ip );
352 } else {
353 $n = ip2long( $ip );
354 if ( $n < 0 ) {
355 $n += pow( 2, 32 );
356 }
357 }
358 return $n;
359 }
360
361 private static function toUnsigned6( $ip ) {
362 return wfBaseConvert( self::IPv6ToRawHex( $ip ), 16, 10 );
363 }
364
365 /**
366 * Convert a network specification in CIDR notation
367 * to an integer network and a number of bits
368 * @param string $range IP with CIDR prefix
369 * @return array(int or string, int)
370 */
371 public static function parseCIDR( $range ) {
372 if ( self::isIPv6( $range ) ) {
373 return self::parseCIDR6( $range );
374 }
375 $parts = explode( '/', $range, 2 );
376 if ( count( $parts ) != 2 ) {
377 return array( false, false );
378 }
379 list( $network, $bits ) = $parts;
380 $network = ip2long( $network );
381 if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) {
382 if ( $bits == 0 ) {
383 $network = 0;
384 } else {
385 $network &= ~( ( 1 << ( 32 - $bits ) ) - 1);
386 }
387 # Convert to unsigned
388 if ( $network < 0 ) {
389 $network += pow( 2, 32 );
390 }
391 } else {
392 $network = false;
393 $bits = false;
394 }
395 return array( $network, $bits );
396 }
397
398 /**
399 * Given a string range in a number of formats,
400 * return the start and end of the range in hexadecimal.
401 *
402 * Formats are:
403 * 1.2.3.4/24 CIDR
404 * 1.2.3.4 - 1.2.3.5 Explicit range
405 * 1.2.3.4 Single IP
406 *
407 * 2001:0db8:85a3::7344/96 CIDR
408 * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
409 * 2001:0db8:85a3::7344 Single IP
410 * @param string $range IP range
411 * @return array(string, string)
412 */
413 public static function parseRange( $range ) {
414 // CIDR notation
415 if ( strpos( $range, '/' ) !== false ) {
416 if ( self::isIPv6( $range ) ) {
417 return self::parseRange6( $range );
418 }
419 list( $network, $bits ) = self::parseCIDR( $range );
420 if ( $network === false ) {
421 $start = $end = false;
422 } else {
423 $start = sprintf( '%08X', $network );
424 $end = sprintf( '%08X', $network + pow( 2, ( 32 - $bits ) ) - 1 );
425 }
426 // Explicit range
427 } elseif ( strpos( $range, '-' ) !== false ) {
428 list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
429 if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) {
430 return self::parseRange6( $range );
431 }
432 if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) {
433 $start = self::toUnsigned( $start );
434 $end = self::toUnsigned( $end );
435 if ( $start > $end ) {
436 $start = $end = false;
437 } else {
438 $start = sprintf( '%08X', $start );
439 $end = sprintf( '%08X', $end );
440 }
441 } else {
442 $start = $end = false;
443 }
444 } else {
445 # Single IP
446 $start = $end = self::toHex( $range );
447 }
448 if ( $start === false || $end === false ) {
449 return array( false, false );
450 } else {
451 return array( $start, $end );
452 }
453 }
454
455 /**
456 * Convert a network specification in IPv6 CIDR notation to an
457 * integer network and a number of bits
458 * @return array(string, int)
459 */
460 private static function parseCIDR6( $range ) {
461 # Explode into <expanded IP,range>
462 $parts = explode( '/', IP::sanitizeIP( $range ), 2 );
463 if ( count( $parts ) != 2 ) {
464 return array( false, false );
465 }
466 list( $network, $bits ) = $parts;
467 $network = self::IPv6ToRawHex( $network );
468 if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) {
469 if ( $bits == 0 ) {
470 $network = "0";
471 } else {
472 # Native 32 bit functions WONT work here!!!
473 # Convert to a padded binary number
474 $network = wfBaseConvert( $network, 16, 2, 128 );
475 # Truncate the last (128-$bits) bits and replace them with zeros
476 $network = str_pad( substr( $network, 0, $bits ), 128, 0, STR_PAD_RIGHT );
477 # Convert back to an integer
478 $network = wfBaseConvert( $network, 2, 10 );
479 }
480 } else {
481 $network = false;
482 $bits = false;
483 }
484 return array( $network, (int)$bits );
485 }
486
487 /**
488 * Given a string range in a number of formats, return the
489 * start and end of the range in hexadecimal. For IPv6.
490 *
491 * Formats are:
492 * 2001:0db8:85a3::7344/96 CIDR
493 * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
494 * 2001:0db8:85a3::7344/96 Single IP
495 * @return array(string, string)
496 */
497 private static function parseRange6( $range ) {
498 # Expand any IPv6 IP
499 $range = IP::sanitizeIP( $range );
500 // CIDR notation...
501 if ( strpos( $range, '/' ) !== false ) {
502 list( $network, $bits ) = self::parseCIDR6( $range );
503 if ( $network === false ) {
504 $start = $end = false;
505 } else {
506 $start = wfBaseConvert( $network, 10, 16, 32, false );
507 # Turn network to binary (again)
508 $end = wfBaseConvert( $network, 10, 2, 128 );
509 # Truncate the last (128-$bits) bits and replace them with ones
510 $end = str_pad( substr( $end, 0, $bits ), 128, 1, STR_PAD_RIGHT );
511 # Convert to hex
512 $end = wfBaseConvert( $end, 2, 16, 32, false );
513 # see toHex() comment
514 $start = "v6-$start";
515 $end = "v6-$end";
516 }
517 // Explicit range notation...
518 } elseif ( strpos( $range, '-' ) !== false ) {
519 list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
520 $start = self::toUnsigned6( $start );
521 $end = self::toUnsigned6( $end );
522 if ( $start > $end ) {
523 $start = $end = false;
524 } else {
525 $start = wfBaseConvert( $start, 10, 16, 32, false );
526 $end = wfBaseConvert( $end, 10, 16, 32, false );
527 }
528 # see toHex() comment
529 $start = "v6-$start";
530 $end = "v6-$end";
531 } else {
532 # Single IP
533 $start = $end = self::toHex( $range );
534 }
535 if ( $start === false || $end === false ) {
536 return array( false, false );
537 } else {
538 return array( $start, $end );
539 }
540 }
541
542 /**
543 * Determine if a given IPv4/IPv6 address is in a given CIDR network
544 * @param $addr The address to check against the given range.
545 * @param $range The range to check the given address against.
546 * @return bool Whether or not the given address is in the given range.
547 */
548 public static function isInRange( $addr, $range ) {
549 $hexIP = self::toHex( $addr );
550 list( $start, $end ) = self::parseRange( $range );
551 return ( strcmp( $hexIP, $start ) >= 0 &&
552 strcmp( $hexIP, $end ) <= 0 );
553 }
554
555 /**
556 * Convert some unusual representations of IPv4 addresses to their
557 * canonical dotted quad representation.
558 *
559 * This currently only checks a few IPV4-to-IPv6 related cases. More
560 * unusual representations may be added later.
561 *
562 * @param $addr something that might be an IP address
563 * @return valid dotted quad IPv4 address or null
564 */
565 public static function canonicalize( $addr ) {
566 if ( self::isValid( $addr ) ) {
567 return $addr;
568 }
569 // Turn mapped addresses from ::ce:ffff:1.2.3.4 to 1.2.3.4
570 if ( strpos( $addr, ':' ) !== false && strpos( $addr, '.' ) !== false ) {
571 $addr = substr( $addr, strrpos( $addr, ':' ) + 1 );
572 if ( self::isIPv4( $addr ) ) {
573 return $addr;
574 }
575 }
576 // IPv6 loopback address
577 $m = array();
578 if ( preg_match( '/^0*' . RE_IPV6_GAP . '1$/', $addr, $m ) ) {
579 return '127.0.0.1';
580 }
581 // IPv4-mapped and IPv4-compatible IPv6 addresses
582 if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . '(' . RE_IP_ADD . ')$/i', $addr, $m ) ) {
583 return $m[1];
584 }
585 if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . RE_IPV6_WORD .
586 ':' . RE_IPV6_WORD . '$/i', $addr, $m ) )
587 {
588 return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) );
589 }
590
591 return null; // give up
592 }
593 }