Load form C data on demand; if we are dealing in all-ASCII text we can save some...
[lhc/web/wiklou.git] / includes / normal / UtfNormalUtil.php
1 <?php
2 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 /**
21 * Some of these functions are adapted from places in MediaWiki.
22 * Should probably merge them for consistency.
23 *
24 * @package MediaWiki
25 */
26
27 /** */
28 function codepointToUtf8( $codepoint ) {
29 if($codepoint < 0x80) return chr($codepoint);
30 if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) .
31 chr($codepoint & 0x3f | 0x80);
32 if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
33 chr($codepoint >> 6 & 0x3f | 0x80) .
34 chr($codepoint & 0x3f | 0x80);
35 if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
36 chr($codepoint >> 12 & 0x3f | 0x80) .
37 chr($codepoint >> 6 & 0x3f | 0x80) .
38 chr($codepoint & 0x3f | 0x80);
39
40 die("Asked for code outside of range ($codepoint)\n");
41 }
42
43 function hexSequenceToUtf8( $sequence ) {
44 $utf = '';
45 foreach( explode( ' ', $sequence ) as $hex ) {
46 $n = hexdec( $hex );
47 $utf .= codepointToUtf8( $n );
48 }
49 return $utf;
50 }
51
52 function utf8ToCodepoint( $char ) {
53 # Find the length
54 $z = ord( $char{0} );
55 if ( $z & 0x80 ) {
56 $length = 0;
57 while ( $z & 0x80 ) {
58 $length++;
59 $z <<= 1;
60 }
61 } else {
62 $length = 1;
63 }
64
65 if ( $length != strlen( $char ) ) {
66 return false;
67 }
68 if ( $length == 1 ) {
69 return ord( $char );
70 }
71
72 # Mask off the length-determining bits and shift back to the original location
73 $z &= 0xff;
74 $z >>= $length;
75
76 # Add in the free bits from subsequent bytes
77 for ( $i=1; $i<$length; $i++ ) {
78 $z <<= 6;
79 $z |= ord( $char{$i} ) & 0x3f;
80 }
81
82 # Make entity
83 return $z;
84 }
85
86 function escapeSingleString( $string ) {
87 return strtr( $string,
88 array(
89 '\\' => '\\\\',
90 '\'' => '\\\''
91 ));
92 }
93
94 ?>