don't just assume we get a valid title object
[lhc/web/wiklou.git] / includes / zhtable / printutf8.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 /*
5 Unicode UTF8
6 0x00000000 - 0x0000007F: 0xxxxxxx
7 0x00000080 - 0x000007FF: 110xxx xx 10xx xxxx
8 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxx xx 10xx xxxx
9 0x00010000 - 0x001FFFFF: 11110x xx 10xx xxxx 10xxxx xx 10xx xxxx
10 0x00200000 - 0x03FFFFFF: 111110xx 10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx
11 0x04000000 - 0x7FFFFFFF: 1111110x 10xx xxxx 10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx
12
13 0000 0 1001 9
14 0001 1 1010 A
15 0010 2 1011 B
16 0011 3 1100 C
17 0100 4 1101 D
18 0101 5 1110 E
19 0110 6 1111 F
20 0111 7
21 1000 8
22 */
23 void printUTF8(long long u) {
24 long long m;
25 if(u<0x80) {
26 printf("%c", (unsigned char)u);
27 }
28 else if(u<0x800) {
29 m = ((u&0x7c0)>>6) | 0xc0;
30 printf("%c", (unsigned char)m);
31 m = (u&0x3f) | 0x80;
32 printf("%c", (unsigned char)m);
33 }
34 else if(u<0x10000) {
35 m = ((u&0xf000)>>12) | 0xe0;
36 printf("%c",(unsigned char)m);
37 m = ((u&0xfc0)>>6) | 0x80;
38 printf("%c",(unsigned char)m);
39 m = (u & 0x3f) | 0x80;
40 printf("%c",(unsigned char)m);
41 }
42 else if(u<0x200000) {
43 m = ((u&0x1c0000)>>18) | 0xf0;
44 printf("%c", (unsigned char)m);
45 m = ((u& 0x3f000)>>12) | 0x80;
46 printf("%c", (unsigned char)m);
47 m = ((u& 0xfc0)>>6) | 0x80;
48 printf("%c", (unsigned char)m);
49 m = (u&0x3f) | 0x80;
50 printf("%c", (unsigned char)m);
51 }
52 else if(u<0x4000000){
53 m = ((u&0x3000000)>>24) | 0xf8;
54 printf("%c", (unsigned char)m);
55 m = ((u&0xfc0000)>>18) | 0x80;
56 printf("%c", (unsigned char)m);
57 m = ((u&0x3f000)>>12) | 0x80;
58 printf("%c", (unsigned char)m);
59 m = ((u&0xfc00)>>6) | 0x80;
60 printf("%c", (unsigned char)m);
61 m = (u&0x3f) | 0x80;
62 printf("%c", (unsigned char)m);
63 }
64 else {
65 m = ((u&0x40000000)>>30) | 0xfc;
66 printf("%c", (unsigned char)m);
67 m = ((u&0x3f000000)>>24) | 0x80;
68 printf("%c", (unsigned char)m);
69 m = ((u&0xfc0000)>>18) | 0x80;
70 printf("%c", (unsigned char)m);
71 m = ((u&0x3f000)>>12) | 0x80;
72 printf("%c", (unsigned char)m);
73 m = ((u&0xfc0)>>6) | 0x80;
74 printf("%c", (unsigned char)m);
75 m = (u&0x3f)| 0x80;
76 printf("%c", (unsigned char)m);
77 }
78 }
79
80 int main() {
81 int i,j;
82 long long n1, n2;
83 unsigned char b1[15], b2[15];
84 unsigned char buf[1024];
85 i=0;
86 while(fgets(buf, 1024, stdin)) {
87 // printf("read %s\n", buf);
88 for(i=0;i<strlen(buf); i++)
89 if(buf[i]=='U') {
90 if(buf[i+1] == '+') {
91 n1 = strtoll(buf+i+2,0,16);
92 printf("U+%05x", n1);
93 printUTF8(n1);printf("|");
94 }
95 }
96 printf("\n");
97 }
98 }
99