2 # Creating the file ZhConversion.php used for Simplified/Traditional
3 # Chinese conversion. It gets the basic conversion table from the Unihan
4 # database, and construct the phrase tables using phrase libraries in
5 # the SCIM packages. There are also special tables used to for adjustment.
6 # Some data in the file simp2trad.manual was taken from the following
8 # Requirement: you need to set your locale to zh_CN.UTF-8 (or any
9 # other utf-8 locales, I suppose)
15 wget ftp
://ftp.unicode.org
/Public
/UNIDATA
/Unihan.zip
19 wget http
://freedesktop.org
/~suzhe
/sources
/scim-tables-0.4
.3.
tar.gz
20 tar zxvf scim-tables-0.4
.3.
tar.gz
> /dev
/null
21 cp scim-tables-0.4
.3/zh
/EZ.txt.in .
22 rm -rf scim-tables-0.4
.3*
25 wget http
://freedesktop.org
/~suzhe
/scim-chinese
/scim-chinese-0.4
.2.
tar.gz
26 tar zxvf scim-chinese-0.4
.2.
tar.gz
> /dev
/null
27 cp scim-chinese-0.4
.2/data
/phrase_lib.txt .
28 rm -rf scim-chinese-0.4
.2*
30 printutf8
: printutf8.c
31 gcc
-o printutf8 printutf8.c
33 unihan.t2s.t
: Unihan.txt printutf8
34 grep kSimplifiedVariant Unihan.txt | sed
'/#/d' | sed
's/kSimplifiedVariant//' | .
/printutf8
> unihan.t2s.t
36 trad2simp.t
: trad2simp.manual unihan.t2s.t
38 for I in
`colrm 11 < trad2simp.manual` ; do sed
"/^$$I/d" tmp1
> tmp2
; mv tmp2 tmp1
; done
39 cat trad2simp.manual tmp1
> trad2simp.t
41 unihan.s2t.t
: Unihan.txt printutf8
42 grep kTraditionalVariant Unihan.txt | sed
'/#/d' | sed
's/kTraditionalVariant//' | .
/printutf8
> unihan.s2t.t
44 simp2trad.t
: unihan.s2t.t simp2trad.manual
46 for I in
`colrm 11 < simp2trad.manual` ; do sed
"/^$$I/d" tmp1
> tmp2
; mv tmp2 tmp1
; done
47 cat simp2trad.manual tmp1
> simp2trad.t
49 t2s_1tomany.t
: trad2simp.t
50 grep
-s
".\{19,\}" trad2simp.t | sed
's/U+...../"/' | sed
's/|U+...../"=>"/' | sed
's/|U+.....//g' | sed
's/|/",/' > t2s_1tomany.t
52 t2s_1to1.t
: trad2simp.t s2t_1tomany.t
53 sed
"/.*|.*|.*|.*/d" trad2simp.t | sed
's/U+[0-9a-z][0-9a-z]*/"/' | sed
's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed
's/|/",/' > t2s_1to1.t
54 grep
'"."=>"..",' s2t_1tomany.t | sed
's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
55 grep
'"."=>"...",' s2t_1tomany.t | sed
's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
56 grep
'"."=>"...",' s2t_1tomany.t | sed
's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
57 grep
'"."=>"....",' s2t_1tomany.t | sed
's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> t2s_1to1.t
58 grep
'"."=>"....",' s2t_1tomany.t | sed
's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
59 grep
'"."=>"....",' s2t_1tomany.t | sed
's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
60 sort t2s_1to1.t | uniq
> t
64 s2t_1tomany.t
: simp2trad.t
65 grep
-s
".\{19,\}" simp2trad.t | sed
's/U+...../"/' | sed
's/|U+...../"=>"/' | sed
's/|U+.....//g' | sed
's/|/",/' > s2t_1tomany.t
67 s2t_1to1.t
: simp2trad.t t2s_1tomany.t
68 sed
"/.*|.*|.*|.*/d" simp2trad.t | sed
's/U+[0-9a-z][0-9a-z]*/"/' | sed
's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed
's/|/",/' > s2t_1to1.t
69 grep
'"."=>"..",' t2s_1tomany.t | sed
's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
70 grep
'"."=>"...",' t2s_1tomany.t | sed
's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
71 grep
'"."=>"...",' t2s_1tomany.t | sed
's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
72 grep
'"."=>"....",' t2s_1tomany.t | sed
's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> s2t_1to1.t
73 grep
'"."=>"....",' t2s_1tomany.t | sed
's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
74 grep
'"."=>"....",' t2s_1tomany.t | sed
's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
75 sort s2t_1to1.t | uniq
> t
79 colrm
1 8 < EZ.txt.in | sed
's/\t//g' | grep
"^.\{2,4\}[0-9]" | sed
's/[0-9]//g' > ez.t
81 alltradphrases.t
: ez.t s2t_1tomany.t
82 for i in
`cat s2t_1tomany.t | sed 's/.*=>".//' | sed 's/"//g' |sed 's/,/\n/' | sed 's/\(.\)/\1\n/g' |sort | uniq`; do grep
-s
$$i ez.t
; done
> alltradphrases.t || true
85 tradphrases_2.t
: alltradphrases.t
86 cat alltradphrases.t | grep
"^..$$" |
sort | uniq
> tradphrases_2.t
88 tradphrases_3.t
: alltradphrases.t
89 cat alltradphrases.t | grep
"^...$$" |
sort | uniq
> tradphrases_3.t
90 for i in
`cat tradphrases_2.t`; do grep
$$i tradphrases_3.t
; done |
sort | uniq
> t3 || true
91 diff t3 tradphrases_3.t | grep
">" | sed
's/> //' > t
95 tradphrases_4.t
: alltradphrases.t
96 cat alltradphrases.t | grep
"^....$$" |
sort | uniq
> tradphrases_4.t
97 for i in
`cat tradphrases_2.t`; do grep
$$i tradphrases_4.t
; done |
sort | uniq
> t3 || true
98 diff t3 tradphrases_4.t | grep
">" | sed
's/> //' > t
100 for i in
`cat tradphrases_3.t`; do grep
$$i tradphrases_4.t
; done |
sort | uniq
> t3 || true
101 diff t3 tradphrases_4.t | grep
">" | sed
's/> //' > t
104 tradphrases.t
: tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
105 cat tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t |
sort | uniq
> tradphrases.t
106 for i in
`sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep
$$i tradphrases.t
; done | diff tradphrases.t
- | grep
'<' | sed
's/< //' > t
110 sed
's/[\t0-9a-zA-Z]//g' phrase_lib.txt | grep
"^.\{2,4\}$$" > ph.t
112 allsimpphrases.t
: ph.t
113 rm -f allsimpphrases.t
114 for i in
`cat t2s_1tomany.t | sed 's/.*=>".//' | sed 's/"//g' | sed 's/,/\n/' | sed 's/\(.\)/\1\n/g' | sort | uniq `; do grep
$$i ph.t
>> allsimpphrases.t
; done
116 simpphrases_2.t
: allsimpphrases.t
117 cat allsimpphrases.t | grep
"^..$$" |
sort | uniq
> simpphrases_2.t
119 simpphrases_3.t
: allsimpphrases.t
120 cat allsimpphrases.t | grep
"^...$$" |
sort | uniq
> simpphrases_3.t
121 for i in
`cat simpphrases_2.t`; do grep
$$i simpphrases_3.t
; done |
sort | uniq
> t3 || true
122 diff t3 simpphrases_3.t | grep
">" | sed
's/> //' > t
125 simpphrases_4.t
: allsimpphrases.t
126 cat allsimpphrases.t | grep
"^....$$" |
sort | uniq
> simpphrases_4.t
128 for i in
`cat simpphrases_2.t`; do grep
$$i simpphrases_4.t
>> t
; done || true
130 diff t3 simpphrases_4.t | grep
">" | sed
's/> //' > t
132 for i in
`cat simpphrases_3.t`; do grep
$$i simpphrases_4.t
; done |
sort | uniq
> t3 || true
133 diff t3 simpphrases_4.t | grep
">" | sed
's/> //' > t
136 simpphrases.t
:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
137 cat simpphrases_2.t simpphrases_3.t simpphrases_4.t
> simpphrases.t
138 for i in
`sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep
$$i simpphrases.t
; done | diff simpphrases.t
- | grep
'<' | sed
's/< //' > t
142 trad2simp1to1.t
: t2s_1tomany.t t2s_1to1.t
143 sed
's/\(.......\).*/\1",/' t2s_1tomany.t
> trad2simp1to1.t
144 cat t2s_1to1.t
>> trad2simp1to1.t
146 simp2trad1to1.t
: s2t_1tomany.t s2t_1to1.t
147 sed
's/\(.......\).*/\1",/' s2t_1tomany.t
> simp2trad1to1.t
148 cat s2t_1to1.t
>> simp2trad1to1.t
150 trad2simp.php
: trad2simp1to1.t tradphrases.t
151 printf
'<?php\n$$trad2simp=array(' > trad2simp.php
152 cat trad2simp1to1.t
>> trad2simp.php
153 printf
');\n$$str=\n"' >> trad2simp.php
154 cat tradphrases.t
>> trad2simp.php
155 printf
'";\n$$t=strtr($$str, $$trad2simp);\necho $$t;\n?>' >> trad2simp.php
157 simp2trad.php
: simp2trad1to1.t simpphrases.t
158 printf
'<?php\n$$simp2trad=array(' > simp2trad.php
159 cat simp2trad1to1.t
>> simp2trad.php
160 printf
');\n$$str=\n"' >> simp2trad.php
161 cat simpphrases.t
>> simp2trad.php
162 printf
'";\n$$t=strtr($$str, $$simp2trad);\necho $$t;\n?>' >> simp2trad.php
164 simp2trad.phrases.t
: trad2simp.php tradphrases.t simp2tradPhrases.manual
165 php
-f trad2simp.php | sed
's/\(.*\)/"\1" => /' > tmp1
166 cat tradphrases.t | sed
's/\(.*\)/"\1",/' > tmp2
167 paste tmp1 tmp2
> simp2trad.phrases.t
168 sed
's/\(.*\)\t\(.*\)/"\1"=>"\2",/' simp2tradPhrases.manual
>> simp2trad.phrases.t
170 trad2simp.phrases.t
: simp2trad.php simpphrases.t trad2simpPhrases.manual
171 php
-f simp2trad.php | sed
's/\(.*\)/"\1" => /' > tmp1
172 cat simpphrases.t | sed
's/\(.*\)/"\1",/' > tmp2
173 paste tmp1 tmp2
> trad2simp.phrases.t
174 sed
's/\(.*\)\t\(.*\)/"\1"=>"\2",/' trad2simpPhrases.manual
>> trad2simp.phrases.t
176 ZhConversion.php
: simp2trad1to1.t simp2trad.phrases.t trad2simp1to1.t trad2simp.phrases.t
177 printf
'<?php\n$$zhSimp2Trad=array(\n' > ZhConversion.php
178 cat simp2trad1to1.t
>> ZhConversion.php
179 echo
>> ZhConversion.php
180 cat simp2trad.phrases.t
>> ZhConversion.php
181 echo
');' >> ZhConversion.php
182 echo
>> ZhConversion.php
183 printf
'$$zhTrad2Simp=array(\n' >> ZhConversion.php
184 cat trad2simp1to1.t
>> ZhConversion.php
185 echo
>> ZhConversion.php
186 cat trad2simp.phrases.t
>> ZhConversion.php
187 printf
');\n?>' >> ZhConversion.php
190 rm -f ZhConversion.php tmp1 tmp2 tmp3 t3
*.t trad2simp.php simp2trad.php