2 # Creating the file ZhConversion.php used for Simplified/Traditional
3 # Chinese conversion. It gets the basic conversion table from the Unihan
4 # database, and construct the phrase tables using phrase libraries in
5 # the SCIM packages. There are also special tables used to for adjustment.
6 # Some data in the file simp2trad.manual was taken from the following
8 # Requirement: you need to set your locale to zh_CN.UTF-8 (or any
9 # other utf-8 locales, I suppose)
12 all: ZhConversion.php tradphrases.notsure simpphrases.notsure
15 wget ftp
://ftp.unicode.org
/Public
/UNIDATA
/Unihan.zip
19 wget http
://freedesktop.org
/~suzhe
/sources
/scim-tables-0.4
.3.
tar.gz
20 tar zxvf scim-tables-0.4
.3.
tar.gz
> /dev
/null
21 cp scim-tables-0.4
.3/zh
/EZ.txt.in .
22 rm -rf scim-tables-0.4
.3*
25 wget http
://freedesktop.org
/~suzhe
/scim-chinese
/scim-chinese-0.4
.2.
tar.gz
26 tar zxvf scim-chinese-0.4
.2.
tar.gz
> /dev
/null
27 cp scim-chinese-0.4
.2/data
/phrase_lib.txt .
28 rm -rf scim-chinese-0.4
.2*
31 wget http
://unc.dl.sourceforge.net
/sourceforge
/libtabe
/libtabe-0.2
.3.tgz
32 tar zxvf libtabe-0.2
.3.tgz
> /dev
/null
33 cp libtabe
/tsi-src
/tsi.src .
36 printutf8
: printutf8.c
37 gcc
-o printutf8 printutf8.c
39 unihan.t2s.t
: Unihan.txt printutf8
40 grep kSimplifiedVariant Unihan.txt | sed
'/#/d' | sed
's/kSimplifiedVariant//' | .
/printutf8
> unihan.t2s.t
42 trad2simp.t
: trad2simp.manual unihan.t2s.t
44 for I in
`colrm 11 < trad2simp.manual` ; do sed
"/^$$I/d" tmp1
> tmp2
; mv tmp2 tmp1
; done
45 cat trad2simp.manual tmp1
> trad2simp.t
47 unihan.s2t.t
: Unihan.txt printutf8
48 grep kTraditionalVariant Unihan.txt | sed
'/#/d' | sed
's/kTraditionalVariant//' | .
/printutf8
> unihan.s2t.t
50 simp2trad.t
: unihan.s2t.t simp2trad.manual
52 for I in
`colrm 11 < simp2trad.manual` ; do sed
"/^$$I/d" tmp1
> tmp2
; mv tmp2 tmp1
; done
53 cat simp2trad.manual tmp1
> simp2trad.t
55 t2s_1tomany.t
: trad2simp.t
56 grep
-s
".\{19,\}" trad2simp.t | sed
's/U+...../"/' | sed
's/|U+...../"=>"/' | sed
's/|U+.....//g' | sed
's/|/",/' > t2s_1tomany.t
58 t2s_1to1.t
: trad2simp.t s2t_1tomany.t
59 sed
"/.*|.*|.*|.*/d" trad2simp.t | sed
's/U+[0-9a-z][0-9a-z]*/"/' | sed
's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed
's/|/",/' > t2s_1to1.t
60 grep
'"."=>"..",' s2t_1tomany.t | sed
's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
61 grep
'"."=>"...",' s2t_1tomany.t | sed
's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
62 grep
'"."=>"...",' s2t_1tomany.t | sed
's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
63 grep
'"."=>"....",' s2t_1tomany.t | sed
's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> t2s_1to1.t
64 grep
'"."=>"....",' s2t_1tomany.t | sed
's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
65 grep
'"."=>"....",' s2t_1tomany.t | sed
's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
66 sort t2s_1to1.t | uniq
> t
70 s2t_1tomany.t
: simp2trad.t
71 grep
-s
".\{19,\}" simp2trad.t | sed
's/U+...../"/' | sed
's/|U+...../"=>"/' | sed
's/|U+.....//g' | sed
's/|/",/' > s2t_1tomany.t
73 s2t_1to1.t
: simp2trad.t t2s_1tomany.t
74 sed
"/.*|.*|.*|.*/d" simp2trad.t | sed
's/U+[0-9a-z][0-9a-z]*/"/' | sed
's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed
's/|/",/' > s2t_1to1.t
75 grep
'"."=>"..",' t2s_1tomany.t | sed
's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
76 grep
'"."=>"...",' t2s_1tomany.t | sed
's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
77 grep
'"."=>"...",' t2s_1tomany.t | sed
's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
78 grep
'"."=>"....",' t2s_1tomany.t | sed
's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> s2t_1to1.t
79 grep
'"."=>"....",' t2s_1tomany.t | sed
's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
80 grep
'"."=>"....",' t2s_1tomany.t | sed
's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
81 sort s2t_1to1.t | uniq
> t
84 tphrase.t
: EZ.txt.in tsi.src
85 colrm
1 8 < EZ.txt.in | sed
's/\t//g' | grep
"^.\{2,4\}[0-9]" | sed
's/[0-9]//g' > t
86 iconv
-c
-f big5
-t utf8 tsi.src | sed
's/ [0-9].*//g' | sed
's/[# ]//g'| grep
"^.\{2,4\}" >> t
87 sort t | uniq
> tphrase.t
89 alltradphrases.t
: tphrase.t s2t_1tomany.t
90 for i in
`cat s2t_1tomany.t | sed 's/.*=>".//' | sed 's/"//g' |sed 's/,/\n/' | sed 's/\(.\)/\1\n/g' |sort | uniq`; do grep
-s
$$i tphrase.t
; done
> alltradphrases.t || true
93 tradphrases_2.t
: alltradphrases.t
94 cat alltradphrases.t | grep
"^..$$" |
sort | uniq
> tradphrases_2.t
96 tradphrases_3.t
: alltradphrases.t
97 cat alltradphrases.t | grep
"^...$$" |
sort | uniq
> tradphrases_3.t
98 for i in
`cat tradphrases_2.t`; do grep
$$i tradphrases_3.t
; done |
sort | uniq
> t3 || true
99 diff t3 tradphrases_3.t | grep
">" | sed
's/> //' > t
103 tradphrases_4.t
: alltradphrases.t
104 cat alltradphrases.t | grep
"^....$$" |
sort | uniq
> tradphrases_4.t
105 for i in
`cat tradphrases_2.t`; do grep
$$i tradphrases_4.t
; done |
sort | uniq
> t3 || true
106 diff t3 tradphrases_4.t | grep
">" | sed
's/> //' > t
108 for i in
`cat tradphrases_3.t`; do grep
$$i tradphrases_4.t
; done |
sort | uniq
> t3 || true
109 diff t3 tradphrases_4.t | grep
">" | sed
's/> //' > t
112 tradphrases.t
: tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
113 cat tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t |
sort | uniq
> tradphrases.t
114 for i in
`sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep
$$i tradphrases.t
; done | diff tradphrases.t
- | grep
'<' | sed
's/< //' > t
117 tradphrases.notsure
: tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
118 cat tradphrases_2.t tradphrases_3.t tradphrases_4.t |
sort | uniq
> t
119 for i in
`sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep
$$i t
; done | diff t
- | grep
'>' | sed
's/> //' > tradphrases.notsure
123 sed
's/[\t0-9a-zA-Z]//g' phrase_lib.txt | grep
"^.\{2,4\}$$" > ph.t
125 allsimpphrases.t
: ph.t
126 rm -f allsimpphrases.t
127 for i in
`cat t2s_1tomany.t | sed 's/.*=>".//' | sed 's/"//g' | sed 's/,/\n/' | sed 's/\(.\)/\1\n/g' | sort | uniq `; do grep
$$i ph.t
>> allsimpphrases.t
; done
129 simpphrases_2.t
: allsimpphrases.t
130 cat allsimpphrases.t | grep
"^..$$" |
sort | uniq
> simpphrases_2.t
132 simpphrases_3.t
: allsimpphrases.t
133 cat allsimpphrases.t | grep
"^...$$" |
sort | uniq
> simpphrases_3.t
134 for i in
`cat simpphrases_2.t`; do grep
$$i simpphrases_3.t
; done |
sort | uniq
> t3 || true
135 diff t3 simpphrases_3.t | grep
">" | sed
's/> //' > t
138 simpphrases_4.t
: allsimpphrases.t
139 cat allsimpphrases.t | grep
"^....$$" |
sort | uniq
> simpphrases_4.t
141 for i in
`cat simpphrases_2.t`; do grep
$$i simpphrases_4.t
>> t
; done || true
143 diff t3 simpphrases_4.t | grep
">" | sed
's/> //' > t
145 for i in
`cat simpphrases_3.t`; do grep
$$i simpphrases_4.t
; done |
sort | uniq
> t3 || true
146 diff t3 simpphrases_4.t | grep
">" | sed
's/> //' > t
149 simpphrases.t
:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
150 cat simpphrases_2.t simpphrases_3.t simpphrases_4.t
> simpphrases.t
151 for i in
`sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep
$$i simpphrases.t
; done | diff simpphrases.t
- | grep
'<' | sed
's/< //' > t
155 simpphrases.notsure
:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
156 cat simpphrases_2.t simpphrases_3.t simpphrases_4.t
> t
157 for i in
`sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep
$$i t
; done | diff t
- | grep
'>' | sed
's/> //' > simpphrases.notsure
159 trad2simp1to1.t
: t2s_1tomany.t t2s_1to1.t
160 sed
's/\(.......\).*/\1",/' t2s_1tomany.t
> trad2simp1to1.t
161 cat t2s_1to1.t
>> trad2simp1to1.t
163 simp2trad1to1.t
: s2t_1tomany.t s2t_1to1.t
164 sed
's/\(.......\).*/\1",/' s2t_1tomany.t
> simp2trad1to1.t
165 cat s2t_1to1.t
>> simp2trad1to1.t
167 trad2simp.php
: trad2simp1to1.t tradphrases.t
168 printf
'<?php\n$$trad2simp=array(' > trad2simp.php
169 cat trad2simp1to1.t
>> trad2simp.php
170 printf
');\n$$str=\n"' >> trad2simp.php
171 cat tradphrases.t
>> trad2simp.php
172 printf
'";\n$$t=strtr($$str, $$trad2simp);\necho $$t;\n?>' >> trad2simp.php
174 simp2trad.php
: simp2trad1to1.t simpphrases.t
175 printf
'<?php\n$$simp2trad=array(' > simp2trad.php
176 cat simp2trad1to1.t
>> simp2trad.php
177 printf
');\n$$str=\n"' >> simp2trad.php
178 cat simpphrases.t
>> simp2trad.php
179 printf
'";\n$$t=strtr($$str, $$simp2trad);\necho $$t;\n?>' >> simp2trad.php
181 simp2trad.phrases.t
: trad2simp.php tradphrases.t toTW.manual
182 php
-f trad2simp.php | sed
's/\(.*\)/"\1" => /' > tmp1
183 cat tradphrases.t | sed
's/\(.*\)/"\1",/' > tmp2
184 paste tmp1 tmp2
> simp2trad.phrases.t
185 sed
's/\(.*\)\t\(.*\)/"\1"=>"\2",/' toTW.manual
>> simp2trad.phrases.t
187 trad2simp.phrases.t
: simp2trad.php simpphrases.t toCN.manual
188 php
-f simp2trad.php | sed
's/\(.*\)/"\1" => /' > tmp1
189 cat simpphrases.t | sed
's/\(.*\)/"\1",/' > tmp2
190 paste tmp1 tmp2
> trad2simp.phrases.t
191 sed
's/\(.*\)\t\(.*\)/"\1"=>"\2",/' toCN.manual
>> trad2simp.phrases.t
193 ZhConversion.php
: simp2trad1to1.t simp2trad.phrases.t trad2simp1to1.t trad2simp.phrases.t toHK.manual toSG.manual
194 printf
'<?php\n/**\n * Simplified/Traditional Chinese conversion tables\n' > ZhConversion.php
195 printf
' *\n * Automatically generated using code and data in includes/zhtable/\n' >> ZhConversion.php
196 printf
' * Do not modify directly! \n *\n * @package MediaWiki\n*/\n\n' >> ZhConversion.php
197 printf
'$$zh2TW=array(\n' >> ZhConversion.php
198 cat simp2trad1to1.t
>> ZhConversion.php
199 echo
>> ZhConversion.php
200 cat simp2trad.phrases.t
>> ZhConversion.php
201 echo
>> ZhConversion.php
202 echo
');' >> ZhConversion.php
203 echo
>> ZhConversion.php
204 echo
>> ZhConversion.php
205 printf
'$$zh2CN=array(\n' >> ZhConversion.php
206 cat trad2simp1to1.t
>> ZhConversion.php
207 echo
>> ZhConversion.php
208 cat trad2simp.phrases.t
>> ZhConversion.php
209 echo
>> ZhConversion.php
210 printf
');' >> ZhConversion.php
211 echo
>> ZhConversion.php
212 echo
>> ZhConversion.php
213 printf
'$$zh2HK=array(\n' >> ZhConversion.php
214 sed
's/\(.*\)\t\(.*\)/"\1" => "\2",/' toHK.manual
>> ZhConversion.php
215 echo
>> ZhConversion.php
216 printf
');' >> ZhConversion.php
217 echo
>> ZhConversion.php
218 echo
>> ZhConversion.php
219 printf
'$$zh2SG=array(\n' >> ZhConversion.php
220 sed
's/\(.*\)\t\(.*\)/"\1" => "\2",/' toSG.manual
>> ZhConversion.php
221 echo
>> ZhConversion.php
222 printf
');' >> ZhConversion.php
223 echo
>> ZhConversion.php
224 printf
'?>' >> ZhConversion.php
228 rm -f ZhConversion.php tmp1 tmp2 tmp3 t3
*.t trad2simp.php simp2trad.php