From 52f656840e2d445bba4e0adfe4a987cce3861c01 Mon Sep 17 00:00:00 2001 From: Shinjiman Date: Sat, 24 May 2008 12:55:06 +0000 Subject: [PATCH] Update the Chinese conversion tables --- includes/ZhConversion.php | 115 +++++++++++++++++++---------------- includes/zhtable/Makefile | 51 ++++++++++++---- includes/zhtable/toCN.manual | 1 - includes/zhtable/toTW.manual | 1 - 4 files changed, 101 insertions(+), 67 deletions(-) diff --git a/includes/ZhConversion.php b/includes/ZhConversion.php index 62bebb4eae..ca2af286ab 100644 --- a/includes/ZhConversion.php +++ b/includes/ZhConversion.php @@ -7298,48 +7298,94 @@ $zh2Hans = array( "龕"=>"龛", "龜"=>"龟", +"一畫" => "一画", +"上畫" => "上画", +"書畫" => "书画", +"作畫" => "作画", +"入畫" => "入画", "幾畫" => "几画", +"刻畫" => "刻画", +"動畫" => "动画", +"勾畫" => "勾画", "賣畫" => "卖画", "滷鹼" => "卤碱", "原畫" => "原画", "口鹼" => "口碱", "古畫" => "古画", "名畫" => "名画", +"國畫" => "国画", +"圖畫" => "图画", +"壁畫" => "壁画", "奇畫" => "奇画", "如畫" => "如画", +"字畫" => "字画", +"年畫" => "年画", "弱鹼" => "弱碱", "彩畫" => "彩画", "所畫" => "所画", "扉畫" => "扉画", +"指畫" => "指画", +"描畫" => "描画", +"插畫" => "插画", "教畫" => "教画", +"春畫" => "春画", "水鹼" => "水碱", +"油畫" => "油画", "洋鹼" => "洋碱", +"塗畫" => "涂画", +"漫畫" => "漫画", "炭畫" => "炭画", +"點畫" => "点画", +"煙鹼" => "烟碱", +"燒鹼" => "烧碱", +"版畫" => "版画", "畫一" => "画一", "畫上" => "画上", "畫下" => "画下", "畫中" => "画中", +"畫了" => "画了", "畫供" => "画供", +"畫像" => "画像", "畫兒" => "画儿", "畫具" => "画具", +"畫冊" => "画册", "畫出" => "画出", +"畫刊" => "画刊", +"畫匠" => "画匠", +"畫捲" => "画卷", "畫史" => "画史", "畫品" => "画品", "畫商" => "画商", +"畫圖" => "画图", "畫圈" => "画圈", +"畫壇" => "画坛", "畫境" => "画境", +"畫外" => "画外", +"畫室" => "画室", +"畫家" => "画家", +"畫展" => "画展", "畫工" => "画工", +"畫布" => "画布", "畫帖" => "画帖", "畫幅" => "画幅", +"畫廊" => "画廊", "畫意" => "画意", "畫成" => "画成", +"畫報" => "画报", +"畫押" => "画押", "畫景" => "画景", "畫本" => "画本", "畫架" => "画架", "畫框" => "画框", "畫法" => "画法", +"畫片" => "画片", "畫王" => "画王", +"畫畫" => "画画", "畫界" => "画界", +"畫皮" => "画皮", +"畫眉" => "画眉", +"畫稿" => "画稿", +"畫筆" => "画笔", "畫符" => "画符", "畫紙" => "画纸", "畫線" => "画线", @@ -7352,16 +7398,26 @@ $zh2Hans = array( "畫質" => "画质", "畫貼" => "画贴", "畫軸" => "画轴", +"畫院" => "画院", +"畫面" => "画面", "畫頁" => "画页", "鹽鹼" => "盐碱", +"硝鹼" => "硝碱", "鹼 " => "碱 ", +"鹼化" => "碱化", +"鹼場" => "碱场", "鹼基" => "碱基", "鹼度" => "碱度", +"鹼性" => "碱性", "鹼水" => "碱水", "鹼熔" => "碱熔", +"鹼類" => "碱类", "磁畫" => "磁画", +"筆畫" => "笔画", "策畫" => "策画", +"純鹼" => "纯碱", "組畫" => "组画", +"繪畫" => "绘画", "絹畫" => "绢画", "耐鹼" => "耐碱", "肉鹼" => "肉碱", @@ -7370,84 +7426,37 @@ $zh2Hans = array( "西畫" => "西画", "貼畫" => "贴画", "返鹼" => "返碱", +"酸鹼" => "酸碱", "鍾鍛" => "锺锻", "鍛鍾" => "锻锺", "雕畫" => "雕画", "鯰 " => "鲶 ", +"鯰魚" => "鲶鱼", "三聯畫" => "三联画", -"中國畫" => "中国画", -"書畫 " => "书画 ", -"書畫社" => "书画社", -"五筆畫" => "五笔画", -"作畫 " => "作画 ", -"入畫 " => "入画 ", "寫生畫" => "写生画", -"刻畫 " => "刻画 ", -"動畫 " => "动画 ", -"勾畫 " => "勾画 ", "單色畫" => "单色画", "卡通畫" => "卡通画", -"國畫 " => "国画 ", -"圖畫 " => "图画 ", -"壁畫 " => "壁画 ", -"字畫 " => "字画 ", "宣傳畫" => "宣传画", -"工筆畫" => "工笔画", -"年畫 " => "年画 ", "幽默畫" => "幽默画", -"指畫 " => "指画 ", -"描畫 " => "描画 ", -"插畫 " => "插画 ", "擘畫 " => "擘画 ", -"春畫 " => "春画 ", -"木刻畫" => "木刻画", "機械畫" => "机械画", "比畫 " => "比画 ", -"毛筆畫" => "毛笔画", "水粉畫" => "水粉画", -"油畫 " => "油画 ", "海景畫" => "海景画", -"漫畫 " => "漫画 ", -"點畫 " => "点画 ", -"版畫 " => "版画 ", "畫 " => "画 ", -"畫像 " => "画像 ", -"畫冊 " => "画册 ", -"畫刊 " => "画刊 ", -"畫匠 " => "画匠 ", -"畫捲 " => "画卷 ", -"畫圖 " => "画图 ", -"畫壇 " => "画坛 ", -"畫室 " => "画室 ", -"畫家 " => "画家 ", "畫屏 " => "画屏 ", -"畫展 " => "画展 ", -"畫布 " => "画布 ", "畫師 " => "画师 ", -"畫廊 " => "画廊 ", -"畫報 " => "画报 ", -"畫押 " => "画押 ", "畫板 " => "画板 ", -"畫片 " => "画片 ", -"畫畫 " => "画画 ", -"畫皮 " => "画皮 ", -"畫眉鳥" => "画眉鸟", -"畫稿 " => "画稿 ", -"畫筆 " => "画笔 ", -"畫院 " => "画院 ", "畫集 " => "画集 ", -"畫面 " => "画面 ", -"筆畫 " => "笔画 ", "細密畫" => "细密画", -"繪畫 " => "绘画 ", -"自畫像" => "自画像", -"蠟筆畫" => "蜡笔画", +"肖像畫" => "肖像画", "裸體畫" => "裸体画", "西洋畫" => "西洋画", +"連環畫" => "连环画", "透視畫" => "透视画", -"銅版畫" => "铜版画", "鍾 " => "锺 ", "靜物畫" => "静物画", +"風景畫" => "风景画", "餘 " => "馀 ", ); @@ -7741,7 +7750,6 @@ $zh2TW = array( "凡高" => "梵谷", "狄安娜" => "黛安娜", "戴安娜" => "黛安娜", -"赫拉" => "希拉", ); $zh2HK = array( @@ -8260,7 +8268,6 @@ $zh2CN = array( "舒麥加" => "迈克尔·舒马赫", "希特拉" => "希特勒", "黛安娜" => "戴安娜", -"希拉" => "赫拉", ); $zh2SG = array( diff --git a/includes/zhtable/Makefile b/includes/zhtable/Makefile index c63e4db71f..29b012ff2d 100644 --- a/includes/zhtable/Makefile +++ b/includes/zhtable/Makefile @@ -12,7 +12,7 @@ DIFF = LANG=zh_CN.UTF8 diff CC ?= gcc SF_MIRROR = easynews -SCIM_TABLES_VER = 0.5.7 +SCIM_TABLES_VER = 0.5.8 SCIM_PINYIN_VER = 0.5.91 LIBTABE_VER = 0.2.3 @@ -21,22 +21,40 @@ INSTDIR = /usr/local/share/zhdaemons/ all: ZhConversion.php tradphrases.notsure simpphrases.notsure wordlist toHans.dict toHant.dict toCN.dict toTW.dict toHK.dict toSG.dict -Unihan.txt: +# Download Unihan database and Traditional Chinese / Simplified Chinese phrases files +Unihan.zip: wget -nc ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip - unzip -q Unihan.zip -EZ.txt.in: +scim-tables-$(SCIM_TABLES_VER).tar.gz: wget -nc http://$(SF_MIRROR).dl.sourceforge.net/sourceforge/scim/scim-tables-$(SCIM_TABLES_VER).tar.gz - tar -xzf scim-tables-$(SCIM_TABLES_VER).tar.gz -O scim-tables-$(SCIM_TABLES_VER)/tables/zh/EZ-Big.txt.in > EZ.txt.in -phrase_lib.txt: +scim-pinyin-$(SCIM_PINYIN_VER).tar.gz: wget -nc http://$(SF_MIRROR).dl.sourceforge.net/sourceforge/scim/scim-pinyin-$(SCIM_PINYIN_VER).tar.gz + +libtabe-$(LIBTABE_VER).tgz: + wget -nc http://$(SF_MIRROR).dl.sourceforge.net/sourceforge/libtabe/libtabe-$(LIBTABE_VER).tgz + +# Extract the file from a comressed files +Unihan.txt: Unihan.zip + unzip -oq Unihan.zip + +EZ.txt.in: scim-tables-$(SCIM_TABLES_VER).tar.gz + tar -xzf scim-tables-$(SCIM_TABLES_VER).tar.gz -O scim-tables-$(SCIM_TABLES_VER)/tables/zh/EZ-Big.txt.in > EZ.txt.in + +Wubi.txt.in: scim-tables-$(SCIM_TABLES_VER).tar.gz + tar -xzf scim-tables-$(SCIM_TABLES_VER).tar.gz -O scim-tables-$(SCIM_TABLES_VER)/tables/zh/Wubi.txt.in > Wubi.txt.in + +Ziranma.txt.in: scim-tables-$(SCIM_TABLES_VER).tar.gz + tar -xzf scim-tables-$(SCIM_TABLES_VER).tar.gz -O scim-tables-$(SCIM_TABLES_VER)/tables/zh/Ziranma.txt.in > Ziranma.txt.in + + +phrase_lib.txt: scim-pinyin-$(SCIM_PINYIN_VER).tar.gz tar -xzf scim-pinyin-$(SCIM_PINYIN_VER).tar.gz -O scim-pinyin-$(SCIM_PINYIN_VER)/data/phrase_lib.txt > phrase_lib.txt -tsi.src: - wget -nc http://$(SF_MIRROR).dl.sourceforge.net/sourceforge/libtabe/libtabe-$(LIBTABE_VER).tgz +tsi.src: libtabe-$(LIBTABE_VER).tgz tar -xzf libtabe-$(LIBTABE_VER).tgz -O libtabe/tsi-src/tsi.src > tsi.src +# Make a word list wordlist: phrase_lib.txt EZ.txt.in tsi.src iconv -c -f big5 -t utf8 tsi.src | $(SED) 's/# //g' | $(SED) 's/[ ][0-9].*//' > wordlist $(SED) 's/\(.*\)\t[0-9][0-9]*.*/\1/' phrase_lib.txt | $(SED) '1,5d' >> wordlist @@ -64,7 +82,7 @@ simp2trad.t: unihan.s2t.t simp2trad.manual cat simp2trad.manual tmp1 > simp2trad.t t2s_1tomany.t: trad2simp.t - $(GREP) -s ".\{19,\}" trad2simp.t | $(SED) 's/U+...../"/' | $(SED) 's/|U+...../"=>"/' | $(SED) 's/|U+.....//g' | $(SED) 's/|/",/' > t2s_1tomany.t + $(GREP) -s ".\{19,\}" trad2simp.t | $(SED) 's/U+...../"/' | $(SED) 's/|U+...../"=>"/' | $(SED) 's/|U+.....//g' | $(SED) 's/|/",/' > t2s_1tomany.t t2s_1to1.t: trad2simp.t s2t_1tomany.t $(SED) "/.*|.*|.*|.*/d" trad2simp.t | $(SED) 's/U+[0-9a-z][0-9a-z]*/"/' | $(SED) 's/|U+[0-9a-z][0-9a-z]*/"=>"/' | $(SED) 's/|/",/' > t2s_1to1.t @@ -133,8 +151,17 @@ tradphrases.notsure: tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany ph.t: phrase_lib.txt $(SED) 's/[\t0-9a-zA-Z]//g' phrase_lib.txt | $(GREP) "^.\{2,4\}$$" > ph.t -allsimpphrases.t: ph.t +Wubi.t: Wubi.txt.in + $(SED) '1,/BEGIN_TABLE/d' Wubi.txt.in | colrm 1 8 | $(SED) 's/\t.*//' | $(GREP) "^...*" > Wubi.t + +Ziranma.t: Ziranma.txt.in + $(SED) '1,/BEGIN_TABLE/d' Ziranma.txt.in | colrm 1 8 | $(SED) 's/\t.*//' | $(GREP) "^...*" > Ziranma.t + + +allsimpphrases.t: t2s_1tomany.t ph.t Wubi.t Ziranma.t rm -f allsimpphrases.t + for i in `cat t2s_1tomany.t | $(SED) 's/.*=>".//' | $(SED) 's/"//g' | $(SED) 's/,/\n/' | $(SED) 's/\(.\)/\1\n/g' | sort | uniq `; do $(GREP) $$i Wubi.t >> allsimpphrases.t; done + for i in `cat t2s_1tomany.t | $(SED) 's/.*=>".//' | $(SED) 's/"//g' | $(SED) 's/,/\n/' | $(SED) 's/\(.\)/\1\n/g' | sort | uniq `; do $(GREP) $$i Ziranma.t >> allsimpphrases.t; done for i in `cat t2s_1tomany.t | $(SED) 's/.*=>".//' | $(SED) 's/"//g' | $(SED) 's/,/\n/' | $(SED) 's/\(.\)/\1\n/g' | sort | uniq `; do $(GREP) $$i ph.t >> allsimpphrases.t; done simpphrases_2.t: allsimpphrases.t @@ -153,7 +180,7 @@ simpphrases_4.t: allsimpphrases.t sort t | uniq > t3 $(DIFF) t3 simpphrases_4.t | $(GREP) ">" | $(SED) 's/> //' > t mv t simpphrases_4.t - for i in `cat simpphrases_3.t`; do $(GREP) $$i simpphrases_4.t; done | sort | uniq > t3 || true + for i in `cat simpphrases_3.t`; do $(GREP) $$i simpphrases_4.t; done | sort | uniq > t3 || true $(DIFF) t3 simpphrases_4.t | $(GREP) ">" | $(SED) 's/> //' > t mv t simpphrases_4.t @@ -259,6 +286,8 @@ cleantmp: rm -f \ Unihan.txt \ EZ.txt.in \ + Wubi.txt.in \ + Ziranma.txt.in \ phrase_lib.txt \ tsi.src # Temporary files and other trash diff --git a/includes/zhtable/toCN.manual b/includes/zhtable/toCN.manual index 427afad2a9..419e02617e 100644 --- a/includes/zhtable/toCN.manual +++ b/includes/zhtable/toCN.manual @@ -305,4 +305,3 @@ 舒麥加 迈克尔·舒马赫 希特拉 希特勒 黛安娜 戴安娜 -希拉 赫拉 diff --git a/includes/zhtable/toTW.manual b/includes/zhtable/toTW.manual index a1639f7f84..aaaa95a83e 100644 --- a/includes/zhtable/toTW.manual +++ b/includes/zhtable/toTW.manual @@ -287,4 +287,3 @@ 凡高 梵谷 狄安娜 黛安娜 戴安娜 黛安娜 -赫拉 希拉 -- 2.20.1