From 561fd67b96fd6896ea2b416215d0bf1ead75eb5d Mon Sep 17 00:00:00 2001 From: Philip Tzou Date: Fri, 6 Aug 2010 20:05:07 +0000 Subject: [PATCH] Refactor Makefile.py. Add new function to manualWordsTable() and fix bug for parsing tsi.src. --- includes/ZhConversion.php | 481 +++++++++++++++- includes/zhtable/Makefile.py | 592 +++++++++----------- includes/zhtable/tradphrases_exclude.manual | 7 +- 3 files changed, 731 insertions(+), 349 deletions(-) diff --git a/includes/ZhConversion.php b/includes/ZhConversion.php index 329b331c02..cdd55b2930 100644 --- a/includes/ZhConversion.php +++ b/includes/ZhConversion.php @@ -2818,6 +2818,7 @@ $zh2Hant = array( '一出逃' => '一出逃', '一划' => '一劃', '一半只' => '一半只', +'一吊錢' => '一吊錢', '一吊钱' => '一吊錢', '一地里' => '一地裡', '一伙' => '一夥', @@ -2939,6 +2940,7 @@ $zh2Hant = array( '不干' => '不幹', '不吊' => '不弔', '不采' => '不採', +'不斗膽' => '不斗膽', '不斗胆' => '不斗膽', '不断发' => '不斷發', '不每只' => '不每只', @@ -2951,6 +2953,7 @@ $zh2Hant = array( '不通吊庆' => '不通弔慶', '不丑' => '不醜', '不采声' => '不采聲', +'不采聲' => '不采聲', '不锈钢' => '不鏽鋼', '不食干腊' => '不食乾腊', '不斗' => '不鬥', @@ -3038,6 +3041,7 @@ $zh2Hant = array( '九只' => '九隻', '九余' => '九餘', '九龙表行' => '九龍表行', +'九龍表行' => '九龍表行', '也克制' => '也剋制', '也斗了胆' => '也斗了膽', '干干' => '乾乾', @@ -3857,6 +3861,7 @@ $zh2Hant = array( '佣金' => '傭金', '傲霜斗雪' => '傲霜鬥雪', '传位于四太子' => '傳位于四太子', +'傳位于四太子' => '傳位于四太子', '传于' => '傳於', '伤痕累累' => '傷痕纍纍', '傻里傻气' => '傻裡傻氣', @@ -4035,8 +4040,10 @@ $zh2Hant = array( '准不准我' => '准不准我', '准不准许' => '准不准許', '准不准谁' => '准不准誰', +'准保護' => '准保護', '准保护' => '准保護', '准保释' => '准保釋', +'准保釋' => '准保釋', '凌蒙初' => '凌濛初', '凝炼' => '凝鍊', '几上' => '几上', @@ -4223,10 +4230,12 @@ $zh2Hant = array( '十出' => '十齣', '千个' => '千個', '千只可' => '千只可', +'千只夠' => '千只夠', '千只够' => '千只夠', '千只怕' => '千只怕', '千只能' => '千只能', '千只足够' => '千只足夠', +'千只足夠' => '千只足夠', '千多只' => '千多隻', '千天后' => '千天後', '千扎' => '千紮', @@ -4317,9 +4326,12 @@ $zh2Hant = array( '只占算' => '只占算', '只采' => '只採', '只冲' => '只衝', +'只要功夫深,铁杵磨成锈花针' => '只要功夫深,鐵杵磨成鏽花針', '只身上已' => '只身上已', '只身上有' => '只身上有', +'只身上沒' => '只身上沒', '只身上没' => '只身上沒', +'只身上無' => '只身上無', '只身上无' => '只身上無', '只身上的' => '只身上的', '只身世' => '只身世', @@ -4330,14 +4342,18 @@ $zh2Hant = array( '只身形' => '只身形', '只身影' => '只身影', '只身后' => '只身後', +'只身後' => '只身後', '只身心' => '只身心', '只身旁' => '只身旁', '只身材' => '只身材', '只身段' => '只身段', '只身为' => '只身為', +'只身為' => '只身為', '只身边' => '只身邊', +'只身邊' => '只身邊', '只身首' => '只身首', '只身体' => '只身體', +'只身體' => '只身體', '只身高' => '只身高', '只采声' => '只采聲', '叮叮当当' => '叮叮噹噹', @@ -4436,6 +4452,7 @@ $zh2Hant = array( '呆呆傻傻' => '呆呆傻傻', '呆呆挣挣' => '呆呆掙掙', '呆呆兽' => '呆呆獸', +'呆呆獸' => '呆呆獸', '呆呆笨笨' => '呆呆笨笨', '呆致致' => '呆緻緻', '呆里呆气' => '呆裡呆氣', @@ -4640,6 +4657,7 @@ $zh2Hant = array( '多只可' => '多只可', '多只在' => '多只在', '多只是' => '多只是', +'多只會' => '多只會', '多只会' => '多只會', '多只有' => '多只有', '多只能' => '多只能', @@ -4655,6 +4673,7 @@ $zh2Hant = array( '夜里' => '夜裡', '夜游' => '夜遊', '够克制' => '夠剋制', +'夢有五不占' => '夢有五不占', '梦有五不占' => '夢有五不占', '梦里' => '夢裡', '梦游' => '夢遊', @@ -4759,6 +4778,7 @@ $zh2Hant = array( '好斗笠' => '好斗笠', '好斗篷' => '好斗篷', '好斗胆' => '好斗膽', +'好斗膽' => '好斗膽', '好斗蓬' => '好斗蓬', '好于' => '好於', '好呆' => '好獃', @@ -5087,7 +5107,6 @@ $zh2Hant = array( '府干' => '府幹', '座钟' => '座鐘', '康庄大道' => '康庄大道', -'康采恩' => '康採恩', '康庄' => '康莊', '厨余' => '廚餘', '厮斗' => '廝鬥', @@ -6146,9 +6165,11 @@ $zh2Hant = array( '有只用' => '有只用', '有够赞' => '有夠讚', '有征伐' => '有征伐', +'有征戰' => '有征戰', '有征战' => '有征戰', '有征服' => '有征服', '有征讨' => '有征討', +'有征討' => '有征討', '有征' => '有徵', '有恒街' => '有恒街', '有栖川' => '有栖川', @@ -6734,7 +6755,6 @@ $zh2Hant = array( '特效药' => '特效藥', '特制' => '特製', '牵一发' => '牽一髮', -'牵挂' => '牽挂', '牵系' => '牽繫', '荦确' => '犖确', '狂占' => '狂佔', @@ -6875,9 +6895,11 @@ $zh2Hant = array( '白霉' => '白黴', '百个' => '百個', '百只可' => '百只可', +'百只夠' => '百只夠', '百只够' => '百只夠', '百只怕' => '百只怕', '百只足够' => '百只足夠', +'百只足夠' => '百只足夠', '百多只' => '百多隻', '百天后' => '百天後', '百拙千丑' => '百拙千醜', @@ -7281,6 +7303,7 @@ $zh2Hant = array( '绝于' => '絕於', '绞干' => '絞乾', '络腮胡' => '絡腮鬍', +'給我干脆' => '給我干脆', '给我干脆' => '給我干脆', '给于' => '給於', '丝来线去' => '絲來線去', @@ -7629,6 +7652,7 @@ $zh2Hant = array( '菠萝干' => '菠蘿乾', '华严钟' => '華嚴鐘', '华发' => '華髮', +'萬一只' => '萬一只', '万一只' => '萬一只', '万个' => '萬個', '万多只' => '萬多隻', @@ -8920,6 +8944,7 @@ $zh2Hant = array( '颠干倒坤' => '顛乾倒坤', '颠覆' => '顛覆', '颠颠仆仆' => '顛顛仆仆', +'顛顛仆仆' => '顛顛仆仆', '颤栗' => '顫慄', '显示表' => '顯示錶', '显示钟' => '顯示鐘', @@ -12555,11 +12580,14 @@ $zh2Hans = array( '與著者' => '与著者', '與著述' => '与著述', '丑著' => '丑着', +'丑著书' => '丑著书', '丑著書' => '丑著书', '丑著作' => '丑著作', '丑著名' => '丑著名', +'丑著录' => '丑著录', '丑著錄' => '丑著录', '丑著稱' => '丑著称', +'丑著称' => '丑著称', '丑著者' => '丑著者', '丑著述' => '丑著述', '專著' => '专著', @@ -12588,11 +12616,14 @@ $zh2Hans = array( '樂著者' => '乐著者', '樂著述' => '乐著述', '乘著' => '乘着', +'乘著书' => '乘著书', '乘著書' => '乘著书', '乘著作' => '乘著作', '乘著名' => '乘著名', +'乘著录' => '乘著录', '乘著錄' => '乘著录', '乘著稱' => '乘著称', +'乘著称' => '乘著称', '乘著者' => '乘著者', '乘著述' => '乘著述', '乾一坛' => '乾一坛', @@ -12668,8 +12699,8 @@ $zh2Hans = array( '乾旦' => '乾旦', '乾明' => '乾明', '乾昧' => '乾昧', -'乾暉' => '乾晖', '乾晖' => '乾晖', +'乾暉' => '乾晖', '乾景' => '乾景', '乾晷' => '乾晷', '乾曜' => '乾曜', @@ -12733,8 +12764,8 @@ $zh2Hans = array( '乾贶' => '乾贶', '乾车' => '乾车', '乾車' => '乾车', -'乾轴' => '乾轴', '乾軸' => '乾轴', +'乾轴' => '乾轴', '乾通' => '乾通', '乾造' => '乾造', '乾道' => '乾道', @@ -12773,26 +12804,35 @@ $zh2Hans = array( '爭著述' => '争著述', '五箇山' => '五箇山', '亮著' => '亮着', +'亮著书' => '亮著书', '亮著書' => '亮著书', '亮著作' => '亮著作', '亮著名' => '亮著名', '亮著錄' => '亮著录', +'亮著录' => '亮著录', +'亮著称' => '亮著称', '亮著稱' => '亮著称', '亮著者' => '亮著者', '亮著述' => '亮著述', '仗著' => '仗着', +'仗著书' => '仗著书', '仗著書' => '仗著书', '仗著作' => '仗著作', '仗著名' => '仗著名', +'仗著录' => '仗著录', '仗著錄' => '仗著录', '仗著稱' => '仗著称', +'仗著称' => '仗著称', '仗著者' => '仗著者', '仗著述' => '仗著述', '代表著' => '代表着', '代表著書' => '代表著书', +'代表著书' => '代表著书', '代表著作' => '代表著作', '代表著名' => '代表著名', '代表著錄' => '代表著录', +'代表著录' => '代表著录', +'代表著称' => '代表著称', '代表著稱' => '代表著称', '代表著者' => '代表著者', '代表著述' => '代表著述', @@ -12809,26 +12849,35 @@ $zh2Hans = array( '傳著者' => '传著者', '傳著述' => '传著述', '伴著' => '伴着', +'伴著书' => '伴著书', '伴著書' => '伴著书', '伴著作' => '伴著作', '伴著名' => '伴著名', +'伴著录' => '伴著录', '伴著錄' => '伴著录', '伴著稱' => '伴著称', +'伴著称' => '伴著称', '伴著者' => '伴著者', '伴著述' => '伴著述', '低著' => '低着', '低著書' => '低著书', +'低著书' => '低著书', '低著作' => '低著作', '低著名' => '低著名', +'低著录' => '低著录', '低著錄' => '低著录', '低著稱' => '低著称', +'低著称' => '低著称', '低著者' => '低著者', '低著述' => '低著述', '住著' => '住着', '住著書' => '住著书', +'住著书' => '住著书', '住著作' => '住著作', '住著名' => '住著名', '住著錄' => '住著录', +'住著录' => '住著录', +'住著称' => '住著称', '住著稱' => '住著称', '住著者' => '住著者', '住著述' => '住著述', @@ -12844,28 +12893,37 @@ $zh2Hans = array( '側著述' => '侧著述', '保護著' => '保护着', '保障著' => '保障着', +'保障著书' => '保障著书', '保障著書' => '保障著书', '保障著作' => '保障著作', '保障著名' => '保障著名', '保障著錄' => '保障著录', +'保障著录' => '保障著录', '保障著稱' => '保障著称', +'保障著称' => '保障著称', '保障著者' => '保障著者', '保障著述' => '保障著述', '信著' => '信着', +'信著书' => '信著书', '信著書' => '信著书', '信著作' => '信著作', '信著名' => '信著名', +'信著录' => '信著录', '信著錄' => '信著录', +'信著称' => '信著称', '信著稱' => '信著称', '信著者' => '信著者', '信著述' => '信著述', '修鍊' => '修炼', '候著' => '候着', '候著書' => '候著书', +'候著书' => '候著书', '候著作' => '候著作', '候著名' => '候著名', +'候著录' => '候著录', '候著錄' => '候著录', '候著稱' => '候著称', +'候著称' => '候著称', '候著者' => '候著者', '候著述' => '候著述', '藉助' => '借助', @@ -12880,36 +12938,48 @@ $zh2Hans = array( '藉著' => '借着', '藉端' => '借端', '借著書' => '借著书', +'借著书' => '借著书', '借著作' => '借著作', '借著名' => '借著名', +'借著录' => '借著录', '借著錄' => '借著录', +'借著称' => '借著称', '借著稱' => '借著称', '借著者' => '借著者', '借著述' => '借著述', '藉詞' => '借词', '做著' => '做着', '做著書' => '做著书', +'做著书' => '做著书', '做著作' => '做著作', '做著名' => '做著名', '做著錄' => '做著录', +'做著录' => '做著录', '做著稱' => '做著称', +'做著称' => '做著称', '做著者' => '做著者', '做著述' => '做著述', '偷著' => '偷着', '偷著書' => '偷著书', +'偷著书' => '偷著书', '偷著作' => '偷著作', '偷著名' => '偷著名', '偷著錄' => '偷著录', +'偷著录' => '偷著录', '偷著稱' => '偷著称', +'偷著称' => '偷著称', '偷著者' => '偷著者', '偷著述' => '偷著述', '傢俬' => '傢俬', '光著' => '光着', '光著書' => '光著书', +'光著书' => '光著书', '光著作' => '光著作', '光著名' => '光著名', '光著錄' => '光著录', +'光著录' => '光著录', '光著稱' => '光著称', +'光著称' => '光著称', '光著者' => '光著者', '光著述' => '光著述', '關著' => '关着', @@ -12922,18 +12992,24 @@ $zh2Hans = array( '關著述' => '关著述', '冀著' => '冀着', '冀著書' => '冀著书', +'冀著书' => '冀著书', '冀著作' => '冀著作', '冀著名' => '冀著名', '冀著錄' => '冀著录', +'冀著录' => '冀著录', '冀著稱' => '冀著称', +'冀著称' => '冀著称', '冀著者' => '冀著者', '冀著述' => '冀著述', '冒著' => '冒着', +'冒著书' => '冒著书', '冒著書' => '冒著书', '冒著作' => '冒著作', '冒著名' => '冒著名', +'冒著录' => '冒著录', '冒著錄' => '冒著录', '冒著稱' => '冒著称', +'冒著称' => '冒著称', '冒著者' => '冒著者', '冒著述' => '冒著述', '寫著' => '写着', @@ -12954,19 +13030,25 @@ $zh2Hans = array( '涼著述' => '凉著述', '憑藉' => '凭借', '制著' => '制着', +'制著书' => '制著书', '制著書' => '制著书', '制著作' => '制著作', '制著名' => '制著名', '制著錄' => '制著录', +'制著录' => '制著录', +'制著称' => '制著称', '制著稱' => '制著称', '制著者' => '制著者', '制著述' => '制著述', '刻著' => '刻着', '刻著書' => '刻著书', +'刻著书' => '刻著书', '刻著作' => '刻著作', '刻著名' => '刻著名', +'刻著录' => '刻著录', '刻著錄' => '刻著录', '刻著稱' => '刻著称', +'刻著称' => '刻著称', '刻著者' => '刻著者', '刻著述' => '刻著述', '辦著' => '办着', @@ -12987,26 +13069,35 @@ $zh2Hans = array( '動著述' => '动著述', '努力著' => '努力着', '努力著書' => '努力著书', +'努力著书' => '努力著书', '努力著作' => '努力著作', '努力著名' => '努力著名', '努力著錄' => '努力著录', +'努力著录' => '努力著录', +'努力著称' => '努力著称', '努力著稱' => '努力著称', '努力著者' => '努力著者', '努力著述' => '努力著述', '努著' => '努着', '努著書' => '努著书', +'努著书' => '努著书', '努著作' => '努著作', '努著名' => '努著名', '努著錄' => '努著录', +'努著录' => '努著录', +'努著称' => '努著称', '努著稱' => '努著称', '努著者' => '努著者', '努著述' => '努著述', '卓著' => '卓著', '印著' => '印着', +'印著书' => '印著书', '印著書' => '印著书', '印著作' => '印著作', '印著名' => '印著名', +'印著录' => '印著录', '印著錄' => '印著录', +'印著称' => '印著称', '印著稱' => '印著称', '印著者' => '印著者', '印著述' => '印著述', @@ -13021,21 +13112,27 @@ $zh2Hans = array( '壓著述' => '压著述', '原著' => '原著', '去著' => '去着', +'去著书' => '去著书', '去著書' => '去著书', '去著作' => '去著作', '去著名' => '去著名', +'去著录' => '去著录', '去著錄' => '去著录', '去著稱' => '去著称', +'去著称' => '去著称', '去著者' => '去著者', '去著述' => '去著述', '反反覆覆' => '反反复复', '反覆' => '反复', '受著' => '受着', '受著書' => '受著书', +'受著书' => '受著书', '受著作' => '受著作', '受著名' => '受著名', '受著錄' => '受著录', +'受著录' => '受著录', '受著稱' => '受著称', +'受著称' => '受著称', '受著者' => '受著者', '受著述' => '受著述', '變著' => '变着', @@ -13047,10 +13144,13 @@ $zh2Hans = array( '變著者' => '变著者', '變著述' => '变著述', '叫著' => '叫着', +'叫著书' => '叫著书', '叫著書' => '叫著书', '叫著作' => '叫著作', '叫著名' => '叫著名', +'叫著录' => '叫著录', '叫著錄' => '叫著录', +'叫著称' => '叫著称', '叫著稱' => '叫著称', '叫著者' => '叫著者', '叫著述' => '叫著述', @@ -13064,17 +13164,23 @@ $zh2Hans = array( '名著' => '名著', '向著' => '向着', '向著書' => '向著书', +'向著书' => '向著书', '向著作' => '向著作', '向著名' => '向著名', '向著錄' => '向著录', +'向著录' => '向著录', +'向著称' => '向著称', '向著稱' => '向著称', '向著者' => '向著者', '向著述' => '向著述', '含著' => '含着', '含著書' => '含著书', +'含著书' => '含著书', '含著作' => '含著作', '含著名' => '含著名', '含著錄' => '含著录', +'含著录' => '含著录', +'含著称' => '含著称', '含著稱' => '含著称', '含著者' => '含著者', '含著述' => '含著述', @@ -13092,18 +13198,24 @@ $zh2Hans = array( '吳其濬' => '吴其濬', '吹著' => '吹着', '吹著書' => '吹著书', +'吹著书' => '吹著书', '吹著作' => '吹著作', '吹著名' => '吹著名', +'吹著录' => '吹著录', '吹著錄' => '吹著录', '吹著稱' => '吹著称', +'吹著称' => '吹著称', '吹著者' => '吹著者', '吹著述' => '吹著述', '周易乾' => '周易乾', '味著' => '味着', +'味著书' => '味著书', '味著書' => '味著书', '味著作' => '味著作', '味著名' => '味著名', +'味著录' => '味著录', '味著錄' => '味著录', +'味著称' => '味著称', '味著稱' => '味著称', '味著者' => '味著者', '味著述' => '味著述', @@ -13119,26 +13231,35 @@ $zh2Hans = array( '哪吒' => '哪吒', '哭著' => '哭着', '哭著書' => '哭著书', +'哭著书' => '哭著书', '哭著作' => '哭著作', '哭著名' => '哭著名', '哭著錄' => '哭著录', +'哭著录' => '哭著录', '哭著稱' => '哭著称', +'哭著称' => '哭著称', '哭著者' => '哭著者', '哭著述' => '哭著述', '唱著' => '唱着', +'唱著书' => '唱著书', '唱著書' => '唱著书', '唱著作' => '唱著作', '唱著名' => '唱著名', +'唱著录' => '唱著录', '唱著錄' => '唱著录', +'唱著称' => '唱著称', '唱著稱' => '唱著称', '唱著者' => '唱著者', '唱著述' => '唱著述', '喝著' => '喝着', +'喝著书' => '喝著书', '喝著書' => '喝著书', '喝著作' => '喝著作', '喝著名' => '喝著名', +'喝著录' => '喝著录', '喝著錄' => '喝著录', '喝著稱' => '喝著称', +'喝著称' => '喝著称', '喝著者' => '喝著者', '喝著述' => '喝著述', '嗅不著' => '嗅不着', @@ -13146,9 +13267,12 @@ $zh2Hans = array( '嗅著' => '嗅着', '嚷著' => '嚷着', '嚷著書' => '嚷著书', +'嚷著书' => '嚷著书', '嚷著作' => '嚷著作', '嚷著名' => '嚷著名', '嚷著錄' => '嚷著录', +'嚷著录' => '嚷著录', +'嚷著称' => '嚷著称', '嚷著稱' => '嚷著称', '嚷著者' => '嚷著者', '嚷著述' => '嚷著述', @@ -13157,17 +13281,23 @@ $zh2Hans = array( '因著〈' => '因著〈', '因著《' => '因著《', '因著書' => '因著书', +'因著书' => '因著书', '因著作' => '因著作', '因著名' => '因著名', '因著錄' => '因著录', +'因著录' => '因著录', '因著稱' => '因著称', +'因著称' => '因著称', '因著者' => '因著者', '因著述' => '因著述', '困著' => '困着', '困著書' => '困著书', +'困著书' => '困著书', '困著作' => '困著作', '困著名' => '困著名', '困著錄' => '困著录', +'困著录' => '困著录', +'困著称' => '困著称', '困著稱' => '困著称', '困著者' => '困著者', '困著述' => '困著述', @@ -13182,17 +13312,23 @@ $zh2Hans = array( '土著' => '土著', '在著' => '在着', '在著書' => '在著书', +'在著书' => '在著书', '在著作' => '在著作', '在著名' => '在著名', '在著錄' => '在著录', +'在著录' => '在著录', '在著稱' => '在著称', +'在著称' => '在著称', '在著者' => '在著者', '在著述' => '在著述', '坐著' => '坐着', +'坐著书' => '坐著书', '坐著書' => '坐著书', '坐著作' => '坐著作', '坐著名' => '坐著名', +'坐著录' => '坐著录', '坐著錄' => '坐著录', +'坐著称' => '坐著称', '坐著稱' => '坐著称', '坐著者' => '坐著者', '坐著述' => '坐著述', @@ -13222,10 +13358,13 @@ $zh2Hans = array( '字乾生' => '字乾生', '存摺' => '存摺', '孤著' => '孤着', +'孤著书' => '孤著书', '孤著書' => '孤著书', '孤著作' => '孤著作', '孤著名' => '孤著名', '孤著錄' => '孤著录', +'孤著录' => '孤著录', +'孤著称' => '孤著称', '孤著稱' => '孤著称', '孤著者' => '孤著者', '孤著述' => '孤著述', @@ -13239,17 +13378,23 @@ $zh2Hans = array( '學著述' => '学著述', '守著' => '守着', '守著書' => '守著书', +'守著书' => '守著书', '守著作' => '守著作', '守著名' => '守著名', +'守著录' => '守著录', '守著錄' => '守著录', +'守著称' => '守著称', '守著稱' => '守著称', '守著者' => '守著者', '守著述' => '守著述', '定著' => '定着', '定著書' => '定著书', +'定著书' => '定著书', '定著作' => '定著作', '定著名' => '定著名', '定著錄' => '定著录', +'定著录' => '定著录', +'定著称' => '定著称', '定著稱' => '定著称', '定著者' => '定著者', '定著述' => '定著述', @@ -13273,10 +13418,13 @@ $zh2Hans = array( '尼乾陀' => '尼乾陀', '展著' => '展着', '展著書' => '展著书', +'展著书' => '展著书', '展著作' => '展著作', '展著名' => '展著名', '展著錄' => '展著录', +'展著录' => '展著录', '展著稱' => '展著称', +'展著称' => '展著称', '展著者' => '展著者', '展著述' => '展著述', '峯岸南' => '峯岸南', @@ -13311,8 +13459,8 @@ $zh2Hans = array( '幺半群' => '幺半群', '幺廝' => '幺厮', '幺厮' => '幺厮', -'么叔' => '幺叔', '幺叔' => '幺叔', +'么叔' => '幺叔', '么媽' => '幺妈', '幺媽' => '幺妈', '么妹' => '幺妹', @@ -13356,10 +13504,13 @@ $zh2Hans = array( '應著述' => '应著述', '康乾' => '康乾', '康著' => '康着', +'康著书' => '康著书', '康著書' => '康著书', '康著作' => '康著作', '康著名' => '康著名', +'康著录' => '康著录', '康著錄' => '康著录', +'康著称' => '康著称', '康著稱' => '康著称', '康著者' => '康著者', '康著述' => '康著述', @@ -13383,58 +13534,79 @@ $zh2Hans = array( '當著述' => '当著述', '彰明較著' => '彰明较著', '待著' => '待着', +'待著书' => '待著书', '待著書' => '待著书', '待著作' => '待著作', '待著名' => '待著名', +'待著录' => '待著录', '待著錄' => '待著录', '待著稱' => '待著称', +'待著称' => '待著称', '待著者' => '待著者', '待著述' => '待著述', '得著' => '得着', '得著書' => '得著书', +'得著书' => '得著书', '得著作' => '得著作', '得著名' => '得著名', '得著錄' => '得著录', +'得著录' => '得著录', '得著稱' => '得著称', +'得著称' => '得著称', '得著者' => '得著者', '得著述' => '得著述', '循著' => '循着', +'循著书' => '循著书', '循著書' => '循著书', '循著作' => '循著作', '循著名' => '循著名', +'循著录' => '循著录', '循著錄' => '循著录', +'循著称' => '循著称', '循著稱' => '循著称', '循著者' => '循著者', '循著述' => '循著述', '心著' => '心着', +'心著书' => '心著书', '心著書' => '心著书', '心著作' => '心著作', '心著名' => '心著名', +'心著录' => '心著录', '心著錄' => '心著录', '心著稱' => '心著称', +'心著称' => '心著称', '心著者' => '心著者', '心著述' => '心著述', '忍著' => '忍着', +'忍著书' => '忍著书', '忍著書' => '忍著书', '忍著作' => '忍著作', '忍著名' => '忍著名', +'忍著录' => '忍著录', '忍著錄' => '忍著录', '忍著稱' => '忍著称', +'忍著称' => '忍著称', '忍著者' => '忍著者', '忍著述' => '忍著述', '志著' => '志着', '志著書' => '志著书', +'志著书' => '志著书', '志著作' => '志著作', '志著名' => '志著名', '志著錄' => '志著录', +'志著录' => '志著录', +'志著称' => '志著称', '志著稱' => '志著称', '志著者' => '志著者', '志著述' => '志著述', '忙著' => '忙着', +'忙著书' => '忙著书', '忙著書' => '忙著书', '忙著作' => '忙著作', '忙著名' => '忙著名', +'忙著录' => '忙著录', '忙著錄' => '忙著录', +'忙著称' => '忙著称', '忙著稱' => '忙著称', '忙著者' => '忙著者', '忙著述' => '忙著述', @@ -13447,18 +13619,24 @@ $zh2Hans = array( '懷著者' => '怀著者', '懷著述' => '怀著述', '急著' => '急着', +'急著书' => '急著书', '急著書' => '急著书', '急著作' => '急著作', '急著名' => '急著名', +'急著录' => '急著录', '急著錄' => '急著录', +'急著称' => '急著称', '急著稱' => '急著称', '急著者' => '急著者', '急著述' => '急著述', '性著' => '性着', +'性著书' => '性著书', '性著書' => '性著书', '性著作' => '性著作', '性著名' => '性著名', +'性著录' => '性著录', '性著錄' => '性著录', +'性著称' => '性著称', '性著稱' => '性著称', '性著者' => '性著者', '性著述' => '性著述', @@ -13473,9 +13651,12 @@ $zh2Hans = array( '恩威並著' => '恩威并著', '悠著' => '悠着', '悠著書' => '悠著书', +'悠著书' => '悠著书', '悠著作' => '悠著作', '悠著名' => '悠著名', '悠著錄' => '悠著录', +'悠著录' => '悠著录', +'悠著称' => '悠著称', '悠著稱' => '悠著称', '悠著者' => '悠著者', '悠著述' => '悠著述', @@ -13489,9 +13670,12 @@ $zh2Hans = array( '慣著述' => '惯著述', '想著' => '想着', '想著書' => '想著书', +'想著书' => '想著书', '想著作' => '想著作', '想著名' => '想著名', '想著錄' => '想著录', +'想著录' => '想著录', +'想著称' => '想著称', '想著稱' => '想著称', '想著者' => '想著者', '想著述' => '想著述', @@ -13505,33 +13689,45 @@ $zh2Hans = array( '戰著述' => '战著述', '戴著' => '戴着', '戴著書' => '戴著书', +'戴著书' => '戴著书', '戴著作' => '戴著作', '戴著名' => '戴著名', '戴著錄' => '戴著录', +'戴著录' => '戴著录', '戴著稱' => '戴著称', +'戴著称' => '戴著称', '戴著者' => '戴著者', '戴著述' => '戴著述', '扎著' => '扎着', '扎著書' => '扎著书', +'扎著书' => '扎著书', '扎著作' => '扎著作', '扎著名' => '扎著名', '扎著錄' => '扎著录', +'扎著录' => '扎著录', +'扎著称' => '扎著称', '扎著稱' => '扎著称', '扎著者' => '扎著者', '扎著述' => '扎著述', '打著' => '打着', '打著書' => '打著书', +'打著书' => '打著书', '打著作' => '打著作', '打著名' => '打著名', '打著錄' => '打著录', +'打著录' => '打著录', +'打著称' => '打著称', '打著稱' => '打著称', '打著者' => '打著者', '打著述' => '打著述', '扛著' => '扛着', +'扛著书' => '扛著书', '扛著書' => '扛著书', '扛著作' => '扛著作', '扛著名' => '扛著名', +'扛著录' => '扛著录', '扛著錄' => '扛著录', +'扛著称' => '扛著称', '扛著稱' => '扛著称', '扛著者' => '扛著者', '扛著述' => '扛著述', @@ -13541,7 +13737,9 @@ $zh2Hans = array( '抓著' => '抓着', '抓著作' => '抓著作', '抓著名' => '抓著名', +'抓著录' => '抓著录', '抓著錄' => '抓著录', +'抓著称' => '抓著称', '抓著稱' => '抓著称', '抓著者' => '抓著者', '抓著述' => '抓著述', @@ -13554,32 +13752,42 @@ $zh2Hans = array( '護著者' => '护著者', '護著述' => '护著述', '披著' => '披着', +'披著书' => '披著书', '披著書' => '披著书', '披著作' => '披著作', '披著名' => '披著名', +'披著录' => '披著录', '披著錄' => '披著录', '披著稱' => '披著称', +'披著称' => '披著称', '披著者' => '披著者', '披著述' => '披著述', '抬著' => '抬着', '抬著作' => '抬著作', '抬著名' => '抬著名', +'抬著录' => '抬著录', '抬著錄' => '抬著录', '抬著稱' => '抬著称', +'抬著称' => '抬著称', '抬著者' => '抬著者', '抬著述' => '抬著述', '抱著' => '抱着', '抱著作' => '抱著作', '抱著名' => '抱著名', +'抱著录' => '抱著录', '抱著錄' => '抱著录', '抱著稱' => '抱著称', +'抱著称' => '抱著称', '抱著者' => '抱著者', '抱著述' => '抱著述', '拉著' => '拉着', +'拉著书' => '拉著书', '拉著書' => '拉著书', '拉著作' => '拉著作', '拉著名' => '拉著名', +'拉著录' => '拉著录', '拉著錄' => '拉著录', +'拉著称' => '拉著称', '拉著稱' => '拉著称', '拉著者' => '拉著者', '拉著述' => '拉著述', @@ -13588,6 +13796,8 @@ $zh2Hans = array( '拎著作' => '拎著作', '拎著名' => '拎著名', '拎著錄' => '拎著录', +'拎著录' => '拎著录', +'拎著称' => '拎著称', '拎著稱' => '拎著称', '拎著者' => '拎著者', '拎著述' => '拎著述', @@ -13595,7 +13805,9 @@ $zh2Hans = array( '拖著作' => '拖著作', '拖著名' => '拖著名', '拖著錄' => '拖著录', +'拖著录' => '拖著录', '拖著稱' => '拖著称', +'拖著称' => '拖著称', '拖著者' => '拖著者', '拖著述' => '拖著述', '拙著' => '拙著', @@ -13605,14 +13817,18 @@ $zh2Hans = array( '拼著' => '拼着', '拼著作' => '拼著作', '拼著名' => '拼著名', +'拼著录' => '拼著录', '拼著錄' => '拼著录', '拼著稱' => '拼著称', +'拼著称' => '拼著称', '拼著者' => '拼著者', '拼著述' => '拼著述', '拿著' => '拿着', '拿著作' => '拿著作', '拿著名' => '拿著名', +'拿著录' => '拿著录', '拿著錄' => '拿著录', +'拿著称' => '拿著称', '拿著稱' => '拿著称', '拿著者' => '拿著者', '拿著述' => '拿著述', @@ -13620,6 +13836,8 @@ $zh2Hans = array( '持著作' => '持著作', '持著名' => '持著名', '持著錄' => '持著录', +'持著录' => '持著录', +'持著称' => '持著称', '持著稱' => '持著称', '持著者' => '持著者', '持著述' => '持著述', @@ -13627,6 +13845,8 @@ $zh2Hans = array( '挑著作' => '挑著作', '挑著名' => '挑著名', '挑著錄' => '挑著录', +'挑著录' => '挑著录', +'挑著称' => '挑著称', '挑著稱' => '挑著称', '挑著者' => '挑著者', '挑著述' => '挑著述', @@ -13656,13 +13876,17 @@ $zh2Hans = array( '挨著作' => '挨著作', '挨著名' => '挨著名', '挨著錄' => '挨著录', +'挨著录' => '挨著录', '挨著稱' => '挨著称', +'挨著称' => '挨著称', '挨著者' => '挨著者', '挨著述' => '挨著述', '捆著' => '捆着', '捆著作' => '捆著作', '捆著名' => '捆著名', '捆著錄' => '捆著录', +'捆著录' => '捆著录', +'捆著称' => '捆著称', '捆著稱' => '捆著称', '捆著者' => '捆著者', '捆著述' => '捆著述', @@ -13678,21 +13902,28 @@ $zh2Hans = array( '掖著作' => '掖著作', '掖著名' => '掖著名', '掖著錄' => '掖著录', +'掖著录' => '掖著录', '掖著稱' => '掖著称', +'掖著称' => '掖著称', '掖著者' => '掖著者', '掖著述' => '掖著述', '接著' => '接着', '接著作' => '接著作', '接著名' => '接著名', '接著錄' => '接著录', +'接著录' => '接著录', '接著稱' => '接著称', +'接著称' => '接著称', '接著者' => '接著者', '接著述' => '接著述', '揉著' => '揉着', +'揉著书' => '揉著书', '揉著書' => '揉著书', '揉著作' => '揉著作', '揉著名' => '揉著名', +'揉著录' => '揉著录', '揉著錄' => '揉著录', +'揉著称' => '揉著称', '揉著稱' => '揉著称', '揉著者' => '揉著者', '揉著述' => '揉著述', @@ -13700,7 +13931,9 @@ $zh2Hans = array( '提著作' => '提著作', '提著名' => '提著名', '提著錄' => '提著录', +'提著录' => '提著录', '提著稱' => '提著称', +'提著称' => '提著称', '提著者' => '提著者', '提著述' => '提著述', '摟著' => '搂着', @@ -13720,9 +13953,12 @@ $zh2Hans = array( '撰著' => '撰著', '撼著' => '撼着', '撼著書' => '撼著书', +'撼著书' => '撼著书', '撼著作' => '撼著作', '撼著名' => '撼著名', '撼著錄' => '撼著录', +'撼著录' => '撼著录', +'撼著称' => '撼著称', '撼著稱' => '撼著称', '撼著者' => '撼著者', '撼著述' => '撼著述', @@ -13730,7 +13966,9 @@ $zh2Hans = array( '敞著作' => '敞著作', '敞著名' => '敞著名', '敞著錄' => '敞著录', +'敞著录' => '敞著录', '敞著稱' => '敞著称', +'敞著称' => '敞著称', '敞著者' => '敞著者', '敞著述' => '敞著述', '數著' => '数着', @@ -13742,18 +13980,24 @@ $zh2Hans = array( '數著述' => '数著述', '斗著' => '斗着', '斗著書' => '斗著书', +'斗著书' => '斗著书', '斗著作' => '斗著作', '斗著名' => '斗著名', '斗著錄' => '斗著录', +'斗著录' => '斗著录', +'斗著称' => '斗著称', '斗著稱' => '斗著称', '斗著者' => '斗著者', '斗著述' => '斗著述', '斥著' => '斥着', '斥著書' => '斥著书', +'斥著书' => '斥著书', '斥著作' => '斥著作', '斥著名' => '斥著名', '斥著錄' => '斥著录', +'斥著录' => '斥著录', '斥著稱' => '斥著称', +'斥著称' => '斥著称', '斥著者' => '斥著者', '斥著述' => '斥著述', '新著' => '新著', @@ -13785,11 +14029,14 @@ $zh2Hans = array( '旋乾轉坤' => '旋乾转坤', '曠若發矇' => '旷若发矇', '昂著' => '昂着', +'昂著书' => '昂著书', '昂著書' => '昂著书', '昂著作' => '昂著作', '昂著名' => '昂著名', '昂著錄' => '昂著录', +'昂著录' => '昂著录', '昂著稱' => '昂著称', +'昂著称' => '昂著称', '昂著者' => '昂著者', '昂著述' => '昂著述', '易·乾' => '易·乾', @@ -13799,10 +14046,13 @@ $zh2Hans = array( '易经乾' => '易经乾', '映著' => '映着', '映著書' => '映著书', +'映著书' => '映著书', '映著作' => '映著作', '映著名' => '映著名', '映著錄' => '映著录', +'映著录' => '映著录', '映著稱' => '映著称', +'映著称' => '映著称', '映著者' => '映著者', '映著述' => '映著述', '昭著' => '昭著', @@ -13812,46 +14062,61 @@ $zh2Hans = array( '晃著作' => '晃著作', '晃著名' => '晃著名', '晃著錄' => '晃著录', +'晃著录' => '晃著录', +'晃著称' => '晃著称', '晃著稱' => '晃著称', '晃著者' => '晃著者', '晃著述' => '晃著述', '暗著' => '暗着', +'暗著书' => '暗著书', '暗著書' => '暗著书', '暗著作' => '暗著作', '暗著名' => '暗著名', '暗著錄' => '暗著录', +'暗著录' => '暗著录', +'暗著称' => '暗著称', '暗著稱' => '暗著称', '暗著者' => '暗著者', '暗著述' => '暗著述', '有著' => '有着', '有著書' => '有著书', +'有著书' => '有著书', '有著作' => '有著作', '有著名' => '有著名', '有著錄' => '有著录', +'有著录' => '有著录', +'有著称' => '有著称', '有著稱' => '有著称', '有著者' => '有著者', '有著述' => '有著述', '望著' => '望着', '望著作' => '望著作', '望著名' => '望著名', +'望著录' => '望著录', '望著錄' => '望著录', '望著稱' => '望著称', +'望著称' => '望著称', '望著者' => '望著者', '望著述' => '望著述', '朝乾夕惕' => '朝乾夕惕', '朝著' => '朝着', '朝著作' => '朝著作', '朝著名' => '朝著名', +'朝著录' => '朝著录', '朝著錄' => '朝著录', '朝著稱' => '朝著称', +'朝著称' => '朝著称', '朝著者' => '朝著者', '朝著述' => '朝著述', '本著' => '本着', +'本著书' => '本著书', '本著書' => '本著书', '本著作' => '本著作', '本著名' => '本著名', +'本著录' => '本著录', '本著錄' => '本著录', '本著稱' => '本著称', +'本著称' => '本著称', '本著者' => '本著者', '本著述' => '本著述', '朴於宇同' => '朴於宇同', @@ -13888,7 +14153,9 @@ $zh2Hans = array( '枕著作' => '枕著作', '枕著名' => '枕著名', '枕著錄' => '枕著录', +'枕著录' => '枕著录', '枕著稱' => '枕著称', +'枕著称' => '枕著称', '枕著者' => '枕著者', '枕著述' => '枕著述', '柳詒徵' => '柳诒徵', @@ -13907,16 +14174,21 @@ $zh2Hans = array( '梳著作' => '梳著作', '梳著名' => '梳著名', '梳著錄' => '梳著录', +'梳著录' => '梳著录', '梳著稱' => '梳著称', +'梳著称' => '梳著称', '梳著者' => '梳著者', '梳著述' => '梳著述', '樊於期' => '樊於期', '氆氌' => '氆氌', '求著' => '求着', +'求著书' => '求著书', '求著書' => '求著书', '求著作' => '求著作', '求著名' => '求著名', +'求著录' => '求著录', '求著錄' => '求著录', +'求著称' => '求著称', '求著稱' => '求著称', '求著者' => '求著者', '求著述' => '求著述', @@ -13925,46 +14197,61 @@ $zh2Hans = array( '沈積' => '沉积', '沈船' => '沉船', '沉著書' => '沉著书', +'沉著书' => '沉著书', '沉著作' => '沉著作', '沉著名' => '沉著名', '沉著錄' => '沉著录', +'沉著录' => '沉著录', +'沉著称' => '沉著称', '沉著稱' => '沉著称', '沉著者' => '沉著者', '沉著述' => '沉著述', '沈默' => '沉默', '沿著' => '沿着', +'沿著书' => '沿著书', '沿著書' => '沿著书', '沿著作' => '沿著作', '沿著名' => '沿著名', +'沿著录' => '沿著录', '沿著錄' => '沿著录', '沿著稱' => '沿著称', +'沿著称' => '沿著称', '沿著者' => '沿著者', '沿著述' => '沿著述', '氾濫' => '泛滥', '洗鍊' => '洗练', '活著' => '活着', +'活著书' => '活著书', '活著書' => '活著书', '活著作' => '活著作', '活著名' => '活著名', +'活著录' => '活著录', '活著錄' => '活著录', '活著稱' => '活著称', +'活著称' => '活著称', '活著者' => '活著者', '活著述' => '活著述', '流著' => '流着', +'流著书' => '流著书', '流著書' => '流著书', '流著作' => '流著作', '流著名' => '流著名', +'流著录' => '流著录', '流著錄' => '流著录', '流著稱' => '流著称', +'流著称' => '流著称', '流著者' => '流著者', '流著述' => '流著述', '流露著' => '流露着', '浮著' => '浮着', +'浮著书' => '浮著书', '浮著書' => '浮著书', '浮著作' => '浮著作', '浮著名' => '浮著名', +'浮著录' => '浮著录', '浮著錄' => '浮著录', '浮著稱' => '浮著称', +'浮著称' => '浮著称', '浮著者' => '浮著者', '浮著述' => '浮著述', '潤著' => '润着', @@ -13976,42 +14263,57 @@ $zh2Hans = array( '潤著者' => '润著者', '潤著述' => '润著述', '涵著' => '涵着', +'涵著书' => '涵著书', '涵著書' => '涵著书', '涵著作' => '涵著作', '涵著名' => '涵著名', +'涵著录' => '涵著录', '涵著錄' => '涵著录', '涵著稱' => '涵著称', +'涵著称' => '涵著称', '涵著者' => '涵著者', '涵著述' => '涵著述', '渴著' => '渴着', +'渴著书' => '渴著书', '渴著書' => '渴著书', '渴著作' => '渴著作', '渴著名' => '渴著名', +'渴著录' => '渴著录', '渴著錄' => '渴著录', +'渴著称' => '渴著称', '渴著稱' => '渴著称', '渴著者' => '渴著者', '渴著述' => '渴著述', '溢著' => '溢着', '溢著書' => '溢著书', +'溢著书' => '溢著书', '溢著作' => '溢著作', '溢著名' => '溢著名', '溢著錄' => '溢著录', +'溢著录' => '溢著录', +'溢著称' => '溢著称', '溢著稱' => '溢著称', '溢著者' => '溢著者', '溢著述' => '溢著述', '演著' => '演着', +'演著书' => '演著书', '演著書' => '演著书', '演著作' => '演著作', '演著名' => '演著名', +'演著录' => '演著录', '演著錄' => '演著录', '演著稱' => '演著称', +'演著称' => '演著称', '演著者' => '演著者', '演著述' => '演著述', '漫著' => '漫着', '漫著書' => '漫著书', +'漫著书' => '漫著书', '漫著作' => '漫著作', '漫著名' => '漫著名', +'漫著录' => '漫著录', '漫著錄' => '漫著录', +'漫著称' => '漫著称', '漫著稱' => '漫著称', '漫著者' => '漫著者', '漫著述' => '漫著述', @@ -14030,10 +14332,13 @@ $zh2Hans = array( '燒著者' => '烧著者', '燒著述' => '烧著述', '照著' => '照着', +'照著书' => '照著书', '照著書' => '照著书', '照著作' => '照著作', '照著名' => '照著名', +'照著录' => '照著录', '照著錄' => '照著录', +'照著称' => '照著称', '照著稱' => '照著称', '照著者' => '照著者', '照著述' => '照著述', @@ -14069,25 +14374,33 @@ $zh2Hans = array( '猜著作' => '猜著作', '猜著名' => '猜著名', '猜著錄' => '猜著录', +'猜著录' => '猜著录', +'猜著称' => '猜著称', '猜著稱' => '猜著称', '猜著者' => '猜著者', '猜著述' => '猜著述', '玩著' => '玩着', '甜著' => '甜着', '甜著書' => '甜著书', +'甜著书' => '甜著书', '甜著作' => '甜著作', '甜著名' => '甜著名', +'甜著录' => '甜著录', '甜著錄' => '甜著录', '甜著稱' => '甜著称', +'甜著称' => '甜著称', '甜著者' => '甜著者', '甜著述' => '甜著述', '用不著' => '用不着', '用得著' => '用得着', '用著' => '用着', +'用著书' => '用著书', '用著書' => '用著书', '用著作' => '用著作', '用著名' => '用著名', +'用著录' => '用著录', '用著錄' => '用著录', +'用著称' => '用著称', '用著稱' => '用著称', '用著者' => '用著者', '用著述' => '用著述', @@ -14102,14 +14415,19 @@ $zh2Hans = array( '留著作' => '留著作', '留著名' => '留著名', '留著錄' => '留著录', +'留著录' => '留著录', '留著稱' => '留著称', +'留著称' => '留著称', '留著者' => '留著者', '留著述' => '留著述', '疑著' => '疑着', +'疑著书' => '疑著书', '疑著書' => '疑著书', '疑著作' => '疑著作', '疑著名' => '疑著名', +'疑著录' => '疑著录', '疑著錄' => '疑著录', +'疑著称' => '疑著称', '疑著稱' => '疑著称', '疑著者' => '疑著者', '疑著述' => '疑著述', @@ -14123,11 +14441,14 @@ $zh2Hans = array( '皺著者' => '皱著者', '皺著述' => '皱著述', '盛著' => '盛着', +'盛著书' => '盛著书', '盛著書' => '盛著书', '盛著作' => '盛著作', '盛著名' => '盛著名', '盛著錄' => '盛著录', +'盛著录' => '盛著录', '盛著稱' => '盛著称', +'盛著称' => '盛著称', '盛著者' => '盛著者', '盛著述' => '盛著述', '盯著' => '盯着', @@ -14135,15 +14456,20 @@ $zh2Hans = array( '盯著作' => '盯著作', '盯著名' => '盯著名', '盯著錄' => '盯著录', +'盯著录' => '盯著录', '盯著稱' => '盯著称', +'盯著称' => '盯著称', '盯著者' => '盯著者', '盯著述' => '盯著述', '盾著' => '盾着', '盾著書' => '盾著书', +'盾著书' => '盾著书', '盾著作' => '盾著作', '盾著名' => '盾著名', '盾著錄' => '盾著录', +'盾著录' => '盾著录', '盾著稱' => '盾著称', +'盾著称' => '盾著称', '盾著者' => '盾著者', '盾著述' => '盾著述', '看不著' => '看不着', @@ -14152,8 +14478,10 @@ $zh2Hans = array( '看著書' => '看着书', '看著作' => '看著作', '看著名' => '看著名', +'看著录' => '看著录', '看著錄' => '看著录', '看著稱' => '看著称', +'看著称' => '看著称', '看著者' => '看著者', '看著述' => '看著述', '著業' => '着业', @@ -14262,9 +14590,12 @@ $zh2Hans = array( '睡得著' => '睡得着', '睡著' => '睡着', '睡著書' => '睡著书', +'睡著书' => '睡著书', '睡著作' => '睡著作', '睡著名' => '睡著名', '睡著錄' => '睡著录', +'睡著录' => '睡著录', +'睡著称' => '睡著称', '睡著稱' => '睡著称', '睡著者' => '睡著者', '睡著述' => '睡著述', @@ -14282,15 +14613,20 @@ $zh2Hans = array( '瞧著書' => '瞧着书', '瞧著作' => '瞧著作', '瞧著名' => '瞧著名', +'瞧著录' => '瞧著录', '瞧著錄' => '瞧著录', +'瞧著称' => '瞧著称', '瞧著稱' => '瞧著称', '瞧著者' => '瞧著者', '瞧著述' => '瞧著述', '瞪著' => '瞪着', '瞪著書' => '瞪著书', +'瞪著书' => '瞪著书', '瞪著作' => '瞪著作', '瞪著名' => '瞪著名', '瞪著錄' => '瞪著录', +'瞪著录' => '瞪著录', +'瞪著称' => '瞪著称', '瞪著稱' => '瞪著称', '瞪著者' => '瞪著者', '瞪著述' => '瞪著述', @@ -14298,27 +14634,36 @@ $zh2Hans = array( '石碁镇' => '石碁镇', '石碁鎮' => '石碁镇', '福著' => '福着', +'福著书' => '福著书', '福著書' => '福著书', '福著作' => '福著作', '福著名' => '福著名', '福著錄' => '福著录', +'福著录' => '福著录', '福著稱' => '福著称', +'福著称' => '福著称', '福著者' => '福著者', '福著述' => '福著述', '穀梁' => '穀梁', '空著' => '空着', +'空著书' => '空著书', '空著書' => '空著书', '空著作' => '空著作', '空著名' => '空著名', +'空著录' => '空著录', '空著錄' => '空著录', +'空著称' => '空著称', '空著稱' => '空著称', '空著者' => '空著者', '空著述' => '空著述', '穿著' => '穿着', +'穿著书' => '穿著书', '穿著書' => '穿著书', '穿著作' => '穿著作', '穿著名' => '穿著名', +'穿著录' => '穿著录', '穿著錄' => '穿著录', +'穿著称' => '穿著称', '穿著稱' => '穿著称', '穿著者' => '穿著者', '穿著述' => '穿著述', @@ -14331,28 +14676,37 @@ $zh2Hans = array( '豎著者' => '竖著者', '豎著述' => '竖著述', '站著' => '站着', +'站著书' => '站著书', '站著書' => '站著书', '站著作' => '站著作', '站著名' => '站著名', '站著錄' => '站著录', +'站著录' => '站著录', +'站著称' => '站著称', '站著稱' => '站著称', '站著者' => '站著者', '站著述' => '站著述', '笑著' => '笑着', +'笑著书' => '笑著书', '笑著書' => '笑著书', '笑著作' => '笑著作', '笑著名' => '笑著名', +'笑著录' => '笑著录', '笑著錄' => '笑著录', +'笑著称' => '笑著称', '笑著稱' => '笑著称', '笑著者' => '笑著者', '笑著述' => '笑著述', '答覆' => '答复', '管著' => '管着', +'管著书' => '管著书', '管著書' => '管著书', '管著作' => '管著作', '管著名' => '管著名', +'管著录' => '管著录', '管著錄' => '管著录', '管著稱' => '管著称', +'管著称' => '管著称', '管著者' => '管著者', '管著述' => '管著述', '米澤瑠美' => '米泽瑠美', @@ -14383,45 +14737,60 @@ $zh2Hans = array( '纏著者' => '缠著者', '纏著述' => '缠著述', '罩著' => '罩着', +'罩著书' => '罩著书', '罩著書' => '罩著书', '罩著作' => '罩著作', '罩著名' => '罩著名', '罩著錄' => '罩著录', +'罩著录' => '罩著录', +'罩著称' => '罩著称', '罩著稱' => '罩著称', '罩著者' => '罩著者', '罩著述' => '罩著述', '美著' => '美着', +'美著书' => '美著书', '美著書' => '美著书', '美著作' => '美著作', '美著名' => '美著名', +'美著录' => '美著录', '美著錄' => '美著录', '美著稱' => '美著称', +'美著称' => '美著称', '美著者' => '美著者', '美著述' => '美著述', '耀著' => '耀着', '耀著書' => '耀著书', +'耀著书' => '耀著书', '耀著作' => '耀著作', '耀著名' => '耀著名', '耀著錄' => '耀著录', +'耀著录' => '耀著录', +'耀著称' => '耀著称', '耀著稱' => '耀著称', '耀著者' => '耀著者', '耀著述' => '耀著述', '老幺' => '老幺', '考著' => '考着', '考著書' => '考著书', +'考著书' => '考著书', '考著作' => '考著作', '考著名' => '考著名', '考著錄' => '考著录', +'考著录' => '考著录', '考著稱' => '考著称', +'考著称' => '考著称', '考著者' => '考著者', '考著述' => '考著述', '肉乾乾' => '肉干干', '肘手鍊足' => '肘手链足', '背著' => '背着', +'背著书' => '背著书', '背著書' => '背著书', '背著作' => '背著作', '背著名' => '背著名', +'背著录' => '背著录', '背著錄' => '背著录', +'背著称' => '背著称', '背著稱' => '背著称', '背著者' => '背著者', '背著述' => '背著述', @@ -14442,11 +14811,14 @@ $zh2Hans = array( '藝著者' => '艺著者', '藝著述' => '艺著述', '苦著' => '苦着', +'苦著书' => '苦著书', '苦著書' => '苦著书', '苦著作' => '苦著作', '苦著名' => '苦著名', +'苦著录' => '苦著录', '苦著錄' => '苦著录', '苦著稱' => '苦著称', +'苦著称' => '苦著称', '苦著者' => '苦著者', '苦著述' => '苦著述', '苧烯' => '苧烯', @@ -14462,11 +14834,14 @@ $zh2Hans = array( '蕭乾' => '萧乾', '萧乾' => '萧乾', '落著' => '落着', +'落著书' => '落著书', '落著書' => '落著书', '落著作' => '落著作', '落著名' => '落著名', +'落著录' => '落著录', '落著錄' => '落著录', '落著稱' => '落著称', +'落著称' => '落著称', '落著者' => '落著者', '落著述' => '落著述', '著書' => '著书', @@ -14482,42 +14857,57 @@ $zh2Hans = array( '著述' => '著述', '蒙著' => '蒙着', '蒙著書' => '蒙著书', +'蒙著书' => '蒙著书', '蒙著作' => '蒙著作', '蒙著名' => '蒙著名', +'蒙著录' => '蒙著录', '蒙著錄' => '蒙著录', '蒙著稱' => '蒙著称', +'蒙著称' => '蒙著称', '蒙著者' => '蒙著者', '蒙著述' => '蒙著述', '藏著' => '藏着', '藏著書' => '藏著书', +'藏著书' => '藏著书', '藏著作' => '藏著作', '藏著名' => '藏著名', '藏著錄' => '藏著录', +'藏著录' => '藏著录', +'藏著称' => '藏著称', '藏著稱' => '藏著称', '藏著者' => '藏著者', '藏著述' => '藏著述', '蘸著' => '蘸着', '蘸著書' => '蘸著书', +'蘸著书' => '蘸著书', '蘸著作' => '蘸著作', '蘸著名' => '蘸著名', +'蘸著录' => '蘸著录', '蘸著錄' => '蘸著录', '蘸著稱' => '蘸著称', +'蘸著称' => '蘸著称', '蘸著者' => '蘸著者', '蘸著述' => '蘸著述', '行著' => '行着', +'行著书' => '行著书', '行著書' => '行著书', '行著作' => '行著作', '行著名' => '行著名', +'行著录' => '行著录', '行著錄' => '行著录', '行著稱' => '行著称', +'行著称' => '行著称', '行著者' => '行著者', '行著述' => '行著述', '衣著' => '衣着', +'衣著书' => '衣著书', '衣著書' => '衣著书', '衣著作' => '衣著作', '衣著名' => '衣著名', +'衣著录' => '衣著录', '衣著錄' => '衣著录', '衣著稱' => '衣著称', +'衣著称' => '衣著称', '衣著者' => '衣著者', '衣著述' => '衣著述', '裝著' => '装着', @@ -14530,9 +14920,12 @@ $zh2Hans = array( '裝著述' => '装著述', '裹著' => '裹着', '裹著書' => '裹著书', +'裹著书' => '裹著书', '裹著作' => '裹著作', '裹著名' => '裹著名', +'裹著录' => '裹著录', '裹著錄' => '裹著录', +'裹著称' => '裹著称', '裹著稱' => '裹著称', '裹著者' => '裹著者', '裹著述' => '裹著述', @@ -14575,10 +14968,13 @@ $zh2Hans = array( '語著者' => '语著者', '語著述' => '语著述', '豫著' => '豫着', +'豫著书' => '豫著书', '豫著書' => '豫著书', '豫著作' => '豫著作', '豫著名' => '豫著名', +'豫著录' => '豫著录', '豫著錄' => '豫著录', +'豫著称' => '豫著称', '豫著稱' => '豫著称', '豫著者' => '豫著者', '豫著述' => '豫著述', @@ -14592,10 +14988,13 @@ $zh2Hans = array( '貞著述' => '贞著述', '走著' => '走着', '走著書' => '走著书', +'走著书' => '走著书', '走著作' => '走著作', '走著名' => '走著名', '走著錄' => '走著录', +'走著录' => '走著录', '走著稱' => '走著称', +'走著称' => '走著称', '走著者' => '走著者', '走著述' => '走著述', '趕著' => '赶着', @@ -14608,9 +15007,12 @@ $zh2Hans = array( '趕著述' => '赶著述', '趴著' => '趴着', '趴著書' => '趴著书', +'趴著书' => '趴著书', '趴著作' => '趴著作', '趴著名' => '趴著名', +'趴著录' => '趴著录', '趴著錄' => '趴著录', +'趴著称' => '趴著称', '趴著稱' => '趴著称', '趴著者' => '趴著者', '趴著述' => '趴著述', @@ -14624,66 +15026,90 @@ $zh2Hans = array( '躍著述' => '跃著述', '跑著' => '跑着', '跑著書' => '跑著书', +'跑著书' => '跑著书', '跑著作' => '跑著作', '跑著名' => '跑著名', +'跑著录' => '跑著录', '跑著錄' => '跑著录', '跑著稱' => '跑著称', +'跑著称' => '跑著称', '跑著者' => '跑著者', '跑著述' => '跑著述', '跟著' => '跟着', +'跟著书' => '跟著书', '跟著書' => '跟著书', '跟著作' => '跟著作', '跟著名' => '跟著名', +'跟著录' => '跟著录', '跟著錄' => '跟著录', +'跟著称' => '跟著称', '跟著稱' => '跟著称', '跟著者' => '跟著者', '跟著述' => '跟著述', '跪著' => '跪着', '跪著書' => '跪著书', +'跪著书' => '跪著书', '跪著作' => '跪著作', '跪著名' => '跪著名', '跪著錄' => '跪著录', +'跪著录' => '跪著录', '跪著稱' => '跪著称', +'跪著称' => '跪著称', '跪著者' => '跪著者', '跪著述' => '跪著述', '跳著' => '跳着', +'跳著书' => '跳著书', '跳著書' => '跳著书', '跳著作' => '跳著作', '跳著名' => '跳著名', +'跳著录' => '跳著录', '跳著錄' => '跳著录', +'跳著称' => '跳著称', '跳著稱' => '跳著称', '跳著者' => '跳著者', '跳著述' => '跳著述', '躊躇滿志' => '踌躇滿志', '踏著' => '踏着', '踏著書' => '踏著书', +'踏著书' => '踏著书', '踏著作' => '踏著作', '踏著名' => '踏著名', '踏著錄' => '踏著录', +'踏著录' => '踏著录', +'踏著称' => '踏著称', '踏著稱' => '踏著称', '踏著者' => '踏著者', '踏著述' => '踏著述', '踩著' => '踩着', +'踩著书' => '踩著书', '踩著書' => '踩著书', '踩著作' => '踩著作', '踩著名' => '踩著名', +'踩著录' => '踩著录', '踩著錄' => '踩著录', '踩著稱' => '踩著称', +'踩著称' => '踩著称', '踩著者' => '踩著者', '踩著述' => '踩著述', '身著' => '身着', +'身著书' => '身著书', '身著書' => '身著书', '身著作' => '身著作', '身著名' => '身著名', +'身著录' => '身著录', '身著錄' => '身著录', '身著稱' => '身著称', +'身著称' => '身著称', '身著者' => '身著者', '身著述' => '身著述', '躺著' => '躺着', '躺著書' => '躺著书', +'躺著书' => '躺著书', '躺著作' => '躺著作', '躺著名' => '躺著名', '躺著錄' => '躺著录', +'躺著录' => '躺著录', +'躺著称' => '躺著称', '躺著稱' => '躺著称', '躺著者' => '躺著者', '躺著述' => '躺著述', @@ -14733,34 +15159,46 @@ $zh2Hans = array( '迫著' => '迫着', '追著' => '追着', '追著書' => '追著书', +'追著书' => '追著书', '追著作' => '追著作', '追著名' => '追著名', '追著錄' => '追著录', +'追著录' => '追著录', +'追著称' => '追著称', '追著稱' => '追著称', '追著者' => '追著者', '追著述' => '追著述', '逆著' => '逆着', '逆著書' => '逆著书', +'逆著书' => '逆著书', '逆著作' => '逆著作', '逆著名' => '逆著名', '逆著錄' => '逆著录', +'逆著录' => '逆著录', +'逆著称' => '逆著称', '逆著稱' => '逆著称', '逆著者' => '逆著者', '逆著述' => '逆著述', '逼著' => '逼着', '逼著書' => '逼著书', +'逼著书' => '逼著书', '逼著作' => '逼著作', '逼著名' => '逼著名', '逼著錄' => '逼著录', +'逼著录' => '逼著录', +'逼著称' => '逼著称', '逼著稱' => '逼著称', '逼著者' => '逼著者', '逼著述' => '逼著述', '遇著' => '遇着', '遇著書' => '遇著书', +'遇著书' => '遇著书', '遇著作' => '遇著作', '遇著名' => '遇著名', '遇著錄' => '遇著录', +'遇著录' => '遇著录', '遇著稱' => '遇著称', +'遇著称' => '遇著称', '遇著者' => '遇著者', '遇著述' => '遇著述', '遺著' => '遗著', @@ -14768,10 +15206,13 @@ $zh2Hans = array( '郭子乾' => '郭子乾', '配著' => '配着', '配著書' => '配著书', +'配著书' => '配著书', '配著作' => '配著作', '配著名' => '配著名', '配著錄' => '配著录', +'配著录' => '配著录', '配著稱' => '配著称', +'配著称' => '配著称', '配著者' => '配著者', '配著述' => '配著述', '釀著' => '酿着', @@ -14784,8 +15225,8 @@ $zh2Hans = array( '釀著述' => '酿著述', '醯壺' => '醯壶', '醯壶' => '醯壶', -'醯酱' => '醯酱', '醯醬' => '醯酱', +'醯酱' => '醯酱', '醯醋' => '醯醋', '醯醢' => '醯醢', '醯鸡' => '醯鸡', @@ -14836,30 +15277,39 @@ $zh2Hans = array( '阿部正瞭' => '阿部正瞭', '附著' => '附着', '附睪' => '附睾', +'附著书' => '附著书', '附著書' => '附著书', '附著作' => '附著作', '附著名' => '附著名', '附著錄' => '附著录', +'附著录' => '附著录', +'附著称' => '附著称', '附著稱' => '附著称', '附著者' => '附著者', '附著述' => '附著述', '陈乾生' => '陈乾生', '陳乾生' => '陈乾生', -'陳公乾生' => '陈公乾生', '陈公乾生' => '陈公乾生', +'陳公乾生' => '陈公乾生', '陋著' => '陋着', '陋著書' => '陋著书', +'陋著书' => '陋著书', '陋著作' => '陋著作', '陋著名' => '陋著名', '陋著錄' => '陋著录', +'陋著录' => '陋著录', +'陋著称' => '陋著称', '陋著稱' => '陋著称', '陋著者' => '陋著者', '陋著述' => '陋著述', '陪著' => '陪着', +'陪著书' => '陪著书', '陪著書' => '陪著书', '陪著作' => '陪著作', '陪著名' => '陪著名', +'陪著录' => '陪著录', '陪著錄' => '陪著录', +'陪著称' => '陪著称', '陪著稱' => '陪著称', '陪著者' => '陪著者', '陪著述' => '陪著述', @@ -14874,19 +15324,25 @@ $zh2Hans = array( '隨著者' => '随著者', '隨著述' => '随著述', '隔著' => '隔着', +'隔著书' => '隔著书', '隔著書' => '隔著书', '隔著作' => '隔著作', '隔著名' => '隔著名', +'隔著录' => '隔著录', '隔著錄' => '隔著录', +'隔著称' => '隔著称', '隔著稱' => '隔著称', '隔著者' => '隔著者', '隔著述' => '隔著述', '隱睪' => '隱睾', '雅著' => '雅着', +'雅著书' => '雅著书', '雅著書' => '雅著书', '雅著作' => '雅著作', '雅著名' => '雅著名', +'雅著录' => '雅著录', '雅著錄' => '雅著录', +'雅著称' => '雅著称', '雅著稱' => '雅著称', '雅著者' => '雅著者', '雅著述' => '雅著述', @@ -14895,7 +15351,9 @@ $zh2Hans = array( '靠著作' => '靠著作', '靠著名' => '靠著名', '靠著錄' => '靠著录', +'靠著录' => '靠著录', '靠著稱' => '靠著称', +'靠著称' => '靠著称', '靠著者' => '靠著者', '靠著述' => '靠著述', '頂著' => '顶着', @@ -14965,18 +15423,24 @@ $zh2Hans = array( '騙著者' => '骗著者', '騙著述' => '骗著述', '高著' => '高着', +'高著书' => '高著书', '高著書' => '高著书', '高著作' => '高著作', '高著名' => '高著名', +'高著录' => '高著录', '高著錄' => '高著录', '高著稱' => '高著称', +'高著称' => '高著称', '高著者' => '高著者', '高著述' => '高著述', '髭著' => '髭着', +'髭著书' => '髭著书', '髭著書' => '髭著书', '髭著作' => '髭著作', '髭著名' => '髭著名', '髭著錄' => '髭著录', +'髭著录' => '髭著录', +'髭著称' => '髭著称', '髭著稱' => '髭著称', '髭著者' => '髭著者', '髭著述' => '髭著述', @@ -14995,10 +15459,13 @@ $zh2Hans = array( '黄润乾' => '黄润乾', '黃潤乾' => '黄润乾', '黏著' => '黏着', +'黏著书' => '黏著书', '黏著書' => '黏著书', '黏著作' => '黏著作', '黏著名' => '黏著名', +'黏著录' => '黏著录', '黏著錄' => '黏著录', +'黏著称' => '黏著称', '黏著稱' => '黏著称', '黏著者' => '黏著者', '黏著述' => '黏著述', diff --git a/includes/zhtable/Makefile.py b/includes/zhtable/Makefile.py index 88c6a63529..9ad0d65f93 100644 --- a/includes/zhtable/Makefile.py +++ b/includes/zhtable/Makefile.py @@ -1,25 +1,33 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # @author Philip -import tarfile, zipfile +import tarfile as tf +import zipfile as zf import os, re, shutil, sys, platform pyversion = platform.python_version() islinux = platform.system().lower() == 'linux' -if pyversion[:3] in ['2.5', '2.6', '2.7']: +if pyversion[:3] in ['2.6', '2.7']: import urllib as urllib_request import codecs - uniopen = codecs.open - def unichr2(i): - if sys.maxunicode >= 0x10000 or i < 0x10000: - return unichr(i) - else: - return unichr(0xD7C0+(i>>10)) + unichr(0xDC00+(i&0x3FF)) + open = codecs.open + _unichr = unichr + if sys.maxunicode < 0x10000: + def unichr(i): + if i < 0x10000: + return _unichr(i) + else: + return _unichr( 0xD7C0 + ( i>>10 ) ) + _unichr( 0xDC00 + ( i & 0x3FF ) ) elif pyversion[:2] == '3.': import urllib.request as urllib_request - uniopen = open - unichr2 = chr + unichr = chr + +def unichr2( *args ): + return [unichr( int( i.split('<')[0][2:], 16 ) ) for i in args] + +def unichr3( *args ): + return [unichr( int( i[2:7], 16 ) ) for i in args if i[2:7]] # DEFINE SF_MIRROR = 'easynews' @@ -28,14 +36,14 @@ SCIM_PINYIN_VER = '0.5.91' LIBTABE_VER = '0.2.3' # END OF DEFINE -def GetFileFromURL( url, dest ): - if os.path.isfile(dest): +def download( url, dest ): + if os.path.isfile( dest ): print( 'File %s up to date.' % dest ) return global islinux if islinux: # we use wget instead urlretrieve under Linux, - # because wget will display details like download progress + # because wget could display details like download progress os.system('wget %s' % url) else: print( 'Downloading from [%s] ...' % url ) @@ -43,191 +51,200 @@ def GetFileFromURL( url, dest ): print( 'Download complete.\n' ) return -def GetFileFromUnihan( path ): - print( 'Extracting files from %s ...' % path ) - text = zipfile.ZipFile(path).read('Unihan_Variants.txt') - uhfile = uniopen('Unihan_Variants.txt', 'w') - uhfile.write(text) - uhfile.close() - return +def uncompress( fp, member, encoding = 'U8' ): + name = member.rsplit( '/', 1 )[-1] + print( 'Extracting %s ...' % name ) + fp.extract( member ) + shutil.move( member, name ) + if '/' in member: + shutil.rmtree( member.split( '/', 1 )[0] ) + return open( name, 'rb', encoding, 'ignore' ) -def GetFileFromTar( path, member, rename ): - print( 'Extracting %s from %s ...' % (rename, path) ) - tarfile.open(path, 'r:gz').extract(member) - shutil.move(member, rename) - tree_rmv = member.split('/')[0] - shutil.rmtree(tree_rmv) - return - -def ReadBIG5File( dest ): - print( 'Reading and decoding %s ...' % dest ) - f1 = uniopen( dest, 'r', encoding='big5hkscs', errors='replace' ) - text = f1.read() - text = text.replace( '\ufffd', '\n' ) - f1.close() - f2 = uniopen( dest, 'w', encoding='utf8' ) - f2.write(text) - f2.close() - return text +unzip = lambda path, member, encoding = 'U8': \ + uncompress( zf.ZipFile( path ), member, encoding ) -def ReadFile( dest ): - print( 'Reading and decoding %s ...' % dest ) - f = uniopen( dest, 'r', encoding='utf8' ) - ret = f.read() - f.close() - return ret +untargz = lambda path, member, encoding = 'U8': \ + uncompress( tf.open( path, 'r:gz' ), member, encoding ) -def ReadUnihanFile( dest ): - print( 'Reading and decoding %s ...' % dest ) - f = uniopen( dest, 'r', encoding='utf8' ) - t2s_code = [] - s2t_code = [] - while True: - line = f.readline() - if line: - if line.startswith('#'): - continue - elif not line.find('kSimplifiedVariant') == -1: - temp = line.split('kSimplifiedVariant') - t2s_code.append( ( temp[0].strip(), temp[1].strip() ) ) - elif not line.find('kTraditionalVariant') == -1: - temp = line.split('kTraditionalVariant') - s2t_code.append( ( temp[0].strip(), temp[1].strip() ) ) - else: +def parserCore( fp, pos, beginmark = None, endmark = None ): + if beginmark and endmark: + start = False + else: start = True + mlist = set() + for line in fp: + if beginmark and line.startswith( beginmark ): + start = True + continue + elif endmark and line.startswith( endmark ): break - f.close() - return ( t2s_code, s2t_code ) + if start and not line.startswith( '#' ): + elems = line.split() + if len( elems ) < 2: + continue + elif len( elems[0] ) > 1: + mlist.add( elems[pos] ) + return mlist -def RemoveRows( text, num ): - text = re.sub( '.*\s*', '', text, num) - return text +def tablesParser( path, name ): + """ Read file from scim-tables and parse it. """ + global SCIM_TABLES_VER + src = 'scim-tables-%s/tables/zh/%s' % ( SCIM_TABLES_VER, name ) + fp = untargz( path, src, 'U8' ) + return parserCore( fp, 1, 'BEGIN_TABLE', 'END_TABLE' ) -def RemoveOneCharConv( text ): - preg = re.compile('^.\s*$', re.MULTILINE) - text = preg.sub( '', text ) - return text +ezbigParser = lambda path: tablesParser( path, 'EZ-Big.txt.in' ) +wubiParser = lambda path: tablesParser( path, 'Wubi.txt.in' ) +zrmParser = lambda path: tablesParser( path, 'Ziranma.txt.in' ) -def ConvertToChar( code ): - code = code.split('<')[0] - return unichr2( int( code[2:], 16 ) ) - -def GetDefaultTable( code_table ): - char_table = {} - for ( f, t ) in code_table: - if f and t: - from_char = ConvertToChar( f ) - to_chars = [ConvertToChar( code ) for code in t.split()] - char_table[from_char] = to_chars - return char_table - -def GetManualTable( dest ): - text = ReadFile( dest ) - temp1 = text.split() - char_table = {} - for elem in temp1: - elem = elem.strip('|') - if elem: - temp2 = elem.split( '|', 1 ) - from_char = unichr2( int( temp2[0][2:7], 16 ) ) - to_chars = [unichr2( int( code[2:7], 16 ) ) for code in temp2[1].split('|')] - char_table[from_char] = to_chars - return char_table - -def GetValidTable( src_table ): - valid_table = {} - for f, t in src_table.items(): - valid_table[f] = t[0] - return valid_table - -def GetToManyRules( src_table ): - tomany_table = {} - for f, t in src_table.items(): - for i in range(1, len(t)): - tomany_table[t[i]] = True - return tomany_table - -def RemoveRules( dest, table ): - text = ReadFile( dest ) - temp1 = text.split() - for elem in temp1: - f = '' - t = '' - elem = elem.strip().replace( '"', '' ).replace( '\'', '' ) - if '=>' in elem: - if elem.startswith( '=>' ): - t = elem.replace( '=>', '' ).strip() - elif elem.endswith( '=>' ): - f = elem.replace( '=>', '' ).strip() - else: - temp2 = elem.split( '=>' ) - f = temp2[0].strip() - t = temp2[1].strip() - try: - table.pop(f, t) - continue - except: - continue +def phraseParser( path ): + """ Read phrase_lib.txt and parse it. """ + global SCIM_PINYIN_VER + src = 'scim-pinyin-%s/data/phrase_lib.txt' % SCIM_PINYIN_VER + dst = 'phrase_lib.txt' + fp = untargz( path, src, 'U8' ) + return parserCore( fp, 0 ) + +def tsiParser( path ): + """ Read tsi.src and parse it. """ + src = 'libtabe/tsi-src/tsi.src' + dst = 'tsi.src' + fp = untargz( path, src, 'big5hkscs' ) + return parserCore( fp, 0 ) + +def unihanParser( path ): + """ Read Unihan_Variants.txt and parse it. """ + fp = unzip( path, 'Unihan_Variants.txt', 'U8' ) + t2s = dict() + s2t = dict() + for line in fp: + if line.startswith( '#' ): + continue else: - f = t = elem + elems = line.split() + if len( elems ) < 3: + continue + type = elems.pop( 1 ) + elems = unichr2( *elems ) + if type == 'kTraditionalVariant': + s2t[elems[0]] = elems[1:] + elif type == 'kSimplifiedVariant': + t2s[elems[0]] = elems[1:] + fp.close() + return ( t2s, s2t ) + +def applyExcludes( mlist, path ): + """ Apply exclude rules from path to mlist. """ + excludes = open( path, 'rb', 'U8' ).read().split() + excludes = [word.split( '#' )[0].strip() for word in excludes] + excludes = '|'.join( excludes ) + excptn = re.compile( '.*(?:%s).*' % excludes ) + diff = [mword for mword in mlist if excptn.search( mword )] + mlist.difference_update( diff ) + return mlist + +def charManualTable( path ): + fp = open( path, 'rb', 'U8' ) + ret = {} + for line in fp: + elems = line.split( '#' )[0].split( '|' ) + elems = unichr3( *elems ) + if len( elems ) > 1: + ret[elems[0]] = elems[1:] + return ret + +def toManyRules( src_table ): + tomany = set() + for ( f, t ) in src_table.iteritems(): + for i in range( 1, len( t ) ): + tomany.add( t[i] ) + return tomany + +def removeRules( path, table ): + fp = open( path, 'rb', 'U8' ) + texc = list() + for line in fp: + elems = line.split( '=>' ) + f = t = elems[0].strip() + if len( elems ) == 2: + t = elems[1].strip() + f = f.strip('"').strip("'") + t = t.strip('"').strip("'") if f: try: - table.pop(f) + table.pop( f ) except: - x = 1 + pass if t: - for temp_f, temp_t in table.copy().items(): - if temp_t == t: - table.pop(temp_f) + texc.append( t ) + texcptn = re.compile( '^(?:%s)$' % '|'.join( texc ) ) + for (tmp_f, tmp_t) in table.copy().iteritems(): + if texcptn.match( tmp_t ): + table.pop( tmp_f ) return table -def DictToSortedList1( src_table ): - return sorted( src_table.items(), key = lambda m: m[0] ) #sorted( temp_table, key = lambda m: len( m[0] ) ) +def customRules( path ): + fp = open( path, 'rb', 'U8' ) + ret = dict() + for line in fp: + elems = line.split( '#' )[0].split() + if len( elems ) > 1: + ret[elems[0]] = elems[1] + return ret -def DictToSortedList2( src_table ): - return sorted( src_table.items(), key = lambda m: m[1] ) +def dictToSortedList( src_table, pos ): + return sorted( src_table.items(), key = lambda m: m[pos] ) -def Converter( string, conv_table ): +def translate( text, conv_table ): i = 0 - while i < len(string): - for j in range(len(string) - i, 0, -1): - f = string[i:][:j] + while i < len( text ): + for j in range( len( text ) - i, 0, -1 ): + f = text[i:][:j] t = conv_table.get( f ) if t: - string = string[:i] + t + string[i:][j:] + text = text[:i] + t + text[i:][j:] i += len(t) - 1 break i += 1 - return string + return text -def GetDefaultWordsTable( src_wordlist, src_tomany, char_conv_table, char_reconv_table ): - wordlist = list( set( src_wordlist ) ) +def manualWordsTable( path, conv_table, reconv_table ): + fp = open( path, 'rb', 'U8' ) + reconv_table = {} + wordlist = [line.split( '#' )[0].strip() for line in fp] + wordlist = list( set( wordlist ) ) + wordlist.sort( key = len, reverse = True ) + while wordlist: + word = wordlist.pop() + new_word = translate( word, conv_table ) + rcv_word = translate( word, reconv_table ) + if word != rcv_word: + reconv_table[word] = word + reconv_table[new_word] = word + return reconv_table + +def defaultWordsTable( src_wordlist, src_tomany, char_conv_table, char_reconv_table ): + wordlist = list( src_wordlist ) wordlist.sort( key = len, reverse = True ) word_conv_table = {} word_reconv_table = {} + conv_table = char_conv_table.copy() + reconv_table = char_reconv_table.copy() + tomanyptn = re.compile( '(?:%s)' % '|'.join( src_tomany ) ) while wordlist: - conv_table = {} - reconv_table = {} conv_table.update( word_conv_table ) - conv_table.update( char_conv_table ) reconv_table.update( word_reconv_table ) - reconv_table.update( char_reconv_table ) word = wordlist.pop() - new_word_len = word_len = len(word) + new_word_len = word_len = len( word ) while new_word_len == word_len: - rvt_test = False - for char in word: - rvt_test = rvt_test or src_tomany.get(char) - test_word = Converter( word, reconv_table ) - new_word = Converter( word, conv_table ) - if not reconv_table.get( new_word ): - if not test_word == word: - word_conv_table[word] = new_word - word_reconv_table[new_word] = word - elif rvt_test: - rvt_word = Converter( new_word, reconv_table ) - if not rvt_word == word: - word_conv_table[word] = new_word - word_reconv_table[new_word] = word + add = False + test_word = translate( word, reconv_table ) + new_word = translate( word, conv_table ) + if not reconv_table.get( new_word ) \ + and ( test_word != word \ + or ( tomanyptn.search( word ) \ + and word != translate( new_word, reconv_table ) ) ): + word_conv_table[word] = new_word + word_reconv_table[new_word] = word try: word = wordlist.pop() except IndexError: @@ -235,205 +252,98 @@ def GetDefaultWordsTable( src_wordlist, src_tomany, char_conv_table, char_reconv new_word_len = len(word) return word_reconv_table -def GetManualWordsTable( src_wordlist, conv_table ): - src_wordlist = [items.split('#')[0].strip() for items in src_wordlist] - wordlist = list( set( src_wordlist ) ) - wordlist.sort( key = len, reverse = True ) - reconv_table = {} - while wordlist: - word = wordlist.pop() - new_word = Converter( word, conv_table ) - reconv_table[new_word] = word - return reconv_table - -def CustomRules( dest ): - text = ReadFile( dest ) - temp = text.split() - ret = dict() - for i in range( 0, len( temp ), 2 ): - ret[temp[i]] = temp[i + 1] - return ret - -def GetPHPArray( table ): +def PHPArray( table ): lines = ['\'%s\' => \'%s\',' % (f, t) for (f, t) in table if f and t] - #lines = ['"%s"=>"%s",' % (f, t) for (f, t) in table] return '\n'.join(lines) -def RemoveSameChar( src_table ): - dst_table = {} - for f, t in src_table.items(): - if f != t: - dst_table[f] = t - return dst_table - def main(): #Get Unihan.zip: url = 'http://www.unicode.org/Public/UNIDATA/Unihan.zip' han_dest = 'Unihan.zip' - GetFileFromURL( url, han_dest ) + download( url, han_dest ) # Get scim-tables-$(SCIM_TABLES_VER).tar.gz: url = 'http://%s.dl.sourceforge.net/sourceforge/scim/scim-tables-%s.tar.gz' % ( SF_MIRROR, SCIM_TABLES_VER ) tbe_dest = 'scim-tables-%s.tar.gz' % SCIM_TABLES_VER - GetFileFromURL( url, tbe_dest ) + download( url, tbe_dest ) # Get scim-pinyin-$(SCIM_PINYIN_VER).tar.gz: url = 'http://%s.dl.sourceforge.net/sourceforge/scim/scim-pinyin-%s.tar.gz' % ( SF_MIRROR, SCIM_PINYIN_VER ) pyn_dest = 'scim-pinyin-%s.tar.gz' % SCIM_PINYIN_VER - GetFileFromURL( url, pyn_dest ) + download( url, pyn_dest ) # Get libtabe-$(LIBTABE_VER).tgz: url = 'http://%s.dl.sourceforge.net/sourceforge/libtabe/libtabe-%s.tgz' % ( SF_MIRROR, LIBTABE_VER ) lbt_dest = 'libtabe-%s.tgz' % LIBTABE_VER - GetFileFromURL( url, lbt_dest ) - - # Extract the file from a comressed files - - # Unihan.txt Simp. & Trad - GetFileFromUnihan( han_dest ) - - # Make word lists - t_wordlist = [] - s_wordlist = [] + download( url, lbt_dest ) - # EZ.txt.in Trad - src = 'scim-tables-%s/tables/zh/EZ-Big.txt.in' % SCIM_TABLES_VER - dst = 'EZ.txt.in' - GetFileFromTar( tbe_dest, src, dst ) - text = ReadFile( dst ) - text = text.split( 'BEGIN_TABLE' )[1].strip() - text = text.split( 'END_TABLE' )[0].strip() - text = re.sub( '.*\t', '', text ) - text = RemoveOneCharConv( text ) - t_wordlist.extend( text.split() ) - - # Wubi.txt.in Simp - src = 'scim-tables-%s/tables/zh/Wubi.txt.in' % SCIM_TABLES_VER - dst = 'Wubi.txt.in' - GetFileFromTar( tbe_dest, src, dst ) - text = ReadFile( dst ) - text = text.split( 'BEGIN_TABLE' )[1].strip() - text = text.split( 'END_TABLE' )[0].strip() - text = re.sub( '.*\t(.*?)\t\d*', '\g<1>', text ) - text = RemoveOneCharConv( text ) - s_wordlist.extend( text.split() ) - - # Ziranma.txt.in Simp - src = 'scim-tables-%s/tables/zh/Ziranma.txt.in' % SCIM_TABLES_VER - dst = 'Ziranma.txt.in' - GetFileFromTar( tbe_dest, src, dst ) - text = ReadFile( dst ) - text = text.split( 'BEGIN_TABLE' )[1].strip() - text = text.split( 'END_TABLE' )[0].strip() - text = re.sub( '.*\t(.*?)\t\d*', '\g<1>', text ) - text = RemoveOneCharConv( text ) - s_wordlist.extend( text.split() ) - - # phrase_lib.txt Simp - src = 'scim-pinyin-%s/data/phrase_lib.txt' % SCIM_PINYIN_VER - dst = 'phrase_lib.txt' - GetFileFromTar( pyn_dest, src, dst ) - text = ReadFile( 'phrase_lib.txt' ) - text = re.sub( '(.*)\t\d\d*.*', '\g<1>', text) - text = RemoveRows( text, 5 ) - text = RemoveOneCharConv( text ) - s_wordlist.extend( text.split() ) - - # tsi.src Trad - src = 'libtabe/tsi-src/tsi.src' - dst = 'tsi.src' - GetFileFromTar( lbt_dest, src, dst ) - text = ReadBIG5File( 'tsi.src' ) - text = re.sub( ' \d.*', '', text.replace('# ', '')) - text = RemoveOneCharConv( text ) - t_wordlist.extend( text.split() ) - - # remove duplicate elements - t_wordlist = list( set( t_wordlist ) ) - s_wordlist = list( set( s_wordlist ) ) - - # simpphrases_exclude.manual Simp - text = ReadFile( 'simpphrases_exclude.manual' ) - temp = text.split() - s_string = '\n'.join( s_wordlist ) - for elem in temp: - s_string = re.sub( '.*%s.*\n' % elem, '', s_string ) - s_wordlist = s_string.split('\n') + # Unihan.txt + ( t2s_1tomany, s2t_1tomany ) = unihanParser( han_dest ) + + t2s_1tomany.update( charManualTable( 'trad2simp.manual' ) ) + s2t_1tomany.update( charManualTable( 'simp2trad.manual' ) ) - # tradphrases_exclude.manual Trad - text = ReadFile( 'tradphrases_exclude.manual' ) - temp = text.split() - t_string = '\n'.join( t_wordlist ) - for elem in temp: - t_string = re.sub( '.*%s.*\n' % elem, '', t_string ) - t_wordlist = t_string.split('\n') + t2s_1to1 = dict( [( f, t[0] ) for ( f, t ) in t2s_1tomany.iteritems()] ) + s2t_1to1 = dict( [( f, t[0] ) for ( f, t ) in s2t_1tomany.iteritems()] ) - # Make char to char convertion table - # Unihan.txt, dict t2s_code, s2t_code = { 'U+XXXX': 'U+YYYY( U+ZZZZ) ... ', ... } - ( t2s_code, s2t_code ) = ReadUnihanFile( 'Unihan_Variants.txt' ) - # dict t2s_1tomany = { '\uXXXX': '\uYYYY\uZZZZ ... ', ... } - t2s_1tomany = {} - t2s_1tomany.update( GetDefaultTable( t2s_code ) ) - t2s_1tomany.update( GetManualTable( 'trad2simp.manual' ) ) - # dict s2t_1tomany - s2t_1tomany = {} - s2t_1tomany.update( GetDefaultTable( s2t_code ) ) - s2t_1tomany.update( GetManualTable( 'simp2trad.manual' ) ) - # dict t2s_1to1 = { '\uXXXX': '\uYYYY', ... }; t2s_trans = { 'ddddd': '', ... } - t2s_1to1 = GetValidTable( t2s_1tomany ) - s_tomany = GetToManyRules( t2s_1tomany ) - # dict s2t_1to1; s2t_trans - s2t_1to1 = GetValidTable( s2t_1tomany ) - t_tomany = GetToManyRules( s2t_1tomany ) - # remove noconvert rules - t2s_1to1 = RemoveRules( 'trad2simp_noconvert.manual', t2s_1to1 ) - s2t_1to1 = RemoveRules( 'simp2trad_noconvert.manual', s2t_1to1 ) + s_tomany = toManyRules( t2s_1tomany ) + t_tomany = toManyRules( s2t_1tomany ) + + # noconvert rules + t2s_1to1 = removeRules( 'trad2simp_noconvert.manual', t2s_1to1 ) + s2t_1to1 = removeRules( 'simp2trad_noconvert.manual', s2t_1to1 ) - # Make word to word convertion table + # the supper set for word to word conversion t2s_1to1_supp = t2s_1to1.copy() s2t_1to1_supp = s2t_1to1.copy() - # trad2simp_supp_set.manual - t2s_1to1_supp.update( CustomRules( 'trad2simp_supp_set.manual' ) ) - # simp2trad_supp_set.manual - s2t_1to1_supp.update( CustomRules( 'simp2trad_supp_set.manual' ) ) - # simpphrases.manual - text = ReadFile( 'simpphrases.manual' ) - s_wordlist_manual = text.split('\n') - t2s_word2word_manual = GetManualWordsTable(s_wordlist_manual, s2t_1to1_supp) - t2s_word2word_manual.update( CustomRules( 'toSimp.manual' ) ) - # tradphrases.manual - text = ReadFile( 'tradphrases.manual' ) - t_wordlist_manual = text.split('\n') - s2t_word2word_manual = GetManualWordsTable(t_wordlist_manual, t2s_1to1_supp) - s2t_word2word_manual.update( CustomRules( 'toTrad.manual' ) ) - # t2s_word2word + t2s_1to1_supp.update( customRules( 'trad2simp_supp_set.manual' ) ) + s2t_1to1_supp.update( customRules( 'simp2trad_supp_set.manual' ) ) + + # word to word manual rules + t2s_word2word_manual = manualWordsTable( 'simpphrases.manual', s2t_1to1_supp, t2s_1to1_supp ) + t2s_word2word_manual.update( customRules( 'toSimp.manual' ) ) + s2t_word2word_manual = manualWordsTable( 'tradphrases.manual', t2s_1to1_supp, s2t_1to1_supp ) + s2t_word2word_manual.update( customRules( 'toTrad.manual' ) ) + + # word to word rules from input methods + t_wordlist = set() + s_wordlist = set() + t_wordlist.update( ezbigParser( tbe_dest ), + tsiParser( lbt_dest ) ) + s_wordlist.update( wubiParser( tbe_dest ), + zrmParser( tbe_dest ), + phraseParser( pyn_dest ) ) + + # exclude + s_wordlist = applyExcludes( s_wordlist, 'simpphrases_exclude.manual' ) + t_wordlist = applyExcludes( t_wordlist, 'tradphrases_exclude.manual' ) + s2t_supp = s2t_1to1_supp.copy() s2t_supp.update( s2t_word2word_manual ) t2s_supp = t2s_1to1_supp.copy() t2s_supp.update( t2s_word2word_manual ) - t2s_word2word = GetDefaultWordsTable( s_wordlist, s_tomany, s2t_1to1_supp, t2s_supp ) - ## toSimp.manual + + # parse list to dict + t2s_word2word = defaultWordsTable( s_wordlist, s_tomany, s2t_1to1_supp, t2s_supp ) t2s_word2word.update( t2s_word2word_manual ) - # s2t_word2word - s2t_word2word = GetDefaultWordsTable( t_wordlist, t_tomany, t2s_1to1_supp, s2t_supp ) - ## toTrad.manual + s2t_word2word = defaultWordsTable( t_wordlist, t_tomany, t2s_1to1_supp, s2t_supp ) s2t_word2word.update( s2t_word2word_manual ) # Final tables # sorted list toHans - t2s_1to1 = RemoveSameChar( t2s_1to1 ) - s2t_1to1 = RemoveSameChar( s2t_1to1 ) - toHans = DictToSortedList1( t2s_1to1 ) + DictToSortedList2( t2s_word2word ) + t2s_1to1 = dict( [( f, t ) for ( f, t ) in t2s_1to1.iteritems() if f != t] ) + toHans = dictToSortedList( t2s_1to1, 0 ) + dictToSortedList( t2s_word2word, 1 ) # sorted list toHant - toHant = DictToSortedList1( s2t_1to1 ) + DictToSortedList2( s2t_word2word ) + s2t_1to1 = dict( [( f, t ) for ( f, t ) in s2t_1to1.iteritems() if f != t] ) + toHant = dictToSortedList( s2t_1to1, 0 ) + dictToSortedList( s2t_word2word, 1 ) # sorted list toCN - toCN = DictToSortedList2( CustomRules( 'toCN.manual' ) ) + toCN = dictToSortedList( customRules( 'toCN.manual' ), 1 ) # sorted list toHK - toHK = DictToSortedList2( CustomRules( 'toHK.manual' ) ) + toHK = dictToSortedList( customRules( 'toHK.manual' ), 1 ) # sorted list toSG - toSG = DictToSortedList2( CustomRules( 'toSG.manual' ) ) + toSG = dictToSortedList( customRules( 'toSG.manual' ), 1 ) # sorted list toTW - toTW = DictToSortedList2( CustomRules( 'toTW.manual' ) ) + toTW = dictToSortedList( customRules( 'toTW.manual' ), 1 ) # Get PHP Array php = '''