aboutsummaryrefslogtreecommitdiffstats
path: root/admin
diff options
context:
space:
mode:
authorKenichi Handa2009-06-12 07:22:13 +0000
committerKenichi Handa2009-06-12 07:22:13 +0000
commit2c36b57712866c03db5c51fea1c76472751130f7 (patch)
tree5e3486b13d427f5a4740deaa671e8a2886bce7af /admin
parent1a3cbf350a9b5f4ac9d89747e80032308f0b5a4c (diff)
downloademacs-2c36b57712866c03db5c51fea1c76472751130f7.tar.gz
emacs-2c36b57712866c03db5c51fea1c76472751130f7.zip
Mostly re-written to handle glibc's EUC-JP-MS.
Diffstat (limited to 'admin')
-rw-r--r--admin/charsets/eucjp-ms.awk98
1 files changed, 64 insertions, 34 deletions
diff --git a/admin/charsets/eucjp-ms.awk b/admin/charsets/eucjp-ms.awk
index c4140f67224..7710e0c0e00 100644
--- a/admin/charsets/eucjp-ms.awk
+++ b/admin/charsets/eucjp-ms.awk
@@ -21,56 +21,84 @@
21# Commentary: 21# Commentary:
22 22
23# eucJP-ms is one of eucJP-open encoding defined at this page: 23# eucJP-ms is one of eucJP-open encoding defined at this page:
24# http://www.opengroup.or.jp/jvc/cde/appendix.html 24# http://home.m05.itscom.net/numa/cde/ucs-conv/appendix.html
25# This program reads the mapping file EUC-JP-MS (of glibc) and
26# generates the Elisp file eucjp-ms.el that defines two translation
27# tables `eucjp-ms-decode' and `eucjp-ms-encode'.
25 28
26BEGIN { 29BEGIN {
30 FS = "[ \t][ \t]*"
31
32 # STATE: 0/ignore, 1/JISX0208, 2/JISX0208 target range
33 # 3/JISX0212 4/JISX0212 target range
34 state = 0;
35
36 JISX0208_FROM1 = "/xad/xa1";
37 JISX0208_TO1 = "/xad/xfc";
38 JISX0208_FROM2 = "/xf5/xa1";
39 JISX0212_FROM = "/x8f/xf3/xf3";
40
27 print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; 41 print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-";
28 print ";;; Automatically genrated from eucJP-13th.txt, eucJP-udc.txt, eucJP-ibmext.txt"; 42 print ";;; Automatically generated from /usr/share/i18n/charmaps/EUC-JP-MS.gz";
29 print "(let ((map"; 43 print "(let ((map";
30 printf " '(;JISEXT<->UNICODE"; 44 print " '(;JISEXT<->UNICODE";
31
32 tohex["A"] = 10;
33 tohex["B"] = 11;
34 tohex["C"] = 12;
35 tohex["D"] = 13;
36 tohex["E"] = 14;
37 tohex["F"] = 15;
38} 45}
39 46
40function decode_hex(str) { 47function write_entry (unicode) {
41 n = 0; 48 if (state == 1) {
42 len = length(str); 49 if ($2 == JISX0208_FROM1 || $2 == JISX0208_FROM2)
43 for (i = 1; i <= len; i++) 50 state = 2;
44 { 51 } else if (state == 3) {
45 c = substr(str, i, 1); 52 if ($2 == JISX0212_FROM)
46 if (c >= "0" && c <= "9") 53 state = 4;
47 n = n * 16 + (c - "0");
48 else
49 n = n * 16 + tohex[c];
50 } 54 }
51 return n; 55 if (state == 2) {
56 jis = $2
57 gsub("/x", "", jis);
58 printf "\n (#x%s . #x%s)", jis, unicode;
59 if ($2 == JISX0208_TO1)
60 state = 1;
61 } else if (state == 4) {
62 jis = substr($2, 5, 8);
63 gsub("/x", "", jis);
64 printf "\n (#x%s #x%s)", jis, unicode;
65 }
66}
67
68
69/^% JIS X 0208/ {
70 state = 1;
71 next;
72}
73
74/^% JIS X 0212/ {
75 state = 3;
76 next;
77}
78
79/^END CHARMAP/ {
80 state = 0;
81 next;
52} 82}
53 83
54/0x8F/ { 84/^<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ {
55 code = decode_hex(substr($1, 5, 4)); 85 if (state > 0)
56 code -= 32896; # code -= 0x8080 86 write_entry(substr($1, 3, 4));
57 printf "\n (#x%04x #x%s)", code, substr($2, 3, 4);
58 next;
59} 87}
60 88
61/0x[A-F]/ { 89/^%IRREVERSIBLE%<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ {
62 code = decode_hex(substr($1, 3, 4)); 90 if (state > 0)
63 code -= 32896; # code -= 0x8080 91 write_entry(substr($1, 17, 4));
64 printf "\n (#x%04x . #x%s)", code, substr($2, 3, 4);
65} 92}
66 93
67END { 94END {
68 print ")))"; 95 print ")))";
69 print " (mapc #'(lambda (x)"; 96 print " (mapc #'(lambda (x)";
70 print " (if (integerp (cdr x))"; 97 print " (let ((code (logand (car x) #x7F7F)))";
71 print " (setcar x (decode-char 'japanese-jisx0208 (car x)))"; 98 print " (if (integerp (cdr x))";
72 print " (setcar x (decode-char 'japanese-jisx0212 (car x)))"; 99 print " (setcar x (decode-char 'japanese-jisx0208 code))";
73 print " (setcdr x (cadr x))))"; 100 print " (setcar x (decode-char 'japanese-jisx0212 code))";
101 print " (setcdr x (cadr x)))))";
74 print " map)"; 102 print " map)";
75 print " (define-translation-table 'eucjp-ms-decode map)"; 103 print " (define-translation-table 'eucjp-ms-decode map)";
76 print " (mapc #'(lambda (x)"; 104 print " (mapc #'(lambda (x)";
@@ -78,6 +106,8 @@ END {
78 print " (setcar x (cdr x)) (setcdr x tmp)))"; 106 print " (setcar x (cdr x)) (setcdr x tmp)))";
79 print " map)"; 107 print " map)";
80 print " (define-translation-table 'eucjp-ms-encode map))"; 108 print " (define-translation-table 'eucjp-ms-encode map))";
109 print "";
110 print ";; arch-tag: c4191096-288a-4f13-9b2a-ee7a1f11eb4a";
81} 111}
82 112
83# arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c 113# arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c