diff options
| author | Kenichi Handa | 2009-06-12 07:22:13 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2009-06-12 07:22:13 +0000 |
| commit | 2c36b57712866c03db5c51fea1c76472751130f7 (patch) | |
| tree | 5e3486b13d427f5a4740deaa671e8a2886bce7af /admin | |
| parent | 1a3cbf350a9b5f4ac9d89747e80032308f0b5a4c (diff) | |
| download | emacs-2c36b57712866c03db5c51fea1c76472751130f7.tar.gz emacs-2c36b57712866c03db5c51fea1c76472751130f7.zip | |
Mostly re-written to handle glibc's EUC-JP-MS.
Diffstat (limited to 'admin')
| -rw-r--r-- | admin/charsets/eucjp-ms.awk | 98 |
1 files changed, 64 insertions, 34 deletions
diff --git a/admin/charsets/eucjp-ms.awk b/admin/charsets/eucjp-ms.awk index c4140f67224..7710e0c0e00 100644 --- a/admin/charsets/eucjp-ms.awk +++ b/admin/charsets/eucjp-ms.awk | |||
| @@ -21,56 +21,84 @@ | |||
| 21 | # Commentary: | 21 | # Commentary: |
| 22 | 22 | ||
| 23 | # eucJP-ms is one of eucJP-open encoding defined at this page: | 23 | # eucJP-ms is one of eucJP-open encoding defined at this page: |
| 24 | # http://www.opengroup.or.jp/jvc/cde/appendix.html | 24 | # http://home.m05.itscom.net/numa/cde/ucs-conv/appendix.html |
| 25 | # This program reads the mapping file EUC-JP-MS (of glibc) and | ||
| 26 | # generates the Elisp file eucjp-ms.el that defines two translation | ||
| 27 | # tables `eucjp-ms-decode' and `eucjp-ms-encode'. | ||
| 25 | 28 | ||
| 26 | BEGIN { | 29 | BEGIN { |
| 30 | FS = "[ \t][ \t]*" | ||
| 31 | |||
| 32 | # STATE: 0/ignore, 1/JISX0208, 2/JISX0208 target range | ||
| 33 | # 3/JISX0212 4/JISX0212 target range | ||
| 34 | state = 0; | ||
| 35 | |||
| 36 | JISX0208_FROM1 = "/xad/xa1"; | ||
| 37 | JISX0208_TO1 = "/xad/xfc"; | ||
| 38 | JISX0208_FROM2 = "/xf5/xa1"; | ||
| 39 | JISX0212_FROM = "/x8f/xf3/xf3"; | ||
| 40 | |||
| 27 | print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; | 41 | print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; |
| 28 | print ";;; Automatically genrated from eucJP-13th.txt, eucJP-udc.txt, eucJP-ibmext.txt"; | 42 | print ";;; Automatically generated from /usr/share/i18n/charmaps/EUC-JP-MS.gz"; |
| 29 | print "(let ((map"; | 43 | print "(let ((map"; |
| 30 | printf " '(;JISEXT<->UNICODE"; | 44 | print " '(;JISEXT<->UNICODE"; |
| 31 | |||
| 32 | tohex["A"] = 10; | ||
| 33 | tohex["B"] = 11; | ||
| 34 | tohex["C"] = 12; | ||
| 35 | tohex["D"] = 13; | ||
| 36 | tohex["E"] = 14; | ||
| 37 | tohex["F"] = 15; | ||
| 38 | } | 45 | } |
| 39 | 46 | ||
| 40 | function decode_hex(str) { | 47 | function write_entry (unicode) { |
| 41 | n = 0; | 48 | if (state == 1) { |
| 42 | len = length(str); | 49 | if ($2 == JISX0208_FROM1 || $2 == JISX0208_FROM2) |
| 43 | for (i = 1; i <= len; i++) | 50 | state = 2; |
| 44 | { | 51 | } else if (state == 3) { |
| 45 | c = substr(str, i, 1); | 52 | if ($2 == JISX0212_FROM) |
| 46 | if (c >= "0" && c <= "9") | 53 | state = 4; |
| 47 | n = n * 16 + (c - "0"); | ||
| 48 | else | ||
| 49 | n = n * 16 + tohex[c]; | ||
| 50 | } | 54 | } |
| 51 | return n; | 55 | if (state == 2) { |
| 56 | jis = $2 | ||
| 57 | gsub("/x", "", jis); | ||
| 58 | printf "\n (#x%s . #x%s)", jis, unicode; | ||
| 59 | if ($2 == JISX0208_TO1) | ||
| 60 | state = 1; | ||
| 61 | } else if (state == 4) { | ||
| 62 | jis = substr($2, 5, 8); | ||
| 63 | gsub("/x", "", jis); | ||
| 64 | printf "\n (#x%s #x%s)", jis, unicode; | ||
| 65 | } | ||
| 66 | } | ||
| 67 | |||
| 68 | |||
| 69 | /^% JIS X 0208/ { | ||
| 70 | state = 1; | ||
| 71 | next; | ||
| 72 | } | ||
| 73 | |||
| 74 | /^% JIS X 0212/ { | ||
| 75 | state = 3; | ||
| 76 | next; | ||
| 77 | } | ||
| 78 | |||
| 79 | /^END CHARMAP/ { | ||
| 80 | state = 0; | ||
| 81 | next; | ||
| 52 | } | 82 | } |
| 53 | 83 | ||
| 54 | /0x8F/ { | 84 | /^<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { |
| 55 | code = decode_hex(substr($1, 5, 4)); | 85 | if (state > 0) |
| 56 | code -= 32896; # code -= 0x8080 | 86 | write_entry(substr($1, 3, 4)); |
| 57 | printf "\n (#x%04x #x%s)", code, substr($2, 3, 4); | ||
| 58 | next; | ||
| 59 | } | 87 | } |
| 60 | 88 | ||
| 61 | /0x[A-F]/ { | 89 | /^%IRREVERSIBLE%<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { |
| 62 | code = decode_hex(substr($1, 3, 4)); | 90 | if (state > 0) |
| 63 | code -= 32896; # code -= 0x8080 | 91 | write_entry(substr($1, 17, 4)); |
| 64 | printf "\n (#x%04x . #x%s)", code, substr($2, 3, 4); | ||
| 65 | } | 92 | } |
| 66 | 93 | ||
| 67 | END { | 94 | END { |
| 68 | print ")))"; | 95 | print ")))"; |
| 69 | print " (mapc #'(lambda (x)"; | 96 | print " (mapc #'(lambda (x)"; |
| 70 | print " (if (integerp (cdr x))"; | 97 | print " (let ((code (logand (car x) #x7F7F)))"; |
| 71 | print " (setcar x (decode-char 'japanese-jisx0208 (car x)))"; | 98 | print " (if (integerp (cdr x))"; |
| 72 | print " (setcar x (decode-char 'japanese-jisx0212 (car x)))"; | 99 | print " (setcar x (decode-char 'japanese-jisx0208 code))"; |
| 73 | print " (setcdr x (cadr x))))"; | 100 | print " (setcar x (decode-char 'japanese-jisx0212 code))"; |
| 101 | print " (setcdr x (cadr x)))))"; | ||
| 74 | print " map)"; | 102 | print " map)"; |
| 75 | print " (define-translation-table 'eucjp-ms-decode map)"; | 103 | print " (define-translation-table 'eucjp-ms-decode map)"; |
| 76 | print " (mapc #'(lambda (x)"; | 104 | print " (mapc #'(lambda (x)"; |
| @@ -78,6 +106,8 @@ END { | |||
| 78 | print " (setcar x (cdr x)) (setcdr x tmp)))"; | 106 | print " (setcar x (cdr x)) (setcdr x tmp)))"; |
| 79 | print " map)"; | 107 | print " map)"; |
| 80 | print " (define-translation-table 'eucjp-ms-encode map))"; | 108 | print " (define-translation-table 'eucjp-ms-encode map))"; |
| 109 | print ""; | ||
| 110 | print ";; arch-tag: c4191096-288a-4f13-9b2a-ee7a1f11eb4a"; | ||
| 81 | } | 111 | } |
| 82 | 112 | ||
| 83 | # arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c | 113 | # arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c |