aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWerner LEMBERG2001-12-18 17:46:16 +0000
committerWerner LEMBERG2001-12-18 17:46:16 +0000
commit285aac852c2862fb9ea3fed0c8ca016b27fd4e40 (patch)
tree2de763b53834a17d2731fbb4677c1dfcfece6b04
parent231c4d1a90a76704edce95b6a96bbb5a8034d86a (diff)
downloademacs-285aac852c2862fb9ea3fed0c8ca016b27fd4e40.tar.gz
emacs-285aac852c2862fb9ea3fed0c8ca016b27fd4e40.zip
Implementing euc-tw encoding.
Improving doc strings.
-rw-r--r--lisp/language/chinese.el161
1 files changed, 153 insertions, 8 deletions
diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el
index 498b9c635ba..7d0f85ac902 100644
--- a/lisp/language/chinese.el
+++ b/lisp/language/chinese.el
@@ -35,7 +35,7 @@
35 35
36(make-coding-system 36(make-coding-system
37 'iso-2022-cn 2 ?C 37 'iso-2022-cn 2 ?C
38 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)" 38 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)."
39 '(ascii 39 '(ascii
40 (nil chinese-gb2312 chinese-cns11643-1) 40 (nil chinese-gb2312 chinese-cns11643-1)
41 (nil chinese-cns11643-2) 41 (nil chinese-cns11643-2)
@@ -49,7 +49,7 @@
49 49
50(make-coding-system 50(make-coding-system
51 'iso-2022-cn-ext 2 ?C 51 'iso-2022-cn-ext 2 ?C
52 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)" 52 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)."
53 '(ascii 53 '(ascii
54 (nil chinese-gb2312 chinese-cns11643-1) 54 (nil chinese-gb2312 chinese-cns11643-1)
55 (nil chinese-cns11643-2) 55 (nil chinese-cns11643-2)
@@ -69,7 +69,7 @@
69 69
70(make-coding-system 70(make-coding-system
71 'chinese-iso-8bit 2 ?c 71 'chinese-iso-8bit 2 ?c
72 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)" 72 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)."
73 '(ascii chinese-gb2312 nil nil 73 '(ascii chinese-gb2312 nil nil
74 nil ascii-eol ascii-cntl nil nil nil nil) 74 nil ascii-eol ascii-cntl nil nil nil nil)
75 '((safe-charsets ascii chinese-gb2312) 75 '((safe-charsets ascii chinese-gb2312)
@@ -83,7 +83,7 @@
83 83
84(make-coding-system 84(make-coding-system
85 'chinese-hz 0 ?z 85 'chinese-hz 0 ?z
86 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)" 86 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)."
87 nil 87 nil
88 '((safe-charsets ascii chinese-gb2312) 88 '((safe-charsets ascii chinese-gb2312)
89 (mime-charset . hz-gb-2312) 89 (mime-charset . hz-gb-2312)
@@ -126,7 +126,8 @@
126;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 126;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127 127
128(make-coding-system 128(make-coding-system
129 'chinese-big5 3 ?B "BIG5 8-bit encoding for Chinese (MIME:Big5)" 129 'chinese-big5 3 ?B
130 "BIG5 8-bit encoding for Chinese (MIME:Big5)."
130 nil 131 nil
131 '((safe-charsets ascii chinese-big5-1 chinese-big5-2) 132 '((safe-charsets ascii chinese-big5-1 chinese-big5-2)
132 (mime-charset . big5) 133 (mime-charset . big5)
@@ -168,16 +169,160 @@
168;; Chinese CNS11643 (traditional) 169;; Chinese CNS11643 (traditional)
169;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 170;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
170 171
172(defvar big5-to-cns (make-translation-table)
173 "Translation table for encoding to `euc-tw'.")
174;; Could have been done by china-util loaded before.
175(unless (get 'big5-to-cns 'translation-table)
176 (define-translation-table 'big5-to-cns big5-to-cns))
177
178(define-ccl-program ccl-decode-euc-tw
179 ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding;
180 ;; CNS planes 2 to 7 always need four bytes. In internal encoding of
181 ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need
182 ;; four bytes. Thus a buffer magnification value of 2 (for both
183 ;; encoding and decoding) is sufficient.
184 `(2
185 ;; we don't have enough registers to hold all charset-ids
186 ((r4 = ,(charset-id 'chinese-cns11643-1))
187 (r5 = ,(charset-id 'chinese-cns11643-2))
188 (r6 = ,(charset-id 'chinese-cns11643-3))
189 (loop
190 (read-if (r0 < #x80)
191 ;; ASCII
192 (write-repeat r0)
193 ;; not ASCII
194 (if (r0 == #x8E)
195 ;; single shift
196 (read-if (r1 < #xA1)
197 ;; invalid byte
198 ((write r0)
199 (write-repeat r1))
200 (if (r1 > #xA7)
201 ;; invalid plane
202 ((write r0)
203 (write-repeat r1))
204 ;; OK, we have a plane
205 (read-if (r2 < #xA1)
206 ;; invalid first byte
207 ((write r0 r1)
208 (write-repeat r2))
209 (read-if (r3 < #xA1)
210 ;; invalid second byte
211 ((write r0 r1 r2)
212 (write-repeat r3))
213 ;; CNS 1-7, finally
214 ((branch (r1 - #xA1)
215 (r1 = r4)
216 (r1 = r5)
217 (r1 = r6)
218 (r1 = ,(charset-id 'chinese-cns11643-4))
219 (r1 = ,(charset-id 'chinese-cns11643-5))
220 (r1 = ,(charset-id 'chinese-cns11643-6))
221 (r1 = ,(charset-id 'chinese-cns11643-7)))
222 (r2 = ((((r2 - #x80) << 7) + r3) - #x80))
223 (write-multibyte-character r1 r2)
224 (repeat))))))
225 ;; standard EUC
226 (if (r0 < #xA1)
227 ;; invalid first byte
228 (write-repeat r0)
229 (read-if (r1 < #xA1)
230 ;; invalid second byte
231 ((write r0)
232 (write-repeat r1))
233 ;; CNS 1, finally
234 ((r1 = ((((r0 - #x80) << 7) + r1) - #x80))
235 (write-multibyte-character r4 r1)
236 (repeat)))))))))
237 "CCL program to decode EUC-TW encoding."
238)
239
240(define-ccl-program ccl-encode-euc-tw
241 `(2
242 ;; we don't have enough registers to hold all charset-ids
243 ((r2 = ,(charset-id 'ascii))
244 (r3 = ,(charset-id 'chinese-big5-1))
245 (r4 = ,(charset-id 'chinese-big5-2))
246 (r5 = ,(charset-id 'chinese-cns11643-1))
247 (r6 = ,(charset-id 'chinese-cns11643-2))
248 (loop
249 (read-multibyte-character r0 r1)
250 (if (r0 == r2)
251 (write-repeat r1)
252 (;; Big 5 encoded characters are first translated to CNS
253 (if (r0 == r3)
254 (translate-character big5-to-cns r0 r1)
255 (if (r0 == r4)
256 (translate-character big5-to-cns r0 r1)))
257 (if (r0 == r5)
258 (r0 = #xA1)
259 (if (r0 == r6)
260 (r0 = #xA2)
261 (if (r0 == ,(charset-id 'chinese-cns11643-3))
262 (r0 = #xA3)
263 (if (r0 == ,(charset-id 'chinese-cns11643-4))
264 (r0 = #xA4)
265 (if (r0 == ,(charset-id 'chinese-cns11643-5))
266 (r0 = #xA5)
267 (if (r0 == ,(charset-id 'chinese-cns11643-6))
268 (r0 = #xA6)
269 (if (r0 == ,(charset-id 'chinese-cns11643-7))
270 (r0 = #xA7)
271 ;; not CNS. We use a dummy character which
272 ;; can't occur in EUC-TW encoding to indicate
273 ;; this.
274 (write-repeat #xFF))))))))))
275 (if (r0 != #xA1)
276 ;; single shift and CNS plane
277 ((write #x8E)
278 (write r0)))
279 (write ((r1 >> 7) + #x80))
280 (write ((r1 % #x80) + #x80))
281 (repeat))))
282 "CCL program to encode EUC-TW encoding."
283)
284
285(defun euc-tw-pre-write-conversion (beg end)
286 "Semi-dummy pre-write function effectively to autoload china-util."
287 ;; Ensure translation table is loaded.
288 (require 'china-util)
289 ;; Don't do this again.
290 (coding-system-put 'euc-tw 'pre-write-conversion nil)
291 nil)
292
293(make-coding-system
294 'euc-tw 4 ?Z
295 "ISO 2022 based EUC encoding for Chinese CNS11643.
296Big5 encoding is accepted for input also (which is then converted to CNS)."
297 '(ccl-decode-euc-tw . ccl-encode-euc-tw)
298 '((safe-charsets ascii
299 chinese-big5-1
300 chinese-big5-2
301 chinese-cns11643-1
302 chinese-cns11643-2
303 chinese-cns11643-3
304 chinese-cns11643-4
305 chinese-cns11643-5
306 chinese-cns11643-6
307 chinese-cns11643-7)
308 (valid-codes (0 . 255))
309 (pre-write-conversion . euc-tw-pre-write-conversion)))
310
311(define-coding-system-alias 'euc-taiwan 'euc-tw)
312
171(set-language-info-alist 313(set-language-info-alist
172 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 314 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2
173 chinese-cns11643-3 chinese-cns11643-4 315 chinese-cns11643-3 chinese-cns11643-4
174 chinese-cns11643-5 chinese-cns11643-6 316 chinese-cns11643-5 chinese-cns11643-6
175 chinese-cns11643-7) 317 chinese-cns11643-7)
176 (coding-system iso-2022-cn) 318 (coding-system iso-2022-cn euc-tw)
177 (coding-priority iso-2022-cn chinese-big5 chinese-iso-8bit) 319 (coding-priority iso-2022-cn euc-tw chinese-big5
320 chinese-iso-8bit)
178 (features china-util) 321 (features china-util)
179 (input-method . "chinese-cns-quick") 322 (input-method . "chinese-cns-quick")
180 (documentation . "Support for Chinese CNS character sets.")) 323 (documentation . "\
324Support for Chinese CNS character sets. Note that EUC-TW coding system
325accepts Big5 for input also (which is then converted to CNS)."))
181 '("Chinese")) 326 '("Chinese"))
182 327
183(provide 'chinese) 328(provide 'chinese)