diff options
| author | Werner LEMBERG | 2001-12-18 17:46:16 +0000 |
|---|---|---|
| committer | Werner LEMBERG | 2001-12-18 17:46:16 +0000 |
| commit | 285aac852c2862fb9ea3fed0c8ca016b27fd4e40 (patch) | |
| tree | 2de763b53834a17d2731fbb4677c1dfcfece6b04 | |
| parent | 231c4d1a90a76704edce95b6a96bbb5a8034d86a (diff) | |
| download | emacs-285aac852c2862fb9ea3fed0c8ca016b27fd4e40.tar.gz emacs-285aac852c2862fb9ea3fed0c8ca016b27fd4e40.zip | |
Implementing euc-tw encoding.
Improving doc strings.
| -rw-r--r-- | lisp/language/chinese.el | 161 |
1 files changed, 153 insertions, 8 deletions
diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el index 498b9c635ba..7d0f85ac902 100644 --- a/lisp/language/chinese.el +++ b/lisp/language/chinese.el | |||
| @@ -35,7 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | (make-coding-system | 36 | (make-coding-system |
| 37 | 'iso-2022-cn 2 ?C | 37 | 'iso-2022-cn 2 ?C |
| 38 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)" | 38 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." |
| 39 | '(ascii | 39 | '(ascii |
| 40 | (nil chinese-gb2312 chinese-cns11643-1) | 40 | (nil chinese-gb2312 chinese-cns11643-1) |
| 41 | (nil chinese-cns11643-2) | 41 | (nil chinese-cns11643-2) |
| @@ -49,7 +49,7 @@ | |||
| 49 | 49 | ||
| 50 | (make-coding-system | 50 | (make-coding-system |
| 51 | 'iso-2022-cn-ext 2 ?C | 51 | 'iso-2022-cn-ext 2 ?C |
| 52 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)" | 52 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." |
| 53 | '(ascii | 53 | '(ascii |
| 54 | (nil chinese-gb2312 chinese-cns11643-1) | 54 | (nil chinese-gb2312 chinese-cns11643-1) |
| 55 | (nil chinese-cns11643-2) | 55 | (nil chinese-cns11643-2) |
| @@ -69,7 +69,7 @@ | |||
| 69 | 69 | ||
| 70 | (make-coding-system | 70 | (make-coding-system |
| 71 | 'chinese-iso-8bit 2 ?c | 71 | 'chinese-iso-8bit 2 ?c |
| 72 | "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)" | 72 | "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)." |
| 73 | '(ascii chinese-gb2312 nil nil | 73 | '(ascii chinese-gb2312 nil nil |
| 74 | nil ascii-eol ascii-cntl nil nil nil nil) | 74 | nil ascii-eol ascii-cntl nil nil nil nil) |
| 75 | '((safe-charsets ascii chinese-gb2312) | 75 | '((safe-charsets ascii chinese-gb2312) |
| @@ -83,7 +83,7 @@ | |||
| 83 | 83 | ||
| 84 | (make-coding-system | 84 | (make-coding-system |
| 85 | 'chinese-hz 0 ?z | 85 | 'chinese-hz 0 ?z |
| 86 | "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)" | 86 | "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." |
| 87 | nil | 87 | nil |
| 88 | '((safe-charsets ascii chinese-gb2312) | 88 | '((safe-charsets ascii chinese-gb2312) |
| 89 | (mime-charset . hz-gb-2312) | 89 | (mime-charset . hz-gb-2312) |
| @@ -126,7 +126,8 @@ | |||
| 126 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 126 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 127 | 127 | ||
| 128 | (make-coding-system | 128 | (make-coding-system |
| 129 | 'chinese-big5 3 ?B "BIG5 8-bit encoding for Chinese (MIME:Big5)" | 129 | 'chinese-big5 3 ?B |
| 130 | "BIG5 8-bit encoding for Chinese (MIME:Big5)." | ||
| 130 | nil | 131 | nil |
| 131 | '((safe-charsets ascii chinese-big5-1 chinese-big5-2) | 132 | '((safe-charsets ascii chinese-big5-1 chinese-big5-2) |
| 132 | (mime-charset . big5) | 133 | (mime-charset . big5) |
| @@ -168,16 +169,160 @@ | |||
| 168 | ;; Chinese CNS11643 (traditional) | 169 | ;; Chinese CNS11643 (traditional) |
| 169 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 170 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 170 | 171 | ||
| 172 | (defvar big5-to-cns (make-translation-table) | ||
| 173 | "Translation table for encoding to `euc-tw'.") | ||
| 174 | ;; Could have been done by china-util loaded before. | ||
| 175 | (unless (get 'big5-to-cns 'translation-table) | ||
| 176 | (define-translation-table 'big5-to-cns big5-to-cns)) | ||
| 177 | |||
| 178 | (define-ccl-program ccl-decode-euc-tw | ||
| 179 | ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding; | ||
| 180 | ;; CNS planes 2 to 7 always need four bytes. In internal encoding of | ||
| 181 | ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need | ||
| 182 | ;; four bytes. Thus a buffer magnification value of 2 (for both | ||
| 183 | ;; encoding and decoding) is sufficient. | ||
| 184 | `(2 | ||
| 185 | ;; we don't have enough registers to hold all charset-ids | ||
| 186 | ((r4 = ,(charset-id 'chinese-cns11643-1)) | ||
| 187 | (r5 = ,(charset-id 'chinese-cns11643-2)) | ||
| 188 | (r6 = ,(charset-id 'chinese-cns11643-3)) | ||
| 189 | (loop | ||
| 190 | (read-if (r0 < #x80) | ||
| 191 | ;; ASCII | ||
| 192 | (write-repeat r0) | ||
| 193 | ;; not ASCII | ||
| 194 | (if (r0 == #x8E) | ||
| 195 | ;; single shift | ||
| 196 | (read-if (r1 < #xA1) | ||
| 197 | ;; invalid byte | ||
| 198 | ((write r0) | ||
| 199 | (write-repeat r1)) | ||
| 200 | (if (r1 > #xA7) | ||
| 201 | ;; invalid plane | ||
| 202 | ((write r0) | ||
| 203 | (write-repeat r1)) | ||
| 204 | ;; OK, we have a plane | ||
| 205 | (read-if (r2 < #xA1) | ||
| 206 | ;; invalid first byte | ||
| 207 | ((write r0 r1) | ||
| 208 | (write-repeat r2)) | ||
| 209 | (read-if (r3 < #xA1) | ||
| 210 | ;; invalid second byte | ||
| 211 | ((write r0 r1 r2) | ||
| 212 | (write-repeat r3)) | ||
| 213 | ;; CNS 1-7, finally | ||
| 214 | ((branch (r1 - #xA1) | ||
| 215 | (r1 = r4) | ||
| 216 | (r1 = r5) | ||
| 217 | (r1 = r6) | ||
| 218 | (r1 = ,(charset-id 'chinese-cns11643-4)) | ||
| 219 | (r1 = ,(charset-id 'chinese-cns11643-5)) | ||
| 220 | (r1 = ,(charset-id 'chinese-cns11643-6)) | ||
| 221 | (r1 = ,(charset-id 'chinese-cns11643-7))) | ||
| 222 | (r2 = ((((r2 - #x80) << 7) + r3) - #x80)) | ||
| 223 | (write-multibyte-character r1 r2) | ||
| 224 | (repeat)))))) | ||
| 225 | ;; standard EUC | ||
| 226 | (if (r0 < #xA1) | ||
| 227 | ;; invalid first byte | ||
| 228 | (write-repeat r0) | ||
| 229 | (read-if (r1 < #xA1) | ||
| 230 | ;; invalid second byte | ||
| 231 | ((write r0) | ||
| 232 | (write-repeat r1)) | ||
| 233 | ;; CNS 1, finally | ||
| 234 | ((r1 = ((((r0 - #x80) << 7) + r1) - #x80)) | ||
| 235 | (write-multibyte-character r4 r1) | ||
| 236 | (repeat))))))))) | ||
| 237 | "CCL program to decode EUC-TW encoding." | ||
| 238 | ) | ||
| 239 | |||
| 240 | (define-ccl-program ccl-encode-euc-tw | ||
| 241 | `(2 | ||
| 242 | ;; we don't have enough registers to hold all charset-ids | ||
| 243 | ((r2 = ,(charset-id 'ascii)) | ||
| 244 | (r3 = ,(charset-id 'chinese-big5-1)) | ||
| 245 | (r4 = ,(charset-id 'chinese-big5-2)) | ||
| 246 | (r5 = ,(charset-id 'chinese-cns11643-1)) | ||
| 247 | (r6 = ,(charset-id 'chinese-cns11643-2)) | ||
| 248 | (loop | ||
| 249 | (read-multibyte-character r0 r1) | ||
| 250 | (if (r0 == r2) | ||
| 251 | (write-repeat r1) | ||
| 252 | (;; Big 5 encoded characters are first translated to CNS | ||
| 253 | (if (r0 == r3) | ||
| 254 | (translate-character big5-to-cns r0 r1) | ||
| 255 | (if (r0 == r4) | ||
| 256 | (translate-character big5-to-cns r0 r1))) | ||
| 257 | (if (r0 == r5) | ||
| 258 | (r0 = #xA1) | ||
| 259 | (if (r0 == r6) | ||
| 260 | (r0 = #xA2) | ||
| 261 | (if (r0 == ,(charset-id 'chinese-cns11643-3)) | ||
| 262 | (r0 = #xA3) | ||
| 263 | (if (r0 == ,(charset-id 'chinese-cns11643-4)) | ||
| 264 | (r0 = #xA4) | ||
| 265 | (if (r0 == ,(charset-id 'chinese-cns11643-5)) | ||
| 266 | (r0 = #xA5) | ||
| 267 | (if (r0 == ,(charset-id 'chinese-cns11643-6)) | ||
| 268 | (r0 = #xA6) | ||
| 269 | (if (r0 == ,(charset-id 'chinese-cns11643-7)) | ||
| 270 | (r0 = #xA7) | ||
| 271 | ;; not CNS. We use a dummy character which | ||
| 272 | ;; can't occur in EUC-TW encoding to indicate | ||
| 273 | ;; this. | ||
| 274 | (write-repeat #xFF)))))))))) | ||
| 275 | (if (r0 != #xA1) | ||
| 276 | ;; single shift and CNS plane | ||
| 277 | ((write #x8E) | ||
| 278 | (write r0))) | ||
| 279 | (write ((r1 >> 7) + #x80)) | ||
| 280 | (write ((r1 % #x80) + #x80)) | ||
| 281 | (repeat)))) | ||
| 282 | "CCL program to encode EUC-TW encoding." | ||
| 283 | ) | ||
| 284 | |||
| 285 | (defun euc-tw-pre-write-conversion (beg end) | ||
| 286 | "Semi-dummy pre-write function effectively to autoload china-util." | ||
| 287 | ;; Ensure translation table is loaded. | ||
| 288 | (require 'china-util) | ||
| 289 | ;; Don't do this again. | ||
| 290 | (coding-system-put 'euc-tw 'pre-write-conversion nil) | ||
| 291 | nil) | ||
| 292 | |||
| 293 | (make-coding-system | ||
| 294 | 'euc-tw 4 ?Z | ||
| 295 | "ISO 2022 based EUC encoding for Chinese CNS11643. | ||
| 296 | Big5 encoding is accepted for input also (which is then converted to CNS)." | ||
| 297 | '(ccl-decode-euc-tw . ccl-encode-euc-tw) | ||
| 298 | '((safe-charsets ascii | ||
| 299 | chinese-big5-1 | ||
| 300 | chinese-big5-2 | ||
| 301 | chinese-cns11643-1 | ||
| 302 | chinese-cns11643-2 | ||
| 303 | chinese-cns11643-3 | ||
| 304 | chinese-cns11643-4 | ||
| 305 | chinese-cns11643-5 | ||
| 306 | chinese-cns11643-6 | ||
| 307 | chinese-cns11643-7) | ||
| 308 | (valid-codes (0 . 255)) | ||
| 309 | (pre-write-conversion . euc-tw-pre-write-conversion))) | ||
| 310 | |||
| 311 | (define-coding-system-alias 'euc-taiwan 'euc-tw) | ||
| 312 | |||
| 171 | (set-language-info-alist | 313 | (set-language-info-alist |
| 172 | "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 | 314 | "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 |
| 173 | chinese-cns11643-3 chinese-cns11643-4 | 315 | chinese-cns11643-3 chinese-cns11643-4 |
| 174 | chinese-cns11643-5 chinese-cns11643-6 | 316 | chinese-cns11643-5 chinese-cns11643-6 |
| 175 | chinese-cns11643-7) | 317 | chinese-cns11643-7) |
| 176 | (coding-system iso-2022-cn) | 318 | (coding-system iso-2022-cn euc-tw) |
| 177 | (coding-priority iso-2022-cn chinese-big5 chinese-iso-8bit) | 319 | (coding-priority iso-2022-cn euc-tw chinese-big5 |
| 320 | chinese-iso-8bit) | ||
| 178 | (features china-util) | 321 | (features china-util) |
| 179 | (input-method . "chinese-cns-quick") | 322 | (input-method . "chinese-cns-quick") |
| 180 | (documentation . "Support for Chinese CNS character sets.")) | 323 | (documentation . "\ |
| 324 | Support for Chinese CNS character sets. Note that EUC-TW coding system | ||
| 325 | accepts Big5 for input also (which is then converted to CNS).")) | ||
| 181 | '("Chinese")) | 326 | '("Chinese")) |
| 182 | 327 | ||
| 183 | (provide 'chinese) | 328 | (provide 'chinese) |