aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarl Heuer1997-02-20 07:02:49 +0000
committerKarl Heuer1997-02-20 07:02:49 +0000
commit4ed4686978bd18292e2bb7b87a7b0e0407ecb3b1 (patch)
tree860ad83f81c8c630fe7051e3c5379ca8a9658f69
parentadb572fb93ddfee88f9c5e9681434517fd241232 (diff)
downloademacs-4ed4686978bd18292e2bb7b87a7b0e0407ecb3b1.tar.gz
emacs-4ed4686978bd18292e2bb7b87a7b0e0407ecb3b1.zip
Initial revision
-rw-r--r--lisp/gnus/gnus-mule.el179
-rw-r--r--lisp/international/ccl.el1106
-rw-r--r--lisp/international/characters.el388
-rw-r--r--lisp/international/encoded-kb.el285
-rw-r--r--lisp/international/fontset.el336
-rw-r--r--lisp/international/isearch-x.el76
-rw-r--r--lisp/international/kinsoku.el141
-rw-r--r--lisp/international/kkc.el586
-rw-r--r--lisp/international/mule-cmds.el494
-rw-r--r--lisp/international/mule-diag.el565
-rw-r--r--lisp/international/mule-util.el419
-rw-r--r--lisp/international/mule.el529
-rw-r--r--lisp/international/quail.el1522
-rw-r--r--lisp/international/skkdic-cnv.el561
-rw-r--r--lisp/international/skkdic-utl.el198
-rw-r--r--lisp/international/titdic-cnv.el403
-rw-r--r--lisp/language/china-util.el155
-rw-r--r--lisp/language/chinese.el236
-rw-r--r--lisp/language/cyrillic.el71
-rw-r--r--lisp/language/devan-util.el1160
-rw-r--r--lisp/language/devanagari.el541
-rw-r--r--lisp/language/ethio-util.el1068
-rw-r--r--lisp/language/ethiopic.el85
-rw-r--r--lisp/language/european.el105
-rw-r--r--lisp/language/greek.el59
-rw-r--r--lisp/language/hebrew.el60
-rw-r--r--lisp/language/indian.el328
-rw-r--r--lisp/language/japan-util.el272
-rw-r--r--lisp/language/japanese.el96
-rw-r--r--lisp/language/korean.el78
-rw-r--r--lisp/language/misc-lang.el31
-rw-r--r--lisp/language/thai-util.el176
-rw-r--r--lisp/language/thai.el63
-rw-r--r--lisp/language/viet-util.el267
-rw-r--r--lisp/language/vietnamese.el254
-rw-r--r--src/category.c665
-rw-r--r--src/category.h130
-rw-r--r--src/ccl.c1140
-rw-r--r--src/ccl.h53
-rw-r--r--src/charset.c1452
-rw-r--r--src/charset.h649
-rw-r--r--src/coding.c3520
-rw-r--r--src/coding.h409
-rw-r--r--src/fontset.c819
-rw-r--r--src/fontset.h201
45 files changed, 21931 insertions, 0 deletions
diff --git a/lisp/gnus/gnus-mule.el b/lisp/gnus/gnus-mule.el
new file mode 100644
index 00000000000..b7abab722d3
--- /dev/null
+++ b/lisp/gnus/gnus-mule.el
@@ -0,0 +1,179 @@
1;; gnus-mule.el -- Provide multilingual environment to GNUS
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: gnus, mule
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; This package enables GNUS to code convert automatically
27;; accoding to a coding system specified for each news group.
28;; Please put the following line in your .emacs:
29;; (add-hook 'gnus-startup-hook 'gnus-mule-initialize)
30;; If you want to specify some coding system for a specific news
31;; group, add the fllowing line in your .emacs:
32;; (gnus-mule-add-group "xxx.yyy.zzz" 'some-coding-system)
33;;
34;; Decoding of summary buffer is not yet implemented.
35
36(require 'gnus)
37
38(defvar gnus-newsgroup-coding-systems nil
39 "Assoc list of news groups vs corresponding coding systems.
40Each element is a list of news group name and cons of coding systems
41for reading and posting.")
42
43;;;###autoload
44(defun gnus-mule-add-group (name coding-system)
45 "Specify that articles of news group NAME are encoded in CODING-SYSTEM.
46All news groups deeper than NAME are also the target.
47If CODING-SYSTEM is a cons, the car and cdr part are regarded as
48coding-system for reading and writing respectively."
49 (if (not (consp coding-system))
50 (setq coding-system (cons coding-system coding-system)))
51 (setq name (concat "^" (regexp-quote name)))
52 (let ((group (assoc name gnus-newsgroup-coding-systems)))
53 (if group
54 (setcdr group coding-system)
55 (setq gnus-newsgroup-coding-systems
56 (cons (cons name coding-system) gnus-newsgroup-coding-systems)))))
57
58(defun gnus-mule-get-coding-system (group)
59 "Return the coding system for news group GROUP."
60 (let ((groups gnus-newsgroup-coding-systems)
61 (len -1)
62 coding-system)
63 ;; Find an entry which matches GROUP the best (i.e. longest).
64 (while groups
65 (if (and (string-match (car (car groups)) group)
66 (> (match-end 0) len))
67 (setq len (match-end 0)
68 coding-system (cdr (car groups))))
69 (setq groups (cdr groups)))
70 coding-system))
71
72;; Flag to indicate if article buffer is already decoded or not.")
73(defvar gnus-mule-article-decoded nil)
74;; Codingsystem for reading articles of the current news group.
75(defvar gnus-mule-coding-system nil)
76(defvar gnus-mule-subject nil)
77(defvar gnus-mule-decoded-subject nil)
78(defvar gnus-mule-original-subject nil)
79
80;; Encode (if ENCODING is t) or decode (if ENCODING is nil) the
81;; region from START to END by CODING-SYSTEM.
82(defun gnus-mule-code-convert1 (start end coding-system encoding)
83 (if (< start end)
84 (save-excursion
85 (if encoding
86 (encode-coding-region start end coding-system)
87 (decode-coding-region start end coding-system)))))
88
89;; Encode (if ENCODING is t) or decode (if ENCODING is nil) the
90;; current buffer by CODING-SYSTEM. Try not to move positions of
91;; (window-start) and (point).
92(defun gnus-mule-code-convert (coding-system encoding)
93 (if coding-system
94 (let ((win (get-buffer-window (current-buffer))))
95 (if win
96 ;; We should keep (point) and (window-start).
97 (save-window-excursion
98 (select-window win)
99 (if encoding
100 ;; Simple way to assure point is on valid character boundary.
101 (beginning-of-line))
102 (gnus-mule-code-convert1 (point-min) (window-start)
103 coding-system encoding)
104 (gnus-mule-code-convert1 (window-start) (point)
105 coding-system encoding)
106 (gnus-mule-code-convert1 (point) (point-max)
107 coding-system encoding)
108 (if (not (pos-visible-in-window-p))
109 ;; point went out of window, move to the bottom of window.
110 (move-to-window-line -1)))
111 ;; No window for the buffer, no need to worry about (point)
112 ;; and (windos-start).
113 (gnus-mule-code-convert1 (point-min) (point-max)
114 coding-system encoding))
115 )))
116
117;; Set `gnus-mule-coding-system' to the coding system articles of the
118;; current news group is encoded. This function is set in
119;; `gnus-select-group-hook'.
120(defun gnus-mule-select-coding-system ()
121 (let ((coding-system (gnus-mule-get-coding-system gnus-newsgroup-name)))
122 (setq gnus-mule-coding-system
123 (if (and coding-system (coding-system-p (car coding-system)))
124 (car coding-system)))))
125
126;; Decode the current article. This function is set in
127;; `gnus-article-prepare-hook'.
128(defun gnus-mule-decode-article ()
129 (gnus-mule-code-convert gnus-mule-coding-system nil)
130 (setq gnus-mule-article-decoded t))
131
132;; Decode the current summary buffer. This function is set in
133;; `gnus-summary-prepare-hook'.
134(defun gnus-mule-decode-summary ()
135 ;; I have not yet implemented this function because I'm not yet
136 ;; familiar with the new Gnus codes, especialy how to extract only
137 ;; subjects from a summary buffer.
138 nil)
139
140(defun gnus-mule-toggle-article-format ()
141 "Toggle decoding/encoding of the current article buffer."
142 (interactive)
143 (let ((buf (get-buffer gnus-article-buffer)))
144 (if (and gnus-mule-coding-system buf)
145 (save-excursion
146 (set-buffer buf)
147 (let ((modif (buffer-modified-p))
148 buffer-read-only)
149 (gnus-mule-code-convert gnus-mule-coding-system
150 gnus-mule-article-decoded)
151 (setq gnus-mule-article-decoded (not gnus-mule-article-decoded))
152 (set-buffer-modified-p modif))))))
153
154;;;###autoload
155(defun gnus-mule-initialize ()
156 "Do several settings for GNUS to enable automatic code conversion."
157 ;; Convenient key definitions
158 (define-key gnus-article-mode-map "z" 'gnus-mule-toggle-article-format)
159 (define-key gnus-summary-mode-map "z" 'gnus-mule-toggle-article-format)
160 ;; Hook definition
161 (add-hook 'gnus-select-group-hook 'gnus-mule-select-coding-system)
162 (add-hook 'gnus-summary-prepare-hook 'gnus-mule-decode-summary)
163 (add-hook 'gnus-article-prepare-hook 'gnus-mule-decode-article))
164
165(gnus-mule-add-group "" 'coding-system-iso-2022-7) ;; default coding system
166(gnus-mule-add-group "alt" 'no-conversion)
167(gnus-mule-add-group "comp" 'no-conversion)
168(gnus-mule-add-group "gnu" 'no-conversion)
169(gnus-mule-add-group "rec" 'no-conversion)
170(gnus-mule-add-group "sci" 'no-conversion)
171(gnus-mule-add-group "soc" 'no-conversion)
172(gnus-mule-add-group "alt.chinese.text" 'coding-system-hz)
173(gnus-mule-add-group "alt.hk" 'coding-system-hz)
174(gnus-mule-add-group "alt.chinese.text.big5" 'coding-system-big5)
175(gnus-mule-add-group "soc.culture.vietnamese" '(nil . coding-system-viqr))
176
177(add-hook 'gnus-startup-hook 'gnus-mule-initialize)
178
179;; gnus-mule.el ends here
diff --git a/lisp/international/ccl.el b/lisp/international/ccl.el
new file mode 100644
index 00000000000..da2f1585d87
--- /dev/null
+++ b/lisp/international/ccl.el
@@ -0,0 +1,1106 @@
1;; ccl.el -- CCL (Code Conversion Language) compiler
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: CCL, mule, multilingual, character set, coding-system
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; CCL (Code Conversion Language) is a simple programming language to
27;; be used for various kind of code conversion. CCL program is
28;; compiled to CCL code (vector of integers) and executed by CCL
29;; interpreter of Emacs.
30;;
31;; CCL is used for code conversion at process I/O and file I/O for
32;; non-standard coding-system. In addition, it is used for
33;; calculating a code point of X's font from a character code.
34;; However, since CCL is designed as a powerful programming language,
35;; it can be used for more generic calculation. For instance,
36;; combination of three or more arithmetic operations can be
37;; calculated faster than Emacs Lisp.
38;;
39;; Here's the syntax of CCL program in BNF notation.
40;;
41;; CCL_PROGRAM :=
42;; (BUFFER_MAGNIFICATION
43;; CCL_MAIN_BLOCK
44;; [ CCL_EOF_BLOCK ])
45;;
46;; BUFFER_MAGNIFICATION := integer
47;; CCL_MAIN_BLOCK := CCL_BLOCK
48;; CCL_EOF_BLOCK := CCL_BLOCK
49;;
50;; CCL_BLOCK :=
51;; STATEMENT | (STATEMENT [STATEMENT ...])
52;; STATEMENT :=
53;; SET | IF | BRANCH | LOOP | REPEAT | BREAK | READ | WRITE | CALL
54;;
55;; SET :=
56;; (REG = EXPRESSION)
57;; | (REG ASSIGNMENT_OPERATOR EXPRESSION)
58;; | integer
59;;
60;; EXPRESSION := ARG | (EXPRESSION OPERATOR ARG)
61;;
62;; IF := (if EXPRESSION CCL_BLOCK CCL_BLOCK)
63;; BRANCH := (branch EXPRESSION CCL_BLOCK [CCL_BLOCK ...])
64;; LOOP := (loop STATEMENT [STATEMENT ...])
65;; BREAK := (break)
66;; REPEAT :=
67;; (repeat)
68;; | (write-repeat [REG | integer | string])
69;; | (write-read-repeat REG [integer | ARRAY])
70;; READ :=
71;; (read REG ...)
72;; | (read-if (REG OPERATOR ARG) CCL_BLOCK CCL_BLOCK)
73;; | (read-branch REG CCL_BLOCK [CCL_BLOCK ...])
74;; WRITE :=
75;; (write REG ...)
76;; | (write EXPRESSION)
77;; | (write integer) | (write string) | (write REG ARRAY)
78;; | string
79;; CALL := (call ccl-program-name)
80;; END := (end)
81;;
82;; REG := r0 | r1 | r2 | r3 | r4 | r5 | r6 | r7
83;; ARG := REG | integer
84;; OPERATOR :=
85;; + | - | * | / | % | & | '|' | ^ | << | >> | <8 | >8 | //
86;; | < | > | == | <= | >= | != | de-sjis | en-sjis
87;; ASSIGNMENT_OPERATOR :=
88;; += | -= | *= | /= | %= | &= | '|=' | ^= | <<= | >>=
89;; ARRAY := '[' interger ... ']'
90
91;;; Code:
92
93(defconst ccl-command-table
94 [if branch loop break repeat write-repeat write-read-repeat
95 read read-if read-branch write call end]
96 "*Vector of CCL commands (symbols).")
97
98;; Put a property to each symbol of CCL commands for the compiler.
99(let (op (i 0) (len (length ccl-command-table)))
100 (while (< i len)
101 (setq op (aref ccl-command-table i))
102 (put op 'ccl-compile-function (intern (format "ccl-compile-%s" op)))
103 (setq i (1+ i))))
104
105(defconst ccl-code-table
106 [set-register
107 set-short-const
108 set-const
109 set-array
110 jump
111 jump-cond
112 write-register-jump
113 write-register-read-jump
114 write-const-jump
115 write-const-read-jump
116 write-string-jump
117 write-array-read-jump
118 read-jump
119 branch
120 read-register
121 write-expr-const
122 read-branch
123 write-register
124 write-expr-register
125 call
126 write-const-string
127 write-array
128 end
129 set-assign-expr-const
130 set-assign-expr-register
131 set-expr-const
132 set-expr-register
133 jump-cond-expr-const
134 jump-cond-expr-register
135 read-jump-cond-expr-const
136 read-jump-cond-expr-register
137 ]
138 "*Vector of CCL compiled codes (symbols).")
139
140;; Put a property to each symbol of CCL codes for the disassembler.
141(let (code (i 0) (len (length ccl-code-table)))
142 (while (< i len)
143 (setq code (aref ccl-code-table i))
144 (put code 'ccl-code i)
145 (put code 'ccl-dump-function (intern (format "ccl-dump-%s" code)))
146 (setq i (1+ i))))
147
148(defconst ccl-jump-code-list
149 '(jump jump-cond write-register-jump write-register-read-jump
150 write-const-jump write-const-read-jump write-string-jump
151 write-array-read-jump read-jump))
152
153;; Put a property `jump-flag' to each CCL code which execute jump in
154;; some way.
155(let ((l ccl-jump-code-list))
156 (while l
157 (put (car l) 'jump-flag t)
158 (setq l (cdr l))))
159
160(defconst ccl-register-table
161 [r0 r1 r2 r3 r4 r5 r6 r7]
162 "*Vector of CCL registers (symbols).")
163
164;; Put a property to indicate register number to each symbol of CCL.
165;; registers.
166(let (reg (i 0) (len (length ccl-register-table)))
167 (while (< i len)
168 (setq reg (aref ccl-register-table i))
169 (put reg 'ccl-register-number i)
170 (setq i (1+ i))))
171
172(defconst ccl-arith-table
173 [+ - * / % & | ^ << >> <8 >8 // nil nil nil
174 < > == <= >= != de-sjis en-sjis]
175 "*Vector of CCL arithmetic/logical operators (symbols).")
176
177;; Put a property to each symbol of CCL operators for the compiler.
178(let (arith (i 0) (len (length ccl-arith-table)))
179 (while (< i len)
180 (setq arith (aref ccl-arith-table i))
181 (if arith (put arith 'ccl-arith-code i))
182 (setq i (1+ i))))
183
184(defconst ccl-assign-arith-table
185 [+= -= *= /= %= &= |= ^= <<= >>= <8= >8= //=]
186 "*Vector of CCL assignment operators (symbols).")
187
188;; Put a property to each symbol of CCL assignment operators for the compiler.
189(let (arith (i 0) (len (length ccl-assign-arith-table)))
190 (while (< i len)
191 (setq arith (aref ccl-assign-arith-table i))
192 (put arith 'ccl-self-arith-code i)
193 (setq i (1+ i))))
194
195(defvar ccl-program-vector nil
196 "Working vector of CCL codes produced by CCL compiler.")
197(defvar ccl-current-ic 0
198 "The current index for `ccl-program-vector'.")
199
200;; Embed integer DATA in `ccl-program-vector' at `ccl-current-ic' and
201;; increment it. If IC is specified, embed DATA at IC.
202(defun ccl-embed-data (data &optional ic)
203 (if ic
204 (aset ccl-program-vector ic data)
205 (aset ccl-program-vector ccl-current-ic data)
206 (setq ccl-current-ic (1+ ccl-current-ic))))
207
208;; Embed string STR of length LEN in `ccl-program-vector' at
209;; `ccl-current-ic'.
210(defun ccl-embed-string (len str)
211 (let ((i 0))
212 (while (< i len)
213 (ccl-embed-data (logior (ash (aref str i) 16)
214 (if (< (1+ i) len)
215 (ash (aref str (1+ i)) 8)
216 0)
217 (if (< (+ i 2) len)
218 (aref str (+ i 2))
219 0)))
220 (setq i (+ i 3)))))
221
222;; Embed a relative jump address to `ccl-current-ic' in
223;; `ccl-program-vector' at IC without altering the other bit field.
224(defun ccl-embed-current-address (ic)
225 (let ((relative (- ccl-current-ic (1+ ic))))
226 (aset ccl-program-vector ic
227 (logior (aref ccl-program-vector ic) (ash relative 8)))))
228
229;; Embed CCL code for the operation OP and arguments REG and DATA in
230;; `ccl-program-vector' at `ccl-current-ic' in the following format.
231;; |----------------- integer (28-bit) ------------------|
232;; |------------ 20-bit ------------|- 3-bit --|- 5-bit -|
233;; |------------- DATA -------------|-- REG ---|-- OP ---|
234;; If REG2 is specified, embed a code in the following format.
235;; |------- 17-bit ------|- 3-bit --|- 3-bit --|- 5-bit -|
236;; |-------- DATA -------|-- REG2 --|-- REG ---|-- OP ---|
237
238;; If REG is a CCL register symbol (e.g. r0, r1...), the register
239;; number is embedded. If OP is one of unconditional jumps, DATA is
240;; changed to an absolute jump address.
241
242(defun ccl-embed-code (op reg data &optional reg2)
243 (if (and (> data 0) (get op 'jump-flag))
244 ;; DATA is an absolute jump address. Make it relative to the
245 ;; next of jump code.
246 (setq data (- data (1+ ccl-current-ic))))
247 (let ((code (logior (get op 'ccl-code)
248 (ash
249 (if (symbolp reg) (get reg 'ccl-register-number) reg) 5)
250 (if reg2
251 (logior (ash (get reg2 'ccl-register-number) 8)
252 (ash data 11))
253 (ash data 8)))))
254 (aset ccl-program-vector ccl-current-ic code)
255 (setq ccl-current-ic (1+ ccl-current-ic))))
256
257;; Just advance `ccl-current-ic' by INC.
258(defun ccl-increment-ic (inc)
259 (setq ccl-current-ic (+ ccl-current-ic inc)))
260
261;;;###autoload
262(defun ccl-program-p (obj)
263 "T if OBJECT is a valid CCL compiled code."
264 (and (vectorp obj)
265 (let ((i 0) (len (length obj)) (flag t))
266 (if (> len 1)
267 (progn
268 (while (and flag (< i len))
269 (setq flag (integerp (aref obj i)))
270 (setq i (1+ i)))
271 flag)))))
272
273;; If non-nil, index of the start of the current loop.
274(defvar ccl-loop-head nil)
275;; If non-nil, list of absolute addresses of the breaking points of
276;; the current loop.
277(defvar ccl-breaks nil)
278
279;;;###autoload
280(defun ccl-compile (ccl-program)
281 "Return a comiled code of CCL-PROGRAM as a vector of integer."
282 (if (or (null (consp ccl-program))
283 (null (integerp (car ccl-program)))
284 (null (listp (car (cdr ccl-program)))))
285 (error "CCL: Invalid CCL program: %s" ccl-program))
286 (if (null (vectorp ccl-program-vector))
287 (setq ccl-program-vector (make-vector 8192 0)))
288 (setq ccl-loop-head nil ccl-breaks nil)
289 (setq ccl-current-ic 0)
290
291 ;; The first element is the buffer magnification.
292 (ccl-embed-data (car ccl-program))
293
294 ;; The second element is the address of the start CCL code for
295 ;; processing end of input buffer (we call it eof-processor). We
296 ;; set it later.
297 (ccl-increment-ic 1)
298
299 ;; Compile the main body of the CCL program.
300 (ccl-compile-1 (car (cdr ccl-program)))
301
302 ;; Embed the address of eof-processor.
303 (ccl-embed-data ccl-current-ic 1)
304
305 ;; Then compile eof-processor.
306 (if (nth 2 ccl-program)
307 (ccl-compile-1 (nth 2 ccl-program)))
308
309 ;; At last, embed termination code.
310 (ccl-embed-code 'end 0 0)
311
312 (let ((vec (make-vector ccl-current-ic 0))
313 (i 0))
314 (while (< i ccl-current-ic)
315 (aset vec i (aref ccl-program-vector i))
316 (setq i (1+ i)))
317 vec))
318
319;; Signal syntax error.
320(defun ccl-syntax-error (cmd)
321 (error "CCL: Syntax error: %s" cmd))
322
323;; Check if ARG is a valid CCL register.
324(defun ccl-check-register (arg cmd)
325 (if (get arg 'ccl-register-number)
326 arg
327 (error "CCL: Invalid register %s in %s." arg cmd)))
328
329;; Check if ARG is a valid CCL command.
330(defun ccl-check-compile-function (arg cmd)
331 (or (get arg 'ccl-compile-function)
332 (error "CCL: Invalid command: %s" cmd)))
333
334;; In the following code, most ccl-compile-XXXX functions return t if
335;; they end with unconditional jump, else return nil.
336
337;; Compile CCL-BLOCK (see the syntax above).
338(defun ccl-compile-1 (ccl-block)
339 (let (unconditional-jump
340 cmd)
341 (if (or (integerp ccl-block)
342 (stringp ccl-block)
343 (and ccl-block (symbolp (car ccl-block))))
344 ;; This block consists of single statement.
345 (setq ccl-block (list ccl-block)))
346
347 ;; Now CCL-BLOCK is a list of statements. Compile them one by
348 ;; one.
349 (while ccl-block
350 (setq cmd (car ccl-block))
351 (setq unconditional-jump
352 (cond ((integerp cmd)
353 ;; SET statement for the register 0.
354 (ccl-compile-set (list 'r0 '= cmd)))
355
356 ((stringp cmd)
357 ;; WRITE statement of string argument.
358 (ccl-compile-write-string cmd))
359
360 ((listp cmd)
361 ;; The other statements.
362 (cond ((eq (nth 1 cmd) '=)
363 ;; SET statement of the form `(REG = EXPRESSION)'.
364 (ccl-compile-set cmd))
365
366 ((and (symbolp (nth 1 cmd))
367 (get (nth 1 cmd) 'ccl-self-arith-code))
368 ;; SET statement with an assignment operation.
369 (ccl-compile-self-set cmd))
370
371 (t
372 (funcall (ccl-check-compile-function (car cmd) cmd)
373 cmd))))
374
375 (t
376 (ccl-syntax-error cmd))))
377 (setq ccl-block (cdr ccl-block)))
378 unconditional-jump))
379
380(defconst ccl-max-short-const (ash 1 19))
381(defconst ccl-min-short-const (ash -1 19))
382
383;; Compile SET statement.
384(defun ccl-compile-set (cmd)
385 (let ((rrr (ccl-check-register (car cmd) cmd))
386 (right (nth 2 cmd)))
387 (cond ((listp right)
388 ;; CMD has the form `(RRR = (XXX OP YYY))'.
389 (ccl-compile-expression rrr right))
390
391 ((integerp right)
392 ;; CMD has the form `(RRR = integer)'.
393 (if (and (<= right ccl-max-short-const)
394 (>= right ccl-min-short-const))
395 (ccl-embed-code 'set-short-const rrr right)
396 (ccl-embed-code 'set-const rrr 0)
397 (ccl-embed-data right)))
398
399 (t
400 ;; CMD has the form `(RRR = rrr [ array ])'.
401 (ccl-check-register right cmd)
402 (let ((ary (nth 3 cmd)))
403 (if (vectorp ary)
404 (let ((i 0) (len (length ary)))
405 (ccl-embed-code 'set-array rrr len right)
406 (while (< i len)
407 (ccl-embed-data (aref ary i))
408 (setq i (1+ i))))
409 (ccl-embed-code 'set-register rrr 0 right))))))
410 nil)
411
412;; Compile SET statement with ASSIGNMENT_OPERATOR.
413(defun ccl-compile-self-set (cmd)
414 (let ((rrr (ccl-check-register (car cmd) cmd))
415 (right (nth 2 cmd)))
416 (if (listp right)
417 ;; CMD has the form `(RRR ASSIGN_OP (XXX OP YYY))', compile
418 ;; the right hand part as `(r7 = (XXX OP YYY))' (note: the
419 ;; register 7 can be used for storing temporary value).
420 (progn
421 (ccl-compile-expression 'r7 right)
422 (setq right 'r7)))
423 ;; Now CMD has the form `(RRR ASSIGN_OP ARG)'. Compile it as
424 ;; `(RRR = (RRR OP ARG))'.
425 (ccl-compile-expression
426 rrr
427 (list rrr (intern (substring (symbol-name (nth 1 cmd)) 0 -1)) right)))
428 nil)
429
430;; Compile SET statement of the form `(RRR = EXPR)'.
431(defun ccl-compile-expression (rrr expr)
432 (let ((left (car expr))
433 (op (get (nth 1 expr) 'ccl-arith-code))
434 (right (nth 2 expr)))
435 (if (listp left)
436 (progn
437 ;; EXPR has the form `((EXPR2 OP2 ARG) OP RIGHT)'. Compile
438 ;; the first term as `(r7 = (EXPR2 OP2 ARG)).'
439 (ccl-compile-expression 'r7 left)
440 (setq left 'r7)))
441
442 ;; Now EXPR has the form (LEFT OP RIGHT).
443 (if (eq rrr left)
444 ;; Compile this SET statement as `(RRR OP= RIGHT)'.
445 (if (integerp right)
446 (progn
447 (ccl-embed-code 'set-assign-expr-const rrr (ash op 3) 'r0)
448 (ccl-embed-data right))
449 (ccl-check-register right expr)
450 (ccl-embed-code 'set-assign-expr-register rrr (ash op 3) right))
451
452 ;; Compile this SET statement as `(RRR = (LEFT OP RIGHT))'.
453 (if (integerp right)
454 (progn
455 (ccl-embed-code 'set-expr-const rrr (ash op 3) left)
456 (ccl-embed-data right))
457 (ccl-check-register right expr)
458 (ccl-embed-code 'set-expr-register
459 rrr
460 (logior (ash op 3) (get right 'ccl-register-number))
461 left)))))
462
463;; Compile WRITE statement with string argument.
464(defun ccl-compile-write-string (str)
465 (let ((len (length str)))
466 (ccl-embed-code 'write-const-string 1 len)
467 (ccl-embed-string len str))
468 nil)
469
470;; Compile IF statement of the form `(if CONDITION TRUE-PART FALSE-PART)'.
471;; If READ-FLAG is non-nil, this statement has the form
472;; `(read-if (REG OPERATOR ARG) TRUE-PART FALSE-PART)'.
473(defun ccl-compile-if (cmd &optional read-flag)
474 (if (and (/= (length cmd) 3) (/= (length cmd) 4))
475 (error "CCL: Invalid number of arguments: %s" cmd))
476 (let ((condition (nth 1 cmd))
477 (true-cmds (nth 2 cmd))
478 (false-cmds (nth 3 cmd))
479 jump-cond-address
480 false-ic)
481 (if (and (listp condition)
482 (listp (car condition)))
483 ;; If CONDITION is a nested expression, the inner expression
484 ;; should be compiled at first as SET statement, i.e.:
485 ;; `(if ((X OP2 Y) OP Z) ...)' is compiled into two statements:
486 ;; `(r7 = (X OP2 Y)) (if (r7 OP Z) ...)'.
487 (progn
488 (ccl-compile-expression 'r7 (car condition))
489 (setq condition (cons 'r7 (cdr condition)))
490 (setq cmd (cons (car cmd)
491 (cons condition (cdr (cdr cmd)))))))
492
493 (setq jump-cond-address ccl-current-ic)
494 ;; Compile CONDITION.
495 (if (symbolp condition)
496 ;; CONDITION is a register.
497 (progn
498 (ccl-check-register condition cmd)
499 (ccl-embed-code 'jump-cond condition 0))
500 ;; CONDITION is a simple expression of the form (RRR OP ARG).
501 (let ((rrr (car condition))
502 (op (get (nth 1 condition) 'ccl-arith-code))
503 (arg (nth 2 condition)))
504 (ccl-check-register rrr cmd)
505 (if (integerp arg)
506 (progn
507 (ccl-embed-code (if read-flag 'read-jump-cond-expr-const
508 'jump-cond-expr-const)
509 rrr 0)
510 (ccl-embed-data op)
511 (ccl-embed-data arg))
512 (ccl-check-register arg cmd)
513 (ccl-embed-code (if read-flag 'read-jump-cond-expr-register
514 'jump-cond-expr-register)
515 rrr 0)
516 (ccl-embed-data op)
517 (ccl-embed-data (get arg 'ccl-register-number)))))
518
519 ;; Compile TRUE-PART.
520 (let ((unconditional-jump (ccl-compile-1 true-cmds)))
521 (if (null false-cmds)
522 ;; This is the place to jump to if condition is false.
523 (ccl-embed-current-address jump-cond-address)
524 (let (end-true-part-address)
525 (if (not unconditional-jump)
526 (progn
527 ;; If TRUE-PART does not end with unconditional jump, we
528 ;; have to jump to the end of FALSE-PART from here.
529 (setq end-true-part-address ccl-current-ic)
530 (ccl-embed-code 'jump 0 0)))
531 ;; This is the place to jump to if CONDITION is false.
532 (ccl-embed-current-address jump-cond-address)
533 ;; Compile FALSE-PART.
534 (setq unconditional-jump
535 (and (ccl-compile-1 false-cmds) unconditional-jump))
536 (if end-true-part-address
537 ;; This is the place to jump to after the end of TRUE-PART.
538 (ccl-embed-current-address end-true-part-address))))
539 unconditional-jump)))
540
541;; Compile BRANCH statement.
542(defun ccl-compile-branch (cmd)
543 (if (< (length cmd) 3)
544 (error "CCL: Invalid number of arguments: %s" cmd))
545 (ccl-compile-branch-blocks 'branch
546 (ccl-compile-branch-expression (nth 1 cmd) cmd)
547 (cdr (cdr cmd))))
548
549;; Compile READ statement of the form `(read-branch EXPR BLOCK0 BLOCK1 ...)'.
550(defun ccl-compile-read-branch (cmd)
551 (if (< (length cmd) 3)
552 (error "CCL: Invalid number of arguments: %s" cmd))
553 (ccl-compile-branch-blocks 'read-branch
554 (ccl-compile-branch-expression (nth 1 cmd) cmd)
555 (cdr (cdr cmd))))
556
557;; Compile EXPRESSION part of BRANCH statement and return register
558;; which holds a value of the expression.
559(defun ccl-compile-branch-expression (expr cmd)
560 (if (listp expr)
561 ;; EXPR has the form `(EXPR2 OP ARG)'. Compile it as SET
562 ;; statement of the form `(r7 = (EXPR2 OP ARG))'.
563 (progn
564 (ccl-compile-expression 'r7 expr)
565 'r7)
566 (ccl-check-register expr cmd)))
567
568;; Compile BLOCKs of BRANCH statement. CODE is 'branch or 'read-branch.
569;; REG is a register which holds a value of EXPRESSION part. BLOCKs
570;; is a list of CCL-BLOCKs.
571(defun ccl-compile-branch-blocks (code rrr blocks)
572 (let ((branches (length blocks))
573 branch-idx
574 jump-table-head-address
575 empty-block-indexes
576 block-tail-addresses
577 block-unconditional-jump)
578 (ccl-embed-code code rrr branches)
579 (setq jump-table-head-address ccl-current-ic)
580 ;; The size of jump table is the number of blocks plus 1 (for the
581 ;; case RRR is out of range).
582 (ccl-increment-ic (1+ branches))
583 (setq empty-block-indexes (list branches))
584 ;; Compile each block.
585 (setq branch-idx 0)
586 (while blocks
587 (if (null (car blocks))
588 ;; This block is empty.
589 (setq empty-block-indexes (cons branch-idx empty-block-indexes)
590 block-unconditional-jump t)
591 ;; This block is not empty.
592 (ccl-embed-data (- ccl-current-ic jump-table-head-address)
593 (+ jump-table-head-address branch-idx))
594 (setq block-unconditional-jump (ccl-compile-1 (car blocks)))
595 (if (not block-unconditional-jump)
596 (progn
597 ;; Jump address of the end of branches are embedded later.
598 ;; For the moment, just remember where to embed them.
599 (setq block-tail-addresses
600 (cons ccl-current-ic block-tail-addresses))
601 (ccl-embed-code 'jump 0 0))))
602 (setq branch-idx (1+ branch-idx))
603 (setq blocks (cdr blocks)))
604 (if (not block-unconditional-jump)
605 ;; We don't need jump code at the end of the last block.
606 (setq block-tail-addresses (cdr block-tail-addresses)
607 ccl-current-ic (1- ccl-current-ic)))
608 ;; Embed jump address at the tailing jump commands of blocks.
609 (while block-tail-addresses
610 (ccl-embed-current-address (car block-tail-addresses))
611 (setq block-tail-addresses (cdr block-tail-addresses)))
612 ;; For empty blocks, make entries in the jump table point directly here.
613 (while empty-block-indexes
614 (ccl-embed-data (- ccl-current-ic jump-table-head-address)
615 (+ jump-table-head-address (car empty-block-indexes)))
616 (setq empty-block-indexes (cdr empty-block-indexes))))
617 ;; Branch command ends by unconditional jump if RRR is out of range.
618 nil)
619
620;; Compile LOOP statement.
621(defun ccl-compile-loop (cmd)
622 (if (< (length cmd) 2)
623 (error "CCL: Invalid number of arguments: %s" cmd))
624 (let* ((ccl-loop-head ccl-current-ic)
625 (ccl-breaks nil)
626 unconditional-jump)
627 (setq cmd (cdr cmd))
628 (if cmd
629 (progn
630 (setq unconditional-jump t)
631 (while cmd
632 (setq unconditional-jump
633 (and (ccl-compile-1 (car cmd)) unconditional-jump))
634 (setq cmd (cdr cmd)))
635 (if (not ccl-breaks)
636 unconditional-jump
637 ;; Embed jump address for break statements encountered in
638 ;; this loop.
639 (while ccl-breaks
640 (ccl-embed-current-address (car ccl-breaks))
641 (setq ccl-breaks (cdr ccl-breaks))))
642 nil))))
643
644;; Compile BREAK statement.
645(defun ccl-compile-break (cmd)
646 (if (/= (length cmd) 1)
647 (error "CCL: Invalid number of arguments: %s" cmd))
648 (if (null ccl-loop-head)
649 (error "CCL: No outer loop: %s" cmd))
650 (setq ccl-breaks (cons ccl-current-ic ccl-breaks))
651 (ccl-embed-code 'jump 0 0)
652 t)
653
654;; Compile REPEAT statement.
655(defun ccl-compile-repeat (cmd)
656 (if (/= (length cmd) 1)
657 (error "CCL: Invalid number of arguments: %s" cmd))
658 (if (null ccl-loop-head)
659 (error "CCL: No outer loop: %s" cmd))
660 (ccl-embed-code 'jump 0 ccl-loop-head)
661 t)
662
663;; Compile WRITE-REPEAT statement.
664(defun ccl-compile-write-repeat (cmd)
665 (if (/= (length cmd) 2)
666 (error "CCL: Invalid number of arguments: %s" cmd))
667 (if (null ccl-loop-head)
668 (error "CCL: No outer loop: %s" cmd))
669 (let ((arg (nth 1 cmd)))
670 (cond ((integerp arg)
671 (ccl-embed-code 'write-const-jump 0 ccl-loop-head)
672 (ccl-embed-data arg))
673 ((stringp arg)
674 (let ((len (length arg))
675 (i 0))
676 (ccl-embed-code 'write-string-jump 0 ccl-loop-head)
677 (ccl-embed-data len)
678 (ccl-embed-string len arg)))
679 (t
680 (ccl-check-register arg cmd)
681 (ccl-embed-code 'write-register-jump arg ccl-loop-head))))
682 t)
683
684;; Compile WRITE-READ-REPEAT statement.
685(defun ccl-compile-write-read-repeat (cmd)
686 (if (or (< (length cmd) 2) (> (length cmd) 3))
687 (error "CCL: Invalid number of arguments: %s" cmd))
688 (if (null ccl-loop-head)
689 (error "CCL: No outer loop: %s" cmd))
690 (let ((rrr (ccl-check-register (nth 1 cmd) cmd))
691 (arg (nth 2 cmd)))
692 (cond ((null arg)
693 (ccl-embed-code 'write-register-read-jump rrr ccl-loop-head))
694 ((integerp arg)
695 (ccl-embed-code 'write-const-read-jump rrr arg ccl-loop-head))
696 ((vectorp arg)
697 (let ((len (length arg))
698 (i 0))
699 (ccl-embed-code 'write-array-read-jump rrr ccl-loop-head)
700 (ccl-embed-data len)
701 (while (< i len)
702 (ccl-embed-data (aref arg i))
703 (setq i (1+ i)))))
704 (t
705 (error "CCL: Invalid argument %s: %s" arg cmd)))
706 (ccl-embed-code 'read-jump rrr ccl-loop-head))
707 t)
708
709;; Compile READ statement.
710(defun ccl-compile-read (cmd)
711 (if (< (length cmd) 2)
712 (error "CCL: Invalid number of arguments: %s" cmd))
713 (let* ((args (cdr cmd))
714 (i (1- (length args))))
715 (while args
716 (let ((rrr (ccl-check-register (car args) cmd)))
717 (ccl-embed-code 'read-register rrr i)
718 (setq args (cdr args) i (1- i)))))
719 nil)
720
721;; Compile READ-IF statement.
722(defun ccl-compile-read-if (cmd)
723 (ccl-compile-if cmd 'read))
724
725;; Compile WRITE statement.
726(defun ccl-compile-write (cmd)
727 (if (< (length cmd) 2)
728 (error "CCL: Invalid number of arguments: %s" cmd))
729 (let ((rrr (nth 1 cmd)))
730 (cond ((integerp rrr)
731 (ccl-embed-code 'write-const-string 0 rrr))
732 ((stringp rrr)
733 (ccl-compile-write-string rrr))
734 ((and (symbolp rrr) (vectorp (nth 2 cmd)))
735 (ccl-check-register rrr cmd)
736 ;; CMD has the form `(write REG ARRAY)'.
737 (let* ((arg (nth 2 cmd))
738 (len (length arg))
739 (i 0))
740 (ccl-embed-code 'write-array rrr len)
741 (while (< i len)
742 (if (not (integerp (aref arg i)))
743 (error "CCL: Invalid argument %s: %s" arg cmd))
744 (ccl-embed-data (aref arg i))
745 (setq i (1+ i)))))
746
747 ((symbolp rrr)
748 ;; CMD has the form `(write REG ...)'.
749 (let* ((args (cdr cmd))
750 (i (1- (length args))))
751 (while args
752 (setq rrr (ccl-check-register (car args) cmd))
753 (ccl-embed-code 'write-register rrr i)
754 (setq args (cdr args) i (1- i)))))
755
756 ((listp rrr)
757 ;; CMD has the form `(write (LEFT OP RIGHT))'.
758 (let ((left (car rrr))
759 (op (get (nth 1 rrr) 'ccl-arith-code))
760 (right (nth 2 rrr)))
761 (if (listp left)
762 (progn
763 ;; RRR has the form `((EXPR OP2 ARG) OP RIGHT)'.
764 ;; Compile the first term as `(r7 = (EXPR OP2 ARG))'.
765 (ccl-compile-expression 'r7 left)
766 (setq left 'r7)))
767 ;; Now RRR has the form `(ARG OP RIGHT)'.
768 (if (integerp right)
769 (progn
770 (ccl-embed-code 'write-expr-const 0 (ash op 3) left)
771 (ccl-embed-data right))
772 (ccl-check-register right rrr)
773 (ccl-embed-code 'write-expr-register 0
774 (logior (ash op 3)
775 (get right 'ccl-register-number))))))
776
777 (t
778 (error "CCL: Invalid argument: %s" cmd))))
779 nil)
780
781;; Compile CALL statement.
782(defun ccl-compile-call (cmd)
783 (if (/= (length cmd) 2)
784 (error "CCL: Invalid number of arguments: %s" cmd))
785 (if (not (symbolp (nth 1 cmd)))
786 (error "CCL: Subroutine should be a symbol: %s" cmd))
787 (let* ((name (nth 1 cmd))
788 (idx (get name 'ccl-program-idx)))
789 (if (not idx)
790 (error "CCL: Unknown subroutine name: %s" name))
791 (ccl-embed-code 'call 0 idx))
792 nil)
793
794;; Compile END statement.
795(defun ccl-compile-end (cmd)
796 (if (/= (length cmd) 1)
797 (error "CCL: Invalid number of arguments: %s" cmd))
798 (ccl-embed-code 'end 0 0)
799 t)
800
801;;; CCL dump staffs
802
803;; To avoid byte-compiler warning.
804(defvar ccl-code)
805
806;;;###autoload
807(defun ccl-dump (ccl-code)
808 "Disassemble compiled CCL-CODE."
809 (let ((len (length ccl-code))
810 (buffer-mag (aref ccl-code 0)))
811 (cond ((= buffer-mag 0)
812 (insert "Don't output anything.\n"))
813 ((= buffer-mag 1)
814 (insert "Out-buffer must be as large as in-buffer.\n"))
815 (t
816 (insert
817 (format "Out-buffer must be %d times bigger than in-buffer.\n"
818 buffer-mag))))
819 (insert "Main-body:\n")
820 (setq ccl-current-ic 2)
821 (if (> (aref ccl-code 1) 0)
822 (progn
823 (while (< ccl-current-ic (aref ccl-code 1))
824 (ccl-dump-1))
825 (insert "At EOF:\n")))
826 (while (< ccl-current-ic len)
827 (ccl-dump-1))
828 ))
829
830;; Return a CCL code in `ccl-code' at `ccl-current-ic'.
831(defun ccl-get-next-code ()
832 (prog1
833 (aref ccl-code ccl-current-ic)
834 (setq ccl-current-ic (1+ ccl-current-ic))))
835
836(defun ccl-dump-1 ()
837 (let* ((code (ccl-get-next-code))
838 (cmd (aref ccl-code-table (logand code 31)))
839 (rrr (ash (logand code 255) -5))
840 (cc (ash code -8)))
841 (insert (format "%5d:[%s] " (1- ccl-current-ic) cmd))
842 (funcall (get cmd 'ccl-dump-function) rrr cc)))
843
844(defun ccl-dump-set-register (rrr cc)
845 (insert (format "r%d = r%d\n" rrr cc)))
846
847(defun ccl-dump-set-short-const (rrr cc)
848 (insert (format "r%d = %d\n" rrr cc)))
849
850(defun ccl-dump-set-const (rrr ignore)
851 (insert (format "r%d = %d\n" rrr (ccl-get-next-code))))
852
853(defun ccl-dump-set-array (rrr cc)
854 (let ((rrr2 (logand cc 7))
855 (len (ash cc -3))
856 (i 0))
857 (insert (format "r%d = array[r%d] of length %d\n\t"
858 rrr rrr2 len))
859 (while (< i len)
860 (insert (format "%d " (ccl-get-next-code)))
861 (setq i (1+ i)))
862 (insert "\n")))
863
864(defun ccl-dump-jump (ignore cc &optional address)
865 (insert (format "jump to %d(" (+ (or address ccl-current-ic) cc)))
866 (if (>= cc 0)
867 (insert "+"))
868 (insert (format "%d)\n" (1+ cc))))
869
870(defun ccl-dump-jump-cond (rrr cc)
871 (insert (format "if (r%d == 0), " rrr))
872 (ccl-dump-jump nil cc))
873
874(defun ccl-dump-write-register-jump (rrr cc)
875 (insert (format "write r%d, " rrr))
876 (ccl-dump-jump nil cc))
877
878(defun ccl-dump-write-register-read-jump (rrr cc)
879 (insert (format "write r%d, read r%d, " rrr rrr))
880 (ccl-dump-jump nil cc)
881 (ccl-get-next-code) ; Skip dummy READ-JUMP
882 )
883
884(defun ccl-extract-arith-op (cc)
885 (aref ccl-arith-table (ash cc -6)))
886
887(defun ccl-dump-write-expr-const (ignore cc)
888 (insert (format "write (r%d %s %d)\n"
889 (logand cc 7)
890 (ccl-extract-arith-op cc)
891 (ccl-get-next-code))))
892
893(defun ccl-dump-write-expr-register (ignore cc)
894 (insert (format "write (r%d %s r%d)\n"
895 (logand cc 7)
896 (ccl-extract-arith-op cc)
897 (logand (ash cc -3) 7))))
898
899(defun ccl-dump-insert-char (cc)
900 (cond ((= cc ?\t) (insert " \"^I\""))
901 ((= cc ?\n) (insert " \"^J\""))
902 (t (insert (format " \"%c\"" cc)))))
903
904(defun ccl-dump-write-const-jump (ignore cc)
905 (let ((address ccl-current-ic))
906 (insert "write char")
907 (ccl-dump-insert-char (ccl-get-next-code))
908 (insert ", ")
909 (ccl-dump-jump nil cc address)))
910
911(defun ccl-dump-write-const-read-jump (rrr cc)
912 (let ((address ccl-current-ic))
913 (insert "write char")
914 (ccl-dump-insert-char (ccl-get-next-code))
915 (insert (format ", read r%d, " rrr))
916 (ccl-dump-jump cc address)
917 (ccl-get-next-code) ; Skip dummy READ-JUMP
918 ))
919
920(defun ccl-dump-write-string-jump (ignore cc)
921 (let ((address ccl-current-ic)
922 (len (ccl-get-next-code))
923 (i 0))
924 (insert "write \"")
925 (while (< i len)
926 (let ((code (ccl-get-next-code)))
927 (insert (ash code -16))
928 (if (< (1+ i) len) (insert (logand (ash code -8) 255)))
929 (if (< (+ i 2) len) (insert (logand code 255))))
930 (setq i (+ i 3)))
931 (insert "\", ")
932 (ccl-dump-jump nil cc address)))
933
934(defun ccl-dump-write-array-read-jump (rrr cc)
935 (let ((address ccl-current-ic)
936 (len (ccl-get-next-code))
937 (i 0))
938 (insert (format "write array[r%d] of length %d,\n\t" rrr len))
939 (while (< i len)
940 (ccl-dump-insert-char (ccl-get-next-code))
941 (setq i (1+ i)))
942 (insert (format "\n\tthen read r%d, " rrr))
943 (ccl-dump-jump nil cc address)
944 (ccl-get-next-code) ; Skip dummy READ-JUMP.
945 ))
946
947(defun ccl-dump-read-jump (rrr cc)
948 (insert (format "read r%d, " rrr))
949 (ccl-dump-jump nil cc))
950
951(defun ccl-dump-branch (rrr len)
952 (let ((jump-table-head ccl-current-ic)
953 (i 0))
954 (insert (format "jump to array[r%d] of length %d\n\t" rrr len))
955 (while (<= i len)
956 (insert (format "%d " (+ jump-table-head (ccl-get-next-code))))
957 (setq i (1+ i)))
958 (insert "\n")))
959
960(defun ccl-dump-read-register (rrr cc)
961 (insert (format "read r%d (%d remaining)\n" rrr cc)))
962
963(defun ccl-dump-read-branch (rrr len)
964 (insert (format "read r%d, " rrr))
965 (ccl-dump-branch rrr len))
966
967(defun ccl-dump-write-register (rrr cc)
968 (insert (format "write r%d (%d remaining)\n" rrr cc)))
969
970(defun ccl-dump-call (ignore cc)
971 (insert (format "call subroutine #%d\n" cc)))
972
973(defun ccl-dump-write-const-string (rrr cc)
974 (if (= rrr 0)
975 (progn
976 (insert "write char")
977 (ccl-dump-insert-char cc)
978 (newline))
979 (let ((len cc)
980 (i 0))
981 (insert "write \"")
982 (while (< i len)
983 (let ((code (ccl-get-next-code)))
984 (insert (format "%c" (lsh code -16)))
985 (if (< (1+ i) len)
986 (insert (format "%c" (logand (lsh code -8) 255))))
987 (if (< (+ i 2) len)
988 (insert (format "%c" (logand code 255))))
989 (setq i (+ i 3))))
990 (insert "\"\n"))))
991
992(defun ccl-dump-write-array (rrr cc)
993 (let ((i 0))
994 (insert (format "write array[r%d] of length %d\n\t" rrr cc))
995 (while (< i cc)
996 (ccl-dump-insert-char (ccl-get-next-code))
997 (setq i (1+ i)))
998 (insert "\n")))
999
1000(defun ccl-dump-end (&rest ignore)
1001 (insert "end\n"))
1002
1003(defun ccl-dump-set-assign-expr-const (rrr cc)
1004 (insert (format "r%d %s= %d\n"
1005 rrr
1006 (ccl-extract-arith-op cc)
1007 (ccl-get-next-code))))
1008
1009(defun ccl-dump-set-assign-expr-register (rrr cc)
1010 (insert (format "r%d %s= r%d\n"
1011 rrr
1012 (ccl-extract-arith-op cc)
1013 (logand cc 7))))
1014
1015(defun ccl-dump-set-expr-const (rrr cc)
1016 (insert (format "r%d = r%d %s %d\n"
1017 rrr
1018 (logand cc 7)
1019 (ccl-extract-arith-op cc)
1020 (ccl-get-next-code))))
1021
1022(defun ccl-dump-set-expr-register (rrr cc)
1023 (insert (format "r%d = r%d %s r%d\n"
1024 rrr
1025 (logand cc 7)
1026 (ccl-extract-arith-op cc)
1027 (logand (ash cc -3) 7))))
1028
1029(defun ccl-dump-jump-cond-expr-const (rrr cc)
1030 (let ((address ccl-current-ic))
1031 (insert (format "if !(r%d %s %d), "
1032 rrr
1033 (aref ccl-arith-table (ccl-get-next-code))
1034 (ccl-get-next-code)))
1035 (ccl-dump-jump nil cc address)))
1036
1037(defun ccl-dump-jump-cond-expr-register (rrr cc)
1038 (let ((address ccl-current-ic))
1039 (insert (format "if !(r%d %s r%d), "
1040 rrr
1041 (aref ccl-arith-table (ccl-get-next-code))
1042 (ccl-get-next-code)))
1043 (ccl-dump-jump nil cc address)))
1044
1045(defun ccl-dump-read-jump-cond-expr-const (rrr cc)
1046 (insert (format "read r%d, " rrr))
1047 (ccl-dump-jump-cond-expr-const rrr cc))
1048
1049(defun ccl-dump-read-jump-cond-expr-register (rrr cc)
1050 (insert (format "read r%d, " rrr))
1051 (ccl-dump-jump-cond-expr-register rrr cc))
1052
1053(defun ccl-dump-binary (ccl-code)
1054 (let ((len (length ccl-code))
1055 (i 2))
1056 (while (< i len)
1057 (let ((code (aref ccl-code i))
1058 (j 27))
1059 (while (>= j 0)
1060 (insert (if (= (logand code (ash 1 j)) 0) ?0 ?1))
1061 (setq j (1- j)))
1062 (setq code (logand code 31))
1063 (if (< code (length ccl-code-table))
1064 (insert (format ":%s" (aref ccl-code-table code))))
1065 (insert "\n"))
1066 (setq i (1+ i)))))
1067
1068;; CCL emulation staffs
1069
1070;; Not yet implemented.
1071
1072;;;###autoload
1073(defmacro declare-ccl-program (name)
1074 "Declare NAME as a name of CCL program.
1075
1076To compile a CCL program which calls another CCL program not yet
1077defined, it must be declared as a CCL program in advance."
1078 `(put ',name 'ccl-program-idx (register-ccl-program ',name nil)))
1079
1080;;;###autoload
1081(defmacro define-ccl-program (name ccl-program &optional doc)
1082 "Set NAME the compiled code of CCL-PROGRAM.
1083CCL-PROGRAM is `eval'ed before being handed to the CCL compiler `ccl-compile'.
1084The compiled code is a vector of integers."
1085 `(let ((prog ,(ccl-compile (eval ccl-program))))
1086 (defconst ,name prog ,doc)
1087 (put ',name 'ccl-program-idx (register-ccl-program ',name prog))
1088 nil))
1089
1090;;;###autoload
1091(defun ccl-execute-with-args (ccl-prog &rest args)
1092 "Execute CCL-PROGRAM with registers initialized by the remaining args.
1093The return value is a vector of resulting CCL registeres."
1094 (let ((reg (make-vector 8 0))
1095 (i 0))
1096 (while (and args (< i 8))
1097 (if (not (integerp (car args)))
1098 (error "Arguments should be integer"))
1099 (aset reg i (car args))
1100 (setq args (cdr args) i (1+ i)))
1101 (ccl-execute ccl-prog reg)
1102 reg))
1103
1104(provide 'ccl)
1105
1106;; ccl.el ends here
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
new file mode 100644
index 00000000000..8ce6e4b5638
--- /dev/null
+++ b/lisp/international/characters.el
@@ -0,0 +1,388 @@
1;;; characters.el --- set syntax and category for multibyte characters
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multibyte character, character set, syntax, category
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; This file contains multibyte characters. Save this file always in
27;; `coding-system-iso-2022-7'.
28
29;;; Predefined categories.
30
31;; For each character set.
32
33(define-category ?a "ASCII")
34(define-category ?l "Latin")
35(define-category ?t "Thai")
36(define-category ?g "Greek")
37(define-category ?b "Arabic")
38(define-category ?w "Hebrew")
39(define-category ?y "Cyrillic")
40(define-category ?k "Japanese katakana")
41(define-category ?r "Japanese roman")
42(define-category ?c "Chinese")
43(define-category ?j "Japanese")
44(define-category ?h "Korean")
45(define-category ?e "Ethiopic (Ge'ez)")
46(define-category ?v "Vietnamese")
47(define-category ?i "Indian")
48
49;; For each group (row) of 2-byte character sets.
50
51(define-category ?A "Alpha numeric characters of 2-byte character sets")
52(define-category ?C "Chinese (Han) characters of 2-byte character sets")
53(define-category ?G "Greek characters of 2-byte characters sets")
54(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
55(define-category ?K "Japanese Katakana characters of 2-byte character sets")
56(define-category ?N "Korean Hangul characters of 2-byte character sets")
57(define-category ?Y "Cyrillic character of 2-byte character sets")
58(define-category ?I "Indian Glyphs")
59
60;; For phonetic classifications.
61
62(define-category ?0 "consonant")
63(define-category ?1 "base vowel")
64(define-category ?2 "upper diacritical mark (including upper vowel)")
65(define-category ?3 "lower diacritical mark (including lower vowel)")
66(define-category ?4 "tone mark")
67(define-category ?5 "vowel")
68(define-category ?6 "digit")
69(define-category ?7 "vowel-modifying diacritical mark")
70(define-category ?8 "vowel-signs.")
71
72;; For filling.
73(define-category ?| "While filling, we can break a line at this character.")
74
75;; Keep the followings for `kinsoku' processing. See comments in
76;; kinsoku.el.
77(define-category ?> "A character which can't be placed at beginning of line.")
78(define-category ?< "A character which can't be placed at end of line.")
79
80
81;;; Setting syntax and category.
82
83;; ASCII
84
85(let ((ch 32))
86 (while (< ch 127) ; All ASCII characters have
87 (modify-category-entry ch ?a) ; the category `a' (ASCII)
88 (modify-category-entry ch ?l) ; and `l' (Latin).
89 (setq ch (1+ ch))))
90
91;; Arabic character set
92
93(let ((charsets '(arabic-iso8859-6
94 arabic-digit
95 arabic-1-column
96 arabic-2-column)))
97 (while charsets
98 (modify-syntax-entry (make-char (car charsets)) "w")
99 (modify-category-entry (make-char (car charsets)) ?b)
100 (setq charsets (cdr charsets))))
101
102;; Chinese character set (GB2312)
103
104(modify-syntax-entry (make-char 'chinese-gb2312) "w")
105(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
106(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
107(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
108(modify-syntax-entry ?\$A!2(B "($A!3(B")
109(modify-syntax-entry ?\$A!4(B "($A!5(B")
110(modify-syntax-entry ?\$A!6(B "($A!7(B")
111(modify-syntax-entry ?\$A!8(B "($A!9(B")
112(modify-syntax-entry ?\$A!:(B "($A!;(B")
113(modify-syntax-entry ?\$A!<(B "($A!=(B")
114(modify-syntax-entry ?\$A!>(B "($A!?(B")
115(modify-syntax-entry ?\$A!3(B ")$A!2(B")
116(modify-syntax-entry ?\$A!5(B ")$A!4(B")
117(modify-syntax-entry ?\$A!7(B ")$A!6(B")
118(modify-syntax-entry ?\$A!9(B ")$A!8(B")
119(modify-syntax-entry ?\$A!;(B ")$A!:(B")
120(modify-syntax-entry ?\$A!=(B ")$A!<(B")
121(modify-syntax-entry ?\$A!?(B ")$A!>(B")
122
123(modify-category-entry (make-char 'chinese-gb2312) ?c)
124(modify-category-entry (make-char 'chinese-gb2312) ?\|)
125(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
126(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
127(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
128(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
129(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
130(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
131(let ((row 48))
132 (while (< row 127)
133 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
134 (setq row (1+ row))))
135
136;; Chinese character set (BIG5)
137
138(let ((generic-big5-1-char (make-char 'chinese-big5-1))
139 (generic-big5-2-char (make-char 'chinese-big5-2)))
140 (modify-syntax-entry generic-big5-1-char "w")
141 (modify-syntax-entry generic-big5-2-char "w")
142
143 (modify-category-entry generic-big5-1-char ?c)
144 (modify-category-entry generic-big5-2-char ?c)
145
146 (modify-category-entry generic-big5-1-char ?C)
147 (modify-category-entry generic-big5-2-char ?C)
148
149 (modify-category-entry generic-big5-1-char ?\|)
150 (modify-category-entry generic-big5-2-char ?\|))
151
152
153;; Chinese character set (CNS11643)
154
155(let ((cns-list '(chinese-cns11643-1
156 chinese-cns11643-2
157 chinese-cns11643-3
158 chinese-cns11643-4
159 chinese-cns11643-5
160 chinese-cns11643-6
161 chinese-cns11643-7))
162 generic-char)
163 (while cns-list
164 (setq generic-char (make-char (car cns-list)))
165 (modify-syntax-entry generic-char "w")
166 (modify-category-entry generic-char ?c)
167 (modify-category-entry generic-char ?C)
168 (modify-category-entry generic-char ?|)
169 (setq cns-list (cdr cns-list))))
170
171;; Cyrillic character set (ISO-8859-5)
172
173(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
174
175(let ((c 160))
176 (while (< c 256)
177 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 c) "w")
178 (setq c (1+ c))))
179(modify-syntax-entry ?,L-(B ".")
180(modify-syntax-entry ?,Lp(B ".")
181(modify-syntax-entry ?,L}(B ".")
182
183;; Ethiopic character set
184
185(modify-category-entry (make-char 'ethiopic) ?e)
186
187;; European character set (Latin-1,2,3,4,5)
188
189(modify-category-entry (make-char 'latin-iso8859-1) ?l)
190(modify-category-entry (make-char 'latin-iso8859-2) ?l)
191(modify-category-entry (make-char 'latin-iso8859-3) ?l)
192(modify-category-entry (make-char 'latin-iso8859-4) ?l)
193(modify-category-entry (make-char 'latin-iso8859-9) ?l)
194
195;; ISO-8859-1 (Latin-1)
196(let ((c 64))
197 (while (< c 128) ; from ',A@(B' to ',A(B'
198 (modify-syntax-entry (make-char 'latin-iso8859-1 c) "w")
199 (setq c (1+ c)))
200 (modify-syntax-entry (make-char 'latin-iso8859-1 32) "w") ; NBSP
201 (modify-syntax-entry ?,AW(B "_")
202 (modify-syntax-entry ?,Aw(B "_")
203 )
204
205;; ISO-8859-2 (Latin-2)
206(let ((c 190))
207 (while (< c 255)
208 (modify-syntax-entry (make-char 'latin-iso8859-2 c) "w")
209 (setq c (1+ c))))
210(let ((chars '(?,B!(B ?,B#(B ?,B%(B ?,B&(B ?,B)(B ?,B*(B ?,B+(B ?,B,(B ?,B.(B ?,B/(B ?,B1(B ?,B3(B ?,B5(B ?,B6(B ?,B9(B ?,B:(B ?,B;(B ?,B<(B)))
211 (while chars
212 (modify-syntax-entry (car chars) "w")
213 (setq chars (cdr chars))))
214(modify-syntax-entry (make-char 'latin-iso8859-2 160) "w") ; NBSP
215(modify-syntax-entry ?,BW(B ".")
216(modify-syntax-entry ?,Bw(B ".")
217
218;; Greek character set (ISO-8859-7)
219
220(modify-category-entry (make-char 'greek-iso8859-7) ?g)
221
222(let ((c 182))
223 (while (< c 255)
224 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
225 (setq c (1+ c))))
226(modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
227(modify-syntax-entry ?,F7(B ".")
228(modify-syntax-entry ?,F;(B ".")
229(modify-syntax-entry ?,F=(B ".")
230
231;; Hebrew character set (ISO-8859-8)
232
233(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
234
235(let ((c 224))
236 (while (< c 251)
237 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
238 (setq c (1+ c))))
239(modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
240
241;; Indian character set (IS 13194 and other Emacs original Indian charsets)
242
243(modify-category-entry (make-char 'indian-is13194) ?i)
244(modify-category-entry (make-char 'indian-2-column) ?I)
245(modify-category-entry (make-char 'indian-1-column) ?I)
246
247;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
248
249(modify-category-entry (make-char 'katakana-jisx0201) ?k)
250(modify-category-entry (make-char 'latin-jisx0201) ?r)
251(modify-category-entry (make-char 'japanese-jisx0208) ?j)
252(modify-category-entry (make-char 'japanese-jisx0212) ?j)
253(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
254
255;; JISX0208
256(modify-syntax-entry (make-char 'japanese-jisx0208) "w")
257(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
258(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
259(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
260(let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B)))
261 (while chars
262 (modify-syntax-entry (car chars) "w")
263 (setq chars (cdr chars))))
264(modify-syntax-entry ?\$B!J(B "($B!K(B")
265(modify-syntax-entry ?\$B!N(B "($B!O(B")
266(modify-syntax-entry ?\$B!P(B "($B!Q(B")
267(modify-syntax-entry ?\$B!V(B "($B!W(B")
268(modify-syntax-entry ?\$B!X(B "($B!Y(B")
269(modify-syntax-entry ?\$B!K(B ")$B!J(B")
270(modify-syntax-entry ?\$B!O(B ")$B!N(B")
271(modify-syntax-entry ?\$B!Q(B ")$B!P(B")
272(modify-syntax-entry ?\$B!W(B ")$B!V(B")
273(modify-syntax-entry ?\$B!Y(B ")$B!X(B")
274
275(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
276(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
277(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
278(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
279(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
280(let ((row 48))
281 (while (< row 127)
282 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
283 (setq row (1+ row))))
284(let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B)))
285 (while chars
286 (modify-category-entry (car chars) ?K)
287 (modify-category-entry (car chars) ?H)
288 (setq chars (cdr chars))))
289(let ((chars '(?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B)))
290 (while chars
291 (modify-category-entry (car chars) ?C)
292 (setq chars (cdr chars))))
293
294;; JISX0212
295(modify-syntax-entry (make-char 'japanese-jisx0212) "w")
296(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
297(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
298(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
299
300(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
301
302;; JISX0201-Kana
303(modify-syntax-entry (make-char 'katakana-jisx0201) "w")
304(let ((chars '(?(I!(B ?(I"(B ?(I#(B ?(I$(B ?(I%(B)))
305 (while chars
306 (modify-syntax-entry (car chars) ".")
307 (setq chars (cdr chars))))
308
309;; Korean character set (KSC5601)
310
311(modify-syntax-entry (make-char 'korean-ksc5601) "w")
312(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
313(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
314(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
315(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
316(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
317(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
318
319(modify-category-entry (make-char 'korean-ksc5601) ?h)
320(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
321(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
322(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
323(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
324(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
325
326;; Thai character set (TIS620)
327
328(modify-category-entry (make-char 'thai-tis620) ?t)
329
330(let ((deflist '(;; chars syntax category
331 (",T!(B-,TCEG(B-,TN(B" "w" ?0) ; consonant
332 (",TDFPRS`(B-,Te(B" "w" ?1) ; vowel base
333 (",TQT(B-,TWgn(B" "w" ?2) ; vowel upper
334 (",TX(B-,TZ(B" "w" ?3) ; vowel lower
335 (",Th(B-,Tm(B" "w" ?4) ; tone mark
336 (",TOfp(B-,Ty(B" "w" ?0) ; digit and misc
337 (",T_oz{(B" "_" ?0) ; symbol
338 ))
339 elm chars len syntax category to ch i)
340 (while deflist
341 (setq elm (car deflist))
342 (setq chars (car elm)
343 len (length chars)
344 syntax (nth 1 elm)
345 category (nth 2 elm)
346 i 0)
347 (while (< i len)
348 (if (= (aref chars i) ?-)
349 (setq i (1+ i)
350 to (sref chars i))
351 (setq ch (sref chars i)
352 to ch))
353 (while (<= ch to)
354 (modify-syntax-entry ch syntax)
355 (modify-category-entry ch category)
356 (setq ch (1+ ch)))
357 (setq i (+ i (char-bytes to))))
358 (setq deflist (cdr deflist))))
359
360;; Vietnamese character set
361
362(let ((lower (make-char 'vietnamese-viscii-lower))
363 (upper (make-char 'vietnamese-viscii-upper)))
364 (modify-syntax-entry lower "w")
365 (modify-syntax-entry upper "w")
366 (modify-category-entry lower ?v)
367 (modify-category-entry upper ?v)
368 (modify-category-entry lower ?l) ; To make a word with
369 (modify-category-entry upper ?l) ; latin characters.
370 )
371
372
373;;; Setting word boundary.
374
375(setq word-combining-categories
376 '((?l . ?l)))
377
378(setq word-separating-categories ; (2-byte character sets)
379 '((?A . ?K) ; Alpha numeric - Katakana
380 (?A . ?C) ; Alpha numeric - Chinese
381 (?H . ?A) ; Hiragana - Alpha numeric
382 (?H . ?K) ; Hiragana - Katakana
383 (?H . ?C) ; Hiragana - Chinese
384 (?K . ?A) ; Katakana - Alpha numeric
385 (?K . ?C) ; Katakana - Chinese
386 (?C . ?A) ; Chinese - Alpha numeric
387 (?C . ?K) ; Chinese - Katakana
388 ))
diff --git a/lisp/international/encoded-kb.el b/lisp/international/encoded-kb.el
new file mode 100644
index 00000000000..2861fc45148
--- /dev/null
+++ b/lisp/international/encoded-kb.el
@@ -0,0 +1,285 @@
1;; encoded-kb.el -- handler for inputting multibyte characters encoded somehow
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; This file is part of GNU Emacs.
7
8;; GNU Emacs is free software; you can redistribute it and/or modify
9;; it under the terms of the GNU General Public License as published by
10;; the Free Software Foundation; either version 2, or (at your option)
11;; any later version.
12
13;; GNU Emacs is distributed in the hope that it will be useful,
14;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16;; GNU General Public License for more details.
17
18;; You should have received a copy of the GNU General Public License
19;; along with GNU Emacs; see the file COPYING. If not, write to
20;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21
22(defvar encoded-kbd-mode nil
23 "Non-nil if in Encoded-kbd minor mode.")
24(put 'encoded-kbd-mode 'permanent-local t)
25
26(or (assq 'encoded-kbd-mode minor-mode-alist)
27 (setq minor-mode-alist
28 (cons '(encoded-kbd-mode " Encoded-kbd") minor-mode-alist)))
29
30(defvar encoded-kbd-mode-map
31 (let ((map (make-sparse-keymap))
32 (i 128))
33 (define-key map "\e" 'encoded-kbd-handle-iso2022-esc)
34 (while (< i 256)
35 (define-key map (vector i) 'encoded-kbd-handle-8bit)
36 (setq i (1+ i)))
37 map)
38 "Keymap for Encoded-kbd minor mode.")
39
40(or (assq 'encoded-kbd-mode minor-mode-map-alist)
41 (setq minor-mode-map-alist
42 (cons (cons 'encoded-kbd-mode encoded-kbd-mode-map)
43 minor-mode-map-alist)))
44
45;; Subsidiary keymaps for handling ISO2022 escape sequences.
46
47(defvar encoded-kbd-iso2022-esc-map
48 (let ((map (make-sparse-keymap)))
49 (define-key map "$" 'encoded-kbd-iso2022-esc-dollar-prefix)
50 (define-key map "(" 'encoded-kbd-iso2022-designation-prefix)
51 (define-key map ")" 'encoded-kbd-iso2022-designation-prefix)
52 (define-key map "," 'encoded-kbd-iso2022-designation-prefix)
53 (define-key map "-" 'encoded-kbd-iso2022-designation-prefix)
54 (append map '((t . encoded-kbd-outernal-command)))
55 map)
56 "Keymap for handling ESC code in Encoded-kbd mode.")
57
58(defvar encoded-kbd-iso2022-esc-dollar-map
59 (let ((map (make-sparse-keymap)))
60 (define-key map "(" 'encoded-kbd-iso2022-designation-prefix)
61 (define-key map ")" 'encoded-kbd-iso2022-designation-prefix)
62 (define-key map "," 'encoded-kbd-iso2022-designation-prefix)
63 (define-key map "-" 'encoded-kbd-iso2022-designation-prefix)
64 (define-key map "@" 'encoded-kbd-iso2022-designation)
65 (define-key map "A" 'encoded-kbd-iso2022-designation)
66 (define-key map "B" 'encoded-kbd-iso2022-designation)
67 (append map '((t . encoded-kbd-outernal-command)))
68 map)
69 "Keymap for handling ESC $ sequence handling in Encoded-kbd mode.")
70(fset 'encoded-kbd-iso2022-esc-dollar-prefix
71 encoded-kbd-iso2022-esc-dollar-map)
72
73(defvar encoded-kbd-iso2022-designation-map
74 (let ((map (make-sparse-keymap))
75 (i 48))
76 (while (< i 128)
77 (define-key map (char-to-string i) 'encoded-kbd-iso2022-designation)
78 (setq i (1+ i)))
79 (append map '((t . encoded-kbd-outernal-command)))
80 map)
81 "Keymap for handling ISO2022 designation sequence in Encoded-kbd mode.")
82(fset 'encoded-kbd-iso2022-designation-prefix
83 encoded-kbd-iso2022-designation-map)
84
85(defvar encoded-kbd-iso2022-non-ascii-map
86 (let ((map (make-keymap))
87 (i 32))
88 (while (< i 128)
89 (define-key map (char-to-string i) 'encoded-kbd-self-insert-iso2022-7bit)
90 (setq i (1+ i)))
91 map)
92 "Keymap for handling non-ASCII character set in Encoded-kbd mode.")
93
94;; One of the symbols `sjis', `iso2022-7', `iso2022-8', or `big5' to
95;; denote what kind of coding-system we are now handling in
96;; Encoded-kbd mode.
97(defvar encoded-kbd-coding nil)
98
99;; Keep information of designation state of ISO2022 encoding. This is
100;; a vector of character sets currently designated to each graphic
101;; registers (0..3).
102
103(defvar encoded-kbd-iso2022-designations nil)
104(make-variable-buffer-local 'encoded-kbd-iso2022-designations)
105(put 'encoded-kbd-iso2022-designations 'permanent-local t)
106
107;; Keep information of invocation state of ISO2022 encoding. This is
108;; a vector of graphic register numbers currently invoked to each
109;; graphic plane (0..1), the third element is a single shifted graphic
110;; register number.
111
112(defvar encoded-kbd-iso2022-invocations nil)
113(make-variable-buffer-local 'encoded-kbd-iso2022-invocations)
114(put 'encoded-kbd-iso2022-invocations 'permanent-local t)
115
116(defun encoded-kbd-iso2022-designation ()
117 "Do ISO2022 designation according to the curren key in Encoded-kbd mode.
118The following key sequence may cause multilingual text insertion."
119 (interactive)
120 (let ((key-seq (this-command-keys))
121 intermediate-char final-char
122 reg dimension chars charset)
123 (if (= (length key-seq) 3)
124 ;; (ESC) $ <intermediate-char> <final-char>
125 (setq intermediate-char (aref key-seq 1)
126 dimension 2
127 chars (if (< intermediate-char ?,) 94 96)
128 final-char (aref key-seq 2)
129 reg (mod intermediate-char 4))
130 (if (= (aref key-seq 1) ?$)
131 ;; (ESC) $ <final-char>
132 (setq dimension 2
133 chars 94
134 final-char (aref key-seq 1)
135 reg 0)
136 ;; (ESC) <intermediate-char> <final-char>
137 (setq intermediate-char (aref key-seq 0)
138 dimension 1
139 chars (if (< intermediate-char ?,) 94 96)
140 final-char (aref key-seq 1)
141 reg (mod intermediate-char 4))))
142 (if (setq charset (iso-charset dimension chars final-char))
143 (aset encoded-kbd-iso2022-designations reg charset)
144 (error "Character set of DIMENSION %s, CHARS %s, FINAL-CHAR `%c' is not supported"
145 dimension chars final-char))
146
147 (if (eq (aref encoded-kbd-iso2022-designations
148 (aref encoded-kbd-iso2022-invocations 0))
149 'ascii)
150 ;; Graphic plane 0 (0x20..0x7f) is for ASCII. We don't have
151 ;; to handle characters in this range specially.
152 (throw 'exit nil)
153 ;; Graphic plane 0 is for non-ASCII.
154 (setq overriding-local-map encoded-kbd-iso2022-non-ascii-map))))
155
156(defun encoded-kbd-handle-iso2022-esc ()
157 (interactive)
158 (let ((overriding-local-map encoded-kbd-iso2022-esc-map))
159 (recursive-edit)))
160
161(defun encoded-kbd-handle-8bit ()
162 "Handle an 8-bit character enterned in Encoded-kbd mode."
163 (interactive)
164 (cond ((eq encoded-kbd-coding 'iso2022-7)
165 (error "Can't handle the character code %d" last-command-char))
166
167 ((eq encoded-kbd-coding 'iso2022-8)
168 (cond ((= last-command-char ?\216)
169 (aset encoded-kbd-iso2022-invocations 2 2))
170
171 ((= last-command-char ?\217)
172 (aset encoded-kbd-iso2022-invocations 2 3))
173
174 ((> last-command-char ?\240)
175 (encoded-kbd-self-insert-iso2022-8bit))
176
177 (t
178 (error "Can't handle the character code %d"
179 last-command-char))))
180
181 ((eq encoded-kbd-coding 'sjis)
182 (encoded-kbd-self-insert-sjis))
183
184 (t
185 (encoded-kbd-self-insert-big5))))
186
187(defun encoded-kbd-self-insert-iso2022-7bit ()
188 (interactive)
189 (let* ((charset (aref encoded-kbd-iso2022-designations
190 (or (aref encoded-kbd-iso2022-invocations 2)
191 (aref encoded-kbd-iso2022-invocations 0))))
192 (last-command-char
193 (if (= (charset-bytes charset) 1)
194 (make-char charset last-command-char)
195 (make-char charset last-command-char (read-char-exclusive)))))
196 (self-insert-command 1)
197 (aset encoded-kbd-iso2022-invocations 2 nil)
198 ))
199
200(defun encoded-kbd-self-insert-iso2022-8bit ()
201 (interactive)
202 (let* ((charset (aref encoded-kbd-iso2022-designations
203 (or (aref encoded-kbd-iso2022-invocations 2)
204 (aref encoded-kbd-iso2022-invocations 1))))
205 (last-command-char
206 (if (= (charset-bytes charset) 1)
207 (make-char charset last-command-char)
208 (make-char charset last-command-char (read-char-exclusive)))))
209 (self-insert-command 1)
210 (aset encoded-kbd-iso2022-invocations 2 nil)
211 ))
212
213(defun encoded-kbd-self-insert-sjis ()
214 (interactive)
215 (let ((last-command-char
216 (if (or (< last-command-char ?\xA0) (>= last-command-char ?\xE0))
217 (decode-sjis-char (+ (ash last-command-char 8)
218 (read-char-exclusive)))
219 (make-char 'latin-jisx0201 last-command-char))))
220 (self-insert-command 1)))
221
222(defun encoded-kbd-self-insert-big5 ()
223 (interactive)
224 (let ((last-command-char
225 (decode-big5-char (+ (ash last-command-char 8)
226 (read-char-exclusive)))))
227 (self-insert-command 1)))
228
229(defun encoded-kbd-mode (&optional arg)
230 "Toggle Encoded-kbd minor mode.
231With arg, turn Keyboard-kbd mode on in and only if arg is positive.
232
233When in Encoded-kbd mode, a text sent from a terminal keyboard
234is accepted as a multilingual text encoded in a coding-system
235set by the command `set-keyboard-coding-system'"
236 (interactive "P")
237 (setq encoded-kbd-mode
238 (if (null arg) (null encoded-kbd-mode)
239 (> (prefix-numeric-value arg) 0)))
240 (if encoded-kbd-mode
241 (let* ((coding (coding-system-vector (keyboard-coding-system)))
242 (input-mode (current-input-mode)))
243 (cond ((null coding)
244 (setq encoded-kbd-mode nil)
245 (error "No coding-system for terminal keyboard is set"))
246
247 ((= (coding-vector-type coding) 1) ; SJIS
248 (set-input-mode (nth 0 input-mode) (nth 1 input-mode)
249 'use-8th-bit (nth 3 input-mode))
250 (setq encoded-kbd-coding 'sjis))
251
252 ((= (coding-vector-type coding) 2) ; ISO2022
253 (if (aref (coding-vector-flags coding) 7) ; 7-bit only
254 (setq encoded-kbd-coding 'iso2022-7)
255 (set-input-mode (nth 0 input-mode) (nth 1 input-mode)
256 'use-8th-bit (nth 3 input-mode))
257 (setq encoded-kbd-coding 'iso2022-8))
258 (make-variable-buffer-local 'encoded-kbd-iso2022-designations)
259 (setq encoded-kbd-iso2022-designations (make-vector 4 nil))
260 (let ((flags (coding-vector-flags coding))
261 (i 0))
262 (while (< i 4)
263 (if (and (aref flags i)
264 (> (aref flags i) 0))
265 (aset encoded-kbd-iso2022-designations i
266 (aref flags i)))
267 (setq i (1+ i))))
268 (make-variable-buffer-local 'encoded-kbd-iso2022-invocations)
269 (setq encoded-kbd-iso2022-invocations (make-vector 3 0))
270 (aset encoded-kbd-iso2022-invocations 1 1))
271
272 ((= (coding-vector-type coding) 3) ; BIG5
273 (set-input-mode (nth 0 input-mode) (nth 1 input-mode)
274 'use-8th-bit (nth 3 input-mode))
275 (setq encoded-kbd-coding 'big5))
276
277 (t
278 (setq encoded-kbd-mode nil)
279 (error "Coding-system `%s' is not supported in Encoded-kbd mode"
280 (keyboard-coding-system))))
281
282 (run-hooks 'encoded-kbd-mode-hook)))
283 (force-mode-line-update))
284
285;;; encoded-kb.el ends here
diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el
new file mode 100644
index 00000000000..65519590cc3
--- /dev/null
+++ b/lisp/international/fontset.el
@@ -0,0 +1,336 @@
1;;; fontset.el --- Commands for handling fontset.
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, fontset
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26;; Set default REGISTRY property of charset to find an appropriate
27;; font for each charset. This is used to generate a font name in a
28;; fontset. If the value contains a character `-', the string before
29;; that is embeded in `CHARSET_REGISTRY' field, and the string after
30;; that is embeded in `CHARSET_ENCODING' field. If the value does not
31;; contain `-', the whole string is embeded in `CHARSET_REGISTRY'
32;; field, and a wild card character `*' is embeded in
33;; `CHARSET_ENCODING' field.
34
35(defvar x-charset-registries
36 '((ascii . "ISO8859-1")
37 (latin-iso8859-1 . "ISO8859-1")
38 (latin-iso8859-2 . "ISO8859-2")
39 (latin-iso8859-3 . "ISO8859-3")
40 (latin-iso8859-4 . "ISO8859-4")
41 (thai-tis620 . "TIS620")
42 (greek-iso8859-7 . "ISO8859-7")
43 (arabic-iso8859-6 . "ISO8859-6")
44 (hebrew-iso8859-8 . "ISO8859-8")
45 (katakana-jisx0201 . "JISX0201")
46 (latin-jisx0201 . "JISX0201")
47 (cyrillic-iso8859-5 . "ISO8859-5")
48 (latin-iso8859-9 . "ISO8859-9")
49 (japanese-jisx0208-1978 . "JISX0208.1978")
50 (chinese-gb2312 . "GB2312")
51 (japanese-jisx0208 . "JISX0208.1983")
52 (korean-ksc5601 . "KSC5601")
53 (japanese-jisx0212 . "JISX0212")
54 (chinese-cns11643-1 . "CNS11643.1992-1")
55 (chinese-cns11643-2 . "CNS11643.1992-2")
56 (chinese-cns11643-3 . "CNS11643.1992-3")
57 (chinese-cns11643-4 . "CNS11643.1992-4")
58 (chinese-cns11643-5 . "CNS11643.1992-5")
59 (chinese-cns11643-6 . "CNS11643.1992-6")
60 (chinese-cns11643-7 . "CNS11643.1992-7")
61 (chinese-big5-1 . "Big5")
62 (chinese-big5-2 . "Big5")
63 (chinese-sisheng . "sisheng_cwnn")
64 (vietnamese-viscii-lower . "VISCII1.1")
65 (vietnamese-viscii-upper . "VISCII1.1")
66 (arabic-digit . "MuleArabic-0")
67 (arabic-1-column . "MuleArabic-1")
68 (arabic-2-column . "MuleArabic-2")
69 (ipa . "MuleIPA")
70 (ethiopic . "Ethio")
71 (ascii-right-to-left . "ISO8859-1")
72 (indian-is13194 . "IS13194-Devanagari")
73 (indian-2-column . "MuleIndian-2")
74 (indian-1-column . "MuleIndian-1")
75 (lao . "lao.mule-1")))
76
77(let ((l x-charset-registries))
78 (while l
79 (put-charset-property (car (car l)) 'x-charset-registry (cdr (car l)))
80 (setq l (cdr l))))
81
82;; Set arguments in `font-encoding-alist' (which see).
83(defun set-font-encoding (pattern charset encoding)
84 (let ((slot (assoc pattern font-encoding-alist)))
85 (if slot
86 (let ((place (assq charset (cdr slot))))
87 (if place
88 (setcdr place encoding)
89 (setcdr slot (cons (cons charset encoding) (cdr slot)))))
90 (setq font-encoding-alist
91 (cons (list pattern (cons charset encoding)) font-encoding-alist)))
92 ))
93
94(set-font-encoding "ISO8859-1" 'ascii 0)
95(set-font-encoding "JISX0201" 'latin-jisx0201 0)
96
97;; Setting for suppressing XLoadQueryFont on big fonts.
98(setq x-pixel-size-width-font-regexp
99 "gb2312\\|jisx0208\\|ksc5601\\|cns11643\\|big5")
100
101;;; XLFD (X Logical Font Description) format handler.
102
103;; Define XLFD's field index numbers. ; field name
104(defconst xlfd-regexp-foundry-subnum 0) ; FOUNDRY
105(defconst xlfd-regexp-family-subnum 1) ; FAMILY_NAME
106(defconst xlfd-regexp-weight-subnum 2) ; WEIGHT_NAME
107(defconst xlfd-regexp-slant-subnum 3) ; SLANT
108(defconst xlfd-regexp-swidth-subnum 4) ; SETWIDTH_NAME
109(defconst xlfd-regexp-adstyle-subnum 5) ; ADD_STYLE_NAME
110(defconst xlfd-regexp-pixelsize-subnum 6) ; PIXEL_SIZE
111(defconst xlfd-regexp-pointsize-subnum 7) ; POINT_SIZE
112(defconst xlfd-regexp-resx-subnum 8) ; RESOLUTION_X
113(defconst xlfd-regexp-resy-subnum 9) ; RESOLUTION_Y
114(defconst xlfd-regexp-spacing-subnum 10) ; SPACING
115(defconst xlfd-regexp-avgwidth-subnum 11) ; AVERAGE_WIDTH
116(defconst xlfd-regexp-registry-subnum 12) ; CHARSET_REGISTRY
117(defconst xlfd-regexp-encoding-subnum 13) ; CHARSET_ENCODING
118
119;; Regular expression matching against a fontname which conforms to
120;; XLFD (X Logical Font Description). All fields in XLFD should be
121;; not be omitted (but can be a wild card) to be matched.
122(defconst xlfd-tight-regexp
123 "^\
124-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)\
125-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)\
126-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)-\\([^-]*\\)$")
127
128;; List of field numbers of XLFD whose values are numeric.
129(defconst xlfd-regexp-numeric-subnums
130 (list xlfd-regexp-pixelsize-subnum ;6
131 xlfd-regexp-pointsize-subnum ;7
132 xlfd-regexp-resx-subnum ;8
133 xlfd-regexp-resy-subnum ;9
134 xlfd-regexp-avgwidth-subnum ;11
135 ))
136
137(defun x-decompose-font-name (pattern)
138 "Decompose PATTERN into XLFD's fields and return vector of the fields.
139The length of the vector is 14.
140
141If PATTERN doesn't conform to XLFD, try to get a full XLFD name from
142X server and use the information of the full name to decompose
143PATTERN. If no full XLFD name is gotten, return nil."
144 (let (xlfd-fields fontname)
145 (if (string-match xlfd-tight-regexp pattern)
146 (let ((i 0))
147 (setq xlfd-fields (make-vector 14 nil))
148 (while (< i 14)
149 (aset xlfd-fields i (match-string (1+ i) pattern))
150 (setq i (1+ i)))
151 xlfd-fields)
152 (setq fontname (condition-case nil
153 (x-resolve-font-name pattern)
154 (error)))
155 (if (and fontname
156 (string-match xlfd-tight-regexp fontname))
157 (let ((len (length pattern))
158 (i 0)
159 l)
160 (setq xlfd-fields (make-vector 14 nil))
161 (while (< i 14)
162 (aset xlfd-fields i
163 (cons (match-beginning (1+ i))
164 (match-string (1+ i) fontname)))
165 (setq i (1+ i)))
166 (setq i 0)
167 (while (< i len)
168 (let ((ch (aref pattern i)))
169 (if (= ch ??)
170 (setq pattern (concat (substring pattern 0 i)
171 "\\(.\\)"
172 (substring pattern (1+ i)))
173 len (+ len 4)
174 i (+ i 4))
175 (if (= ch ?*)
176 (setq pattern (concat (substring pattern 0 i)
177 "\\(.*\\)"
178 (substring pattern (1+ i)))
179 len (+ len 5)
180 i (+ i 5))
181 (setq i (1+ i))))))
182 (string-match pattern fontname)
183 (setq l (cdr (cdr (match-data))))
184 (setq i 0)
185 (while (< i 14)
186 (if (or (null l) (< (car (aref xlfd-fields i)) (car l)))
187 (progn
188 (aset xlfd-fields i (cdr (aref xlfd-fields i)))
189 (setq i (1+ i)))
190 (if (< (car (aref xlfd-fields i)) (car (cdr l)))
191 (progn
192 (aset xlfd-fields i nil)
193 (setq i (1+ i)))
194 (setq l (cdr (cdr l))))))
195 xlfd-fields)))))
196
197(defsubst x-compose-font-name (xlfd-fields)
198 "Compose X's fontname from FIELDS.
199FIELDS is a vector of XLFD fields.
200If a field is nil, wild-card character `*' is embedded."
201 (concat "-" (mapconcat (lambda (x) (or x "*")) xlfd-fields "-")))
202
203(defun x-complement-fontset-spec (xlfd-fields fontlist)
204 "Complement FONTLIST for all charsets based on XLFD-FIELDS and return it.
205XLFD-FIELDS is a vector of XLFD (X Logical Font Description) fields.
206FONTLIST is an alist of cons of charset and fontname.
207
208Fontnames for charsets not listed in FONTLIST are generated from
209XLFD-FIELDS and a property of x-charset-register of each charset
210automatically."
211 (let ((charsets charset-list))
212 (while charsets
213 (let ((charset (car charsets)))
214 (if (null (assq charset fontlist))
215 (let ((registry (get-charset-property charset
216 'x-charset-registry)))
217 (if (string-match "-" registry)
218 ;; REGISTRY contains `CHARSET_ENCODING' field.
219 (progn
220 (aset xlfd-fields xlfd-regexp-registry-subnum
221 (substring registry 0 (match-beginning 0)))
222 (aset xlfd-fields xlfd-regexp-encoding-subnum
223 (substring registry (match-end 0))))
224 (aset xlfd-fields xlfd-regexp-registry-subnum
225 (concat registry "*"))
226 (aset xlfd-fields xlfd-regexp-encoding-subnum "*"))
227 (setq fontlist
228 (cons (cons charset (x-compose-font-name xlfd-fields))
229 fontlist)))))
230 (setq charsets (cdr charsets))))
231 fontlist)
232
233;; Return a list to be appended to `x-fixed-font-alist' when
234;; `mouse-set-font' is called.
235(defun generate-fontset-menu ()
236 (let ((fontsets global-fontset-alist)
237 fontset-name
238 l)
239 (while fontsets
240 (setq fontset-name (car (car fontsets)) fontsets (cdr fontsets))
241 (if (string-match "fontset-\\([^-]+\\)" fontset-name)
242 ;; This fontset has a nickname. Just show it.
243 (let ((nickname (match-string 1 fontset-name)))
244 (setq l (cons (list (concat ".." nickname) fontset-name) l)))
245 (setq l (cons (list fontset-name fontset-name) l))))
246 (cons "Fontset" l)))
247
248(defun fontset-plain-name (fontset)
249 "Return a plain and descriptive name of FONTSET."
250 (let ((xlfd-fields (x-decompose-font-name fontset)))
251 (if xlfd-fields
252 (let ((weight (aref xlfd-fields xlfd-regexp-weight-subnum))
253 (slant (aref xlfd-fields xlfd-regexp-slant-subnum))
254 (swidth (aref xlfd-fields xlfd-regexp-swidth-subnum))
255 (size (aref xlfd-fields xlfd-regexp-pixelsize-subnum))
256 name)
257 (if (integerp size)
258 (setq name (format "%d " size))
259 (setq name ""))
260 (if (string-match "bold\\|demibold" weight)
261 (setq name (concat name weight " ")))
262 (cond ((string= slant "i")
263 (setq name (concat name "italic ")))
264 ((string= slant "o")
265 (setq name (concat name "slant ")))
266 ((string= slant "ri")
267 (setq name (concat name "reverse italic ")))
268 ((string= slant "ro")
269 (setq name (concat name "reverse slant "))))
270 (if (= (length name) 0)
271 ;; No descriptive fields found.
272 fontset
273 name))
274 fontset)))
275
276(defun create-fontset-from-fontset-spec (fontset-spec)
277 "Create a fontset from fontset specification string FONTSET-SPEC.
278FONTSET-SPEC is a string of the format:
279 FONTSET-NAME,CHARSET-NAME0:FONT-NAME0,CHARSET-NAME1:FONT-NAME1, ...
280Any number of SPACE, TAB, and NEWLINE can be put before and after commas."
281 (if (string-match "[^,]+" fontset-spec)
282 (let* ((idx2 (match-end 0))
283 (name (match-string 0 fontset-spec))
284 fontlist charset xlfd-fields)
285 (while (string-match "[, \t\n]*\\([^:]+\\):\\([^,]+\\)"
286 fontset-spec idx2)
287 (setq idx2 (match-end 0))
288 (setq charset (intern (match-string 1 fontset-spec)))
289 (if (charsetp charset)
290 (setq fontlist (cons (cons charset (match-string 2 fontset-spec))
291 fontlist))))
292 (if (setq xlfd-fields (x-decompose-font-name name))
293 ;; If NAME conforms to XLFD, complement FONTLIST for
294 ;; charsets not specified in FONTSET-SPEC.
295 (setq fontlist
296 (x-complement-fontset-spec xlfd-fields fontlist)))
297 (new-fontset name fontlist))))
298
299
300;; Create default fontset from 16 dots fonts which are the most widely
301;; installed fonts.
302(defvar default-fontset-spec
303 "-*-fixed-medium-r-normal-*-16-*-*-*-*-*-fontset-default,
304 chinese-gb2312:-*-medium-r-normal-*-16-*-gb2312*-*,
305 korean-ksc5601:-*-medium-r-normal-*-16-*-ksc5601*-*,
306 chinese-cns11643-1:-*-medium-r-normal-*-16-*-cns11643*-1,
307 chinese-cns11643-2:-*-medium-r-normal-*-16-*-cns11643*-2,
308 chinese-cns11643-3:-*-medium-r-normal-*-16-*-cns11643*-3,
309 chinese-cns11643-4:-*-medium-r-normal-*-16-*-cns11643*-4,
310 chinese-cns11643-5:-*-medium-r-normal-*-16-*-cns11643*-5,
311 chinese-cns11643-6:-*-medium-r-normal-*-16-*-cns11643*-6,
312 chinese-cns11643-7:-*-medium-r-normal-*-16-*-cns11643*-7"
313 "String of fontset spec of a default fontset.
314See the documentation of `create-fontset-from-fontset-spec' for the format.")
315
316;; Create fontsets from X resources of the name `fontset-N (class
317;; Fontset-N)' where N is integer 0, 1, ...
318;; The values of the resources the string of the same format as
319;; `default-fontset-spec'.
320
321(defun create-fontset-from-x-resource ()
322 (let ((idx 0)
323 fontset-spec)
324 (while (setq fontset-spec (x-get-resource (concat "fontset-" idx)
325 (concat "Fontset-" idx)))
326 (create-fontset-from-fontset-spec fontset-spec)
327 (setq idx (1+ idx)))))
328
329(defsubst fontset-list ()
330 "Returns a list of all defined fontset names."
331 (mapcar 'car global-fontset-alist))
332
333;;
334(provide 'fontset)
335
336;;; fontset.el ends here
diff --git a/lisp/international/isearch-x.el b/lisp/international/isearch-x.el
new file mode 100644
index 00000000000..fb288d16d9c
--- /dev/null
+++ b/lisp/international/isearch-x.el
@@ -0,0 +1,76 @@
1;;; isearch-x.el --- extended isearch handling commands
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, isearch
7
8;; Author: Kenichi HANDA <handa@etl.go.jp>
9;; Maintainer: Kenichi HANDA <handa@etl.go.jp>
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING. If not, write to
25;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27;;; Code:
28
29;;;###autoload
30(defun isearch-toggle-specified-input-method ()
31 "Select and toggle specified input method in interactive search."
32 (interactive)
33 ;; Let the command `toggle-input-method' ask users to select input
34 ;; method interactively.
35 (setq default-input-method nil)
36 (isearch-toggle-input-method))
37
38;;;###autoload
39(defun isearch-toggle-input-method ()
40 "Toggle input method in interactive search."
41 (interactive)
42 (if isearch-multibyte-characters-flag
43 (setq isearch-multibyte-characters-flag nil)
44 (condition-case nil
45 (progn
46 (if (null default-input-method)
47 (let ((overriding-terminal-local-map nil))
48 ;; No input method has ever been selected. Select one
49 ;; interactively now. This also sets
50 ;; `default-input-method-title' to the title of the
51 ;; selected input method.
52 (toggle-input-method)
53 ;; And, inactivate it for the moment.
54 (toggle-input-method)))
55 (setq isearch-multibyte-characters-flag t))
56 (error (ding))))
57 (isearch-update))
58
59(defun isearch-input-method-after-insert-chunk-function ()
60 (funcall inactivate-current-input-method-function))
61
62(defun isearch-process-search-multibyte-characters (last-char)
63 (let* ((overriding-terminal-local-map nil)
64 ;; Let input method exit when a chunk is inserted.
65 (input-method-after-insert-chunk-hook
66 '(isearch-input-method-after-insert-chunk-function))
67 (input-method-inactivate-hook '(exit-minibuffer))
68 ;; Let input method work rather tersely.
69 (input-method-tersely-flag t)
70 str)
71 (setq unread-command-events (cons last-char unread-command-events))
72 (setq str (read-multilingual-string (concat (isearch-message-prefix)
73 isearch-message)))
74 (isearch-process-search-string str str)))
75
76;;; isearch-x.el ends here
diff --git a/lisp/international/kinsoku.el b/lisp/international/kinsoku.el
new file mode 100644
index 00000000000..a26f61c997a
--- /dev/null
+++ b/lisp/international/kinsoku.el
@@ -0,0 +1,141 @@
1;;; kinsoku.el --- `Kinsoku' processing functions.
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: kinsoku
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; `Kinsoku' processing is to prohibit specific characters to be
27;; placed at beginning of line or at end of line. Characters not to
28;; be placed at beginning and end of line have character category `>'
29;; and `<' respectively. This restriction is dissolved by making a
30;; line longer or shorter.
31;;
32;; `Kinsoku' is a Japanese word which originally means ordering to
33;; stay in one place, and is used for the text processing described
34;; above in the context of text formatting.
35
36;;; Code:
37
38(defvar kinsoku-limit 4
39 "How many more columns we can make lines longer by `kinsoku' processing.
40The value 0 means there's no limitation.")
41
42;; Setting character category `>' for characters which should not be
43;; placed at beginning of line.
44(let* ((kinsoku-bol
45 (concat
46 ;; ASCII
47 "!)-_~}]:;',.?"
48 ;; Japanese JISX0208
49 "$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>(B\
50$B!?!@!A!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n(B\
51$B$!$#$%$'$)$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v(B"
52 ;; Chinese GB2312
53 "$A!"!##.#,!$!%!&!'!(!)!*!+!,!-!/!1#)!3!5!7!9!;!=(B\
54$A!?#;#:#?#!!@!A!B!C!c!d!e!f#/#\#"#_#~#|(e(B"
55 ;; Chinese BIG5
56 "$(0!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2(B\
57$(0!3!4!5!6!7!8!9!:!;!<!=!?!A!C!E!G!I!K(B\
58$(0!M!O!Q(B $(0!S!U!W!Y![!]!_!a!c!e!g!i!k!q(B\
59$(0"#"$"%"&"'"(")"*"+","2"3"4"j"k"l"x%7(B"))
60 (len (length kinsoku-bol))
61 (idx 0)
62 ch)
63 (while (< idx len)
64 (setq ch (sref kinsoku-bol idx)
65 idx (+ idx (char-bytes ch)))
66 (modify-category-entry ch ?>)))
67
68;; Setting character category `<' for characters which should not be
69;; placed at end of line.
70(let* ((kinsoku-eol
71 (concat
72 ;; ASCII
73 "({[`"
74 ;; Japanese JISX0208
75 "$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!n!w!x(B\
76$A!.!0#"#(!2!4!6!8!:!<!>!c!d!e#@!f!l(B"
77 ;; Chinese GB2312
78 "$A(E(F(G(H(I(J(K(L(M(N(O(P(Q(R(S(T(U(V(W(X(Y(h(B\
79$(0!>!@!B!D!F!H!J!L!N!P!R!T!V!X!Z!\!^!`!b(B"
80 ;; Chinese BIG5
81 "$(0!d!f!h!j!k!q!p"i"j"k"n"x$u$v$w$x$y$z${(B\
82$(0$|$}$~%!%"%#%$%%%&%'%(%)%*%+%:(B"))
83 (len (length kinsoku-eol))
84 (idx 0)
85 ch)
86 (while (< idx len)
87 (setq ch (sref kinsoku-eol idx)
88 idx (+ idx (char-bytes ch)))
89 (modify-category-entry ch ?<)))
90
91;; Try to resolve `kinsoku' restriction by making the current line longer.
92(defun kinsoku-longer ()
93 (let ((pos-and-column (save-excursion
94 (forward-char 1)
95 (while (aref (char-category-set (following-char)) ?>)
96 (forward-char 1))
97 (cons (point) (current-column)))))
98 (if (or (<= kinsoku-limit 0)
99 (< (cdr pos-and-column) (+ (current-fill-column) kinsoku-limit)))
100 (goto-char (car pos-and-column)))))
101
102;; Try to resolve `kinsoku' restriction by making the current line shorter.
103;; The line can't be broken before the buffer position LINEBEG."
104(defun kinsoku-shorter (linebeg)
105 (let ((pos (save-excursion
106 (forward-char -1)
107 (while (and (< linebeg (point))
108 (or (aref (char-category-set (preceding-char)) ?<)
109 (aref (char-category-set (following-char)) ?>)))
110 (forward-char -1))
111 (point))))
112 (if (< linebeg pos)
113 (goto-char pos))))
114
115;;;###autoload
116(defun kinsoku (linebeg)
117 "Go to a line breaking position near point by doing `kinsoku' processing.
118LINEBEG is a buffer position we can't break a line before.
119
120`Kinsoku' processing is to prohibit specific characters to be placed
121at beginning of line or at end of line. Characters not to be placed
122at beginning and end of line have character category `>' and `<'
123respectively. This restriction is dissolved by making a line longer or
124shorter.
125
126`Kinsoku' is a Japanese word which originally means ordering to stay
127in one place, and is used for the text processing described above in
128the context of text formatting."
129 (if (or (and
130 ;; The character after point can't be placed at beginning
131 ;; of line.
132 (aref (char-category-set (following-char)) ?>)
133 ;; We at first try to dissolve this situation by making a
134 ;; line longer. If it fails, then try making a line
135 ;; shorter.
136 (not (kinsoku-longer)))
137 ;; The character before point can't be placed at end of line.
138 (aref (char-category-set (preceding-char)) ?<))
139 (kinsoku-shorter linebeg)))
140
141;; kinsoku.el ends here
diff --git a/lisp/international/kkc.el b/lisp/international/kkc.el
new file mode 100644
index 00000000000..d004b88e58a
--- /dev/null
+++ b/lisp/international/kkc.el
@@ -0,0 +1,586 @@
1;; kkc.el -- Kana Kanji converter
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, Japanese, SKK
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; These routines provide a simple and easy-to-use converter from
27;; Kana-string to Kana-Kanji-mixed-string. This converter (here after
28;; KKC) uses a SKK dictionary to get information how to convert
29;; Kana-string. Since KKC can't be fully automated, we need an
30;; interaction with a user to decide the correct conversion. For
31;; that, we provide KKC major mode.
32
33;;; Code:
34
35(require 'skkdic-utl)
36
37(defvar kkc-input-method-title "$B4A(B"
38 "String denoting KKC input method.
39This string is shown at mode line when users are in KKC mode.")
40
41(defvar kkc-init-file-name "~/.kkcrc"
42 "Name of a file which contains user's initial setup code for KKC.")
43
44;; A flag to control a file specified by `kkc-init-file-name'.
45;; The value nil means the file is not yet consulted.
46;; The value t means the file has already been consulted but there's
47;; no need of updating it yet.
48;; Any other value means that we must update the file before exiting Emacs.
49(defvar kkc-init-file-flag nil)
50
51;; Cash data for `kkc-lookup-key'. This may be initialized by loading
52;; a file specified by `kkc-init-file-name'. If any elements are
53;; modified, the data is written out to the file when exiting Emacs.
54(defvar kkc-lookup-cache '(kkc-lookup-cache))
55
56(defun kkc-save-init-file ()
57 "Save initial setup code for KKC to a file specified by `kkc-init-file-name'"
58 (if (and kkc-init-file-flag
59 (not (eq kkc-init-file-flag t)))
60 (let ((coding-system-for-write 'coding-system-iso-2022-7))
61 (write-region (format "(setq kkc-lookup-cache '%S)\n" kkc-lookup-cache)
62 nil
63 kkc-init-file-name))))
64
65;; Sequence of characters to be used for indexes for shown list. The
66;; Nth character is for the Nth conversion in the list currently shown.
67(defvar kkc-show-conversion-list-index-chars
68 "1234567890abcdefghijklmnopqrsuvwxyz")
69
70(defvar kkc-mode-map
71 (let ((map (make-keymap))
72 (i 0))
73 (while (< i ? )
74 (define-key map (char-to-string i) 'undefined)
75 (setq i (1+ i)))
76 (while (< i 128)
77 (define-key map (char-to-string i) 'kkc-non-kkc-command)
78 (setq i (1+ i)))
79 (setq i 0)
80 (let ((len (length kkc-show-conversion-list-index-chars)))
81 (while (< i len)
82 (define-key map
83 (char-to-string (aref kkc-show-conversion-list-index-chars i))
84 'kkc-select-from-list)
85 (setq i (1+ i))))
86 (define-key map " " 'kkc-next)
87 (define-key map (char-to-string help-char) 'help-command)
88 (define-key map "\r" 'kkc-terminate)
89 (define-key map "\C-@" 'kkc-first-char-only)
90 (define-key map "\C-n" 'kkc-next)
91 (define-key map "\C-p" 'kkc-prev)
92 (define-key map "\C-i" 'kkc-shorter)
93 (define-key map "\C-o" 'kkc-longer)
94 (define-key map "\C-c" 'kkc-cancel)
95 (define-key map "\C-?" 'kkc-cancel)
96 (define-key map "\C-f" 'kkc-next-phrase)
97 (define-key map "K" 'kkc-katakana)
98 (define-key map "H" 'kkc-hiragana)
99 (define-key map "l" 'kkc-show-conversion-list-or-next-group)
100 (define-key map "L" 'kkc-show-conversion-list-or-prev-group)
101 (define-key map [?\C-\ ] 'kkc-first-char-only)
102 (define-key map [delete] 'kkc-cancel)
103 (define-key map [return] 'kkc-terminate)
104 (append map '((t . kkc-non-kkc-command))))
105 "Keymap for KKC (Kana Kanji Conversion) mode.")
106
107(defun kkc-mode ()
108 "Major mode for converting Kana string to Kanji-Kana mixed string.
109Commands:
110\\{kkc-mode-map}"
111 (setq major-mode 'kkc-mode)
112 (setq mode-name "KKC")
113 (use-local-map kkc-mode-map)
114 (run-hooks 'kkc-mode-hook))
115
116;;; Internal variables used in KKC.
117
118;; The current Kana string to be converted.
119(defvar kkc-original-kana nil)
120
121;; The current key sequence (vector of Kana characters) generated from
122;; `kkc-original-kana'.
123(defvar kkc-current-key nil)
124
125;; List of the current conversions for `kkc-current-key'.
126(defvar kkc-current-conversions nil)
127
128;; Vector of the same length as `kkc-current-conversion'. The first
129;; element is a vector of:
130;; o index number of the first conversion shown previously,
131;; o index number of a conversion next of the last one shown previously,
132;; o the shown string itself.
133;; The remaining elements are widths (including columns for index
134;; numbers) of conversions stored in the same order as in
135;; `kkc-current-conversion'.
136(defvar kkc-current-conversions-width nil)
137
138(defvar kkc-show-conversion-list-count 4
139 "Count of successive `kkc-next' or `kkc-prev' to show conversion list.")
140
141;; Provided that `kkc-current-key' is [A B C D E F G H I], the current
142;; conversion target is [A B C D E F], the sequence of which
143;; conversion is found is [A B C D]:
144;;
145;; A B C D E F G H I
146;; kkc-overlay-head (black): |<--------->|
147;; kkc-overlay-tail (underline): |<------->|
148;; kkc-length-head: |<--------->|
149;; kkc-length-converted: |<----->|
150;;
151(defvar kkc-overlay-head nil)
152(defvar kkc-overlay-tail nil)
153(defvar kkc-length-head nil)
154(defvar kkc-length-converted nil)
155
156;; Cursor type (`box' or `bar') of the current frame.
157(defvar kkc-cursor-type nil)
158
159;; Flag to tell if the current conversion is canceled. If non-nil,
160;; the value is a buffer position of the head of currently active
161;; conversion region.
162(defvar kkc-canceled nil)
163
164;; Lookup SKK dictionary to set list of conversions in
165;; kkc-current-conversions for key sequence kkc-current-key of length
166;; LEN. If no conversion is found in the dictionary, don't change
167;; kkc-current-conversions and return nil.
168;; Postfixes are handled only if POSTFIX is non-nil.
169(defun kkc-lookup-key (len &optional postfix)
170 ;; At first, prepare cache data if any.
171 (if (not kkc-init-file-flag)
172 (progn
173 (setq kkc-init-file-flag t)
174 (add-hook 'kill-emacs-hook 'kkc-save-init-file)
175 (if (file-readable-p kkc-init-file-name)
176 (condition-case nil
177 (load-file "~/.kkcrc")
178 (error (message "Invalid data in %s" kkc-init-file-name)
179 (ding))))))
180 (let ((entry (lookup-nested-alist kkc-current-key kkc-lookup-cache len 0 t)))
181 (if (consp (car entry))
182 (setq kkc-length-converted len
183 kkc-current-conversions-width nil
184 kkc-current-conversions (car entry))
185 (setq entry (skkdic-lookup-key kkc-current-key len postfix))
186 (if entry
187 (progn
188 (setq kkc-length-converted len
189 kkc-current-conversions-width nil
190 kkc-current-conversions (cons 1 entry))
191 (if postfix
192 ;; Store this conversions in the cache.
193 (progn
194 (set-nested-alist kkc-current-key kkc-current-conversions
195 kkc-lookup-cache kkc-length-converted)
196 (setq kkc-init-file-flag 'kkc-lookup-cache)))
197 t)
198 (if (= len 1)
199 (setq kkc-length-converted 1
200 kkc-current-conversions-width nil
201 kkc-current-conversions (cons 0 nil)))))))
202
203;;;###autoload
204(defun kkc-region (from to)
205 "Convert Kana string in the current region to Kanji-Kana mixed string.
206After one candidate of conversion is shown in the region, users are
207put in KKC major mode to select a desirable conversion."
208 (interactive "r")
209 (setq kkc-original-kana (buffer-substring from to))
210 (goto-char from)
211
212 ;; Setup overlays.
213 (if (overlayp kkc-overlay-head)
214 (move-overlay kkc-overlay-head from to)
215 (setq kkc-overlay-head (make-overlay from to nil nil t))
216 (overlay-put kkc-overlay-head 'face 'highlight))
217 (if (overlayp kkc-overlay-tail)
218 (move-overlay kkc-overlay-tail to to)
219 (setq kkc-overlay-tail (make-overlay to to nil nil t))
220 (overlay-put kkc-overlay-tail 'face 'underline))
221
222 ;; After updating the conversion region with the first candidate of
223 ;; conversion, jump into a recursive editing environment with KKC
224 ;; mode .
225 (let ((overriding-local-map nil)
226 (previous-local-map (current-local-map))
227 (minor-mode-alist nil)
228 (minor-mode-map-alist nil)
229 (current-input-method-title kkc-input-method-title)
230 major-mode mode-name)
231 (unwind-protect
232 (progn
233 (setq kkc-canceled nil)
234 (setq kkc-current-key (string-to-vector kkc-original-kana))
235 (setq kkc-length-head (length kkc-current-key))
236 (setq kkc-length-converted 0)
237 (while (not (kkc-lookup-key kkc-length-head))
238 (setq kkc-length-head (1- kkc-length-head)))
239 (goto-char to)
240 (kkc-update-conversion 'all)
241 (kkc-mode)
242 (recursive-edit))
243 (goto-char (overlay-end kkc-overlay-tail))
244 (delete-overlay kkc-overlay-head)
245 (delete-overlay kkc-overlay-tail)
246 (use-local-map previous-local-map)))
247 kkc-canceled)
248
249(defun kkc-terminate ()
250 "Exit from KKC mode by fixing the current conversion."
251 (interactive)
252 (throw 'exit nil))
253
254(defun kkc-non-kkc-command ()
255 "Exit from KKC mode by fixing the current conversion.
256After that, handle the event which invoked this command."
257 (interactive)
258 (setq unread-command-events (list last-input-event))
259 (kkc-terminate))
260
261(defun kkc-cancel ()
262 "Exit from KKC mode by canceling any conversions."
263 (interactive)
264 (setq kkc-canceled (overlay-start kkc-overlay-head))
265 (goto-char kkc-canceled)
266 (delete-region (overlay-start kkc-overlay-head)
267 (overlay-end kkc-overlay-tail))
268 (insert kkc-original-kana)
269 (kkc-terminate))
270
271(defun kkc-first-char-only ()
272 "Select only the first character currently converted."
273 (interactive)
274 (goto-char (overlay-start kkc-overlay-head))
275 (forward-char 1)
276 (delete-region (point) (overlay-end kkc-overlay-tail))
277 (kkc-terminate))
278
279;; Count of successive invocations of `kkc-next'.
280(defvar kkc-next-count nil)
281
282(defun kkc-next ()
283 "Select the next candidate of conversion."
284 (interactive)
285 (if (eq this-command last-command)
286 (setq kkc-next-count (1+ kkc-next-count))
287 (setq kkc-next-count 1))
288 (let ((idx (1+ (car kkc-current-conversions))))
289 (if (< idx 0)
290 (setq idx 1))
291 (if (>= idx (length kkc-current-conversions))
292 (setq idx 0))
293 (setcar kkc-current-conversions idx)
294 (if (> idx 1)
295 (progn
296 (set-nested-alist kkc-current-key kkc-current-conversions
297 kkc-lookup-cache kkc-length-converted)
298 (setq kkc-init-file-flag 'kkc-lookup-cache)))
299 (if (or kkc-current-conversions-width
300 (>= kkc-next-count kkc-show-conversion-list-count))
301 (kkc-show-conversion-list-update))
302 (kkc-update-conversion)))
303
304;; Count of successive invocations of `kkc-next'.
305(defvar kkc-prev-count nil)
306
307(defun kkc-prev ()
308 "Select the previous candidate of conversion."
309 (interactive)
310 (if (eq this-command last-command)
311 (setq kkc-prev-count (1+ kkc-prev-count))
312 (setq kkc-prev-count 1))
313 (let ((idx (1- (car kkc-current-conversions))))
314 (if (< idx 0)
315 (setq idx (1- (length kkc-current-conversions))))
316 (setcar kkc-current-conversions idx)
317 (if (> idx 1)
318 (progn
319 (set-nested-alist kkc-current-key kkc-current-conversions
320 kkc-lookup-cache kkc-length-converted)
321 (setq kkc-init-file-flag 'kkc-lookup-cache)))
322 (if (or kkc-current-conversions-width
323 (>= kkc-prev-count kkc-show-conversion-list-count))
324 (kkc-show-conversion-list-update))
325 (kkc-update-conversion)))
326
327(defun kkc-select-from-list ()
328 "Select one candidate from the list currently shown in echo area."
329 (interactive)
330 (let (idx)
331 (if kkc-current-conversions-width
332 (let ((len (length kkc-show-conversion-list-index-chars))
333 (maxlen (- (aref (aref kkc-current-conversions-width 0) 1)
334 (aref (aref kkc-current-conversions-width 0) 0)))
335 (i 0))
336 (if (> len maxlen)
337 (setq len maxlen))
338 (while (< i len)
339 (if (= (aref kkc-show-conversion-list-index-chars i)
340 last-input-char)
341 (setq idx i i len)
342 (setq i (1+ i))))))
343 (if idx
344 (progn
345 (setcar kkc-current-conversions
346 (+ (aref (aref kkc-current-conversions-width 0) 0) idx))
347 (kkc-show-conversion-list-update)
348 (kkc-update-conversion))
349 (setq unread-command-events (list last-input-event))
350 (kkc-terminate))))
351
352(defun kkc-katakana ()
353 "Convert to Katakana."
354 (interactive)
355 (setcar kkc-current-conversions -1)
356 (kkc-update-conversion 'all))
357
358(defun kkc-hiragana ()
359 "Convert to hiragana."
360 (interactive)
361 (setcar kkc-current-conversions 0)
362 (kkc-update-conversion))
363
364(defun kkc-shorter ()
365 "Make the Kana string to be converted shorter."
366 (interactive)
367 (if (<= kkc-length-head 1)
368 (error "Can't be shorter")
369 (setq kkc-length-head (1- kkc-length-head))
370 (if (> kkc-length-converted kkc-length-head)
371 (let ((len kkc-length-head))
372 (setq kkc-length-converted 0)
373 (while (not (kkc-lookup-key len))
374 (setq len (1- len)))))
375 (kkc-update-conversion 'all)))
376
377(defun kkc-longer ()
378 "Make the Kana string to be converted longer."
379 (interactive)
380 (if (>= kkc-length-head (length kkc-current-key))
381 (error "Can't be longer")
382 (setq kkc-length-head (1+ kkc-length-head))
383 ;; This time, try also entries with postfixes.
384 (kkc-lookup-key kkc-length-head 'postfix)
385 (kkc-update-conversion 'all)))
386
387(defun kkc-next-phrase ()
388 "Fix the currently converted string and try to convert the remaining string."
389 (interactive)
390 (if (>= kkc-length-head (length kkc-current-key))
391 (kkc-terminate)
392 (setq kkc-length-head (- (length kkc-current-key) kkc-length-head))
393 (goto-char (overlay-end kkc-overlay-head))
394 (while (and (< (point) (overlay-end kkc-overlay-tail))
395 (looking-at "\\CH"))
396 (goto-char (match-end 0))
397 (setq kkc-length-head (1- kkc-length-head)))
398 (if (= kkc-length-head 0)
399 (kkc-terminate)
400 (let ((newkey (make-vector kkc-length-head 0))
401 (idx (- (length kkc-current-key) kkc-length-head))
402 (i 0))
403 ;; For the moment, (setq kkc-original-kana (concat newkey))
404 ;; doesn't work.
405 (setq kkc-original-kana "")
406 (while (< i kkc-length-head)
407 (aset newkey i (aref kkc-current-key (+ idx i)))
408 (setq kkc-original-kana
409 (concat kkc-original-kana (char-to-string (aref newkey i))))
410 (setq i (1+ i)))
411 (setq kkc-current-key newkey)
412 (setq kkc-length-converted 0)
413 (while (and (not (kkc-lookup-key kkc-length-head))
414 (> kkc-length-head 1))
415 (setq kkc-length-head (1- kkc-length-head)))
416 (let ((pos (point))
417 (tail (overlay-end kkc-overlay-tail)))
418 (move-overlay kkc-overlay-head pos tail)
419 (move-overlay kkc-overlay-tail tail tail))
420 (kkc-update-conversion 'all)))))
421
422;; We'll show users a list of available conversions in echo area with
423;; index numbers so that users can select one conversion with the
424;; number.
425
426;; Set `kkc-current-conversions-width'.
427(defun kkc-setup-current-conversions-width ()
428 (let ((convs (cdr kkc-current-conversions))
429 (len (length kkc-current-conversions))
430 (idx 1))
431 (setq kkc-current-conversions-width (make-vector len nil))
432 ;; To tell `kkc-show-conversion-list-update' to generate
433 ;; message from scratch.
434 (aset kkc-current-conversions-width 0 (vector len -2 nil))
435 ;; Fill the remaining slots.
436 (while convs
437 (aset kkc-current-conversions-width idx
438 (+ (string-width (car convs)) 4))
439 (setq convs (cdr convs)
440 idx (1+ idx)))))
441
442(defun kkc-show-conversion-list-or-next-group ()
443 "Show list of available conversions in echo area with index numbers.
444If the list is already shown, show the next group of conversions,
445and change the current conversion to the first one in the group."
446 (interactive)
447 (if (< (length kkc-current-conversions) 3)
448 (error "No alternative"))
449 (if kkc-current-conversions-width
450 (let ((next-idx (aref (aref kkc-current-conversions-width 0) 1)))
451 (if (< next-idx (length kkc-current-conversions-width))
452 (setcar kkc-current-conversions next-idx)
453 (setcar kkc-current-conversions 1))
454 (kkc-show-conversion-list-update)
455 (kkc-update-conversion))
456 (kkc-setup-current-conversions-width)
457 (kkc-show-conversion-list-update)))
458
459(defun kkc-show-conversion-list-or-prev-group ()
460 "Show list of available conversions in echo area with index numbers.
461If the list is already shown, show the previous group of conversions,
462and change the current conversion to the last one in the group."
463 (interactive)
464 (if (< (length kkc-current-conversions) 3)
465 (error "No alternative"))
466 (if kkc-current-conversions-width
467 (let ((this-idx (aref (aref kkc-current-conversions-width 0) 0)))
468 (if (> this-idx 1)
469 (setcar kkc-current-conversions (1- this-idx))
470 (setcar kkc-current-conversions
471 (1- (length kkc-current-conversions-width))))
472 (kkc-show-conversion-list-update)
473 (kkc-update-conversion))
474 (kkc-setup-current-conversions-width)
475 (kkc-show-conversion-list-update)))
476
477;; Update the conversion list shown in echo area.
478(defun kkc-show-conversion-list-update ()
479 (or kkc-current-conversions-width
480 (kkc-setup-current-conversions-width))
481 (let* ((current-idx (car kkc-current-conversions))
482 (first-slot (aref kkc-current-conversions-width 0))
483 (this-idx (aref first-slot 0))
484 (next-idx (aref first-slot 1))
485 (msg (aref first-slot 2)))
486 (if (< current-idx this-idx)
487 ;; The currently selected conversion is before the list shown
488 ;; previously. We must start calculation of message width
489 ;; from the start again.
490 (setq this-idx 1 msg nil)
491 (if (>= current-idx next-idx)
492 ;; The currently selected conversion is after the list shown
493 ;; previously. We start calculation of message width from
494 ;; the conversion next of TO.
495 (setq this-idx next-idx msg nil)
496 ;; The current conversion is in MSG. Just clear brackets
497 ;; around index number.
498 (if (string-match "<.>" msg)
499 (progn
500 (aset msg (match-beginning 0) ?\ )
501 (aset msg (1- (match-end 0)) ?\ )))))
502 (if (not msg)
503 (let ((len (length kkc-current-conversions))
504 (max-width (window-width (minibuffer-window)))
505 (width-table kkc-current-conversions-width)
506 (width 0)
507 (idx this-idx)
508 l)
509 (while (< idx current-idx)
510 (if (<= (+ width (aref width-table idx)) max-width)
511 (setq width (+ width (aref width-table idx)))
512 (setq this-idx idx width (aref width-table idx)))
513 (setq idx (1+ idx)
514 l (cdr l)))
515 (aset first-slot 0 this-idx)
516 (while (and (< idx len)
517 (<= (+ width (aref width-table idx)) max-width))
518 (setq width (+ width (aref width-table idx))
519 idx (1+ idx)
520 l (cdr l)))
521 (aset first-slot 1 (setq next-idx idx))
522 (setq l (nthcdr this-idx kkc-current-conversions))
523 (setq msg "")
524 (setq idx this-idx)
525 (while (< idx next-idx)
526 (setq msg (format "%s %c %s "
527 msg
528 (aref kkc-show-conversion-list-index-chars
529 (- idx this-idx))
530 (car l)))
531 (setq idx (1+ idx)
532 l (cdr l)))
533 (aset first-slot 2 msg)))
534 (if (> current-idx 0)
535 (progn
536 ;; Highlight the current conversion by brackets.
537 (string-match (format " \\(%c\\) "
538 (aref kkc-show-conversion-list-index-chars
539 (- current-idx this-idx)))
540 msg)
541 (aset msg (match-beginning 0) ?<)
542 (aset msg (1- (match-end 0)) ?>)))
543 (message "%s" msg)))
544
545;; Update the conversion area with the latest conversion selected.
546;; ALL if non nil means to update the whole area, else update only
547;; inside quail-overlay-head.
548
549(defun kkc-update-conversion (&optional all)
550 (goto-char (overlay-start kkc-overlay-head))
551 (cond ((= (car kkc-current-conversions) 0) ; Hiragana
552 (let ((i 0))
553 (while (< i kkc-length-converted)
554 (insert (aref kkc-current-key i))
555 (setq i (1+ i)))))
556 ((= (car kkc-current-conversions) -1) ; Katakana
557 (let ((i 0))
558 (while (< i kkc-length-converted)
559 (insert (japanese-katakana (aref kkc-current-key i)))
560 (setq i (1+ i)))))
561 (t
562 (insert (nth (car kkc-current-conversions) kkc-current-conversions))))
563 (delete-region (point) (overlay-start kkc-overlay-tail))
564 (if all
565 (let ((len (length kkc-current-key))
566 (i kkc-length-converted))
567 (delete-region (overlay-start kkc-overlay-tail)
568 (overlay-end kkc-overlay-head))
569 (while (< i kkc-length-head)
570 (if (= (car kkc-current-conversions) -1)
571 (insert (japanese-katakana (aref kkc-current-key i)))
572 (insert (aref kkc-current-key i)))
573 (setq i (1+ i)))
574 (let ((pos (point)))
575 (while (< i len)
576 (insert (aref kkc-current-key i))
577 (setq i (1+ i)))
578 (move-overlay kkc-overlay-head
579 (overlay-start kkc-overlay-head) pos)
580 (delete-region (point) (overlay-end kkc-overlay-tail)))))
581 (goto-char (overlay-end kkc-overlay-tail)))
582
583;;
584(provide 'kkc)
585
586;; kkc.el ends here
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
new file mode 100644
index 00000000000..ebc9663dcb0
--- /dev/null
+++ b/lisp/international/mule-cmds.el
@@ -0,0 +1,494 @@
1;;; mule-cmds.el --- Commands for mulitilingual environment
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26;;; MULE related key bindings and menus.
27
28(defvar mule-keymap (make-sparse-keymap "MULE")
29 "Keymap for MULE (Multilingual environment) specific commands.")
30(fset 'mule-prefix mule-keymap)
31
32;; Keep "C-x C-k ..." for mule specific commands.
33(define-key ctl-x-map "\C-k" 'mule-prefix)
34
35(define-key global-map [menu-bar mule] (cons "Mule" mule-keymap))
36
37(setq menu-bar-final-items (cons 'mule menu-bar-final-items))
38
39(defvar mule-describe-language-support-map
40 (make-sparse-keymap "Describe Language Support"))
41(fset 'mule-describe-language-support-prefix
42 mule-describe-language-support-map)
43
44(define-key mule-keymap "m" 'toggle-enable-multibyte-characters)
45(define-key mule-keymap "f" 'set-buffer-file-coding-system)
46(define-key mule-keymap "t" 'set-terminal-coding-system)
47(define-key mule-keymap "k" 'set-keyboard-coding-system)
48(define-key mule-keymap "p" 'set-current-process-coding-system)
49(define-key mule-keymap "i" 'select-input-method)
50
51(define-key help-map "\C-L" 'describe-language-support)
52(define-key help-map "\C-\\" 'describe-input-method)
53(define-key help-map "C" 'describe-current-coding-system)
54(define-key help-map "h" 'view-hello-file)
55
56(define-key mule-keymap [set-process-coding-system]
57 '(" ... of process" . set-current-process-coding-system))
58(define-key mule-keymap [set-keyboard-coding-system]
59 '(" ... of keyboard" . set-keyboard-coding-system))
60(define-key mule-keymap [set-terminal-coding-system]
61 '(" ... of terminal" . set-terminal-coding-system))
62(define-key mule-keymap [set-buffer-file-coding-system]
63 '(" ... of visiting file" . set-buffer-file-coding-system))
64(define-key mule-keymap [separator-mule]
65 '("Setting coding systems"))
66(define-key mule-keymap [describe-current-coding-system]
67 '("Describe current coding systems" . describe-current-coding-system))
68(define-key mule-keymap [describe-language-support]
69 '("Describe language support" . mule-describe-language-support-prefix))
70(define-key mule-keymap [view-hello-file]
71 '("Show many languages" . view-hello-file))
72(define-key mule-keymap [describe-input-method]
73 '("Describe input method" . describe-input-method))
74(define-key mule-keymap [select-input-method]
75 '("Select input method" . select-input-method))
76(define-key mule-keymap [toggle-input-method]
77 '("Toggle input method" . toggle-input-method))
78(define-key mule-keymap [toggle-mule]
79 '("Toggle MULE" . toggle-enable-multibyte-characters))
80
81;; These are meaningless when running under X.
82(put 'set-keyboard-coding-system 'menu-enable
83 '(null window-system))
84(put 'set-terminal-coding-system 'menu-enable
85 '(null window-system))
86
87
88;; This should be a single character key binding because users use it
89;; very frequently while editing multilingual text. Now we can use
90;; only two such keys: "\C-\\" and "\C-^", but the latter is not
91;; convenient because it requires shifting on most keyboards. An
92;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
93;; but it won't be used that frequently.
94(define-key global-map "\C-\\" 'toggle-input-method)
95
96(defun toggle-enable-multibyte-characters (&optional arg)
97 "Change whether this buffer enables multibyte characters.
98With arg, make them enable iff arg is positive."
99 (interactive "P")
100 (setq enable-multibyte-characters
101 (if (null arg) (null enable-multibyte-characters)
102 (> (prefix-numeric-value arg) 0)))
103 (force-mode-line-update))
104
105(defun view-hello-file ()
106 "Display the HELLO file which list up many languages and characters."
107 (interactive)
108 (find-file-read-only (expand-file-name "HELLO" data-directory)))
109
110
111;;; Language support staffs.
112
113(defvar primary-language "English"
114 "Name of a user's primary language.
115Emacs provide various language supports based on this variable.")
116
117(defvar language-info-alist nil
118 "Alist of language names vs the corresponding information of various kind.
119Each element looks like:
120 (LANGUAGE-NAME . ((KEY . INFO) ...))
121where LANGUAGE-NAME is a string,
122KEY is a symbol denoting the kind of information,
123INFO is any Lisp object which contains the actual information related
124to KEY.")
125
126(defun get-language-info (language-name key)
127 "Return the information for LANGUAGE-NAME of the kind KEY.
128LANGUAGE-NAME is a string.
129KEY is a symbol denoting the kind of required information."
130 (let ((lang-slot (assoc language-name language-info-alist)))
131 (if lang-slot
132 (cdr (assq key (cdr lang-slot))))))
133
134;; Return a lambda form which calls `describe-language-support' with
135;; argument LANG.
136(defun build-describe-language-support-function (lang)
137 `(lambda ()
138 (interactive)
139 (describe-language-support ,lang)))
140
141(defun set-language-info (language-name key info)
142 "Set for LANGUAGE-NAME the information INFO under KEY.
143LANGUAGE-NAME is a string
144KEY is a symbol denoting the kind of information.
145INFO is any Lisp object which contains the actual information.
146
147Currently, the following KEYs are used by Emacs:
148charset: list of symbols whose values are charsets specific to the language.
149coding-system: list of coding systems specific to the langauge.
150setup-function: see the documentation of `set-language-envrionment'.
151tutorial: a tutorial file name written in the language.
152sample-text: one line short text containing characters of the language.
153documentation: a docstring describing how the language is supported,
154 or a fuction to call to describe it,
155 or t which means call `describe-language-support' to describe it.
156input-method: alist of input method names for the language vs information
157 for activating them. Use `register-input-method' (which see)
158 to add a new input method to the alist.
159
160Emacs will use more KEYs in the future. To avoid the conflition, users
161should use prefix \"user-\" in the name of KEY."
162 (let (lang-slot key-slot)
163 (setq lang-slot (assoc language-name language-info-alist))
164 (if (null lang-slot) ; If no slot for the language, add it.
165 (setq lang-slot (list language-name)
166 language-info-alist (cons lang-slot language-info-alist)))
167 (setq key-slot (assq key lang-slot))
168 (if (null key-slot) ; If no slot for the key, add it.
169 (progn
170 (setq key-slot (list key))
171 (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
172 (setcdr key-slot info)
173 ;; Setup menu.
174 (if (eq key 'documentation)
175 (define-key mule-describe-language-support-map
176 (vector (intern language-name))
177 (cons language-name
178 (build-describe-language-support-function language-name))))
179 ))
180
181(defun set-language-info-alist (language-name alist)
182 "Set for LANGUAGE-NAME the information in ALIST.
183ALIST is an alist of KEY and INFO. See the documentation of
184`set-langauge-info' for the meanings of KEY and INFO."
185 (while alist
186 (set-language-info language-name (car (car alist)) (cdr (car alist)))
187 (setq alist (cdr alist))))
188
189(defun read-language-name (key prompt &optional initial-input)
190 "Read language name which has information for KEY, prompting with PROMPT."
191 (let* ((completion-ignore-case t)
192 (name (completing-read prompt
193 language-info-alist
194 (function (lambda (elm) (assq key elm)))
195 t
196 initial-input)))
197 ;; In spite of the documentation, completing-read returns null
198 ;; string instead of nil if input is null.
199 (and (> (length name) 0) name)))
200
201;;; Multilingual input methods.
202
203(defvar current-input-method nil
204 "The current input method for multilingual text.
205The value is a cons of language name and input method name.
206If nil, it means no input method is activated now.")
207(make-variable-buffer-local 'current-input-method)
208(put 'current-input-method 'permanent-local t)
209
210(defvar current-input-method-title nil
211 "Title string of the current input method shown in mode line.
212Every input method should set this an appropriate value when activated.")
213(make-variable-buffer-local 'current-input-method-title)
214(put 'current-input-method-title 'permanent-local t)
215
216(defvar default-input-method nil
217 "Default input method.
218The default input method is the one activated automatically by the command
219`toggle-input-method' (\\[toggle-input-method]).
220The value is a cons of language name and input method name.")
221
222(defvar default-input-method-title nil
223 "Title string of the default input method.")
224
225(defvar previous-input-method nil
226 "Input method selected previously.
227This is the one selected before the current input method is selected.
228See also the documentation of `default-input-method'.")
229
230(defvar inactivate-current-input-method-function nil
231 "Function to call for inactivating the current input method.
232Every input method should set this to an appropriate value when activated.
233This function is called with no argument.")
234(make-variable-buffer-local 'inactivate-current-input-method-function)
235(put 'inactivate-current-input-method-function 'permanent-local t)
236
237(defvar describe-current-input-method-function nil
238 "Function to call for describing the current input method.
239This function is called with no argument.")
240(make-variable-buffer-local 'describe-current-input-method-function)
241(put 'describe-current-input-method-function 'permanent-local t)
242
243(defun register-input-method (language-name input-method)
244 "Register INPUT-METHOD as an input method of LANGUAGE-NAME.
245LANGUAGE-NAME is a string.
246INPUT-METHOD is a list of the form:
247 (METHOD-NAME ACTIVATE-FUNC ARG ...)
248where METHOD-NAME is the name of this method,
249ACTIVATE-FUNC is the function to call for activating this method.
250Arguments for the function are METHOD-NAME and ARGs."
251 (let ((slot (get-language-info language-name 'input-method))
252 method-slot)
253 (if (null slot)
254 (set-language-info language-name 'input-method (list input-method))
255 (setq method-slot (assoc (car input-method) slot))
256 (if method-slot
257 (setcdr method-slot (cdr input-method))
258 (set-language-info language-name 'input-method
259 (cons input-method slot))))))
260
261(defun read-language-and-input-method-name ()
262 "Read a language names and the corresponding input method from a minibuffer.
263Return a cons of those names."
264 (let ((language-name (read-language-name
265 'input-method
266 "Language: "
267 (if previous-input-method
268 (cons (car previous-input-method) 0)))))
269 (if (null language-name)
270 (error "No input method for the specified language"))
271 (let* ((completion-ignore-case t)
272 (key-slot
273 (assq 'input-method
274 (cdr (assoc language-name language-info-alist))))
275 (method-name
276 (completing-read "Input method: " (cdr key-slot) nil t
277 (if (and previous-input-method
278 (string= language-name
279 (car previous-input-method)))
280 (cons (cdr previous-input-method) 0)))))
281 ;; In spite of the documentation, completing-read returns
282 ;; null string instead of nil if input is null.
283 (if (= (length method-name) 0)
284 (error "No input method specified"))
285 (list language-name method-name))))
286
287(defun set-default-input-method (language-name method-name)
288 "Set the default input method to METHOD-NAME for inputting LANGUAGE-NAME.
289The default input method is the one activated automatically by the command
290`toggle-input-method' (\\[toggle-input-method]).
291This doesn't affect the currently activated input method."
292 (interactive (read-language-and-input-method-name))
293 (let* ((key-slot (get-language-info language-name 'input-method))
294 (method-slot (assoc method-name key-slot)))
295 (if (null method-slot)
296 (error "No input method `%s' for %s" method-name language-name))
297 (setq default-input-method (cons language-name method-name))))
298
299(defun select-input-method (language-name method-name)
300 "Select and activate input method METHOD-NAME for inputting LANGUAGE-NAME.
301The information for activating METHOD-NAME is stored
302in `language-info-alist' under the key 'input-method.
303The format of the information has the form:
304 ((METHOD-NAME ACTIVATE-FUNC ARG ...) ...)
305where ACTIVATE-FUNC is a function to call for activating this method.
306Arguments for the function are METHOD-NAME and ARGs."
307 (interactive (read-language-and-input-method-name))
308 (let* ((key-slot (get-language-info language-name 'input-method))
309 (method-slot (assoc method-name key-slot)))
310 (if (null method-slot)
311 (error "No input method `%s' for %s" method-name language-name))
312 (if current-input-method
313 (progn
314 (if (not (equal previous-input-method current-input-method))
315 (setq previous-input-method current-input-method))
316 (funcall inactivate-current-input-method-function)))
317 (setq method-slot (cdr method-slot))
318 (apply (car method-slot) method-name (cdr method-slot))
319 (setq default-input-method
320 (setq current-input-method (cons language-name method-name)))
321 (setq default-input-method-title current-input-method-title)
322 (setq current-input-method default-input-method)))
323
324(defun toggle-input-method (&optional arg)
325 "Toggle whether a multilingual input method is activated in this buffer.
326With arg, activate an input method specified interactively.
327Without arg, the method being activated is the one selected most recently,
328 but if no input method has ever been selected, select one interactively."
329 (interactive "P")
330 (if arg
331 (call-interactively 'select-input-method)
332 (if (null current-input-method)
333 (if default-input-method
334 (select-input-method (car default-input-method)
335 (cdr default-input-method))
336 (call-interactively 'select-input-method))
337 (funcall inactivate-current-input-method-function)
338 (setq current-input-method nil))))
339
340(defun describe-input-method ()
341 "Describe the current input method."
342 (interactive)
343 (if current-input-method
344 (if (and (symbolp describe-current-input-method-function)
345 (fboundp describe-current-input-method-function))
346 (funcall describe-current-input-method-function)
347 (message "No way to describe the current input method `%s'"
348 (cdr current-input-method))
349 (ding))
350 (message "No input method is activated now")
351 (ding)))
352
353(defun read-multilingual-string (prompt &optional initial-input
354 language-name method-name)
355 "Read a multilingual string from minibuffer, prompting with string PROMPT.
356The input method selected last time is activated in minibuffer.
357If non-nil, second arg INITIAL-INPUT is a string to insert before reading.
358Optional 3rd and 4th arguments LANGUAGE-NAME and METHOD-NAME specify
359 the input method to be activated instead of the one selected last time."
360 (let ((minibuffer-setup-hook '(toggle-input-method))
361 (default-input-method default-input-method))
362 (if (and language-name method-name)
363 (set-default-input-method language-name method-name))
364 (read-string prompt initial-input)))
365
366;; Variables to control behavior of input methods. All input methods
367;; should react to these variables.
368
369(defvar input-method-tersely-flag nil
370 "*If this flag is non-nil, input method works rather tersely.
371
372For instance, Quail input method does not show guidance buffer while
373inputting at minibuffer if this flag is t.")
374
375(defvar input-method-activate-hook nil
376 "Normal hook run just after an input method is activated.")
377
378(defvar input-method-inactivate-hook nil
379 "Normal hook run just after an input method is inactivated.")
380
381(defvar input-method-after-insert-chunk-hook nil
382 "Normal hook run just after an input method insert some chunk of text.")
383
384
385;;; Language specific setup functions.
386(defun set-language-environment (language-name)
387 "Setup a user's environment for LANGUAGE-NAME.
388
389To setup, a fucntion returned by:
390 (get-language-info LANGUAGE-NAME 'setup-function)
391is called."
392 (interactive (list (read-language-name 'setup-function "Language: ")))
393 (let (func)
394 (if (or (null language-name)
395 (null (setq func
396 (get-language-info language-name 'setup-function))))
397 (error "No way to setup environment for the specified language"))
398 (funcall func)))
399
400;; Print all arguments with `princ', then print "\n".
401(defsubst princ-list (&rest args)
402 (while args (princ (car args)) (setq args (cdr args)))
403 (princ "\n"))
404
405(defun describe-language-support (language-name)
406 "Show documentation about how Emacs supports LANGUAGE-NAME."
407 (interactive (list (read-language-name 'documentation "Language: ")))
408 (let (doc)
409 (if (or (null language-name)
410 (null (setq doc
411 (get-language-info language-name 'documentation))))
412 (error "No documentation for the specified language"))
413 (with-output-to-temp-buffer "*Help*"
414 (if (not (eq doc t))
415 (cond ((stringp doc)
416 (princ doc))
417 ((and (symbolp doc) (fboundp doc))
418 (funcall doc))
419 (t
420 (error "Invalid documentation data for %s" language-name)))
421 (princ-list "List of items specific to "
422 language-name
423 " environment")
424 (princ "-----------------------------------------------------------\n")
425 (let ((str (get-language-info language-name 'sample-text)))
426 (if (stringp str)
427 (progn
428 (princ "<sample text>\n")
429 (princ-list " " str))))
430 (princ "<input methods>\n")
431 (let ((l (get-language-info language-name 'input-method)))
432 (while l
433 (princ-list " " (car (car l)))
434 (setq l (cdr l))))
435 (princ "<character sets>\n")
436 (let ((l (get-language-info language-name 'charset)))
437 (if (null l)
438 (princ-list " nothing specific to " language-name)
439 (while l
440 (princ-list " " (car l)
441 (format ":%3d:\n\t" (charset-id (car l)))
442 (charset-description (car l)))
443 (setq l (cdr l)))))
444 (princ "<coding systems>\n")
445 (let ((l (get-language-info language-name 'coding-system)))
446 (if (null l)
447 (princ-list " nothing specific to " language-name)
448 (while l
449 (princ-list " " (car l) ":\n\t"
450 (coding-system-docstring (car l)))
451 (setq l (cdr l)))))))))
452
453;;; Charset property
454
455(defsubst get-charset-property (charset propname)
456 "Return the value of CHARSET's PROPNAME property.
457This is the last value stored with
458`(put-charset-property CHARSET PROPNAME VALUE)'."
459 (plist-get (charset-plist charset) propname))
460
461(defsubst put-charset-property (charset propname value)
462 "Store CHARSETS's PROPNAME property with value VALUE.
463It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
464 (set-charset-plist charset
465 (plist-put (charset-plist charset) propname value)))
466
467;;; Character code property
468(put 'char-code-property-table 'char-table-extra-slots 0)
469
470(defvar char-code-property-table
471 (make-char-table 'char-code-property-table)
472 "Char-table containing a property list of each character code.
473
474See also the documentation of `get-char-code-property' and
475`put-char-code-property'")
476
477(defun get-char-code-property (char propname)
478 "Return the value of CHAR's PROPNAME property in `char-code-property-table'."
479 (let ((plist (aref char-code-property-table char)))
480 (if (listp plist)
481 (car (cdr (memq propname plist))))))
482
483(defun put-char-code-property (char propname value)
484 "Store CHAR's PROPNAME property with VALUE in `char-code-property-table'.
485It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
486 (let ((plist (aref char-code-property-table char)))
487 (if plist
488 (let ((slot (memq propname plist)))
489 (if slot
490 (setcar (cdr slot) value)
491 (nconc plist (list propname value))))
492 (aset char-code-property-table char (list propname value)))))
493
494;;; mule-cmds.el ends here
diff --git a/lisp/international/mule-diag.el b/lisp/international/mule-diag.el
new file mode 100644
index 00000000000..ecee2b4615e
--- /dev/null
+++ b/lisp/international/mule-diag.el
@@ -0,0 +1,565 @@
1;; mule-diag.el -- show diagnosis of multilingual environment (MULE)
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, charset, coding system, fontset, diagnosis
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; General utility function
25
26;; Print all arguments with single space separator in one line.
27(defun print-list (&rest args)
28 (while (cdr args)
29 (if (car args)
30 (progn (princ (car args)) (princ " ")))
31 (setq args (cdr args)))
32 (princ (car args))
33 (princ "\n"))
34
35;;; CHARSET
36
37;;;###autoload
38(defun list-character-sets ()
39 "Display a list of all charsets."
40 (interactive)
41 (with-output-to-temp-buffer "*Help*"
42 (print-character-sets)))
43
44(defvar charset-other-info-func nil)
45
46(defun print-character-sets ()
47 "Print information on all charsets in a machine readable format."
48 (princ "\
49#########################
50## LIST OF CHARSETS
51## Each line corresponds to one charset.
52## The following attributes are listed in this order
53## separated by a colon `:' in one line.
54## CHARSET-SYMBOL-NAME,
55## CHARSET-ID,
56## DIMENSION (1 or 2)
57## CHARS (94 or 96)
58## BYTES (of multibyte form: 1, 2, 3, or 4),
59## WIDTH (occupied column numbers: 1 or 2),
60## DIRECTION (0:left-to-right, 1:right-to-left),
61## ISO-FINAL-CHAR (character code of ISO-2022's final character)
62## ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR)
63## DESCRIPTION (describing string of the charset)
64")
65 (let ((charsets charset-list)
66 charset)
67 (while charsets
68 (setq charset (car charsets))
69 (princ (format "%s:%03d:%d:%d:%d:%d:%d:%d:%d:%s\n"
70 charset
71 (charset-id charset)
72 (charset-dimension charset)
73 (charset-chars charset)
74 (charset-bytes charset)
75 (charset-width charset)
76 (charset-direction charset)
77 (charset-iso-final-char charset)
78 (charset-iso-graphic-plane charset)
79 (charset-description charset)))
80 (setq charsets (cdr charsets)))))
81
82
83;;; CODING-SYSTEM
84
85;; Print information of designation of each graphic register in FLAGS
86;; in human readable format. See the documentation of
87;; `make-coding-system' for the meaning of FLAGS.
88(defun print-designation (flags)
89 (let ((graphic-register 0)
90 charset)
91 (while (< graphic-register 4)
92 (setq charset (aref flags graphic-register))
93 (princ (format
94 " G%d -- %s\n"
95 graphic-register
96 (cond ((null charset)
97 "never used")
98 ((eq charset t)
99 "no initial designation, and used by any charsets")
100 ((symbolp charset)
101 (format "%s:%s"
102 charset (charset-description charset)))
103 ((listp charset)
104 (if (charsetp (car charset))
105 (format "%s:%s, and also used by the followings:"
106 (car charset)
107 (charset-description (car charset)))
108 "no initial designation, and used by the followings:"))
109 (t
110 "invalid designation information"))))
111 (if (listp charset)
112 (progn
113 (setq charset (cdr charset))
114 (while charset
115 (cond ((eq (car charset) t)
116 (princ "\tany other charsets\n"))
117 ((charsetp (car charset))
118 (princ (format "\t%s:%s\n"
119 (car charset)
120 (charset-description (car charset)))))
121 (t
122 "invalid designation information"))
123 (setq charset (cdr charset)))))
124 (setq graphic-register (1+ graphic-register)))))
125
126;;;###autoload
127(defun describe-coding-system (coding-system)
128 "Display information of CODING-SYSTEM."
129 (interactive "zCoding-system: ")
130 (check-coding-system coding-system)
131 (with-output-to-temp-buffer "*Help*"
132 (let ((coding-vector (coding-system-vector coding-system)))
133 (princ "Coding-system ")
134 (princ coding-system)
135 (princ " [")
136 (princ (char-to-string (coding-vector-mnemonic coding-vector)))
137 (princ "]: \n")
138 (princ " ")
139 (princ (coding-vector-docstring coding-vector))
140 (princ "\nType: ")
141 (let ((type (coding-vector-type coding-vector))
142 (flags (coding-vector-flags coding-vector)))
143 (princ type)
144 (princ ", which means ")
145 (cond ((eq type nil)
146 (princ "do no conversion."))
147 ((eq type t)
148 (princ "do automatic conversion."))
149 ((eq type 0)
150 (princ "Emacs internal multibyte form."))
151 ((eq type 1)
152 (princ "Shift-JIS (MS-KANJI)."))
153 ((eq type 2)
154 (princ "a variant of ISO-2022.\n")
155 (princ "Initial designations:\n")
156 (print-designation flags)
157 (princ "Other Form: \n")
158 (princ (if (aref flags 4) "short-form" "long-form"))
159 (if (aref flags 5) (princ ", ASCII@EOL"))
160 (if (aref flags 6) (princ ", ASCII@CNTL"))
161 (princ (if (aref flags 7) ", 7-bit" ", 8-bit"))
162 (if (aref flags 8) (princ ", use-locking-shift"))
163 (if (aref flags 9) (princ ", use-single-shift"))
164 (if (aref flags 10) (princ ", use-roman"))
165 (if (aref flags 10) (princ ", use-old-jis"))
166 (if (aref flags 11) (princ ", no-ISO6429"))
167 (princ "."))
168 ((eq type 3)
169 (princ "Big5."))
170 ((eq type 4)
171 (princ "do conversion by CCL program."))
172 (t (princ "invalid coding-system."))))
173 (princ "\nEOL-Type: ")
174 (let ((eol-type (coding-system-eoltype coding-system)))
175 (cond ((vectorp eol-type)
176 (princ "Automatic selection from ")
177 (princ eol-type)
178 (princ "\n"))
179 ((or (null eol-type) (eq eol-type 0)) (princ "LF\n"))
180 ((eq eol-type 1) (princ "CRLF\n"))
181 ((eq eol-type 2) (princ "CR\n"))
182 (t (princ "invalid\n"))))
183 )))
184
185;;;###autoload
186(defun describe-current-coding-system-briefly ()
187 "Display coding systems currently used in a brief format in mini-buffer.
188
189The format is \"current: [FKTPp=........] default: [FPp=......]\",
190where mnemonics of the following coding systems come in this order
191at the place of `...':
192 buffer-file-coding-system (of the current buffer)
193 eol-type of buffer-file-coding-system (of the current buffer)
194 keyboard-coding-system
195 terminal-coding-system
196 process-coding-system for read (of the current buffer, if any)
197 eol-type of process-coding-system for read (of the current buffer, if any)
198 process-coding-system for write (of the current buffer, if any)
199 eol-type of process-coding-system for write (of the current buffer, if any)
200 default buffer-file-coding-system
201 eol-type of default buffer-file-coding-system
202 default process-coding-system for read
203 default eol-type of process-coding-system for read
204 default process-coding-system for write
205 default eol-type of process-coding-system"
206 (interactive)
207 (let* ((proc (get-buffer-process (current-buffer)))
208 (process-coding-systems (if proc (process-coding-system proc))))
209 (message
210 "current: [FKTPp=%c%c%c%c%c%c%c%c] default: [FPp=%c%c%c%c%c%c]"
211 (coding-system-mnemonic buffer-file-coding-system)
212 (coding-system-eoltype-mnemonic buffer-file-coding-system)
213 (coding-system-mnemonic (keyboard-coding-system))
214 (coding-system-mnemonic (terminal-coding-system))
215 (coding-system-mnemonic (car process-coding-systems))
216 (coding-system-eoltype-mnemonic (car process-coding-systems))
217 (coding-system-mnemonic (cdr process-coding-systems))
218 (coding-system-eoltype-mnemonic (cdr process-coding-systems))
219 (coding-system-mnemonic (default-value 'buffer-file-coding-system))
220 (coding-system-eoltype-mnemonic (default-value 'buffer-file-coding-system))
221 (coding-system-mnemonic (car default-process-coding-system))
222 (coding-system-eoltype-mnemonic (car default-process-coding-system))
223 (coding-system-mnemonic (cdr default-process-coding-system))
224 (coding-system-eoltype-mnemonic (cdr default-process-coding-system))
225 )))
226
227;; Print symbol name and mnemonics of CODING-SYSTEM by `princ'.
228(defsubst print-coding-system-briefly (coding-system)
229 (print-list ":"
230 coding-system
231 (format "[%c%c]"
232 (coding-system-mnemonic coding-system)
233 (coding-system-eoltype-mnemonic coding-system))))
234
235;;;###autoload
236(defun describe-current-coding-system ()
237 "Display coding systems currently used in a detailed format."
238 (interactive)
239 (with-output-to-temp-buffer "*Help*"
240 (let* ((proc (get-buffer-process (current-buffer)))
241 (process-coding-systems (if proc (process-coding-system proc))))
242 (princ "Current:\n buffer-file-coding-system")
243 (print-coding-system-briefly buffer-file-coding-system)
244 (princ " keyboard-coding-system")
245 (print-coding-system-briefly (keyboard-coding-system))
246 (princ " terminal-coding-system")
247 (print-coding-system-briefly (terminal-coding-system))
248 (if process-coding-systems
249 (progn (princ " process-coding-system (read)")
250 (print-coding-system-briefly (car process-coding-systems))
251 (princ " process-coding-system (write)")
252 (print-coding-system-briefly (cdr process-coding-systems))))
253 (princ "Default:\n buffer-file-coding-system")
254 (print-coding-system-briefly (default-value 'buffer-file-coding-system))
255 (princ " process-coding-system (read)")
256 (print-coding-system-briefly (car default-process-coding-system))
257 (princ " process-coding-system (write)")
258 (print-coding-system-briefly (cdr default-process-coding-system))
259 (princ "coding-system-alist:\n")
260 (pp coding-system-alist))
261 (let ((l coding-category-list))
262 (princ "\nCoding categories (in the order of priority):\n")
263 (while l
264 (princ (format "%s -> %s\n" (car l) (symbol-value (car l))))
265 (setq l (cdr l))))))
266
267;; Print detailed information on CODING-SYSTEM.
268(defun print-coding-system (coding-system)
269 (let ((type (coding-system-type coding-system))
270 (eol-type (coding-system-eoltype coding-system))
271 (flags (coding-system-flags coding-system)))
272 (princ (format "%s:%s:%c:%d:"
273 coding-system
274 type
275 (coding-system-mnemonic coding-system)
276 (if (integerp eol-type) eol-type 3)))
277 (cond ((eq type 2) ; ISO-2022
278 (let ((idx 0)
279 charset)
280 (while (< idx 4)
281 (setq charset (aref flags idx))
282 (cond ((null charset)
283 (princ -1))
284 ((eq charset t)
285 (princ -2))
286 ((charsetp charset)
287 (princ charset))
288 ((listp charset)
289 (princ "(")
290 (princ (car charset))
291 (setq charset (cdr charset))
292 (while charset
293 (princ ",")
294 (princ (car charset))
295 (setq charset (cdr charset)))
296 (princ ")")))
297 (princ ",")
298 (setq idx (1+ idx)))
299 (while (< idx 12)
300 (princ (if (aref flags idx) 1 0))
301 (princ ",")
302 (setq idx (1+ idx)))
303 (princ (if (aref flags idx) 1 0))))
304 ((eq type 4) ; CCL
305 (let (i len)
306 (setq i 0 len (length (car flags)))
307 (while (< i len)
308 (princ (format " %x" (aref (car flags) i)))
309 (setq i (1+ i)))
310 (princ ",")
311 (setq i 0 len (length (cdr flags)))
312 (while (< i len)
313 (princ (format " %x" (aref (cdr flags) i)))
314 (setq i (1+ i)))))
315 (t (princ 0)))
316 (princ ":")
317 (princ (coding-system-docstring coding-system))
318 (princ "\n")))
319
320(defun list-coding-systems ()
321 "Print information on all coding systems in a machine readable format."
322 (with-output-to-temp-buffer "*Help*"
323 (princ "\
324#########################
325## LIST OF CODING SYSTEMS
326## Each line corresponds to one coding system
327## Format of a line is:
328## NAME:TYPE:MNEMONIC:EOL:FLAGS:DOCSTRING,
329## where
330## TYPE = nil (no conversion), t (auto conversion),
331## 0 (Mule internal), 1 (SJIS), 2 (ISO2022), 3 (BIG5), or 4 (CCL)
332## EOL = 0 (LF), 1 (CRLF), 2 (CR), or 3 (Automatic detection)
333## FLAGS =
334## if TYPE = 2 then
335## comma (`,') separated data of the followings:
336## G0, G1, G2, G3, SHORT-FORM, ASCII-EOL, ASCII-CNTL, SEVEN,
337## LOCKING-SHIFT, SINGLE-SHIFT, USE-ROMAN, USE-OLDJIS, NO-ISO6429
338## else if TYPE = 4 then
339## comma (`,') separated CCL programs for read and write
340## else
341## 0
342##
343")
344 (let ((codings (make-vector 7 nil)))
345 (mapatoms
346 (function
347 (lambda (arg)
348 (if (and arg
349 (coding-system-p arg)
350 (null (get arg 'pre-write-conversion))
351 (null (get arg 'post-read-conversion)))
352 (let* ((type (coding-system-type arg))
353 (idx (if (null type) 0 (if (eq type t) 1 (+ type 2)))))
354 (if (or (= idx 0)
355 (vectorp (coding-system-eoltype arg)))
356 (aset codings idx (cons arg (aref codings idx)))))))))
357 (let ((idx 0) elt)
358 (while (< idx 7)
359 (setq elt (aref codings idx))
360 (while elt
361 (print-coding-system (car elt))
362 (setq elt (cdr elt)))
363 (setq idx (1+ idx)))))
364 (princ "\
365############################
366## LIST OF CODING CATEGORIES (ordered by priority)
367## CATEGORY:CODING-SYSTEM
368##
369")
370 (let ((l coding-category-list))
371 (while l
372 (princ (format "%s:%s\n" (car l) (symbol-value (car l))))
373 (setq l (cdr l))))
374 ))
375
376;;; FONT
377
378;; Print information of a font in FONTINFO.
379(defun describe-font-internal (font-info &optional verbose)
380 (print-list "name (opened by):" (aref font-info 0))
381 (print-list " full name:" (aref font-info 1))
382 (let ((charset (aref font-info 2)))
383 (print-list " charset:"
384 (format "%s (%s)" charset (charset-description charset))))
385 (print-list " size:" (format "%d" (aref font-info 3)))
386 (print-list " height:" (format "%d" (aref font-info 4)))
387 (print-list " baseline-offset:" (format "%d" (aref font-info 5)))
388 (print-list "relative-compose:" (format "%d" (aref font-info 6))))
389
390;;;###autoload
391(defun describe-font (fontname)
392 "Display information about fonts which partially match FONTNAME."
393 (interactive "sFontname: ")
394 (or window-system
395 (error "No window system being used"))
396 (let ((font-info (font-info fontname)))
397 (if (null font-info)
398 (message "No matching font")
399 (with-output-to-temp-buffer "*Help*"
400 (describe-font-internal font-info 'verbose)))))
401
402;; Print information in FONTINFO of a fontset named FONTSET.
403(defun describe-fontset-internal (fontset fontset-info)
404 (print-list "Fontset:" fontset)
405 (let ((size (aref fontset-info 0)))
406 (print-list " size:" (format "%d" size)
407 (if (= size 0) "... which means not yet used" "")))
408 (print-list " height:" (format "%d" (aref fontset-info 1)))
409 (print-list " fonts: (charset : font name)")
410 (let* ((fonts (aref fontset-info 2))
411 elt charset requested opened)
412 (while fonts
413 (setq elt (car fonts)
414 charset (car elt)
415 requested (nth 1 elt)
416 opened (nth 2 elt))
417 (print-list " " charset ":" requested)
418 (if (stringp opened)
419 (print-list " Opened as: " opened)
420 (if (null opened) " -- open failed --"))
421 (setq fonts (cdr fonts)))))
422
423;;;###autoload
424(defun describe-fontset (fontset)
425 "Display information about FONTSET."
426 (interactive
427 (if (not window-system)
428 (error "No window system being used")
429 (let ((fontset-list (mapcar '(lambda (x) (list x)) (fontset-list))))
430 (list (completing-read "Fontset: " fontset-list)))))
431 (setq fontset (query-fontset fontset))
432 (if (null fontset)
433 (error "No matching fontset")
434 (let ((fontset-info (fontset-info fontset)))
435 (with-output-to-temp-buffer "*Help*"
436 (describe-fontset-internal fontset fontset-info)))))
437
438
439;;; DIAGNOSIS
440
441(defun insert-list (args)
442 (while (cdr args)
443 (insert (or (car args) "nil") " ")
444 (setq args (cdr args)))
445 (if args (insert (or (car args) "nil")))
446 (insert "\n"))
447
448(defun insert-section (sec title)
449 (insert "########################################\n"
450 "# Section " (format "%d" sec) ". " title "\n"
451 "########################################\n\n"))
452
453;;;###autoload
454(defun mule-diag ()
455 "Show diagnosis of the running Mule."
456 (interactive)
457 (let ((buf (get-buffer-create "*Diagnosis*")))
458 (save-excursion
459 (set-buffer buf)
460 (erase-buffer)
461 (insert "\t###############################\n"
462 "\t### Diagnosis of your Emacs ###\n"
463 "\t###############################\n\n"
464 "CONTENTS: Section 1. General Information\n"
465 " Section 2. Display\n"
466 " Section 3. Input methods\n"
467 " Section 4. Coding systems\n"
468 " Section 5. Charsets\n")
469 (if window-system
470 (insert " Section 6. Fontset list\n"))
471 (insert "\n")
472
473 (insert-section 1 "General Information")
474 (insert "Version of this emacs:\n " (emacs-version) "\n"
475 "Primary language:\n " primary-language "\n\n")
476
477 (insert-section 2 "Display")
478 (if window-system
479 (insert "Window-system: "
480 (symbol-name window-system)
481 (format "%s" window-system-version))
482 (insert "Terminal: " (getenv "TERM")))
483 (insert "\n\n")
484
485 (if (eq window-system 'x)
486 (let ((font (cdr (assq 'font (frame-parameters)))))
487 (insert "The selected frame is using the "
488 (if (query-fontset font) "fontset" "font")
489 ":\n\t" font))
490 (insert "Coding system of the terminal: "
491 (symbol-name (terminal-coding-system))))
492 (insert "\n\n")
493
494 (insert-section 3 "Input methods")
495 (insert "language\tinput-method\n"
496 "--------\t------------\n")
497 (let ((alist language-info-alist))
498 (while alist
499 (insert (car (car alist)))
500 (indent-to 16)
501 (let ((methods (get-language-info (car (car alist)) 'input-method)))
502 (if methods
503 (insert-list (mapcar 'car methods))
504 (insert "none\n")))
505 (setq alist (cdr alist))))
506 (insert "\n")
507 (if default-input-method
508 (insert "The input method used last time is: "
509 (cdr default-input-method)
510 "\n"
511 " for inputting the language: "
512 (car default-input-method)
513 "\n")
514 (insert "No input method has ever been selected.\n"))
515
516 (insert "\n")
517
518 (insert-section 4 "Coding systems")
519 (save-excursion (list-coding-systems))
520 (insert-buffer "*Help*")
521 (goto-char (point-max))
522 (insert "\n")
523
524 (insert-section 5 "Charsets")
525 (save-excursion (list-character-sets))
526 (insert-buffer "*Help*")
527 (goto-char (point-max))
528 (insert "\n")
529
530 (if window-system
531 (let ((fontsets (fontset-list)))
532 (insert-section 6 "Fontset list")
533 (while fontsets
534 (describe-fontset (car fontsets))
535 (insert-buffer "*Help*")
536 (setq fontsets (cdr fontsets)))))
537
538 (set-buffer-modified-p nil)
539 )
540 (let ((win (display-buffer buf)))
541 (set-window-point win 1)
542 (set-window-start win 1))
543 ))
544
545
546;;; DUMP DATA FILE
547
548;;;###autoload
549(defun dump-charsets ()
550 "Dump information of all charsets into the file \"charsets.dat\"."
551 (list-character-sets)
552 (set-buffer (get-buffer "*Help*"))
553 (let (make-backup-files)
554 (write-region (point-min) (point-max) "charsets.dat"))
555 (kill-emacs))
556
557;;;###autoload
558(defun dump-codings ()
559 "Dump information of all coding systems into the file \"codings.dat\"."
560 (list-coding-systems)
561 (set-buffer (get-buffer "*Help*"))
562 (let (make-backup-files)
563 (write-region (point-min) (point-max) "codings.dat"))
564 (kill-emacs))
565
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
new file mode 100644
index 00000000000..27c64aa3a2b
--- /dev/null
+++ b/lisp/international/mule-util.el
@@ -0,0 +1,419 @@
1;;; mule-util.el --- Utility functions for mulitilingual environment (mule)
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26;;; String manipulations while paying attention to multibyte
27;;; characters.
28
29;;;###autoload
30(defun string-to-sequence (string type)
31 "Convert STRING to a sequence of TYPE which contains characters in STRING.
32TYPE should be `list' or `vector'.
33Multibyte characters are conserned."
34 (or (eq type 'list) (eq type 'vector)
35 (error "Invalid type: %s" type))
36 (let* ((len (length string))
37 (i 0)
38 l ch)
39 (while (< i len)
40 (setq ch (sref string i))
41 (setq l (cons ch l))
42 (setq i (+ i (char-bytes ch))))
43 (setq l (nreverse l))
44 (if (eq type 'list)
45 l
46 (vconcat l))))
47
48;;;###autoload
49(defsubst string-to-list (string)
50 "Return a list of characters in STRING."
51 (string-to-sequence string 'list))
52
53;;;###autoload
54(defsubst string-to-vector (string)
55 "Return a vector of characters in STRING."
56 (string-to-sequence string 'vector))
57
58;;;###autoload
59(defun store-substring (string idx obj)
60 "Embed OBJ (string or character) at index IDX of STRING."
61 (let* ((str (cond ((stringp obj) obj)
62 ((integerp obj) (char-to-string obj))
63 (t (error
64 "Invalid argument (should be string or character): %s"
65 obj))))
66 (string-len (length string))
67 (len (length str))
68 (i 0))
69 (while (and (< i len) (< idx string-len))
70 (aset string idx (aref str i))
71 (setq idx (1+ idx) i (1+ i)))
72 string))
73
74;;;###autoload
75(defun truncate-string-to-width (str width &optional start-column padding)
76 "Truncate string STR to fit in WIDTH columns.
77Optional 1st arg START-COLUMN if non-nil specifies the starting column.
78Optional 2nd arg PADDING if non-nil, space characters are padded at
79the head and tail of the resulting string to fit in WIDTH if necessary.
80If PADDING is nil, the resulting string may be narrower than WIDTH."
81 (or start-column
82 (setq start-column 0))
83 (let ((len (length str))
84 (idx 0)
85 (column 0)
86 (head-padding "") (tail-padding "")
87 ch last-column last-idx from-idx)
88 (condition-case nil
89 (while (< column start-column)
90 (setq ch (sref str idx)
91 column (+ column (char-width ch))
92 idx (+ idx (char-bytes ch))))
93 (args-out-of-range (setq idx len)))
94 (if (< column start-column)
95 (if padding (make-string width ?\ ) "")
96 (if (and padding (> column start-column))
97 (setq head-padding (make-string (- column start-column) ?\ )))
98 (setq from-idx idx)
99 (condition-case nil
100 (while (< column width)
101 (setq last-column column
102 last-idx idx
103 ch (sref str idx)
104 column (+ column (char-width ch))
105 idx (+ idx (char-bytes ch))))
106 (args-out-of-range (setq idx len)))
107 (if (> column width)
108 (setq column last-column idx last-idx))
109 (if (and padding (< column width))
110 (setq tail-padding (make-string (- width column) ?\ )))
111 (setq str (substring str from-idx idx))
112 (if padding
113 (concat head-padding str tail-padding)
114 str))))
115
116;;; For backward compatiblity ...
117;;;###autoload
118(defalias 'truncate-string 'truncate-string-to-width)
119(make-obsolete 'truncate-string 'truncate-string-to-width)
120
121;;; Nested alist handler. Nested alist is alist whose elements are
122;;; also nested alist.
123
124;;;###autoload
125(defsubst nested-alist-p (obj)
126 "Return t if OBJ is a nesetd alist.
127
128Nested alist is a list of the form (ENTRY . BRANCHES), where ENTRY is
129any Lisp object, and BRANCHES is a list of cons cells of the form
130(KEY-ELEMENT . NESTED-ALIST).
131
132You can use a nested alist to store any Lisp object (ENTRY) for a key
133sequence KEYSEQ, where KEYSEQ is a sequence of KEY-ELEMENT. KEYSEQ
134can be a string, a vector, or a list."
135 (and obj (listp obj) (listp (cdr obj))))
136
137;;;###autoload
138(defun set-nested-alist (keyseq entry alist &optional len branches)
139 "Set ENTRY for KEYSEQ in a nested alist ALIST.
140Optional 4th arg LEN non-nil means the firlst LEN elements in KEYSEQ
141 is considered.
142Optional argument BRANCHES if non-nil is branches for a keyseq
143longer than KEYSEQ.
144See the documentation of `nested-alist-p' for more detail."
145 (or (nested-alist-p alist)
146 (error "Invalid arguement %s" alist))
147 (let ((islist (listp keyseq))
148 (len (or len (length keyseq)))
149 (i 0)
150 key-elt slot)
151 (while (< i len)
152 (if (null (nested-alist-p alist))
153 (error "Keyseq %s is too long for this nested alist" keyseq))
154 (setq key-elt (if islist (nth i keyseq) (aref keyseq i)))
155 (setq slot (assoc key-elt (cdr alist)))
156 (if (null slot)
157 (progn
158 (setq slot (cons key-elt (list t)))
159 (setcdr alist (cons slot (cdr alist)))))
160 (setq alist (cdr slot))
161 (setq i (1+ i)))
162 (setcar alist entry)
163 (if branches
164 (if (cdr alist)
165 (error "Can't set branches for keyseq %s" keyseq)
166 (setcdr alist branches)))))
167
168;;;###autoload
169(defun lookup-nested-alist (keyseq alist &optional len start nil-for-too-long)
170 "Look up key sequence KEYSEQ in nested alist ALIST. Return the definition.
171Optional 1st argument LEN specifies the length of KEYSEQ.
172Optional 2nd argument START specifies index of the starting key.
173The returned value is normally a nested alist of which
174car part is the entry for KEYSEQ.
175If ALIST is not deep enough for KEYSEQ, return number which is
176 how many key elements at the front of KEYSEQ it takes
177 to reach a leaf in ALIST.
178Optional 3rd argument NIL-FOR-TOO-LONG non-nil means return nil
179 even if ALIST is not deep enough."
180 (or (nested-alist-p alist)
181 (error "invalid arguement %s" alist))
182 (or len
183 (setq len (length keyseq)))
184 (let ((i (or start 0)))
185 (if (catch 'lookup-nested-alist-tag
186 (if (listp keyseq)
187 (while (< i len)
188 (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist))))
189 (setq i (1+ i))
190 (throw 'lookup-nested-alist-tag t))))
191 (while (< i len)
192 (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
193 (setq i (1+ i))
194 (throw 'lookup-nested-alist-tag t))))
195 ;; KEYSEQ is too long.
196 (if nil-for-too-long nil i)
197 alist)))
198
199;; Coding system related functions.
200
201;;;###autoload
202(defun set-coding-system-alist (target-type regexp coding-system
203 &optional operation)
204 "Update `coding-system-alist' according to the arguments.
205TARGET-TYPE specifies a type of the target: `file', `process', or `network'.
206 TARGET-TYPE tells which slots of coding-system-alist should be affected.
207 If `file', it affects slots for insert-file-contents and write-region.
208 If `process', it affects slots for call-process, call-process-region, and
209 start-process.
210 If `network', it affects a slot for open-network-process.
211REGEXP is a regular expression matching a target of I/O operation.
212CODING-SYSTEM is a coding system to perform code conversion
213 on the I/O operation, or a cons of coding systems for decoding and
214 encoding respectively, or a function symbol which returns the cons.
215Optional arg OPERATION if non-nil specifies directly one of slots above.
216 The valid value is: insert-file-contents, write-region,
217 call-process, call-process-region, start-process, or open-network-stream.
218If OPERATION is specified, TARGET-TYPE is ignored.
219See the documentation of `coding-system-alist' for more detail."
220 (or (stringp regexp)
221 (error "Invalid regular expression: %s" regexp))
222 (or (memq target-type '(file process network))
223 (error "Invalid target type: %s" target-type))
224 (if (symbolp coding-system)
225 (if (not (fboundp coding-system))
226 (progn
227 (check-coding-system coding-system)
228 (setq coding-system (cons coding-system coding-system))))
229 (check-coding-system (car coding-system))
230 (check-coding-system (cdr coding-system)))
231 (let ((op-list (if operation (list operation)
232 (cond ((eq target-type 'file)
233 '(insert-file-contents write-region))
234 ((eq target-type 'process)
235 '(call-process call-process-region start-process))
236 (t ; i.e. (eq target-type network)
237 '(open-network-stream)))))
238 slot)
239 (while op-list
240 (setq slot (assq (car op-list) coding-system-alist))
241 (if slot
242 (let ((chain (cdr slot)))
243 (if (catch 'tag
244 (while chain
245 (if (string= regexp (car (car chain)))
246 (progn
247 (setcdr (car chain) coding-system)
248 (throw 'tag nil)))
249 (setq chain (cdr chain)))
250 t)
251 (setcdr slot (cons (cons regexp coding-system) (cdr slot)))))
252 (setq coding-system-alist
253 (cons (cons (car op-list) (list (cons regexp coding-system)))
254 coding-system-alist)))
255 (setq op-list (cdr op-list)))))
256
257;;;###autoload
258(defun coding-system-list ()
259 "Return a list of all existing coding systems."
260 (let (l)
261 (mapatoms (lambda (x) (if (get x 'coding-system) (setq l (cons x l)))))
262 l))
263
264
265;;; Composite charcater manipulations.
266
267;;;###autoload
268(defun compose-region (start end)
269 "Compose all characters in the current region into one composite character.
270When called from a program, expects two arguments,
271positions (integers or markers) specifying the region."
272 (interactive "r")
273 (save-excursion
274 (let ((str (buffer-substring start end)))
275 (goto-char start)
276 (delete-region start end)
277 (insert (compose-string str)))))
278
279;;;###autoload
280(defun decompose-region (start end)
281 "Decompose all composite characters in the current region.
282Composite characters are broken up into individual components.
283When called from a program, expects two arguments,
284positions (integers or markers) specifying the region."
285 (interactive "r")
286 (save-restriction
287 (narrow-to-region start end)
288 (goto-char (point-min))
289 (let ((enable-multibyte-characters nil)
290 ;; This matches the whole bytes of single composite character.
291 (re-cmpchar "\200[\240-\377]+")
292 p ch str)
293 (while (re-search-forward re-cmpchar nil t)
294 (setq str (buffer-substring (match-beginning 0) (match-end 0)))
295 (delete-region (match-beginning 0) (match-end 0))
296 (insert (decompose-composite-char (string-to-char str)))))))
297
298;;;###autoload
299(defconst reference-point-alist
300 '((tl . 0) (tc . 1) (tr . 2)
301 (ml . 3) (mc . 4) (mr . 5)
302 (bl . 6) (bc . 7) (br . 8)
303 (top-left . 0) (top-center . 1) (top-right . 2)
304 (mid-left . 3) (mid-center . 4) (mid-right . 5)
305 (bottom-left . 6) (bottom-center . 7) (bottom-right . 8)
306 (0 . 0) (1 . 1) (2 . 2)
307 (3 . 3) (4 . 4) (5 . 5)
308 (6 . 6) (7 . 7) (8 . 8))
309 "Alist of reference point symbols vs reference point codes.
310Meanings of reference point codes are as follows:
311
312 0----1----2 <-- ascent 0:tl or top-left
313 | | 1:tc or top-center
314 | | 2:tr or top-right
315 | | 3:ml or mid-left
316 | 4 <--+---- center 4:mc or mid-center
317 | | 5:mr or mid-right
318--- 3 5 <-- baseline 6:bl or bottom-left
319 | | 7:bc or bottom-center
320 6----7----8 <-- descent 8:br or bottom-right
321
322Reference point symbols are to be used to specify composition rule of
323the form \(GLOBAL-REF-POINT . NEW-REF-POINT), where GLOBAL-REF-POINT
324is a reference point in the overall glyphs already composed, and
325NEW-REF-POINT is a reference point in the new glyph to be added.
326
327For instance, if GLOBAL-REF-POINT is 8 and NEW-REF-POINT is 1, the
328overall glyph is updated as follows:
329
330 +-------+--+ <--- new ascent
331 | | |
332 | global| |
333 | glyph | |
334--- | | | <--- baseline (doesn't change)
335 +----+--+--+
336 | | new |
337 | |glyph|
338 +----+-----+ <--- new descent
339")
340
341;; Return a string for char CH to be embedded in multibyte form of
342;; composite character.
343(defun compose-chars-component (ch)
344 (if (< ch 128)
345 (format "\240%c" (+ ch 128))
346 (let ((str (char-to-string ch)))
347 (if (cmpcharp ch)
348 (if (/= (aref str 1) ?\xFF)
349 (error "Char %c can't be composed" ch)
350 (substring str 2))
351 (aset str 0 (+ (aref str 0) ?\x20))
352 str))))
353
354;; Return a string for composition rule RULE to be embedded in
355;; multibyte form of composite character.
356(defsubst compose-chars-rule (rule)
357 (char-to-string (+ ?\xA0
358 (* (cdr (assq (car rule) reference-point-alist)) 9)
359 (cdr (assq (cdr rule) reference-point-alist)))))
360
361;;;###autoload
362(defun compose-chars (first-component &rest args)
363 "Return one char string composed from the arguments.
364Each argument is a character (including a composite chararacter)
365or a composition rule.
366A composition rule has the form \(GLOBAL-REF-POINT . NEW-REF-POINT).
367See the documentation of `reference-point-alist' for more detail."
368 (if (= (length args) 0)
369 (char-to-string first-component)
370 (let* ((with-rule (consp (car args)))
371 (str (if with-rule (concat (vector leading-code-composition ?\xFF))
372 (char-to-string leading-code-composition))))
373 (setq str (concat str (compose-chars-component first-component)))
374 (while args
375 (if with-rule
376 (progn
377 (if (not (consp (car args)))
378 (error "Invalid composition rule: %s" (car args)))
379 (setq str (concat str (compose-chars-rule (car args))
380 (compose-chars-component (car (cdr args))))
381 args (cdr (cdr args))))
382 (setq str (concat str (compose-chars-component (car args)))
383 args (cdr args))))
384 str)))
385
386;;;###autoload
387(defun decompose-composite-char (char &optional type with-composition-rule)
388 "Convert composite character CHAR to a string containing components of CHAR.
389Optional 1st arg TYPE specifies the type of sequence returned.
390It should be `string' (default), `list', or `vector'.
391Optional 2nd arg WITH-COMPOSITION-RULE non-nil means the returned
392sequence contains embedded composition rules if any. In this case, the
393order of elements in the sequence is the same as arguments for
394`compose-chars' to create CHAR.
395If TYPE is omitted or is `string', composition rules are omitted
396even if WITH-COMPOSITION-RULE is t."
397 (or type
398 (setq type 'string))
399 (let* ((len (composite-char-component-count char))
400 (i (1- len))
401 l)
402 (setq with-composition-rule (and with-composition-rule
403 (not (eq type 'string))
404 (composite-char-composition-rule-p char)))
405 (while (> i 0)
406 (setq l (cons (composite-char-component char i) l))
407 (if with-composition-rule
408 (let ((rule (- (composite-char-composition-rule char i) ?\xA0)))
409 (setq l (cons (cons (/ rule 9) (% rule 9)) l))))
410 (setq i (1- i)))
411 (setq l (cons (composite-char-component char 0) l))
412 (cond ((eq type 'string)
413 (apply 'concat-chars l))
414 ((eq type 'list)
415 l)
416 (t ; i.e. TYPE is vector
417 (vconcat l)))))
418
419;;; mule-util.el ends here
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
new file mode 100644
index 00000000000..82c2679455a
--- /dev/null
+++ b/lisp/international/mule.el
@@ -0,0 +1,529 @@
1;;; mule.el --- basic commands for mulitilingual environment
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, character set, coding system
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26(defconst mule-version "3.0 (MOMIJINOGA)" "\
27Version number and name of this version of MULE (multilingual environment).")
28
29(defconst mule-version-date "1998.1.1" "\
30Distribution date of this version of MULE (multilingual environment).")
31
32(defun load-with-code-conversion (fullname file &optional noerror nomessage)
33 "Execute a file of Lisp code named FILE whose absolute path is FULLNAME.
34The FILE is decoded before evaluation if necessary.
35If optional second arg NOERROR is non-nil,
36 report no error if FILE doesn't exist.
37Print messages at start and end of loading unless
38 optional third arg NOMESSAGE is non-nil.
39Return t if file exists."
40 (if (null (file-readable-p fullname))
41 (and (null noerror)
42 (signal 'file-error (list "Cannot open load file" file)))
43 ;; Read file with code conversion, and then eval.
44 (let* ((buffer
45 ;; To avoid any autoloading, set default-major-mode to
46 ;; fundamental-mode.
47 (let ((default-major-mode 'fundamental-mode))
48 ;; We can't use `generate-new-buffer' because files.el
49 ;; is not yet loaded.
50 (get-buffer-create (generate-new-buffer-name " *load*"))))
51 (load-in-progress t))
52 (or nomessage (message "Loading %s..." file))
53 (unwind-protect
54 (progn
55 (save-excursion
56 (set-buffer buffer)
57 (insert-file-contents fullname)
58 ;; We must set `buffer-file-name' for `eval-buffer' and
59 ;; `load-history'.
60 (setq buffer-file-name file)
61 ;; Make `kill-buffer' quiet.
62 (set-buffer-modified-p nil))
63 ;; Eval in the original buffer.
64 (eval-buffer buffer))
65 (kill-buffer buffer))
66 (let ((hook (assoc file after-load-alist)))
67 (if hook
68 (mapcar (function eval) (cdr hook))))
69 (or nomessage noninteractive
70 (message "Loading %s...done" file))
71 t)))
72
73;; API (Application Program Interface) for charsets.
74
75;; Return t if OBJ is a quoted symbol.
76(defsubst quoted-symbol-p (obj)
77 (and (listp obj) (eq (car obj) 'quote)))
78
79(defsubst charsetp (object)
80 "T is OBJECT is a charset."
81 (and (symbolp object) (vectorp (get object 'charset))))
82
83(defsubst charset-info (charset)
84 "Return a vector of information of CHARSET.
85The elements of the vector are:
86 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
87 LEADING-CODE-BASE, LEADING-CODE-EXT,
88 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
89 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
90 PLIST,
91where
92CHARSET-ID (integer) is the identification number of the charset.
93DIMENSION (integer) is the number of bytes to represent a character of
94the charset: 1 or 2.
95CHARS (integer) is the number of characters in a dimension: 94 or 96.
96BYTE (integer) is the length of multi-byte form of a character in
97 the charset: one of 1, 2, 3, and 4.
98WIDTH (integer) is the number of columns a character in the charset
99 occupies on the screen: one of 0, 1, and 2.
100DIRECTION (integer) is the rendering direction of characters in the
101 charset when rendering. If 0, render from right to left, else
102 render from left to right.
103LEADING-CODE-BASE (integer) is the base leading-code for the
104 charset.
105LEADING-CODE-EXT (integer) is the extended leading-code for the
106 charset. All charsets of less than 0xA0 has the value 0.
107ISO-FINAL-CHAR (character) is the final character of the
108 corresponding ISO 2022 charset.
109ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
110 while encoding to variants of ISO 2022 coding system, one of the
111 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
112REVERSE-CHARSET (integer) is the charset which differs only in
113 LEFT-TO-RIGHT value from the charset. If there's no such a
114 charset, the value is -1.
115SHORT-NAME (string) is the short name to refer to the charset.
116LONG-NAME (string) is the long name to refer to the charset
117DESCRIPTION (string) is the description string of the charset.
118PLIST (property list) may contain any type of information a user
119 want to put and get by functions `put-charset-property' and
120 `get-charset-property' respectively."
121 (get charset 'charset))
122
123(defmacro charset-id (charset)
124 "Return charset identification number of CHARSET."
125 (if (and (listp charset) (eq (car charset) 'quote))
126 (aref (charset-info (nth 1 charset)) 0)
127 `(aref (charset-info ,charset) 0)))
128
129(defmacro charset-bytes (charset)
130 (if (quoted-symbol-p charset)
131 (aref (charset-info (nth 1 charset)) 1)
132 `(aref (charset-info ,charset) 1)))
133
134(defmacro charset-dimension (charset)
135 (if (quoted-symbol-p charset)
136 (aref (charset-info (nth 1 charset)) 2)
137 `(aref (charset-info ,charset) 2)))
138
139(defmacro charset-chars (charset)
140 (if (quoted-symbol-p charset)
141 (aref (charset-info (nth 1 charset)) 3)
142 `(aref (charset-info ,charset) 3)))
143
144(defmacro charset-width (charset)
145 (if (quoted-symbol-p charset)
146 (aref (charset-info (nth 1 charset)) 4)
147 `(aref (charset-info ,charset) 4)))
148
149(defmacro charset-direction (charset)
150 (if (quoted-symbol-p charset)
151 (aref (charset-info (nth 1 charset)) 5)
152 `(aref (charset-info ,charset) 5)))
153
154(defmacro charset-iso-final-char (charset)
155 (if (quoted-symbol-p charset)
156 (aref (charset-info (nth 1 charset)) 8)
157 `(aref (charset-info ,charset) 8)))
158
159(defmacro charset-iso-graphic-plane (charset)
160 (if (quoted-symbol-p charset)
161 (aref (charset-info (nth 1 charset)) 9)
162 `(aref (charset-info ,charset) 9)))
163
164(defmacro charset-reverse-charset (charset)
165 (if (quoted-symbol-p charset)
166 (aref (charset-info (nth 1 charset)) 10)
167 `(aref (charset-info ,charset) 10)))
168
169(defmacro charset-short-name (charset)
170 (if (quoted-symbol-p charset)
171 (aref (charset-info (nth 1 charset)) 11)
172 `(aref (charset-info ,charset) 11)))
173
174(defmacro charset-long-name (charset)
175 (if (quoted-symbol-p charset)
176 (aref (charset-info (nth 1 charset)) 12)
177 `(aref (charset-info ,charset) 12)))
178
179(defmacro charset-description (charset)
180 (if (quoted-symbol-p charset)
181 (aref (charset-info (nth 1 charset)) 13)
182 `(aref (charset-info ,charset) 13)))
183
184(defmacro charset-plist (charset)
185 (if (quoted-symbol-p charset)
186 (aref (charset-info (nth 1 charset)) 14)
187 `(aref (charset-info ,charset) 14)))
188
189(defun set-charset-plist (charset plist)
190 (aset (charset-info charset) 14 plist))
191
192(defmacro make-char (charset &optional c1 c2)
193 (if (quoted-symbol-p charset)
194 `(make-char-internal ,(charset-id (nth 1 charset)) ,c1 ,c2)
195 `(make-char-internal (charset-id ,charset) ,c1 ,c2)))
196
197;; Coding-system staffs
198
199;; Coding-system object is a symbol that has the property
200;; `coding-system' and `eol-type'.
201;;
202;; The value of the property `coding-system' is a coding-vector of the
203;; format: [TYPE MNEMONIC DOCSTRING NOT-USED-NOW FLAGS].
204;; See comments in src/coding.c for more detail. The property value
205;; may be another coding-system, in which case, the coding-vector
206;; should be taken from that coding-system.
207;;
208;; The value of the property `eol-type' is integer 0..2 or a vector of
209;; length 3. The integer value 0, 1, and 2 indicate the format of
210;; end-of-line LF, CRLF, and CR respectively. The vector value
211;; indicates that the format of end-of-line should be detected
212;; automatically. Nth element of the vector is the subsidiary
213;; coding-system whose `eol-type' property is integer value.
214;;
215;; Coding-system may also have properties `post-read-conversion' and
216;; `pre-write-conversion and the values are functions.
217;;
218;; The function in `post-read-conversion' is called after some text is
219;; inserted and decoded along the coding-system and before any
220;; functions in `after-insert-functions' are called. The arguments to
221;; this function is the same as those of a function in
222;; `after-insert-functions', i.e. LENGTH of a text while putting point
223;; at the head of the text to be decoded
224;;
225;; The function in `pre-write-conversion' is called after all
226;; functions in `write-region-annotate-functions' and
227;; `buffer-file-format' are called, and before the text is encoded by
228;; the coding-system. The arguments to this function is the same as
229;; those of a function in `write-region-annotate-functions', i.e. FROM
230;; and TO specifying region of a text.
231
232(defsubst coding-vector-type (vec) (aref vec 0))
233(defsubst coding-vector-mnemonic (vec) (aref vec 1))
234(defsubst coding-vector-docstring (vec) (aref vec 2))
235(defsubst coding-vector-flags (vec) (aref vec 4))
236
237;; Return type of CODING-SYSTEM.
238(defun coding-system-type (coding-system)
239 (let ((vec (coding-system-vector coding-system)))
240 (if vec (coding-vector-type vec))))
241
242;; Return mnemonic character of CODING-SYSTEM.
243(defun coding-system-mnemonic (coding-system)
244 (let ((vec (coding-system-vector coding-system)))
245 (if vec (coding-vector-mnemonic vec)
246 ?-)))
247
248;; Return docstring of CODING-SYSTEM.
249(defun coding-system-docstring (coding-system)
250 (let ((vec (coding-system-vector coding-system)))
251 (if vec (coding-vector-docstring vec))))
252
253;; Return flags of CODING-SYSTEM.
254(defun coding-system-flags (coding-system)
255 (let ((vec (coding-system-vector coding-system)))
256 (if vec (coding-vector-flags vec))))
257
258;; Return eol-type of CODING-SYSTEM.
259(defun coding-system-eoltype (coding-system)
260 (and coding-system
261 (or (get coding-system 'eol-type)
262 (coding-system-eoltype (get coding-system 'coding-system)))))
263
264;; Return mnemonic character of eol-type of CODING-SYSTEM.
265(defun coding-system-eoltype-mnemonic (coding-system)
266 (let ((eol-type (coding-system-eoltype coding-system)))
267 (cond ((vectorp eol-type) eol-mnemonic-undecided)
268 ((eq eol-type 0) eol-mnemonic-unix)
269 ((eq eol-type 1) eol-mnemonic-unix)
270 ((eq eol-type 2) eol-mnemonic-unix)
271 (t ?-))))
272
273;; Return function for post-read-conversion of CODING-SYSTEM.
274(defun coding-system-post-read-conversion (coding-system)
275 (and coding-system
276 (symbolp coding-system)
277 (or (get coding-system 'post-read-conversion)
278 (coding-system-post-read-conversion
279 (get coding-system 'coding-system)))))
280
281;; Return function for pre-write-conversion of CODING-SYSTEM.
282(defun coding-system-pre-write-conversion (coding-system)
283 (and coding-system
284 (symbolp coding-system)
285 (or (get coding-system 'pre-write-conversion)
286 (coding-system-pre-write-conversion
287 (get coding-system 'coding-system)))))
288
289(defun make-coding-system (coding-system type mnemonic docstring
290 &optional flags)
291 "Define a new CODING-SYSTEM (symbol).
292Remaining arguments are TYPE, MNEMONIC, DOCSTRING, and FLAGS (optional).
293TYPE is an integer value indicating the type of coding-system as follows:
294 0: Emacs internal format,
295 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC,
296 2: ISO-2022 including many variants,
297 3: Big5 used mainly on Chinese PC,
298 4: private, CCL programs provide encoding/decoding algorithm.
299MNEMONIC is a character to be displayed on mode line for the coding-system.
300DOCSTRING is a documentation string for the coding-system.
301FLAGS specifies more precise information of each TYPE.
302 If TYPE is 2 (ISO-2022), FLAGS should be a list of:
303 CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM,
304 ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT,
305 USE-ROMAN, USE-OLDJIS, NO-ISO6429.
306 CHARSETn are character sets initially designated to Gn graphic registers.
307 If CHARSETn is nil, Gn is never used.
308 If CHARSETn is t, Gn can be used but nothing designated initially.
309 If CHARSETn is a list of character sets, those character sets are
310 designated to Gn on output, but nothing designated to Gn initially.
311 SHORT-FORM non-nil means use short designation sequence on output.
312 ASCII-EOL non-nil means designate ASCII to g0 at end of line on output.
313 ASCII-CNTL non-nil means designate ASCII to g0 before control codes and
314 SPACE on output.
315 SEVEN non-nil means use 7-bit code only on output.
316 LOCKING-SHIFT non-nil means use locking-shift.
317 SINGLE-SHIFT non-nil means use single-shift.
318 USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII.
319 USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983.
320 NO-ISO6429 non-nil means not use ISO6429's direction specification.
321 If TYPE is 4 (private), FLAGS should be a cons of CCL programs,
322 for encoding and decoding. See the documentation of CCL for more detail."
323
324 ;; At first, set a value of `coding-system' property.
325 (let ((coding-vector (make-vector 5 nil)))
326 (aset coding-vector 0 type)
327 (aset coding-vector 1
328 ;; MNEMONIC must be a printable character.
329 (if (and (> mnemonic ? ) (< mnemonic 127)) mnemonic ? ))
330 (aset coding-vector 2 (if (stringp docstring) docstring ""))
331 (aset coding-vector 3 nil) ; obsolete element
332 (cond ((eq type 2) ; ISO2022
333 (let ((i 0)
334 (vec (make-vector 32 nil)))
335 (while (< i 4)
336 (let ((charset (car flags)))
337 (if (and charset (not (eq charset t)))
338 (if (symbolp charset)
339 (setq charset (charset-id charset))
340 (let (elt l)
341 (while charset
342 (setq elt (car charset))
343 (if (and elt (not (eq elt t)))
344 (setq elt (charset-id elt)))
345 (setq l (cons elt l))
346 (setq charset (cdr charset)))
347 (setq charset (nreverse l)))))
348 (aset vec i charset))
349 (setq flags (cdr flags) i (1+ i)))
350 (while (and (< i 32) flags)
351 (aset vec i (car flags))
352 (setq flags (cdr flags) i (1+ i)))
353 (aset coding-vector 4 vec)))
354 ((eq type 4) ; private
355 (if (and (consp flags)
356 (vectorp (car flags))
357 (vectorp (cdr flags)))
358 (aset coding-vector 4 flags)
359 (error "Invalid FLAGS argument for TYPE 4 (CCL)")))
360 (t (aset coding-vector 4 flags)))
361 (put coding-system 'coding-system coding-vector))
362
363 ;; Next, set a value of `eol-type' property. The value is a vector
364 ;; of subsidiary coding-systems, each corresponds to a coding-system
365 ;; for the detected end-of-line format.
366 (let ((codings (vector (intern (format "%s-unix" coding-system))
367 (intern (format "%s-dos" coding-system))
368 (intern (format "%s-mac" coding-system))))
369 (i 0))
370 (while (< i 3)
371 (put (aref codings i) 'coding-system coding-system)
372 (put (aref codings i) 'eol-type i)
373 (setq i (1+ i)))
374 (put coding-system 'eol-type codings))
375 )
376
377(defun define-coding-system-alias (symbol new-symbol)
378 "Define NEW-SYMBOL as the same coding system as SYMBOL."
379 (check-coding-system symbol)
380 (put new-symbol 'coding-system (get symbol 'coding-system))
381 (let ((eol-type (get symbol 'eol-type)))
382 (if (vectorp eol-type)
383 (let* ((name (symbol-name new-symbol))
384 (new (vector (intern (concat name "-unix"))
385 (intern (concat name "-dos"))
386 (intern (concat name "-mac"))))
387 (i 0))
388 (while (< i 3)
389 (define-coding-system-alias (aref eol-type i) (aref new i))
390 (setq i (1+ i)))
391 (setq eol-type new)))
392 (put new-symbol 'eol-type eol-type)))
393
394(defvar buffer-file-coding-system nil
395 "Coding-system of the file which the current-buffer is visiting.")
396(make-variable-buffer-local 'buffer-file-coding-system)
397;; This value should not be reset by changing major mode.
398(put 'buffer-file-coding-system 'permanent-local t)
399
400(defun set-buffer-file-coding-system (coding-system &optional force)
401 "Set buffer-file-coding-system of the current buffer to CODING-SYSTEM.
402If eol-type of the current buffer-file-coding-system is an integer value N, and
403 eol-type of CODING-SYSTEM is a vector, the Nth element of the vector is set
404 instead of CODING-SYSTEM itself.
405Optional prefix argument FORCE non-nil means CODING-SYSTEM is set
406 regardless of eol-type of the current buffer-file-coding-system."
407 (interactive "zBuffer-file-coding-system: \nP")
408 (check-coding-system coding-system)
409 (if (null force)
410 (let ((x (coding-system-eoltype buffer-file-coding-system))
411 (y (coding-system-eoltype coding-system)))
412 (if (and (numberp x) (>= x 0) (<= x 2) (vectorp y))
413 (setq coding-system (aref y x)))))
414 (setq buffer-file-coding-system coding-system)
415 (set-buffer-modified-p t)
416 (force-mode-line-update))
417
418(defun set-current-process-coding-system (input output)
419 (interactive
420 "zCoding-system for process input: \nzCoding-system for process output: ")
421 (let ((proc (get-buffer-process (current-buffer))))
422 (if (null proc)
423 (error "no process")
424 (check-coding-system input)
425 (check-coding-system output)
426 (set-process-coding-system proc input output)))
427 (force-mode-line-update))
428
429(defvar default-process-coding-system (cons nil nil)
430 "Cons of default values used to read from and write to process.")
431
432(defun set-coding-priority (arg)
433 "Set priority of coding-category according to LIST.
434LIST is a list of coding-categories ordered by priority."
435 (let (l)
436 ;; Put coding-categories listed in ARG to L while checking the
437 ;; validity. We assume that `coding-category-list' contains whole
438 ;; coding-categories.
439 (while arg
440 (if (null (memq (car arg) coding-category-list))
441 (error "Invalid element in argument: %s" (car arg)))
442 (setq l (cons (car arg) l))
443 (setq arg (cdr arg)))
444 ;; Put coding-category not listed in ARG to L.
445 (while coding-category-list
446 (if (null (memq (car coding-category-list) l))
447 (setq l (cons (car coding-category-list) l)))
448 (setq coding-category-list (cdr coding-category-list)))
449 ;; Update `coding-category-list' and return it.
450 (setq coding-category-list (nreverse l))))
451
452;;; FILE I/O
453
454;; Set buffer-file-coding-system of the current buffer after some text
455;; is inserted.
456(defun after-insert-file-set-buffer-file-coding-system (inserted)
457 (if last-coding-system-used
458 (let ((coding-system
459 (find-new-buffer-file-coding-system last-coding-system-used))
460 (modified-p (buffer-modified-p)))
461 (if coding-system
462 (set-buffer-file-coding-system coding-system))
463 (set-buffer-modified-p modified-p)))
464 nil)
465
466(setq after-insert-file-functions
467 (cons 'after-insert-file-set-buffer-file-coding-system
468 after-insert-file-functions))
469
470;; The coding-vector and eol-type of coding-system returned is decided
471;; independently in the following order.
472;; 1. That of buffer-file-coding-system locally bound.
473;; 2. That of CODING.
474
475(defun find-new-buffer-file-coding-system (coding)
476 "Return a coding system for a buffer when a file of CODING is inserted.
477The returned value is set to `buffer-file-coding-system' of the
478current buffer. Return nil if there's no need of setting new
479buffer-file-coding-system."
480 (let (local-coding local-eol
481 found-eol
482 new-coding new-eol)
483 (if (null coding)
484 ;; Nothing found about coding.
485 nil
486
487 ;; Get information of the current local value of
488 ;; `buffer-file-coding-system' in LOCAL-EOL and LOCAL-CODING.
489 (if (local-variable-p 'buffer-file-coding-system)
490 ;; Something already set locally.
491 (progn
492 (setq local-eol (coding-system-eoltype buffer-file-coding-system))
493 (if (null (numberp local-eol))
494 ;; But eol-type is not yet set.
495 (setq local-eol nil))
496 (if (null (eq (coding-system-type buffer-file-coding-system) t))
497 ;; This is not automatic-conversion.
498 (progn
499 (setq local-coding buffer-file-coding-system)
500 (while (symbolp (get local-coding 'coding-system))
501 (setq local-coding (get local-coding 'coding-system))))
502 )))
503
504 (if (and local-eol local-coding)
505 ;; The current buffer has already set full coding-system, we
506 ;; had better not change it.
507 nil
508
509 (setq found-eol (coding-system-eoltype coding))
510 (if (null (numberp found-eol))
511 ;; But eol-type is not found.
512 (setq found-eol nil))
513 (if (eq (coding-system-type coding) t)
514 ;; This is automatic-conversion, which means nothing found
515 ;; except for eol-type.
516 (setq coding nil))
517
518 ;; The local setting takes precedence over the found one.
519 (setq new-coding (or local-coding coding))
520 (setq new-eol (or local-eol found-eol))
521 (if (and (numberp new-eol)
522 (vectorp (coding-system-eoltype new-coding)))
523 (setq new-coding
524 (aref (coding-system-eoltype new-coding) new-eol)))
525 new-coding))))
526
527(provide 'mule)
528
529;;; mule.el ends here
diff --git a/lisp/international/quail.el b/lisp/international/quail.el
new file mode 100644
index 00000000000..e44767a7e4d
--- /dev/null
+++ b/lisp/international/quail.el
@@ -0,0 +1,1522 @@
1;;; quail.el -- provides simple input method for multilingual text
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Author: Kenichi HANDA <handa@etl.go.jp>
7;; Naoto TAKAHASHI <ntakahas@etl.go.jp>
8;; Maintainer: Kenichi HANDA <handa@etl.go.jp>
9;; Keywords: mule, multilingual, input method
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING. If not, write to
25;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27;;; Commentary:
28
29;; In Quail minor mode, you can input multilingual text easily. By
30;; defining a translation table (named Quail map) which maps ASCII key
31;; string to multilingual character or string, you can input any text
32;; from ASCII keyboard.
33;;
34;; We use words "translation" and "conversion" differently. The
35;; former is done by Quail package itself, the latter is the further
36;; process of converting a translated text to some more desirable
37;; text. For instance, Quail package for Japanese (`quail-jp')
38;; translates Roman text (transliteration of Japanese in Latin
39;; alphabets) to Hiragana text, which is then converted to
40;; Kanji-and-Kana mixed text or Katakana text by commands specified in
41;; CONVERSION-KEYS argument of the Quail package.
42
43;;; Code:
44
45(require 'faces)
46
47;; Buffer local variables
48
49(defvar quail-current-package nil
50 "The current Quail package to input multilingual text in Quail minor mode.
51See the documentation of `quail-package-alist' for the format.")
52(make-variable-buffer-local 'quail-current-package)
53(put 'quail-current-package 'permanent-local t)
54
55;; Quail uses the following two buffers to assist users.
56;; A buffer to show available key sequence or translation list.
57(defvar quail-guidance-buf nil)
58;; A buffer to show completion list of the current key sequence.
59(defvar quail-completion-buf nil)
60
61(defvar quail-mode nil
62 "Non-nil if in Quail minor mode.")
63(make-variable-buffer-local 'quail-mode)
64(put 'quail-mode 'permanent-local t)
65
66(defvar quail-overlay nil
67 "Overlay which covers the current translation region of Quail.")
68(make-variable-buffer-local 'quail-overlay)
69
70(defvar quail-conv-overlay nil
71 "Overlay which covers the text to be converted in Quail mode.")
72(make-variable-buffer-local 'quail-conv-overlay)
73
74(defvar quail-current-key nil
75 "Current key for translation in Quail mode.")
76
77(defvar quail-current-str nil
78 "Currently selected translation of the current key.")
79
80(defvar quail-current-translations nil
81 "Cons of indices and vector of possible translations of the current key.")
82
83;; A flag to control conversion region. Normally nil, but if set to
84;; t, it means we must start the new conversion region if new key to
85;; be translated is input.
86(defvar quail-reset-conversion-region nil)
87
88;; Quail package handlers.
89
90(defvar quail-package-alist nil
91 "List of Quail packages.
92A Quail package is a list of these elements:
93 NAME, TITLE, QUAIL-MAP, GUIDANCE, DOCSTRING, TRANSLATION-KEYS,
94 FORGET-LAST-SELECTION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT,
95 DECODE-MAP, MAXIMUM-SHORTEST, OVERLAY-PLIST, UPDATE-TRANSLATION-FUNCTION,
96 CONVERSION-KEYS.
97
98QUAIL-MAP is a data structure to map key strings to translations. For
99the format, see the documentation of `quail-map-p'.
100
101DECODE-MAP is an alist of translations and corresponding keys.
102
103See the documentation of `quail-define-package' for the other elements.")
104
105;; Return various slots in the current quail-package.
106
107(defsubst quail-name ()
108 "Return the name of the current Quail package."
109 (nth 0 quail-current-package))
110(defsubst quail-title ()
111 "Return the title of the current Quail package."
112 (nth 1 quail-current-package))
113(defsubst quail-map ()
114 "Return the translation map of the current Quail package."
115 (nth 2 quail-current-package))
116(defsubst quail-guidance ()
117 "Return an object used for `guidance' feature of the current Quail package.
118See also the documentation of `quail-define-package'."
119 (nth 3 quail-current-package))
120(defsubst quail-docstring ()
121 "Return the documentation string of the current Quail package."
122 (nth 4 quail-current-package))
123(defsubst quail-translation-keymap ()
124 "Return translation keymap in the current Quail package.
125Translation keymap is a keymap used while translation region is active."
126 (nth 5 quail-current-package))
127(defsubst quail-forget-last-selection ()
128 "Return `forget-last-selection' flag of the current Quail package.
129See also the documentation of `quail-define-package'."
130 (nth 6 quail-current-package))
131(defsubst quail-deterministic ()
132 "Return `deterministic' flag of the current Quail package.
133See also the documentation of `quail-define-package'."
134 (nth 7 quail-current-package))
135(defsubst quail-kbd-translate ()
136 "Return `kbd-translate' flag of the current Quail package.
137See also the documentation of `quail-define-package'."
138 (nth 8 quail-current-package))
139(defsubst quail-show-layout ()
140 "Return `show-layout' flag of the current Quail package.
141See also the documentation of `quail-define-package'."
142 (nth 9 quail-current-package))
143(defsubst quail-decode-map ()
144 "Return decode map of the current Quail package.
145It is an alist of translations and corresponding keys."
146 (nth 10 quail-current-package))
147(defsubst quail-maximum-shortest ()
148 "Return `maximum-shortest' flag of the current Quail package.
149See also the documentation of `quail-define-package'."
150 (nth 11 quail-current-package))
151(defsubst quail-overlay-plist ()
152 "Return property list of an overly used in the current Quail package."
153 (nth 12 quail-current-package))
154(defsubst quail-update-translation-function ()
155 "Return a function for updating translation in the current Quail package."
156 (nth 13 quail-current-package))
157(defsubst quail-conversion-keymap ()
158 "Return conversion keymap in the current Quail package.
159Conversion keymap is a keymap used while conversion region is active
160 but translation region is not active."
161 (nth 14 quail-current-package))
162
163(defsubst quail-package (name)
164 "Return Quail package named NAME."
165 (assoc name quail-package-alist))
166
167(defun quail-add-package (package)
168 "Add Quail package PACKAGE to `quail-package-alist'."
169 (let ((pac (quail-package (car package))))
170 (if pac
171 (setcdr pac (cdr package))
172 (setq quail-package-alist (cons package quail-package-alist)))))
173
174(defun quail-select-package (name)
175 "Select Quail package named NAME as the current Quail package."
176 (let ((package (quail-package name)))
177 (if (null package)
178 (error "No Quail package `%s'" name))
179 (setq quail-current-package package)
180 (setq-default quail-current-package package)
181 name))
182
183;;;###autoload
184(defun quail-use-package (package-name &rest libraries)
185 "Start using Quail package PACKAGE-NAME.
186The remaining arguments are libraries to be loaded before using the package."
187 (while libraries
188 (if (not (load (car libraries) t))
189 (progn
190 (with-output-to-temp-buffer "*Help*"
191 (princ "Quail package \"")
192 (princ package-name)
193 (princ "\" can't be activated\n because library \"")
194 (princ (car libraries))
195 (princ "\" is not in `load-path'.
196
197The most common case is that you have not yet installed appropriate
198libraries in LEIM (Libraries of Emacs Input Method) which is
199distributed separately from Emacs.
200
201Installation of LEIM for Quail is very simple, just copy Quail
202packages (byte-compiled Emacs Lisp files) to somewhere in your
203`load-path'.
204
205LEIM is available from the same ftp directory as Emacs."))
206 (error ""))
207 (setq libraries (cdr libraries))))
208 (quail-select-package package-name)
209 (setq current-input-method-title (quail-title))
210 (quail-mode 1))
211
212(defun quail-inactivate ()
213 "Turn off Quail input method."
214 (interactive)
215 (throw 'quail-tag t))
216
217(or (assq 'quail-mode minor-mode-alist)
218 (setq minor-mode-alist
219 (cons '(quail-mode " Quail") minor-mode-alist)))
220
221(defvar quail-mode-map
222 (let ((map (make-keymap))
223 (i ? ))
224 (while (< i 127)
225 (define-key map (char-to-string i) 'quail-start-translation)
226 (setq i (1+ i)))
227 map)
228 "Keymap for Quail mode.")
229
230(or (assq 'quail-mode minor-mode-map-alist)
231 (setq minor-mode-map-alist
232 (cons (cons 'quail-mode quail-mode-map) minor-mode-map-alist)))
233
234(defvar quail-translation-keymap
235 (let ((map (make-keymap))
236 (i 0))
237 (while (< i ?\ )
238 (define-key map (char-to-string i) 'quail-execute-non-quail-command)
239 (setq i (1+ i)))
240 (while (< i 127)
241 (define-key map (char-to-string i) 'quail-self-insert-command)
242 (setq i (1+ i)))
243 (define-key map "\177" 'quail-delete-last-char)
244 (define-key map "\C-\\" 'quail-inactivate)
245 (define-key map "\C-f" 'quail-next-translation)
246 (define-key map "\C-b" 'quail-prev-translation)
247 (define-key map "\C-n" 'quail-next-translation-block)
248 (define-key map "\C-p" 'quail-prev-translation-block)
249 (define-key map "\C-i" 'quail-completion)
250 (define-key map "\C-@" 'quail-select-current)
251 (define-key map "\C-c" 'quail-abort-translation)
252 (define-key map "\C-h" 'quail-translation-help)
253 (define-key map [tab] 'quail-completion)
254 (define-key map [delete] 'quail-delete-last-char)
255 (define-key map [backspace] 'quail-delete-last-char)
256 ;; At last, define default key binding.
257 (append map '((t . quail-execute-non-quail-command))))
258 "Keymap used processing translation in Quail mode.
259This map is activated while translation region is active.")
260
261(defvar quail-conversion-keymap
262 (let ((map (make-keymap))
263 (i 0))
264 (while (< i ?\ )
265 (define-key map (char-to-string i) 'quail-execute-non-quail-command)
266 (setq i (1+ i)))
267 (while (< i 127)
268 (define-key map (char-to-string i)
269 'quail-start-translation-in-conversion-mode)
270 (setq i (1+ i)))
271 (define-key map "\C-b" 'quail-conversion-backward-char)
272 (define-key map "\C-f" 'quail-conversion-forward-char)
273 (define-key map "\C-a" 'quail-conversion-beginning-of-region)
274 (define-key map "\C-e" 'quail-conversion-end-of-region)
275 (define-key map "\C-d" 'quail-conversion-delete-char)
276 (define-key map "\C-h" 'quail-conversion-help)
277 (define-key map "\C-\\" 'quail-inactivate)
278 (define-key map "\177" 'quail-conversion-backward-delete-char)
279 (define-key map [delete] 'quail-conversion-backward-delete-char)
280 (define-key map [backspace] 'quail-conversion-backward-delete-char)
281 ;; At last, define default key binding.
282 (append map '((t . quail-execute-non-quail-command))))
283 "Keymap used for processing conversion in Quail mode.
284This map is activated while convesion region is active but translation
285region is not active.")
286
287(defun quail-define-package (name language title
288 &optional guidance docstring translation-keys
289 forget-last-selection deterministic
290 kbd-translate show-layout create-decode-map
291 maximum-shortest overlay-plist
292 update-translation-function
293 conversion-keys)
294 "Define NAME as a new Quail package for input LANGUAGE.
295TITLE is a string to be displayed at mode-line to indicate this package.
296Optional arguments are GUIDANCE, DOCSTRING, TRANLSATION-KEYS,
297 FORGET-LAST-SELECTION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT,
298 CREATE-DECODE-MAP, MAXIMUM-SHORTEST, OVERLAY-PLIST,
299 UPDATE-TRANSLATION-FUNCTION, and CONVERSION-KEYS.
300
301GUIDANCE specifies how a guidance string is shown in echo area.
302If it is t, list of all possible translations for the current key is shown
303 with the currently selected translation being highlighted.
304If it is an alist, the element has the form (CHAR . STRING). Each character
305 in the current key is searched in the list and the corresponding string is
306 shown.
307If it is nil, the current key is shown.
308
309DOCSTRING is the documentation string of this package.
310
311TRANSLATION-KEYS specifies additional key bindings used while translation
312region is active. It is an alist of single key character vs. corresponding
313command to be called.
314
315FORGET-LAST-SELECTION non-nil means a selected translation is not kept
316for the future to translate the same key. If this flag is nil, a
317translation selected for a key is remembered so that it can be the
318first candidate when the same key is entered later.
319
320DETERMINISTIC non-nil means the first candidate of translation is
321selected automatically without allowing users to select another
322translation for a key. In this case, unselected translations are of
323no use for an interactive use of Quail but can be used by some other
324programs. If this flag is non-nil, FORGET-LAST-SELECTION is also set
325to t.
326
327KBD-TRANSLATE non-nil means input characters are translated from a
328user's keyboard layout to the standard keyboard layout. See the
329documentation of `quail-keyboard-layout' and
330`quail-keyboard-layout-standard' for more detail.
331
332SHOW-LAYOUT non-nil means the `quail-help' command should show
333the user's keyboard layout visually with translated characters.
334If KBD-TRANSLATE is set, it is desirable to set also this flag unless
335this package defines no translations for single character keys.
336
337CREATE-DECODE-MAP non-nil means decode map is also created. A decode
338map is an alist of translations and corresponding original keys.
339Although this map is not used by Quail itself, it can be used by some
340other programs. For instance, Vietnamese supporting needs this map to
341convert Vietnamese text to VIQR format which uses only ASCII
342characters to represent Vietnamese characters.
343
344MAXIMUM-SHORTEST non-nil means break key sequence to get maximum
345length of the shortest sequence. When we don't have a translation of
346key \"..ABCD\" but have translations of \"..AB\" and \"CD..\", break
347the key at \"..AB\" and start translation of \"CD..\". Hangul
348packages, for instance, use this facility. If this flag is nil, we
349break the key just at \"..ABC\" and start translation of \"D..\".
350
351OVERLAY-PLIST if non-nil is a property list put on an overlay which
352covers Quail translation region.
353
354UPDATE-TRANSLATION-FUNCTION if non-nil is a function to call to update
355the current translation region accoding to a new translation data. By
356default, a tranlated text or a user's key sequence (if no transltion
357for it) is inserted.
358
359CONVERSION-KEYS specifies additional key bindings used while
360conversion region is active. It is an alist of single key character
361vs. corresponding command to be called."
362 (let (translation-keymap conversion-keymap)
363 (if deterministic (setq forget-last-selection t))
364 (if translation-keys
365 (progn
366 (setq translation-keymap (copy-keymap quail-translation-keymap))
367 (while translation-keys
368 (define-key translation-keymap
369 (car (car translation-keys)) (cdr (car translation-keys)))
370 (setq translation-keys (cdr translation-keys))))
371 (setq translation-keymap quail-translation-keymap))
372 (if conversion-keys
373 (progn
374 (setq conversion-keymap (copy-keymap quail-conversion-keymap))
375 (while conversion-keys
376 (define-key conversion-keymap
377 (car (car conversion-keys)) (cdr (car conversion-keys)))
378 (setq conversion-keys (cdr conversion-keys)))))
379 (quail-add-package
380 (list name title (list nil) guidance (or docstring "")
381 translation-keymap
382 forget-last-selection deterministic kbd-translate show-layout
383 (if create-decode-map (list 'decode-map) nil)
384 maximum-shortest overlay-plist update-translation-function
385 conversion-keymap)))
386 (register-input-method language (list name 'quail-use-package))
387 (quail-select-package name))
388
389;; Quail minor mode handlers.
390
391;; Setup overlays used in Quail mode.
392(defun quail-setup-overlays ()
393 (let ((pos (point)))
394 (if (overlayp quail-overlay)
395 (move-overlay quail-overlay pos pos)
396 (setq quail-overlay (make-overlay pos pos nil nil t))
397 (overlay-put quail-overlay 'face 'underline)
398 (let ((l (quail-overlay-plist)))
399 (while l
400 (overlay-put quail-overlay (car l) (car (cdr l)))
401 (setq l (cdr (cdr l))))))
402 (if (overlayp quail-conv-overlay)
403 (move-overlay quail-conv-overlay pos pos)
404 (setq quail-conv-overlay (make-overlay pos pos nil nil t))
405 (overlay-put quail-conv-overlay 'face 'underline)
406 ;;(overlay-put quail-conv-overlay 'modification-hooks
407 ;;'(quail-conv-overlay-modification-hook))
408 )))
409
410;; Delete overlays used in Quail mode.
411(defun quail-delete-overlays ()
412 (if (overlayp quail-overlay)
413 (delete-overlay quail-overlay))
414 (if (overlayp quail-conv-overlay)
415 (delete-overlay quail-conv-overlay)))
416
417;; While translating and converting, we enter the recursive edit and
418;; exit it frequently, which results in frequent and annoying change
419;; of and annoying in mode line. To avoid it, we use a modified
420;; mode-line-format.
421(defvar quail-mode-line-format nil)
422
423;; Return a modified mode-line-format which doesn't show the recursive
424;; editing level. But, we only pay attention to the top level
425;; elements of the current mode-line-format.
426(defun quail-generate-mode-line-format ()
427 (if (listp mode-line-format)
428 (let ((new (copy-sequence mode-line-format))
429 l elt idx)
430 (setq l new)
431 (while l
432 (setq elt (car l))
433 (if (and (stringp elt)
434 (or (setq idx (string-match "%\\[" elt))
435 (setq idx (string-match "%\\]" elt))))
436 (setcar l (concat (substring elt 0 idx)
437 (substring elt (+ idx 2)))))
438 (setq l (cdr l)))
439 new)
440 mode-line-format))
441
442(defun quail-mode (&optional arg)
443 "Toggle Quail minor mode.
444With arg, turn Quail mode on if and only if arg is positive.
445Try \\[describe-bindings] in Quail mode to see the available key binding.
446The command \\[describe-input-method] describes the current Quail package."
447 (interactive "P")
448 (setq quail-mode
449 (if (null arg) (null quail-mode)
450 (> (prefix-numeric-value arg) 0)))
451 (if (null quail-mode)
452 ;; Let's turn off Quail mode.
453 (progn
454 (quail-hide-guidance-buf)
455 (quail-delete-overlays)
456 (setq describe-current-input-method-function nil)
457 (setq current-input-method nil)
458 (run-hooks 'quail-mode-exit-hook)
459 (run-hooks 'input-method-inactivate-hook))
460 ;; Let's turn on Quail mode.
461 (if (null quail-current-package)
462 ;; Quail package is not yet selected. Select one now.
463 (let (name)
464 (if quail-package-alist
465 (setq name (car (car quail-package-alist)))
466 (setq quail-mode nil)
467 (error "No Quail package loaded"))
468 (quail-select-package name)))
469 (setq inactivate-current-input-method-function 'quail-mode)
470 (setq describe-current-input-method-function 'quail-help)
471 (setq quail-mode-line-format (quail-generate-mode-line-format))
472 (quail-delete-overlays)
473 (quail-show-guidance-buf)
474 ;; If we are in minibuffer, turn off Quail mode before exiting.
475 (if (eq (selected-window) (minibuffer-window))
476 (add-hook 'minibuffer-exit-hook 'quail-exit-from-minibuffer))
477 (make-local-hook 'post-command-hook)
478 (run-hooks 'quail-mode-hook)
479 (run-hooks 'input-method-activate-hook))
480 (force-mode-line-update))
481
482(defun quail-exit-from-minibuffer ()
483 (if quail-mode (quail-mode -1))
484 (if (<= (minibuffer-depth) 1)
485 (remove-hook 'minibuffer-exit-hook 'quail-exit-from-minibuffer)))
486
487(defvar quail-saved-overriding-local-map nil)
488(defvar quail-saved-current-buffer nil)
489
490;; Toggle `quail-mode'. This function is added to `post-command-hook'
491;; in Quail mode, to turn Quail mode temporarily off, or back on
492;; after one non-Quail command.
493(defun quail-toggle-mode-temporarily ()
494 (if quail-mode
495 ;; We are going to handle following events out of Quail mode.
496 (setq quail-mode nil
497 quail-saved-overriding-local-map overriding-local-map
498 quail-saved-current-buffer (current-buffer)
499 overriding-local-map nil)
500 ;; We have just executed one non-Quail command. We don't need
501 ;; this hook any more.
502 (remove-hook 'post-command-hook 'quail-toggle-mode-temporarily t)
503 ;; If the command changed the current buffer, we should not go
504 ;; back to Quail mode.
505 (if (not (eq (current-buffer) quail-saved-current-buffer))
506 (throw 'quail-tag nil)
507 ;; Let's go back to Quail mode.
508 (setq quail-mode t)
509 (setq overriding-local-map quail-saved-overriding-local-map)
510 ;; If whole text in conversion area was deleted, exit from the
511 ;; recursive edit.
512 (let ((start (overlay-start quail-conv-overlay)))
513 (if (and start (= start (overlay-end quail-conv-overlay)))
514 (throw 'quail-tag nil)))
515 )))
516
517(defun quail-execute-non-quail-command ()
518 "Execute one non-Quail command in Quail mode.
519The current translation and conversion are terminated."
520 (interactive)
521 (setq unread-command-events (cons last-input-event unread-command-events))
522 (quail-delete-overlays)
523 (if (buffer-live-p quail-guidance-buf)
524 (save-excursion
525 (set-buffer quail-guidance-buf)
526 (erase-buffer)))
527 (throw 'quail-tag nil))
528
529;; Keyboard layout translation handlers.
530
531;; Some Quail packages provide localized keyboard simulation which
532;; requires a particular keyboard layout. In this case, what we need
533;; is locations of keys the user entered, not character codes
534;; generated by those keys. However, for the moment, there's no
535;; common way to get such information. So, we ask a user to give
536;; information of his own keyboard layout, then translate it to the
537;; standard layout which we defined so that all Quail packages depend
538;; just on it.
539
540(defconst quail-keyboard-layout-standard
541 "\
542 1!2@3#4$5%6^7&8*9(0)-_=+`~ \
543 qQwWeErRtTyYuUiIoOpP[{]} \
544 aAsSdDfFgGhHjJkKlL;:'\"\\| \
545 zZxXcCvVbBnNmM,<.>/? "
546 "Standard keyboard layout of printable characters Quail assumes.
547See the documentation of `quail-keyboard-layout' for this format.
548This layout is almost the same as that of VT100,
549 but the location of key \\ (backslash) is just right of key ' (single-quote),
550 not right of RETURN key.")
551
552(defvar quail-keyboard-layout quail-keyboard-layout-standard
553 "A string which represents physical key layout of a particular keyboard.
554We assume there are four rows and each row has 15 keys (columns),
555 the first column of the first row is left of key '1',
556 the first column of the second row is left of key `q',
557 the first column of the third row is left of key `a',
558 the first column of the fourth row is left of key `z'.
559Nth (N is even) and (N+1)th characters in the string are non-shifted
560 and shifted characters respectively at the same location.
561The location of Nth character is row (N / 30) and column ((N mod 30) / 2).")
562
563(defconst quail-keyboard-layout-len 120)
564
565;; Here we provide several examples of famous keyboard layouts.
566
567(defvar quail-keyboard-layout-alist
568 (list
569 '("sun-type3" . "\
570 1!2@3#4$5%6^7&8*9(0)-_=+\\|`~\
571 qQwWeErRtTyYuUiIoOpP[{]} \
572 aAsSdDfFgGhHjJkKlL;:'\" \
573 zZxXcCvVbBnNmM,<.>/? ")
574 (cons "standard" quail-keyboard-layout-standard))
575 "Alist of keyboard names and corresponding layout strings.
576See the documentation of `quail-keyboard-layout' for the format of
577 the layout string.")
578
579(defun quail-set-keyboard-layout (kbd-type)
580 "Set the current keyboard layout to the same as keyboard KBD-TYPE.
581
582Since some Quail packages depends on a physical layout of keys (not
583characters generated by them), those are created by assuming the
584standard layout defined in `quail-keyboard-layout-standard'. This
585function tells Quail system the layout of your keyboard so that what
586you type is correctly handled."
587 (interactive
588 (let* ((completing-ignore-case t)
589 (type (completing-read "Keyboard type: "
590 quail-keyboard-layout-alist)))
591 (list type)))
592 (let ((layout (assoc kbd-type quail-keyboard-layout-alist)))
593 (if (null layout)
594 ;; Here, we had better ask a user to define his own keyboard
595 ;; layout interactively.
596 (error "Unknown keyboard type `%s'" kbd-type))
597 (setq quail-keyboard-layout (cdr layout))))
598
599(defun quail-keyboard-translate (ch)
600 "Translate CHAR according to `quail-keyboard-layout' and return the result."
601 (if (eq quail-keyboard-layout quail-keyboard-layout-standard)
602 ch
603 (let ((i 0))
604 (while (and (< i quail-keyboard-layout-len)
605 (/= ch (aref quail-keyboard-layout i)))
606 (setq i (1+ i)))
607 (if (= i quail-keyboard-layout-len)
608 (error "Character `%c' not found in your keyboard layout" ch))
609 (aref quail-keyboard-layout-standard i))))
610
611;; Quail map
612
613(defsubst quail-map-p (object)
614 "Return t if OBJECT is a Quail map.
615
616A Quail map holds information how a particular key should be translated.
617Its format is (TRANSLATION . ALIST).
618TRANSLATION is either a character, or a cons (INDEX . VECTOR).
619In the latter case, each element of VECTOR is a candidate for the translation,
620and INDEX points the currently selected translation.
621
622ALIST is normally a list of elements that look like (CHAR . DEFN),
623where DEFN is another Quail map for a longer key (CHAR added to the
624current key). It may also be a symbol of a function which returns an
625alist of the above format.
626
627Just after a Quail package is read, TRANSLATION may be a string or a
628vector. Then each element of the string or vector is a candidate for
629the translation. These objects are transformed to cons cells in the
630format \(INDEX . VECTOR), as described above."
631 (and (consp object)
632 (let ((translation (car object)))
633 (or (integerp translation) (consp translation) (null translation)
634 (vectorp translation) (stringp translation)
635 (symbolp translation)))
636 (let ((alist (cdr object)))
637 (or (listp alist) (symbolp alist)))))
638
639(defmacro quail-define-rules (&rest rules)
640 "Define translation rules of the current Quail package.
641Each argument is a list of KEY and TRANSLATION.
642KEY is a string meaning a sequence of keystrokes to be translated.
643TRANSLATION is a character, a string, a vector, a Quail map, or a function.
644It it is a character, it is the sole translation of KEY.
645If it is a string, each character is a candidate for the translation.
646If it is a vector, each element (string or character) is a candidate
647 for the translation.
648In these cases, a key specific Quail map is generated and assigned to KEY.
649
650If TRANSLATION is a Quail map or a function symbol which returns a Quail map,
651 it is used to handle KEY."
652 `(quail-install-map
653 ',(let ((l rules)
654 (map (list nil)))
655 (while l
656 (quail-defrule-internal (car (car l)) (car (cdr (car l))) map)
657 (setq l (cdr l)))
658 map)))
659
660(defun quail-install-map (map)
661 "Install the Quail map MAP in the current Quail package.
662The installed map can be referred by the function `quail-map'."
663 (if (null quail-current-package)
664 (error "No current Quail package"))
665 (if (null (quail-map-p map))
666 (error "Invalid Quail map `%s'" map))
667 (setcar (cdr (cdr quail-current-package)) map))
668
669(defun quail-defrule (key translation &optional name)
670 "Add one translation rule, KEY to TRANSLATION, in the current Quail package.
671KEY is a string meaning a sequence of keystrokes to be translated.
672TRANSLATION is a character, a string, a vector, a Quail map, or a function.
673It it is a character, it is the sole translation of KEY.
674If it is a string, each character is a candidate for the translation.
675If it is a vector, each element (string or character) is a candidate
676 for the translation.
677In these cases, a key specific Quail map is generated and assigned to KEY.
678
679If TRANSLATION is a Quail map or a function symbol which returns a Quail map,
680 it is used to handle KEY.
681Optional argument NAME, if specified, says which Quail package
682to define this translation rule in. The default is to define it in the
683current Quail package."
684 (if name
685 (let ((package (quail-package name)))
686 (if (null package)
687 (error "No Quail package `%s'" name))
688 (setq quail-current-package package)))
689 (quail-defrule-internal key translation (quail-map)))
690
691;; Define KEY as TRANS in a Quail map MAP.
692(defun quail-defrule-internal (key trans map)
693 (if (null (stringp key))
694 "Invalid Quail key `%s'" key)
695 (if (not (or (numberp trans) (stringp trans) (vectorp trans)
696 (symbolp trans)
697 (quail-map-p trans)))
698 (error "Invalid Quail translation `%s'" trans))
699 (if (null (quail-map-p map))
700 (error "Invalid Quail map `%s'" map))
701 (let ((len (length key))
702 (idx 0)
703 ch entry)
704 (while (< idx len)
705 (if (null (consp map))
706 ;; We come here, for example, when we try to define a rule
707 ;; for "ABC" but a rule for "AB" is already defined as a
708 ;; symbol.
709 (error "Quail key %s is too long" key))
710 (setq ch (aref key idx)
711 entry (assq ch (cdr map)))
712 (if (null entry)
713 (progn
714 (setq entry (cons ch (list nil)))
715 (setcdr map (cons entry (cdr map)))))
716 (setq map (cdr entry))
717 (setq idx (1+ idx)))
718 (if (symbolp trans)
719 (if (cdr map)
720 ;; We come here, for example, when we try to define a rule
721 ;; for "AB" as a symbol but a rule for "ABC" is already
722 ;; defined.
723 (error "Quail key %s is too short" key)
724 (setcdr entry trans))
725 (if (quail-map-p trans)
726 (if (not (listp (cdr map)))
727 ;; We come here, for example, when we try to define a rule
728 ;; for "AB" as a symbol but a rule for "ABC" is already
729 ;; defined.
730 (error "Quail key %s is too short" key)
731 (if (not (listp (cdr trans)))
732 (if (cdr map)
733 ;; We come here, for example, when we try to
734 ;; define a rule for "AB" as a symbol but a rule
735 ;; for "ABC" is already defined.
736 (error "Quail key %s is too short" key)
737 (setcdr entry trans))
738 (setcdr entry (append trans (cdr map)))))
739 (setcar map trans)))))
740
741(defun quail-get-translation (map key len)
742 "Return the translation specified in Quail map MAP for KEY of length LEN.
743The translation is either a character or a cons of the form (INDEX . VECTOR),
744where VECTOR is a vector of candidates (character or string) for
745the translation, and INDEX points into VECTOR to specify the currently
746selected translation."
747 (let ((def (car map)))
748 (if (and def (symbolp def))
749 ;; DEF is a symbol of a function which returns valid translation.
750 (setq def (funcall def key len)))
751 (cond
752 ((or (integerp def) (consp def))
753 def)
754
755 ((null def)
756 ;; No translation.
757 nil)
758
759 ((stringp def)
760 ;; Each character in DEF is a candidate of translation. Reform
761 ;; it as (INDEX . VECTOR).
762 (setq def (string-to-vector def))
763 ;; But if the length is 1, we don't need vector but a single
764 ;; character as the translation.
765 (if (= (length def) 1)
766 (aref def 0)
767 (cons 0 def)))
768
769 ((vectorp def)
770 ;; Each element (string or character) in DEF is a candidate of
771 ;; translation. Reform it as (INDEX . VECTOR).
772 (cons 0 def))
773
774 (t
775 (error "Invalid object in Quail map: %s" def)))))
776
777(defun quail-lookup-key (key len)
778 "Lookup KEY of length LEN in the current Quail map and return the definition.
779The returned value is a Quail map specific to KEY."
780 (let ((idx 0)
781 (map (quail-map))
782 (kbd-translate (quail-kbd-translate))
783 slot ch translation)
784 (while (and map (< idx len))
785 (setq ch (if kbd-translate (quail-keyboard-translate (aref key idx))
786 (aref key idx)))
787 (setq idx (1+ idx))
788 (if (and (cdr map) (symbolp (cdr map)))
789 (setcdr map (funcall (cdr map) key idx)))
790 (setq slot (assq ch (cdr map)))
791 (if (and (cdr slot) (symbolp (cdr slot)))
792 (setcdr slot (funcall (cdr slot) key idx)))
793 (setq map (cdr slot)))
794 (if (and map (setq translation (quail-get-translation map key len)))
795 (progn
796 ;; We may have to reform car part of MAP.
797 (if (not (equal (car map) translation))
798 (setcar map translation))
799 (if (consp translation)
800 (progn
801 (setq quail-current-translations translation)
802 (if (quail-forget-last-selection)
803 (setcar quail-current-translations 0))))
804 ;; We may have to reform cdr part of MAP.
805 (if (and (cdr map) (symbolp (cdr map)))
806 (progn
807 (setcdr map (funcall (cdr map) key len))))
808 ))
809 map))
810
811(defun quail-conv-overlay-modification-hook (overlay after &rest ignore)
812 (if (and after
813 (= (overlay-start overlay) (overlay-end overlay)))
814 ;; Whole text in conversion area was deleted. Let's exit from
815 ;; the recursive edit.
816 (throw 'exit nil)))
817
818(defvar quail-suppress-conversion nil
819 "If non-nil, suppress converting facility of the current Quail package.")
820
821;; If set to non-nil, exit conversion mode before starting new translation.
822(defvar quail-exit-conversion-mode nil)
823
824(defun quail-start-translation ()
825 "Start translating the typed character in Quail mode."
826 (interactive "*")
827 (setq unread-command-events
828 (cons last-command-event unread-command-events))
829 ;; Check the possibility of translating the last key.
830 (if (assq last-command-event (cdr (quail-map)))
831 ;; Ok, we can start translation.
832 (let ((mode-line-format quail-mode-line-format))
833 (quail-setup-overlays)
834 (if (catch 'quail-tag
835 (if (and (not quail-suppress-conversion)
836 (quail-conversion-keymap))
837 ;; We must start translation in conversion mode.
838 (let ((overriding-local-map (quail-conversion-keymap)))
839 (setq quail-exit-conversion-mode nil)
840 (recursive-edit)
841 (if (and auto-fill-function
842 (> (current-column) (current-fill-column)))
843 (run-hooks 'auto-fill-function)))
844 (let ((overriding-local-map (quail-translation-keymap)))
845 (setq quail-current-key "")
846 (recursive-edit)))
847 (if (prog1 (< (overlay-start quail-conv-overlay)
848 (overlay-end quail-conv-overlay))
849 (delete-overlay quail-conv-overlay))
850 (run-hooks 'input-method-after-insert-chunk-hook))
851 nil)
852 ;; Someone has thrown a tag with value t, which means
853 ;; we should turn Quail mode off.
854 (quail-mode -1)))
855 ;; Since the typed character doesn't start any translation, handle
856 ;; it out of Quail mode. We come back to Quail mode later because
857 ;; function `quail-toggle-mode-temporarily' is in
858 ;; `post-command-hook'.
859 (add-hook 'post-command-hook 'quail-toggle-mode-temporarily nil t)))
860
861(defsubst quail-point-in-conversion-region ()
862 "Return non-nil value if the point is in conversion region of Quail mode."
863 (let (start pos)
864 (and (setq start (overlay-start quail-conv-overlay))
865 (>= (setq pos (point)) start)
866 (<= pos (overlay-end quail-conv-overlay)))))
867
868(defun quail-start-translation-in-conversion-mode ()
869 "Start translating the typed character in conversion mode of Quail mode."
870 (interactive "*")
871 (setq unread-command-events
872 (cons last-command-event unread-command-events))
873 (if (or quail-exit-conversion-mode
874 (not (quail-point-in-conversion-region)))
875 (progn
876 ;; We must start translation with new conversion region.
877 (setq quail-exit-conversion-mode nil)
878 (throw 'exit nil)))
879 ;; Check the possibility of translating the last key.
880 (if (assq last-command-event (cdr (quail-map)))
881 ;; Ok, we can start translation.
882 (let ((overriding-local-map (quail-translation-keymap)))
883 (setq quail-current-key "")
884 (move-overlay quail-overlay (point) (point))
885 (recursive-edit))
886 ;; Since the typed character doesn't start any translation, handle
887 ;; it out of Quail mode. We come back to Quail mode later because
888 ;; function `quail-toggle-mode-temporarily' is in
889 ;; `post-command-hook'.
890 (add-hook 'post-command-hook 'quail-toggle-mode-temporarily nil t)))
891
892(defun quail-terminate-translation ()
893 "Terminate the translation of the current key."
894 (let ((start (overlay-start quail-overlay)))
895 (if (and start
896 (< start (overlay-end quail-overlay)))
897 ;; Here we simulate self-insert-command.
898 (let (last-command-char)
899 (goto-char start)
900 ;; The first one might want to expand an abbrev.
901 (setq last-command-char (following-char))
902 (delete-char 1)
903 (self-insert-command 1)
904 (if (< (point) (overlay-end quail-overlay))
905 (if overwrite-mode
906 (while (< (point) (overlay-end quail-overlay))
907 (setq last-command-char (following-char))
908 (delete-char 1)
909 (self-insert-command 1))
910 ;; The last one might still want to auto-fill.
911 (goto-char (overlay-end quail-overlay))
912 (let ((last-command-char (preceding-char)))
913 (delete-char -1)
914 (self-insert-command 1)))))))
915 (delete-overlay quail-overlay)
916 (if (buffer-live-p quail-guidance-buf)
917 (save-excursion
918 (set-buffer quail-guidance-buf)
919 (erase-buffer)))
920 (throw 'exit nil))
921
922(defsubst quail-delete-region ()
923 "Delete the text in the current translation region of Quail."
924 (delete-region (overlay-start quail-overlay) (overlay-end quail-overlay)))
925
926(defun quail-select-current ()
927 "Select the current text shown in Quail translation region."
928 (interactive)
929 (quail-terminate-translation))
930
931;; Update the current translation status according to CONTROL-FLAG.
932;; If CONTROL-FLAG is integer value, it is the number of keys in the
933;; head quail-current-key which can be translated. The remaining keys
934;; are put back to unread-command-events to be handled again.
935;; If CONTROL-FLAG is t, terminate the translation for the whole keys
936;; in quail-current-key.
937;; If CONTROL-FLAG is nil, proceed the translation with more keys.
938
939(defun quail-update-translation (control-flag)
940 (quail-delete-region)
941 (let ((func (quail-update-translation-function)))
942 (if func
943 (funcall func control-flag)
944 (if (numberp control-flag)
945 (let ((len (length quail-current-key)))
946 (while (> len control-flag)
947 (setq len (1- len))
948 (setq unread-command-events
949 (cons (aref quail-current-key len)
950 unread-command-events)))
951 (insert (or quail-current-str
952 (substring quail-current-key 0 len))))
953 (insert (or quail-current-str quail-current-key)))))
954 (quail-update-guidance)
955 (if control-flag
956 (quail-terminate-translation)))
957
958(defun quail-self-insert-command ()
959 "Add the typed character to the key for translation."
960 (interactive "*")
961 (setq quail-current-key
962 (concat quail-current-key (char-to-string last-command-event)))
963 (quail-update-translation (quail-translate-key)))
964
965(defun quail-translate-key ()
966 "Translate the current key sequence according to the current Quail map.
967Return t if we can terminate the translation.
968Return nil if the current key sequence may be followed by more keys.
969Return number if we can't find any translation for the current key
970sequence. The number is the count of valid keys in the current
971sequence counting from the head."
972 (let* ((len (length quail-current-key))
973 (map (quail-lookup-key quail-current-key len))
974 def ch)
975 (if map
976 (let ((def (car map)))
977 (setq quail-current-str
978 (if (consp def) (aref (cdr def) (car def)) def))
979 ;; Return t only if we can terminate the current translation.
980 (and
981 ;; No alternative translations.
982 (or (null (consp def)) (= (length (cdr def)) 1))
983 ;; No translation for the longer key.
984 (null (cdr map))
985 ;; No shorter breaking point.
986 (or (null (quail-maximum-shortest))
987 (< len 3)
988 (null (quail-lookup-key quail-current-key (1- len)))
989 (null (quail-lookup-key
990 (substring quail-current-key -2 -1) 1)))))
991
992 ;; There's no translation for the current key sequence. Before
993 ;; giving up, we must check two possibilities.
994 (cond ((and
995 (quail-maximum-shortest)
996 (>= len 4)
997 (setq def (car (quail-lookup-key quail-current-key (- len 2))))
998 (quail-lookup-key (substring quail-current-key -2) 2))
999 ;; Now the sequence is "...ABCD", which can be split into
1000 ;; "...AB" and "CD..." to get valid translation.
1001 ;; At first, get translation of "...AB".
1002 (setq quail-current-str
1003 (if (consp def) (aref (cdr def) (car def)) def))
1004 ;; Then, return the length of "...AB".
1005 (- len 2))
1006
1007 ((and quail-current-translations
1008 (not (quail-deterministic))
1009 (setq ch (aref quail-current-key (1- len)))
1010 (>= ch ?0) (<= ch ?9))
1011 ;; A numeric key is entered to select a desirable translation.
1012 (setq quail-current-key (substring quail-current-key 0 -1))
1013 (quail-select-translation
1014 (+ (* (/ (car quail-current-translations) 10) 10)
1015 ;; We treat key 1,2..,9,0 as specifying 0,1,..8,9.
1016 (if (= ch ?0) 9 (- ch ?1))))
1017 ;; And, we can terminate the current translation.
1018 t)
1019
1020 (t
1021 ;; No way to handle the last character in this context.
1022 (1- len))))))
1023
1024(defun quail-next-translation ()
1025 "Select next translation in the current batch of candidates."
1026 (interactive)
1027 (if quail-current-translations
1028 (progn
1029 (quail-select-translation (1+ (car quail-current-translations)))
1030 (quail-update-translation nil))
1031 (beep)))
1032
1033(defun quail-prev-translation ()
1034 "Select previous translation in the current batch of candidates."
1035 (interactive)
1036 (if quail-current-translations
1037 (progn
1038 (quail-select-translation (1- (car quail-current-translations)))
1039 (quail-update-translation nil))
1040 (beep)))
1041
1042(defun quail-next-translation-block ()
1043 "Select the next batch of 10 translation candidates."
1044 (interactive)
1045 (if quail-current-translations
1046 (let ((limit (1- (length (cdr quail-current-translations))))
1047 (n (car quail-current-translations)))
1048 (if (< (/ n 10) (/ limit 10))
1049 (progn
1050 (quail-select-translation (min (+ n 10) limit))
1051 (quail-update-translation nil))
1052 ;; We are already at the last block.
1053 (beep)))
1054 (beep)))
1055
1056(defun quail-prev-translation-block ()
1057 "Select the previous batch of 10 translation candidates."
1058 (interactive)
1059 (if (and quail-current-translations
1060 (>= (car quail-current-translations) 10))
1061 (progn
1062 (quail-select-translation (- (car quail-current-translations) 10))
1063 (quail-update-translation nil))
1064 (beep)))
1065
1066(defun quail-select-translation (n)
1067 "Select Nth translation in the current batch of translation candidates."
1068 (if (or (< n 0) (>= n (length (cdr quail-current-translations))))
1069 (beep)
1070 (setcar quail-current-translations n)
1071 (setq quail-current-str (aref (cdr quail-current-translations) n))))
1072
1073(defun quail-abort-translation ()
1074 "Abort translation and delete the current Quail key sequence."
1075 (interactive)
1076 (quail-delete-region)
1077 (quail-terminate-translation))
1078
1079(defun quail-delete-last-char ()
1080 "Delete the last input character from the current Quail key sequence."
1081 (interactive)
1082 (if (= (length quail-current-key) 1)
1083 (quail-abort-translation)
1084 (setq quail-current-key (substring quail-current-key 0 -1))
1085 (quail-update-translation (quail-translate-key))))
1086
1087;; For conversion mode.
1088
1089(defun quail-conversion-backward-char ()
1090 (interactive)
1091 (if (<= (point) (overlay-start quail-conv-overlay))
1092 (error "Beginning of conversion region"))
1093 (forward-char -1))
1094
1095(defun quail-conversion-forward-char ()
1096 (interactive)
1097 (if (>= (point) (overlay-end quail-conv-overlay))
1098 (error "End of conversion region"))
1099 (forward-char 1))
1100
1101(defun quail-conversion-beginning-of-region ()
1102 (interactive)
1103 (goto-char (overlay-start quail-conv-overlay)))
1104
1105(defun quail-conversion-end-of-region ()
1106 (interactive)
1107 (goto-char (overlay-end quail-conv-overlay)))
1108
1109(defun quail-conversion-delete-char ()
1110 (interactive)
1111 (if (>= (point) (overlay-end quail-conv-overlay))
1112 (error "End of conversion region"))
1113 (delete-char 1)
1114 (if (= (overlay-start quail-conv-overlay)
1115 (overlay-end quail-conv-overlay))
1116 (throw 'quail-tag nil)))
1117
1118(defun quail-conversion-backward-delete-char ()
1119 (interactive)
1120 (if (<= (point) (overlay-start quail-conv-overlay))
1121 (error "Beginning of conversion region"))
1122 (delete-char -1)
1123 (if (= (overlay-start quail-conv-overlay)
1124 (overlay-end quail-conv-overlay))
1125 (throw 'quail-tag nil)))
1126
1127(defun quail-do-conversion (func &rest args)
1128 "Call FUNC to convert text in the current conversion region of Quail.
1129Remaining args are for FUNC."
1130 (delete-overlay quail-overlay)
1131 (apply func args))
1132
1133(defun quail-no-conversion ()
1134 "Do no conversion of the current conversion region of Quail."
1135 (interactive)
1136 (throw 'exit nil))
1137
1138;; Guidance, Completion, and Help buffer handlers.
1139
1140(defun quail-show-guidance-buf ()
1141 "Display a Quail guidance buffer in some window.
1142Create the buffer if it does not exist yet.
1143The window is normally shown in a minibuffer,
1144but if the selected window is a minibuffer, it is shown in
1145the bottommost ordinary window."
1146
1147 (if (or (null input-method-tersely-flag)
1148 (not (eq (selected-window) (minibuffer-window))))
1149 (progn
1150 ;; At first, setup a guidance buffer.
1151 (or (buffer-live-p quail-guidance-buf)
1152 (setq quail-guidance-buf
1153 (get-buffer-create " *Quail-guidance*")))
1154 (save-excursion
1155 (let ((title (quail-title)))
1156 (set-buffer quail-guidance-buf)
1157 ;; Show the title of Quail package in the left of mode-line.
1158 (setq current-input-method nil)
1159 (setq current-input-method-title title)
1160 (setq mode-line-format (cons '("[" current-input-method-title "]")
1161 default-mode-line-format))
1162 (erase-buffer)
1163 (or (overlayp quail-overlay)
1164 (progn
1165 (setq quail-overlay (make-overlay 1 1))
1166 (overlay-put quail-overlay 'face 'highlight)))
1167 (delete-overlay quail-overlay)
1168 (set-buffer-modified-p nil)))
1169 (bury-buffer quail-guidance-buf)
1170
1171 ;; Then, display it in an appropriate window.
1172 (if (not (get-buffer-window quail-guidance-buf))
1173 ;; Guidance buffer is not yet shown in any window.
1174 (let ((win (minibuffer-window)))
1175 (if (eq (selected-window) win)
1176 ;; Since we are in minibuffer, we can't use it for guidance.
1177 ;; Let's find the bottom window.
1178 (let (height)
1179 (setq win (window-at 0 (- (frame-height) 2)))
1180 (setq height (window-height win))
1181 ;; If WIN is too tall, split it vertically and use
1182 ;; the lower one.
1183 (if (>= height 4)
1184 (let ((window-min-height 2))
1185 ;; Here, `split-window' returns a lower window
1186 ;; which is what we wanted.
1187 (setq win (split-window win (- height 2)))))
1188 (set-window-buffer win quail-guidance-buf)
1189 (set-window-dedicated-p win t))
1190 (set-window-buffer win quail-guidance-buf))))))
1191
1192 ;; And, create a buffer for completion.
1193 (or (buffer-live-p quail-completion-buf)
1194 (progn
1195 (setq quail-completion-buf (get-buffer-create "*Quail Completions*"))
1196 (save-excursion
1197 (set-buffer quail-completion-buf)
1198 (setq quail-overlay (make-overlay 1 1))
1199 (overlay-put quail-overlay 'face 'highlight))))
1200 (bury-buffer quail-completion-buf))
1201
1202(defun quail-hide-guidance-buf ()
1203 "Hide the Quail guidance buffer."
1204 (let* ((win (minibuffer-window))
1205 (buf (window-buffer win)))
1206 (if (eq buf quail-guidance-buf)
1207 ;; Quail guidance buffer is at echo area. Vacate it to the
1208 ;; deepest minibuffer.
1209 (set-window-buffer win (format " *Minibuf-%d*" (minibuffer-depth)))
1210 ;; Delete the window for guidance buffer.
1211 (if (or (null input-method-tersely-flag)
1212 (not (eq (selected-window) (minibuffer-window))))
1213 (progn
1214 (setq win (get-buffer-window quail-guidance-buf))
1215 (set-window-dedicated-p win nil)
1216 (delete-window win))))))
1217
1218(defun quail-update-guidance ()
1219 "Update the Quail guidance buffer and completion buffer (if displayed now)."
1220 ;; Update guidance buffer.
1221 (if (or (null input-method-tersely-flag)
1222 (not (eq (selected-window) (minibuffer-window))))
1223 (let ((guidance (quail-guidance)))
1224 (cond ((eq guidance t)
1225 ;; Show the current possible translations.
1226 (quail-show-translations))
1227 ((null guidance)
1228 ;; Show the current input keys.
1229 (let ((key quail-current-key))
1230 (save-excursion
1231 (set-buffer quail-guidance-buf)
1232 (erase-buffer)
1233 (insert key))))
1234 ((listp guidance)
1235 ;; Show alternative characters specified in this alist.
1236 (let* ((key quail-current-key)
1237 (len (length key))
1238 (i 0)
1239 ch alternative)
1240 (save-excursion
1241 (set-buffer quail-guidance-buf)
1242 (erase-buffer)
1243 (while (< i len)
1244 (setq ch (aref key i))
1245 (setq alternative (cdr (assoc ch guidance)))
1246 (insert (or alternative ch))
1247 (setq i (1+ i)))))))))
1248
1249 ;; Update completion buffer if displayed now. We highlight the
1250 ;; selected candidate string in *Completion* buffer if any.
1251 (let ((win (get-buffer-window quail-completion-buf))
1252 key str pos)
1253 (if win
1254 (save-excursion
1255 (setq str (if (stringp quail-current-str)
1256 quail-current-str
1257 (if (numberp quail-current-str)
1258 (char-to-string quail-current-str)))
1259 key quail-current-key)
1260 (set-buffer quail-completion-buf)
1261 (goto-char (point-min))
1262 (if (null (search-forward (concat " " key ":") nil t))
1263 (delete-overlay quail-overlay)
1264 (setq pos (point))
1265 (if (and str (search-forward (concat "." str) nil t))
1266 (move-overlay quail-overlay (1+ (match-beginning 0)) (point))
1267 (move-overlay quail-overlay (match-beginning 0) (point)))
1268 ;; Now POS points end of KEY and (point) points end of STR.
1269 (if (pos-visible-in-window-p (point) win)
1270 ;; STR is already visible.
1271 nil
1272 ;; We want to make both KEY and STR visible, but if the
1273 ;; window is too short, make at least STR visible.
1274 (setq pos (progn (point) (goto-char pos)))
1275 (beginning-of-line)
1276 (set-window-start win (point))
1277 (if (not (pos-visible-in-window-p pos win))
1278 (set-window-start win pos))
1279 ))))))
1280
1281(defun quail-show-translations ()
1282 "Show the current possible translations."
1283 (let ((key quail-current-key)
1284 (map (quail-lookup-key quail-current-key (length quail-current-key))))
1285 (save-excursion
1286 (set-buffer quail-guidance-buf)
1287 (erase-buffer)
1288
1289 ;; Show the current key.
1290 (insert key)
1291
1292 ;; Show possible following keys.
1293 (if (cdr map)
1294 (let ((l (cdr map)))
1295 (insert "[")
1296 (while l
1297 (insert (car (car l)))
1298 (setq l (cdr l)))
1299 (insert "]")))
1300
1301 ;; Show list of translations.
1302 (if (consp (car map))
1303 (let* ((idx (car (car map)))
1304 (translations (cdr (car map)))
1305 (from (* (/ idx 10) 10))
1306 (to (min (+ from 10) (length translations))))
1307 (indent-to 10)
1308 (insert (format "(%d/%d)"
1309 (1+ (/ from 10))
1310 (1+ (/ (length translations) 10))))
1311 (while (< from to)
1312 ;; We show the last digit of FROM, but by changing
1313 ;; 0,1,..,9 to 1,2,..,0.
1314 (insert (format " %d."
1315 (if (= (% from 10) 9) 0 (1+ (% from 10)))))
1316 (let ((pos (point)))
1317 (insert (aref translations from))
1318 (if (= idx from)
1319 (move-overlay quail-overlay pos (point))))
1320 (setq from (1+ from)))))
1321 )))
1322
1323(defun quail-completion ()
1324 "List all completions for the current key.
1325All possible translations of the current key and whole possible longer keys
1326 are shown."
1327 (interactive)
1328 (let ((key quail-current-key)
1329 (map (quail-lookup-key quail-current-key (length quail-current-key))))
1330 (save-excursion
1331 (set-buffer quail-completion-buf)
1332 (erase-buffer)
1333 (insert "Possible completion and corresponding translations are:\n")
1334 (quail-completion-1 key map 1)
1335 (goto-char (point-min))
1336 (display-buffer (current-buffer)))
1337 (quail-update-guidance)))
1338
1339;; List all completions of KEY in MAP with indentation INDENT.
1340(defun quail-completion-1 (key map indent)
1341 (let ((len (length key)))
1342 (indent-to indent)
1343 (insert key ":")
1344 (if (and (symbolp map) (fboundp map))
1345 (setq map (funcall map key len)))
1346 (if (car map)
1347 (quail-completion-list-translations map key (+ indent len 1))
1348 (insert " -\n"))
1349 (setq indent (+ indent 2))
1350 (if (cdr map)
1351 (let ((l (cdr map))
1352 (newkey (make-string (1+ len) 0))
1353 (i 0))
1354 ;; Set KEY in the first LEN characters of NEWKEY.
1355 (while (< i len)
1356 (aset newkey i (aref key i))
1357 (setq i (1+ i)))
1358 (while l ; L = ((CHAR . DEFN) ....) ;
1359 (aset newkey len (car (car l)))
1360 (quail-completion-1 newkey (cdr (car l)) indent)
1361 (setq l (cdr l)))))))
1362
1363;; List all possible translations of KEY in Quail map MAP with
1364;; indentation INDENT."
1365(defun quail-completion-list-translations (map key indent)
1366 (let ((translations
1367 (quail-get-translation map key (length key))))
1368 (if (integerp translations)
1369 (insert "(1/1) 1." translations "\n")
1370 ;; We need only vector part.
1371 (setq translations (cdr translations))
1372 ;; Insert every 10 elements with indices in a line.
1373 (let ((len (length translations))
1374 (i 0)
1375 (first t)
1376 num)
1377 (while (< i len)
1378 (if first
1379 (progn
1380 (insert "(1/1)")
1381 (setq first nil))
1382 (if (= (% i 10) 0)
1383 (progn
1384 (newline)
1385 (indent-to indent)
1386 (insert (format "(%d/%d)" (1+ (/ i 10)) (1+ (/ len 10)))))))
1387 ;; We show the last digit of FROM while converting
1388 ;; 0,1,..,9 to 1,2,..,0.
1389 (insert (format " %d." (if (= (% i 10) 9) 0 (1+ (% i 10)))))
1390 (insert (aref translations i))
1391 (setq i (1+ i)))
1392 (newline)))))
1393
1394(defun quail-help ()
1395 "Show brief description of the current Quail package."
1396 (interactive)
1397 (let ((package quail-current-package)
1398 (buf (get-buffer-create "*Quail-help*")))
1399 (save-excursion
1400 (set-buffer buf)
1401 (erase-buffer)
1402 (setq quail-current-package package)
1403 (insert "Quail input method (name:"
1404 (quail-name)
1405 ", mode line indicator:["
1406 (quail-title)
1407 "])\n---- Documentation ----\n"
1408 (quail-docstring))
1409 (newline)
1410 (if (quail-show-layout) (quail-show-kbd-layout))
1411 (insert )
1412 (quail-help-insert-keymap-description
1413 quail-mode-map
1414 "---- Key bindings (before starting translation) ----
1415key binding
1416--- -------\n")
1417 (quail-help-insert-keymap-description
1418 (quail-translation-keymap)
1419 "--- Key bindings (while translating) ---
1420key binding
1421--- -------\n")
1422 (if (quail-conversion-keymap)
1423 (quail-help-insert-keymap-description
1424 (quail-conversion-keymap)
1425 "--- Key bindings (while converting) ---
1426key binding
1427--- -------\n"))
1428 (goto-char (point-min))
1429 (set-buffer-modified-p nil)
1430 (help-mode))
1431 (display-buffer buf)))
1432
1433(defun quail-help-insert-keymap-description (keymap &optional header)
1434 (let (from to)
1435 (if header
1436 (insert header))
1437 (save-excursion
1438 (save-window-excursion
1439 (let ((overriding-local-map keymap))
1440 (describe-bindings))
1441 (set-buffer "*Help*")
1442 (goto-char (point-min))
1443 (forward-line 4)
1444 (setq from (point))
1445 (search-forward "Global Bindings:" nil 'move)
1446 (beginning-of-line)
1447 (setq to (point))))
1448 (insert-buffer-substring "*Help*" from to)))
1449
1450(defun quail-show-kbd-layout ()
1451 "Show keyboard layout with key tops of multilingual characters."
1452 (insert "--- Keyboard layout ---\n")
1453 (let* ((i 0) ch)
1454 (while (< i quail-keyboard-layout-len)
1455 (if (= (% i 30) 0)
1456 (progn
1457 (newline)
1458 (indent-to (/ i 30)))
1459 (if (= (% i 2) 0)
1460 (insert " ")))
1461 (setq ch (aref quail-keyboard-layout i))
1462 (if (= ch ?\ )
1463 (insert ch)
1464 (let ((map (cdr (assq ch (cdr (quail-map))))))
1465 (if map
1466 (let ((translation
1467 (quail-get-translation map (char-to-string ch) 1)))
1468 (if (integerp translation)
1469 (insert translation)
1470 (insert (aref (cdr translation) (car translation)))))
1471 (insert ch))))
1472 (setq i (1+ i))))
1473 (newline))
1474
1475(defun quail-translation-help ()
1476 "Show help message while translating in Quail mode."
1477 (interactive)
1478 (let ((package quail-current-package)
1479 (current-key quail-current-key)
1480 (buf (get-buffer-create "*Quail-Help*")))
1481 (save-excursion
1482 (set-buffer buf)
1483 (erase-buffer)
1484 (setq quail-current-package package)
1485 (insert
1486 (format "You are translating the key sequence \"%s\" in Quail mode.\n"
1487 quail-current-key))
1488 (quail-help-insert-keymap-description
1489 (quail-translation-keymap)
1490 "-----------------------
1491key binding
1492--- -------\n")
1493 (goto-char (point-min))
1494 (set-buffer-modified-p nil))
1495 (display-buffer buf)))
1496
1497(defun quail-conversion-help ()
1498 "Show help message while converting in Quail mode."
1499 (interactive)
1500 (let ((package quail-current-package)
1501 (str (buffer-substring (overlay-start quail-conv-overlay)
1502 (overlay-end quail-conv-overlay)))
1503 (buf (get-buffer-create "*Quail-Help*")))
1504 (save-excursion
1505 (set-buffer buf)
1506 (erase-buffer)
1507 (setq quail-current-package package)
1508 (insert
1509 (format "You are converting the string \"%s\" in Quail mode.\n" str))
1510 (quail-help-insert-keymap-description
1511 (quail-conversion-keymap)
1512 "-----------------------
1513key binding
1514--- -------\n")
1515 (goto-char (point-min))
1516 (set-buffer-modified-p nil))
1517 (display-buffer buf)))
1518
1519;;
1520(provide 'quail)
1521
1522;;; quail.el ends here
diff --git a/lisp/international/skkdic-cnv.el b/lisp/international/skkdic-cnv.el
new file mode 100644
index 00000000000..4f227204bdc
--- /dev/null
+++ b/lisp/international/skkdic-cnv.el
@@ -0,0 +1,561 @@
1;; skkdic-cnv.el -- convert a SKK dictionary for `skkdic-utl'
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, Japanese, SKK
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; SKK is a Japanese input method running on Mule created by Masahiko
27;; Sato <masahiko@sato.riec.tohoku.ac.jp>. Here we provide utilities
28;; to handle a dictionary distributed with SKK so that a different
29;; input method (e.g. quail-japanese) can utilize the dictionary.
30
31;; The format of SKK dictionary is quite simple. Each line has the
32;; form "KANASTRING /CONV1/CONV2/.../" which means KANASTRING ($B2>L>J8(B
33;; $B;zNs(B) can be converted to one of CONVi. CONVi is a Kanji ($B4A;z(B)
34;; and Kana ($B2>L>(B) mixed string.
35;;
36;; KANASTRING may have a trailing ASCII letter for Okurigana ($BAw$j2>L>(B)
37;; information. For instance, the trailing letter `k' means that one
38;; of the following Okurigana is allowed: $B$+$-$/$1$3(B. So, in that
39;; case, the string "KANASTRING$B$/(B" can be converted to one of "CONV1$B$/(B",
40;; CONV2$B$/(B, ...
41
42;;; Code:
43
44;; Name of a file to generate from SKK dictionary.
45(defvar skkdic-filename "skkdic.el")
46
47;; To make a generated skkdic.el smaller.
48(make-coding-system
49 'coding-system-iso-2022-7-short
50 2 ?J
51 "Like `coding-system-iso-2022' but no ASCII designation before SPC."
52 '(ascii nil nil nil t t nil t))
53
54(defconst skkdic-jbytes
55 (charset-bytes 'japanese-jisx0208))
56
57(defun skkdic-convert-okuri-ari (skkbuf buf)
58 (message "Processing OKURI-ARI entries ...")
59 (goto-char (point-min))
60 (save-excursion
61 (set-buffer buf)
62 (insert ";; Setting okuri-ari entries.\n"
63 "(skkdic-set-okuri-ari\n"))
64 (while (not (eobp))
65 (let ((from (point))
66 to)
67 (end-of-line)
68 (setq to (point))
69
70 (save-excursion
71 (set-buffer buf)
72 (insert-buffer-substring skkbuf from to)
73 (beginning-of-line)
74 (insert "\"")
75 (search-forward " ")
76 (delete-char 1) ; delete the first '/'
77 (let ((p (point)))
78 (end-of-line)
79 (delete-char -1) ; delete the last '/'
80 (subst-char-in-region p (point) ?/ ? 'noundo))
81 (insert "\"\n"))
82
83 (forward-line 1)))
84 (save-excursion
85 (set-buffer buf)
86 (insert ")\n\n")))
87
88(defconst skkdic-postfix-list '(skkdic-postfix-list))
89
90(defconst skkdic-postfix-data
91 '(("$B$$$-(B" "$B9T(B")
92 ("$B$,$+$j(B" "$B78(B")
93 ("$B$,$/(B" "$B3X(B")
94 ("$B$,$o(B" "$B@n(B")
95 ("$B$7$c(B" "$B<R(B")
96 ("$B$7$e$&(B" "$B=8(B")
97 ("$B$7$g$&(B" "$B>^(B" "$B>k(B")
98 ("$B$8$g$&(B" "$B>k(B")
99 ("$B$;$s(B" "$B@~(B")
100 ("$B$@$1(B" "$B3Y(B")
101 ("$B$A$c$/(B" "$BCe(B")
102 ("$B$F$s(B" "$BE9(B")
103 ("$B$H$&$2(B" "$BF=(B")
104 ("$B$I$*$j(B" "$BDL$j(B")
105 ("$B$d$^(B" "$B;3(B")
106 ("$B$P$7(B" "$B66(B")
107 ("$B$O$D(B" "$BH/(B")
108 ("$B$b$/(B" "$BL\(B")
109 ("$B$f$-(B" "$B9T(B")))
110
111(defun skkdic-convert-postfix (skkbuf buf)
112 (message "Processing POSTFIX entries ...")
113 (goto-char (point-min))
114 (save-excursion
115 (set-buffer buf)
116 (insert ";; Setting postfix entries.\n"
117 "(skkdic-set-postfix\n"))
118
119 ;; Initialize SKKDIC-POSTFIX-LIST by predefined data
120 ;; SKKDIC-POSTFIX-DATA.
121 (save-excursion
122 (set-buffer buf)
123 (let ((l skkdic-postfix-data)
124 kana candidates entry)
125 (while l
126 (setq kana (car (car l)) candidates (cdr (car l)))
127 (insert "\"" kana)
128 (while candidates
129 (insert " " (car candidates))
130 (setq entry (lookup-nested-alist (car candidates)
131 skkdic-postfix-list nil nil t))
132 (if (consp (car entry))
133 (setcar entry (cons kana (car entry)))
134 (set-nested-alist (car candidates) (list kana)
135 skkdic-postfix-list))
136 (setq candidates (cdr candidates)))
137 (insert "\"\n")
138 (setq l (cdr l)))))
139
140 ;; Search postfix entries.
141 (while (re-search-forward "^[#<>?]\\(\\cH+\\) " nil t)
142 (let ((kana (match-string 1))
143 str candidates)
144 (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/")
145 (setq str (match-string 1))
146 (if (not (member str candidates))
147 (setq candidates (cons str candidates)))
148 (goto-char (match-end 1)))
149 (save-excursion
150 (set-buffer buf)
151 (insert "\"" kana)
152 (while candidates
153 (insert " " (car candidates))
154 (let ((entry (lookup-nested-alist (car candidates)
155 skkdic-postfix-list nil nil t)))
156 (if (consp (car entry))
157 (if (not (member kana (car entry)))
158 (setcar entry (cons kana (car entry))))
159 (set-nested-alist (car candidates) (list kana)
160 skkdic-postfix-list)))
161 (setq candidates (cdr candidates)))
162 (insert "\"\n"))))
163 (save-excursion
164 (set-buffer buf)
165 (insert ")\n\n")))
166
167(defconst skkdic-prefix-list '(skkdic-prefix-list))
168
169(defun skkdic-convert-prefix (skkbuf buf)
170 (message "Processing PREFIX entries ...")
171 (goto-char (point-min))
172 (save-excursion
173 (set-buffer buf)
174 (insert ";; Setting prefix entries.\n"
175 "(skkdic-set-prefix\n"))
176 (save-excursion
177 (while (re-search-forward "^\\(\\cH+\\)[<>?] " nil t)
178 (let ((kana (match-string 1))
179 str candidates)
180 (while (looking-at "/\\([^/\n]+\\)/")
181 (setq str (match-string 1))
182 (if (not (member str candidates))
183 (setq candidates (cons str candidates)))
184 (goto-char (match-end 1)))
185 (save-excursion
186 (set-buffer buf)
187 (insert "\"" kana)
188 (while candidates
189 (insert " " (car candidates))
190 (set-nested-alist (car candidates) kana skkdic-prefix-list)
191 (setq candidates (cdr candidates)))
192 (insert "\"\n")))))
193 (save-excursion
194 (set-buffer buf)
195 (insert ")\n\n")))
196
197;; FROM and TO point the head and tail of "/J../J../.../".
198(defun skkdic-get-candidate-list (from to)
199 (let (candidates)
200 (goto-char from)
201 (while (re-search-forward "/\\cj+" to t)
202 (setq candidates (cons (buffer-substring (1+ (match-beginning 0))
203 (match-end 0))
204 candidates)))
205 candidates))
206
207;; Return entry for STR from nested alist ALIST.
208(defsubst skkdic-get-entry (str alist)
209 (car (lookup-nested-alist str alist nil nil t)))
210
211
212(defconst skkdic-word-list '(skkdic-word-list))
213
214;; Return t if substring of STR (between FROM and TO) can be broken up
215;; to chunks all of which can be derived from another entry in SKK
216;; dictionary. SKKBUF is the buffer where the original SKK dictionary
217;; is visited, KANA is the current entry for STR. FIRST is t iff this
218;; is called at top level.
219
220(defun skkdic-breakup-string (skkbuf kana str from to &optional first)
221 (let ((len (- to from)))
222 (or (and (>= len (* skkdic-jbytes 2))
223 (let ((min-idx (+ from (* skkdic-jbytes 2)))
224 (idx (if first (- to skkdic-jbytes) to))
225 (found nil))
226 (while (and (not found) (>= idx min-idx))
227 (let ((kana2-list (skkdic-get-entry
228 (substring str from idx)
229 skkdic-word-list)))
230 (if (or (and (consp kana2-list)
231 (let ((kana-len (length kana))
232 kana2)
233 (catch 'skkdic-tag
234 (while kana2-list
235 (setq kana2 (car kana2-list))
236 (if (string-match kana2 kana)
237 (throw 'skkdic-tag t))
238 (setq kana2-list (cdr kana2-list)))))
239 (or (= idx to)
240 (skkdic-breakup-string skkbuf kana str
241 idx to)))
242 (and (stringp kana2-list)
243 (string-match kana2-list kana)))
244 (setq found t)
245 (setq idx (- idx skkdic-jbytes)))))
246 found))
247 (and first
248 (> len (* skkdic-jbytes 2))
249 (let ((kana2 (skkdic-get-entry
250 (substring str from (+ from skkdic-jbytes))
251 skkdic-prefix-list)))
252 (and (stringp kana2)
253 (eq (string-match kana2 kana) 0)))
254 (skkdic-breakup-string skkbuf kana str (+ from skkdic-jbytes) to))
255 (and (not first)
256 (>= len skkdic-jbytes)
257 (let ((kana2-list (skkdic-get-entry
258 (substring str from to)
259 skkdic-postfix-list)))
260 (and (consp kana2-list)
261 (let (kana2)
262 (catch 'skkdic-tag
263 (while kana2-list
264 (setq kana2 (car kana2-list))
265 (if (string= kana2
266 (substring kana (- (length kana2))))
267 (throw 'skkdic-tag t))
268 (setq kana2-list (cdr kana2-list)))))))))))
269
270;; Return list of candidates which excludes some from CANDIDATES.
271;; Excluded candidates can be derived from another entry.
272
273(defun skkdic-reduced-candidates (skkbuf kana candidates)
274 (let (elt l)
275 (while candidates
276 (setq elt (car candidates))
277 (if (or (= (length elt) skkdic-jbytes)
278 (and (string-match "^\\cj" elt)
279 (not (skkdic-breakup-string skkbuf kana elt 0 (length elt)
280 'first))))
281 (setq l (cons elt l)))
282 (setq candidates (cdr candidates)))
283 (nreverse l)))
284
285(defconst skkdic-okuri-nasi-entries (list nil))
286(defconst skkdic-okuri-nasi-entries-count 0)
287
288(defun skkdic-collect-okuri-nasi ()
289 (message "Collecting OKURI-NASI entries ...")
290 (save-excursion
291 (let ((prev-ratio 0)
292 ratio)
293 (while (re-search-forward "^\\(\\cH+\\) \\(/\\cj.*\\)/$" nil t)
294 (let ((kana (match-string 1))
295 (candidates (skkdic-get-candidate-list (match-beginning 2)
296 (match-end 2))))
297 (setq skkdic-okuri-nasi-entries
298 (cons (cons kana candidates) skkdic-okuri-nasi-entries)
299 skkdic-okuri-nasi-entries-count
300 (1+ skkdic-okuri-nasi-entries-count))
301 (setq ratio (floor (/ (* (point) 100.0) (point-max))))
302 (if (/= ratio prev-ratio)
303 (progn
304 (message "collected %2d%% %s ..." ratio kana)
305 (setq prev-ratio ratio)))
306 (while candidates
307 (let ((entry (lookup-nested-alist (car candidates)
308 skkdic-word-list nil nil t)))
309 (if (consp (car entry))
310 (setcar entry (cons kana (car entry)))
311 (set-nested-alist (car candidates) (list kana)
312 skkdic-word-list)))
313 (setq candidates (cdr candidates))))))))
314
315(defun skkdic-convert-okuri-nasi (skkbuf buf)
316 (message "Processing OKURI-NASI entries ...")
317 (save-excursion
318 (set-buffer buf)
319 (insert ";; Setting okuri-nasi entries.\n"
320 "(skkdic-set-okuri-nasi\n")
321 (let ((l (nreverse skkdic-okuri-nasi-entries))
322 (count 0)
323 (prev-ratio 0)
324 ratio)
325 (while l
326 (let ((kana (car (car l)))
327 (candidates (cdr (car l))))
328 (setq ratio (/ (* count 1000) skkdic-okuri-nasi-entries-count)
329 count (1+ count))
330 (if (/= prev-ratio (/ ratio 10))
331 (progn
332 (message "processed %2d%% %s ..." (/ ratio 10) kana)
333 (setq prev-ratio (/ ratio 10))))
334 (if (setq candidates
335 (skkdic-reduced-candidates skkbuf kana candidates))
336 (progn
337 (insert "\"" kana)
338 (while candidates
339 (insert " " (car candidates))
340 (setq candidates (cdr candidates)))
341 (insert "\"\n"))))
342 (setq l (cdr l))))
343 (insert ")\n\n")))
344
345(defun skkdic-convert (filename &optional dirname)
346 "Convert SKK dictionary of FILENAME into the file \"skkdic.el\".
347Optional argument DIRNAME if specified is the directory name under which
348the generated \"skkdic.el\" is saved."
349 (interactive "FSKK dictionary file: ")
350 (message "Reading file \"%s\" ..." filename)
351 (let ((skkbuf(find-file-noselect (expand-file-name filename)))
352 (buf (get-buffer-create "*skkdic-work*")))
353 (save-excursion
354 ;; Setup and generate the header part of working buffer.
355 (set-buffer buf)
356 (erase-buffer)
357 (buffer-disable-undo)
358 (insert ";; skkdic.el -- dictionary for Japanese input method\n"
359 ";;\tGenerated by the command `skkdic-convert'\n"
360 ";;\tDate: " (current-time-string) "\n"
361 ";;\tOriginal SKK dictionary file: "
362 (file-name-nondirectory filename)
363 "\n\n"
364 ";;; Comment:\n\n"
365 ";; Do byte-compile this file again after any modification.\n\n"
366 ";;; Start of the header of the original TIT dictionary.\n\n")
367 (set-buffer skkbuf)
368 (widen)
369 (goto-char 1)
370 (let (pos)
371 (search-forward ";; okuri-ari")
372 (forward-line 1)
373 (setq pos (point))
374 (set-buffer buf)
375 (insert-buffer-substring skkbuf 1 pos))
376 (insert "\n"
377 ";;; Code:\n\n")
378
379 ;; Generate the body part of working buffer.
380 (set-buffer skkbuf)
381 (let ((from (point))
382 to)
383 ;; Convert okuri-ari entries.
384 (search-forward ";; okuri-nasi")
385 (beginning-of-line)
386 (setq to (point))
387 (narrow-to-region from to)
388 (skkdic-convert-okuri-ari skkbuf buf)
389 (widen)
390
391 ;; Convert okuri-nasi postfix entries.
392 (goto-char to)
393 (forward-line 1)
394 (setq from (point))
395 (re-search-forward "^\\cH")
396 (setq to (match-beginning 0))
397 (narrow-to-region from to)
398 (skkdic-convert-postfix skkbuf buf)
399 (widen)
400
401 ;; Convert okuri-nasi prefix entries.
402 (goto-char to)
403 (skkdic-convert-prefix skkbuf buf)
404
405 ;;
406 (skkdic-collect-okuri-nasi)
407
408 ;; Convert okuri-nasi general entries.
409 (skkdic-convert-okuri-nasi skkbuf buf)
410
411 ;; Postfix
412 (save-excursion
413 (set-buffer buf)
414 (goto-char (point-max))
415 (insert ";;\n(provide 'skkdic)\n\n;; skkdic.el ends here\n")))
416
417 ;; Save the working buffer.
418 (set-buffer buf)
419 (set-visited-file-name (expand-file-name skkdic-filename dirname) t)
420 (set-buffer-file-coding-system 'coding-system-iso-2022-7-short)
421 (save-buffer 0))
422 (kill-buffer skkbuf)
423 (switch-to-buffer buf)))
424
425(defun batch-skkdic-convert ()
426 "Run `skkdic-convert' on the files remaining on the command line.
427Use this from the command line, with `-batch';
428it won't work in an interactive Emacs.
429For example, invoke:
430 % emacs -batch -l skkconv -f batch-skkdic-convert SKK-JISYO.L
431to generate \"skkdic.el\" from SKK dictionary file \"SKK-JISYO.L\".
432To get complete usage, invoke:
433 % emacs -batch -l skkconv -f batch-skkdic-convert -h"
434 (defvar command-line-args-left) ; Avoid compiler warning.
435 (if (not noninteractive)
436 (error "`batch-skkdic-convert' should be used only with -batch"))
437 (if (string= (car command-line-args-left) "-h")
438 (progn
439 (message "To convert SKK-JISYO.L into skkdic.el:")
440 (message " %% emacs -batch -l skkdic-conv -f batch-skkdic-convert SKK-JISYO.L")
441 (message "To convert SKK-JISYO.L into DIR/skkdic.el:")
442 (message " %% emacs -batch -l skkdic-conv -f batch-skkdic-convert -dir DIR SKK-JISYO.L"))
443 (let (targetdir filename)
444 (if (string= (car command-line-args-left) "-dir")
445 (progn
446 (setq command-line-args-left (cdr command-line-args-left))
447 (setq targetdir (expand-file-name (car command-line-args-left)))
448 (setq command-line-args-left (cdr command-line-args-left))))
449 (setq filename (expand-file-name (car command-line-args-left)))
450 (message "Converting %s to skkdic.el ..." filename)
451 (message "It takes around 10 minutes even on Sun SS20.")
452 (skkdic-convert filename targetdir)
453 (message "Do byte-compile the created file by:")
454 (message " %% emacs -batch -l skkdic-cnv -f batch-byte-compile skkdic.el")
455 (message " ^^^^^^^^^^^^^ -- Don't forget this option!")
456 ))
457 (kill-emacs 0))
458
459
460;; The following macros are expanded at byte-compiling time so that
461;; compiled code can be loaded quickly.
462
463(defun skkdic-get-kana-compact-codes (kana)
464 (let* ((len (length kana))
465 (vec (make-vector (/ (+ len (1- skkdic-jbytes)) skkdic-jbytes) 0))
466 (i 0)
467 ch)
468 (while (< i len)
469 (setq ch (sref kana i))
470 (aset vec (/ i 3)
471 (if (< ch 128) ; CH is an ASCII letter for OKURIGANA,
472 (- ch) ; represented by a negative code.
473 (if (= ch ?$B!<(B) ; `$B!<(B' is represented by 0.
474 0
475 (- (nth 2 (split-char ch)) 32))))
476 (setq i (+ i 3)))
477 vec))
478
479(defun skkdic-extract-conversion-data (entry)
480 (string-match "^\\cH+[a-z]* " entry)
481 (let ((kana (substring entry (match-beginning 0) (1- (match-end 0))))
482 (i (match-end 0))
483 candidates)
484 (while (string-match "[^ ]+" entry i)
485 (setq candidates (cons (match-string 0 entry) candidates))
486 (setq i (match-end 0)))
487 (cons (skkdic-get-kana-compact-codes kana) candidates)))
488
489(defmacro skkdic-set-okuri-ari (&rest entries)
490 `(defconst skkdic-okuri-ari
491 ',(let ((l entries)
492 (map '(skkdic-okuri-ari))
493 entry)
494 (while l
495 (setq entry (skkdic-extract-conversion-data (car l)))
496 (set-nested-alist (car entry) (cdr entry) map)
497 (setq l (cdr l)))
498 map)))
499
500(defmacro skkdic-set-postfix (&rest entries)
501 `(defconst skkdic-postfix
502 ',(let ((l entries)
503 (map '(nil))
504 (longest 1)
505 len entry)
506 (while l
507 (setq entry (skkdic-extract-conversion-data (car l)))
508 (setq len (length (car entry)))
509 (if (> len longest)
510 (setq longest len))
511 (let ((entry2 (lookup-nested-alist (car entry) map nil nil t)))
512 (if (consp (car entry2))
513 (let ((conversions (cdr entry)))
514 (while conversions
515 (if (not (member (car conversions) (car entry2)))
516 (setcar entry2 (cons (car conversions) (car entry2))))
517 (setq conversions (cdr conversions))))
518 (set-nested-alist (car entry) (cdr entry) map)))
519 (setq l (cdr l)))
520 (setcar map longest)
521 map)))
522
523(defmacro skkdic-set-prefix (&rest entries)
524 `(defconst skkdic-prefix
525 ',(let ((l entries)
526 (map '(nil))
527 (longest 1)
528 len entry)
529 (while l
530 (setq entry (skkdic-extract-conversion-data (car l)))
531 (setq len (length (car entry)))
532 (if (> len longest)
533 (setq longest len))
534 (let ((entry2 (lookup-nested-alist (car entry) map len nil t)))
535 (if (consp (car entry2))
536 (let ((conversions (cdr entry)))
537 (while conversions
538 (if (not (member (car conversions) (car entry2)))
539 (setcar entry2 (cons (car conversions) (car entry2))))
540 (setq conversions (cdr conversions))))
541 (set-nested-alist (car entry) (cdr entry) map len)))
542 (setq l (cdr l)))
543 (setcar map longest)
544 map)))
545
546(defmacro skkdic-set-okuri-nasi (&rest entries)
547 `(defconst skkdic-okuri-nasi
548 ',(let ((l entries)
549 (map '(skdic-okuri-nasi))
550 (count 0)
551 entry)
552 (while l
553 (setq count (1+ count))
554 (if (= (% count 10) 0)
555 (message (format "%d entries" count)))
556 (setq entry (skkdic-extract-conversion-data (car l)))
557 (set-nested-alist (car entry) (cdr entry) map)
558 (setq l (cdr l)))
559 map)))
560
561;; skkdic-cnv.el ends here
diff --git a/lisp/international/skkdic-utl.el b/lisp/international/skkdic-utl.el
new file mode 100644
index 00000000000..d5b0c1e6149
--- /dev/null
+++ b/lisp/international/skkdic-utl.el
@@ -0,0 +1,198 @@
1;; skkdic-utl.el -- utility functions for handling skkdic.el
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, Japanese, SKK
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; SKK is a free Japanese input method running on Mule created by
27;; Masahiko Sato <masahiko@sato.riec.tohoku.ac.jp>. A dictionary of
28;; SKK can be converted by `skkdic-convert' (skkdic-conv.el) to a file
29;; "skkdic.el" in which the dictionary entries are defined in the
30;; format which can be handled by the following functions.
31
32;;; Code:
33
34(defvar skkdic-okuri-ari nil
35 "OKURI-ARI entries of SKK dictionary.")
36(defvar skkdic-postfix nil
37 "POSTFIX entries of SKK dictionary.")
38(defvar skkdic-prefix nil
39 "PREFIX entries of SKK dictionary.")
40(defvar skkdic-okuri-nasi nil
41 "OKURI-NASI entries of SKK dictionary.")
42
43;; Alist of Okuriganas vs trailing ASCII letters in OKURI-ARI entry.
44(defconst skkdic-okurigana-table
45 '((?$B$!(B . ?a) (?$B$"(B . ?a) (?$B$#(B . ?i) (?$B$$(B . ?i) (?$B$%(B . ?u)
46 (?$B$&(B . ?u) (?$B$'(B . ?e) (?$B$((B . ?e) (?$B$)(B . ?o) (?$B$*(B . ?o)
47 (?$B$+(B . ?k) (?$B$,(B . ?g) (?$B$-(B . ?k) (?$B$.(B . ?g) (?$B$/(B . ?k)
48 (?$B$0(B . ?g) (?$B$1(B . ?k) (?$B$2(B . ?g) (?$B$3(B . ?k) (?$B$4(B . ?g)
49 (?$B$5(B . ?s) (?$B$6(B . ?z) (?$B$7(B . ?s) (?$B$8(B . ?j) (?$B$9(B . ?s)
50 (?$B$:(B . ?z) (?$B$;(B . ?s) (?$B$<(B . ?z) (?$B$=(B . ?s) (?$B$>(B . ?z)
51 (?$B$?(B . ?t) (?$B$@(B . ?d) (?$B$A(B . ?t) (?$B$B(B . ?d) (?$B$C(B . ?t)
52 (?$B$D(B . ?t) (?$B$E(B . ?d) (?$B$F(B . ?t) (?$B$G(B . ?d) (?$B$H(B . ?t) (?$B$I(B . ?d)
53 (?$B$J(B . ?n) (?$B$K(B . ?n) (?$B$L(B . ?n) (?$B$M(B . ?n) (?$B$N(B . ?n)
54 (?$B$O(B . ?h) (?$B$P(B . ?b) (?$B$Q(B . ?p) (?$B$R(B . ?h) (?$B$S(B . ?b)
55 (?$B$T(B . ?p) (?$B$U(B . ?h) (?$B$V(B . ?b) (?$B$W(B . ?p) (?$B$X(B . ?h)
56 (?$B$Y(B . ?b) (?$B$Z(B . ?p) (?$B$[(B . ?h) (?$B$\(B . ?b) (?$B$](B . ?p)
57 (?$B$^(B . ?m) (?$B$_(B . ?m) (?$B$`(B . ?m) (?$B$a(B . ?m) (?$B$b(B . ?m)
58 (?$B$c(B . ?y) (?$B$d(B . ?y) (?$B$e(B . ?y) (?$B$f(B . ?y) (?$B$g(B . ?y) (?$B$h(B . ?y)
59 (?$B$i(B . ?r) (?$B$j(B . ?r) (?$B$k(B . ?r) (?$B$l(B . ?r) (?$B$m(B . ?r)
60 (?$B$o(B . ?w) (?$B$p(B . ?w) (?$B$q(B . ?w) (?$B$r(B . ?w)
61 (?$B$s(B . ?n)
62 ))
63
64(defconst skkdic-jbytes
65 (charset-bytes 'japanese-jisx0208))
66
67(defun skkdic-merge-head-and-tail (heads tails postfix)
68 (let ((min-len (* skkdic-jbytes 2))
69 l)
70 (while heads
71 (if (or (not postfix)
72 (>= (length (car heads)) min-len))
73 (let ((tail tails))
74 (while tail
75 (if (or postfix
76 (>= (length (car tail)) min-len))
77 (setq l (cons (concat (car heads) (car tail)) l)))
78 (setq tail (cdr tail)))))
79 (setq heads (cdr heads)))
80 l))
81
82(defconst skkdic-jisx0208-hiragana-block (nth 1 (split-char ?$B$"(B)))
83
84(defun skkdic-lookup-key (seq len &optional postfix)
85 "Return a list of conversion string for sequence SEQ of length LEN.
86
87SEQ is a vector of Kana characters to be converted by SKK dictionary.
88If LEN is shorter than the length of KEYSEQ, the first LEN keys in SEQ
89are took into account.
90
91Postfixes are handled only if the optional argument POSTFIX is non-nil."
92 (or skkdic-okuri-nasi
93 (condition-case err
94 (load-library "skk/skkdic")
95 (error (ding)
96 (with-output-to-temp-buffer "*Help*"
97 (princ "The library `skkdic' can't be loaded.
98
99The most common case is that you have not yet installed the library
100included in LEIM (Libraries of Emacs Input Method) which is
101distributed separately from Emacs.
102
103LEIM is available from the same ftp directory as Emacs."))
104 (signal (car err) (cdr err)))))
105
106 (let ((vec (make-vector len 0))
107 (i 0)
108 entry)
109 ;; At first, generate vector VEC from SEQ for looking up SKK
110 ;; alists. Nth element in VEC corresponds to Nth element in SEQ.
111 ;; The values are decided as follows.
112 ;; If SEQ[N] is `$B!<(B', VEC[N] is 0,
113 ;; Else if SEQ[N] is a Hiragana character, VEC[N] is:
114 ;; ((The 2nd position code o SEQ[N]) - 32),
115 ;; ELse VEC[N] is 128.
116 (while (< i len)
117 (let ((ch (aref seq i))
118 elts)
119 (if (= ch ?$B!<(B)
120 (aset vec i 0)
121 (setq elts (split-char ch))
122 (if (and (eq (car elts) 'japanese-jisx0208)
123 (= (nth 1 elts) skkdic-jisx0208-hiragana-block))
124 (aset vec i (- (nth 2 elts) 32))
125 (aset vec i 128))))
126 (setq i (1+ i)))
127
128 ;; Search OKURI-NASI entries.
129 (setq entry (lookup-nested-alist vec skkdic-okuri-nasi len 0 t))
130 (if (consp (car entry))
131 (setq entry (copy-sequence (car entry)))
132 (setq entry nil))
133
134 (if postfix
135 ;; Search OKURI-NASI entries with postfixes.
136 (let ((break (max (- len (car skkdic-postfix)) 1))
137 entry-head entry-postfix entry2)
138 (while (< break len)
139 (if (and (setq entry-head
140 (lookup-nested-alist vec skkdic-okuri-nasi
141 break 0 t))
142 (consp (car entry-head))
143 (setq entry-postfix
144 (lookup-nested-alist vec skkdic-postfix
145 len break t))
146 (consp (car entry-postfix))
147 (setq entry2 (skkdic-merge-head-and-tail
148 (car entry-head) (car entry-postfix) t)))
149 (if entry
150 (nconc entry entry2)
151 (setq entry entry2)))
152 (setq break (1+ break)))))
153
154 ;; Search OKURI-NASI entries with prefixes.
155 (let ((break (min (car skkdic-prefix) (- len 2)))
156 entry-prefix entry-tail entry2)
157 (while (> break 0)
158 (if (and (setq entry-prefix
159 (lookup-nested-alist vec skkdic-prefix break 0 t))
160 (consp (car entry-prefix))
161 (setq entry-tail
162 (lookup-nested-alist vec skkdic-okuri-nasi len break t))
163 (consp (car entry-tail))
164 (setq entry2 (skkdic-merge-head-and-tail
165 (car entry-prefix) (car entry-tail) nil)))
166 (if entry
167 (nconc entry entry2)
168 (setq entry entry2)))
169 (setq break (1- break))))
170
171 ;; Search OKURI-ARI entries.
172 (let ((okurigana (assq (aref seq (1- len)) skkdic-okurigana-table))
173 orig-element entry2)
174 (if okurigana
175 (progn
176 (setq orig-element (aref vec (1- len)))
177 (aset vec (1- len) (- (cdr okurigana)))
178 (if (and (setq entry2 (lookup-nested-alist vec skkdic-okuri-ari
179 len 0 t))
180 (consp (car entry2)))
181 (progn
182 (setq entry2 (copy-sequence (car entry2)))
183 (let ((l entry2)
184 (okuri (char-to-string (aref seq (1- len)))))
185 (while l
186 (setcar l (concat (car l) okuri))
187 (setq l (cdr l)))
188 (if entry
189 (nconc entry entry2)
190 (setq entry entry2)))))
191 (aset vec (1- len) orig-element))))
192
193 entry))
194
195;;
196(provide 'skkdic-utl)
197
198;; skkdic-utl.el ends here
diff --git a/lisp/international/titdic-cnv.el b/lisp/international/titdic-cnv.el
new file mode 100644
index 00000000000..382a666735f
--- /dev/null
+++ b/lisp/international/titdic-cnv.el
@@ -0,0 +1,403 @@
1;;; titdic-cnv.el --- convert TIT dictionary to Quail package
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: Quail, TIT, cxterm
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Comments:
25
26;; Convert TIT format dictionary (of cxterm) to quail-package.
27;;
28;; Usage (within Emacs):
29;; M-x titdic-convert<CR>TIT-FILE-NAME<CR>
30;; Usage (from shell):
31;; % emacs -batch -l titdic-convert -f batch-titdic-convert\
32;; [-dir DIR] [DIR | FILE] ...
33;;
34;; When you run titdic-convert within Emacs, you have a chance to
35;; modify arguments of `quail-define-package' before saving the
36;; converted file. For instance, you are likely to modify TITLE,
37;; DOCSTRING, and KEY-BINDINGS.
38
39;; TIT dictionary file (*.tit) is a line-oriented text (English,
40;; Chinese, Japanese, and Korean) file. The whole file contains of
41;; two parts, the definition part (`header' here after) followed by
42;; the dictionary part (`body' here after). All lines begin with
43;; leading '#' are ignored.
44;;
45;; Each line in the header part has two fields, KEY and VALUE. These
46;; fields are separated by one or more white characters.
47;;
48;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
49;; These fields are separated by one or more white characters.
50;;
51;; See the manual page of `tit2cit' of cxterm distribution for more
52;; detail.
53
54;;; Code:
55
56(require 'quail)
57
58;; List of values of key "ENCODE:" and the corresponding Emacs'
59;; coding-system and language environment name.
60(defvar tit-encode-list
61 '(("GB" coding-system-euc-china "Chinese-GB")
62 ("BIG5" coding-system-big5 "Chinese-BIG5")
63 ("JIS" coding-system-euc-japan "Japanese")
64 ("KS" coding-system-euc-korea "Korean")))
65
66;; Return a value of the key in the current line.
67(defsubst tit-read-key-value ()
68 (if (looking-at "[^ \t\n]+")
69 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
70
71;; Return an appropriate quail-package filename from FILENAME (TIT
72;; dictionary filename). For instance, ".../ZOZY.tit" -> "zozy.el".
73(defun tit-make-quail-package-name (filename &optional dirname)
74 (expand-file-name
75 (concat (downcase (file-name-nondirectory (substring filename 0 -4))) ".el")
76 dirname))
77
78;; This value is t if we are processing phrase dictionary.
79(defvar tit-phrase nil)
80(defvar tit-encode nil)
81(defvar tit-default-encode "GB")
82
83;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
84;; that each characters in KEYS invokes FUNCTION-SYMBOL.
85(defun tit-generate-key-bindings (keys function-symbol)
86 (let ((len (length keys))
87 (i 0)
88 key)
89 (while (< i len)
90 (setq key (aref keys i))
91 (indent-to 3)
92 (if (< key ?\ )
93 (if (eq (lookup-key quail-translation-keymap (char-to-string key))
94 'quail-execute-non-quail-command)
95 (insert (format "(\"\\C-%c\" . %s)\n"
96 (+ key ?@) function-symbol)))
97 (if (< key 127)
98 (insert (format "(\"%c\" . %s)\n" key function-symbol))
99 (insert (format "(\"\\C-?\" . %s)\n" function-symbol))))
100 (setq i (1+ i)))))
101
102;; Analyze header part of TIT dictionary and generate an appropriate
103;; `quail-define-package' function call.
104(defun tit-process-header (filename)
105 (message "Processing header part...")
106 (goto-char (point-min))
107
108 (let (;; TIT keywords and the corresponding default values.
109 (tit-multichoice t)
110 (tit-prompt "")
111 (tit-comments nil)
112 (tit-backspace "\010\177")
113 (tit-deleteall "\015\025")
114 (tit-moveright ".>")
115 (tit-moveleft ",<")
116 (tit-keyprompt nil))
117 ;; At first, collect information from the header.
118 (while (not (eobp))
119 (insert ";; ")
120 (let ((ch (following-char)))
121 (cond ((= ch ?C) ; COMMENT
122 (cond ((looking-at "COMMENT")
123 (let ((pos (match-end 0)))
124 (end-of-line)
125 (while (re-search-backward "[\"\\]" pos t)
126 (insert "\\")
127 (forward-char -1))
128 (end-of-line)
129 (setq tit-comments (cons (buffer-substring pos (point))
130 tit-comments))))))
131 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
132 (cond ((looking-at "MULTICHOICE:[ \t]*")
133 (goto-char (match-end 0))
134 (setq tit-multichoice (looking-at "YES")))
135 ((looking-at "MOVERIGHT:[ \t]*")
136 (goto-char (match-end 0))
137 (setq tit-moveright (tit-read-key-value)))
138 ((looking-at "MOVELEFT:[ \t]*")
139 (goto-char (match-end 0))
140 (setq tit-moveleft (tit-read-key-value)))))
141 ((= ch ?P) ; PROMPT
142 (cond ((looking-at "PROMPT:[ \t]*")
143 (goto-char (match-end 0))
144 (setq tit-prompt (tit-read-key-value)))))
145 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
146 ; BEGINPHRASE
147 (cond ((looking-at "BACKSPACE:[ \t]*")
148 (goto-char (match-end 0))
149 (setq tit-backspace (tit-read-key-value)))
150 ((looking-at "BEGINDICTIONARY")
151 (setq tit-phrase nil))
152 ((looking-at "BEGINPHRASE")
153 (setq tit-phrase t))))
154 ((= ch ?K) ; KEYPROMPT
155 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
156 (let ((key-char (match-string 1)))
157 (goto-char (match-end 0))
158 (setq tit-keyprompt
159 (cons (cons key-char (tit-read-key-value))
160 tit-keyprompt))))))))
161 (forward-line 1))
162
163 ;; Then, generate header part of the Quail package.
164 (goto-char (point-min))
165 (insert ";; Quail package `"
166 (substring (file-name-nondirectory buffer-file-name) 0 -3)
167 "' generated by the command `titdic-convert'\n"
168 ";;\tDate: " (current-time-string) "\n"
169 ";;\tOriginal TIT dictionary file: "
170 (file-name-nondirectory filename)
171 "\n\n"
172 ";;; Comment:\n\n"
173 ";; Do byte-compile this file again after any modification.\n\n"
174 ";;; Start of the header of original TIT dictionary.\n\n")
175
176 (goto-char (point-max))
177 (insert "\n"
178 ";;; End of the header of original TIT dictionary.\n\n"
179 ";;; Code:\n\n"
180 "(require 'quail)\n\n")
181
182 (insert "(quail-define-package ")
183 ;; Args NAME, LANGUAGE, TITLE
184 (insert
185 "\""
186 (concat "quail-"
187 (substring (file-name-nondirectory buffer-file-name) 0 -3))
188 "\" \"" (nth 2 (assoc tit-encode tit-encode-list))
189 "\" \""
190 (if (string-match "[:$A!K$(0!(!J(B]+\\([^:$A!K$(0!(!K(B]+\\)" tit-prompt)
191 (substring tit-prompt (match-beginning 1) (match-end 1))
192 tit-prompt)
193 "\"\n")
194
195 ;; Arg GUIDANCE
196 (if tit-keyprompt
197 (progn
198 (insert " '(")
199 (while tit-keyprompt
200 (indent-to 3)
201 (insert (format "(%d . \"%s\")\n"
202 (string-to-char (car (car tit-keyprompt)))
203 (cdr (car tit-keyprompt))))
204 (setq tit-keyprompt (cdr tit-keyprompt)))
205 (forward-char -1)
206 (insert ")")
207 (forward-char 1))
208 (insert " t\n"))
209
210 ;; Arg DOCSTRING
211 (insert "\"" tit-prompt "\n")
212 (let ((l (nreverse tit-comments)))
213 (while l
214 (insert (format "%s\n" (car l)))
215 (setq l (cdr l))))
216 (insert "\"\n")
217
218 ;; Arg KEY-BINDINGS
219 (insert " '(")
220 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
221 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
222 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
223 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
224 (forward-char -1)
225 (insert ")")
226 (forward-char 1)
227
228 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
229 ;; The remaining args are all nil.
230 (insert " nil"
231 (if tit-multichoice " nil" " t")
232 (if tit-keyprompt " t t)\n\n" " nil nil)\n\n")))
233
234 ;; Return the position of end of the header.
235 (point-max))
236
237;; Convert body part of TIT dictionary into `quail-define-rules'
238;; function call.
239(defun tit-process-body ()
240 (message "Formatting translation rules...")
241 (let ((enable-multibyte-characters nil)
242 (keyseq "\000")
243 pos)
244 (insert "(quail-define-rules\n")
245 (while (null (eobp))
246 (if (or (= (following-char) ?#) (= (following-char) ?\n))
247 (insert ";; ")
248 (insert "(\"")
249 (setq pos (point))
250 (skip-chars-forward "^ \t")
251 (setq keyseq
252 (concat (regexp-quote (buffer-substring pos (point))) "[ \t]+"))
253 (save-excursion
254 (while (re-search-backward "[\\\"]" pos t)
255 (insert "\\")
256 (forward-char -1)))
257 (insert "\"")
258 (skip-chars-forward " \t")
259
260 ;; Now point is at the start of translations. Remember it in
261 ;; POS and combine lines of the same key sequence while
262 ;; deleting trailing white spaces and comments (start with
263 ;; '#'). POS doesn't has to be a marker because we never
264 ;; modify region before POS.
265 (setq pos (point))
266 (if (looking-at "[^ \t]*\\([ \t]*#.*\\)")
267 (delete-region (match-beginning 1) (match-end 1)))
268 (while (and (= (forward-line 1) 0)
269 (looking-at keyseq))
270 (let ((p (match-end 0)))
271 (skip-chars-backward " \t\n")
272 (delete-region (point) p)
273 (if tit-phrase (insert " "))
274 (if (looking-at "[^ \t]*\\([ \t]*#.*\\)")
275 (delete-region (match-beginning 1) (match-end 1)))
276 ))
277
278 ;; Modify the current line to meet the syntax of Quail package.
279 (goto-char pos)
280 (if tit-phrase
281 (progn
282 ;; PHRASE1 PHRASE2 ... => ["PHRASE1" "PHRASE2" ...]
283 (insert "[\"")
284 (skip-chars-forward "^ \t\n")
285 (while (not (eolp))
286 (insert "\"")
287 (forward-char 1)
288 (insert "\"")
289 (skip-chars-forward "^ \t\n"))
290 (insert "\"])"))
291 ;; TRANSLATIONS => "TRANSLATIONS"
292 (insert "\"")
293 (end-of-line)
294 (insert "\")")))
295 (forward-line 1))
296 (insert ")\n")))
297
298;;;###autoload
299(defun titdic-convert (filename &optional dirname)
300 "Convert a TIT dictionary of FILENAME into a Quail package.
301Optional argument DIRNAME if specified is the directory name under which
302the generated Quail package is saved."
303 (interactive "FTIT dictionary file: ")
304 (let ((buf (get-buffer-create "*tit-work*")))
305 (save-excursion
306 ;; Setup the buffer.
307 (set-buffer buf)
308 (erase-buffer)
309 (let ((coding-system-for-read 'no-conversion))
310 (insert-file-contents (expand-file-name filename)))
311 (set-visited-file-name (tit-make-quail-package-name filename dirname) t)
312 (set-buffer-file-coding-system 'coding-system-iso-2022-7)
313
314 ;; Decode the buffer contents from the encoding specified by a
315 ;; value of the key "ENCODE:".
316 (let (coding-system)
317 (save-excursion
318 (if (search-forward "\nBEGIN" nil t)
319 (let ((limit (point))
320 slot)
321 (goto-char 1)
322 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
323 (progn
324 (goto-char (match-end 0))
325 (setq tit-encode (tit-read-key-value)))
326 (setq tit-encode tit-default-encode))
327 (setq slot (assoc tit-encode tit-encode-list))
328 (if slot
329 (setq coding-system (nth 1 slot))
330 (error "Invalid ENCODE: value in TIT dictionary")))
331 (error "TIT dictionary doesn't have body part")))
332 (message "Decoding %s..." coding-system)
333 (goto-char 1)
334 (decode-coding-region 1 (point-max) coding-system))
335
336 ;; Set point the starting position of the body part.
337 (goto-char 1)
338 (if (search-forward "\nBEGIN" nil t)
339 (forward-line 1)
340 (error "TIT dictionary can't be decoded correctly"))
341
342 ;; Now process the header and body parts.
343 (goto-char
344 (save-excursion
345 (save-restriction
346 (narrow-to-region 1 (point))
347 (tit-process-header filename))))
348 (tit-process-body))
349
350 (if noninteractive
351 ;; Save the Quail package file.
352 (save-excursion
353 (set-buffer buf)
354 (save-buffer 0))
355 ;; Show the Quail package just generated.
356 (switch-to-buffer buf)
357 (goto-char 1)
358 (message "Save this buffer after you make any modification"))))
359
360;;;###autoload
361(defun batch-titdic-convert ()
362 "Run `titdic-convert' on the files remaining on the command line.
363Use this from the command line, with `-batch';
364it won't work in an interactive Emacs.
365For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
366 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
367To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
368 (defvar command-line-args-left) ; Avoid compiler warning.
369 (if (not noninteractive)
370 (error "`batch-titdic-convert' should be used only with -batch"))
371 (if (string= (car command-line-args-left) "-h")
372 (progn
373 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
374 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
375 (message "To convert XXX.tit into DIR/xxx.el:")
376 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
377 (let (targetdir filename files file)
378 (if (string= (car command-line-args-left) "-dir")
379 (progn
380 (setq command-line-args-left (cdr command-line-args-left))
381 (setq targetdir (car command-line-args-left))
382 (setq command-line-args-left (cdr command-line-args-left))))
383 (while command-line-args-left
384 (setq filename (expand-file-name (car command-line-args-left)))
385 (if (file-directory-p filename)
386 (progn
387 (message "Converting all tit files in the directory %s" filename)
388 (setq files (directory-files filename t "\\.tit$")))
389 (setq files (list filename)))
390 (while files
391 (setq file (expand-file-name (car files)))
392 (if (file-newer-than-file-p
393 file (tit-make-quail-package-name file targetdir))
394 (progn
395 (message "Converting %s to quail-package..." file)
396 (titdic-convert file targetdir)))
397 (setq files (cdr files)))
398 (setq command-line-args-left (cdr command-line-args-left)))
399 (message "Do byte-compile the created files by:")
400 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
401 (kill-emacs 0))
402
403;;; titdic-cnv.el ends here
diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el
new file mode 100644
index 00000000000..902919db440
--- /dev/null
+++ b/lisp/language/china-util.el
@@ -0,0 +1,155 @@
1;; china-util.el -- utilities for Chinese
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, Chinese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26;; Hz/ZW encoding stuffs
27
28;; HZ is an encoding method for Chinese character set GB2312 used
29;; widely in Internet. It is very similar to 7-bit environment of
30;; ISO-2022. The difference is that HZ uses the sequence "~{" and
31;; "~}" for designating GB2312 and ASCII respectively, hence, it
32;; doesn't uses ESC (0x1B) code.
33
34;; ZW is another encoding method for Chinese character set GB2312. It
35;; encodes Chinese characters line by line by starting each line with
36;; the sequence "zW". It also uses only 7-bit as HZ.
37
38;; ISO-2022 escape sequence to designate GB2312.
39(defvar iso2022-gb-designation "\e$A")
40;; HZ escape sequence to designate GB2312.
41(defvar hz-gb-designnation "~{")
42;; ISO-2022 escape sequence to designate ASCII.
43(defvar iso2022-ascii-designation "\e(B")
44;; HZ escape sequence to designate ASCII.
45(defvar hz-ascii-designnation "~}")
46;; Regexp of ZW sequence to start GB2312.
47(defvar zw-start-gb "^zW")
48;; Regexp for start of GB2312 in an encoding mixture of HZ and ZW.
49(defvar hz/zw-start-gb (concat hz-gb-designnation "\\|" zw-start-gb))
50
51(defvar decode-hz-line-continuation nil
52 "Flag to tell if we should care line continuation convention of Hz.")
53
54;;;###autoload
55(defun decode-hz-region (beg end)
56 "Decode HZ/ZW encoded text in the current region.
57Return the length of resulting text."
58 (interactive "r")
59 (save-excursion
60 (save-restriction
61 (narrow-to-region beg end)
62
63 ;; We, at first, convert HZ/ZW to `coding-system-iso-2022-7',
64 ;; then decode it.
65
66 ;; "~\n" -> "\n"
67 (goto-char (point-min))
68 (while (search-forward "~" nil t)
69 (if (= (following-char) ?\n) (delete-char -1))
70 (if (not (eobp)) (forward-char 1)))
71
72 ;; "^zW...\n" -> Chinese GB2312
73 ;; "~{...~}" -> Chinese GB2312
74 (goto-char (point-min))
75 (let ((chinese-found nil))
76 (while (re-search-forward hz/zw-start-gb nil t)
77 (if (= (char-after (match-beginning 0)) ?z)
78 ;; ZW -> coding-system-iso-20227-7
79 (progn
80 (delete-char -2)
81 (insert iso2022-gb-designation)
82 (end-of-line)
83 (insert iso2022-ascii-designation))
84 ;; HZ -> coding-system-iso-20227-7
85 (delete-char -2)
86 (insert iso2022-gb-designation)
87 (let ((pos (save-excursion (end-of-line) (point))))
88 (if (search-forward hz-ascii-designnation pos t)
89 (replace-match iso2022-ascii-designation)
90 (if (not decode-hz-line-continuation)
91 (insert iso2022-ascii-designation)))))
92 (setq chinese-found t))
93 (if (or chinese-found
94 (let ((enable-multibyte-characters nil))
95 ;; Here we check if the text contains EUC (China) codes.
96 ;; If any, we had better decode them also.
97 (goto-char (point-min))
98 (re-search-forward "[\240-\377]" nil t)))
99 (decode-coding-region (point-min) (point-max)
100 'coding-system-euc-china)))
101
102 ;; "~~" -> "~"
103 (goto-char (point-min))
104 (while (search-forward "~~" nil t) (delete-char -1))
105 (- (point-max) (point-min)))))
106
107;;;###autoload
108(defun decode-hz-buffer ()
109 "Decode HZ/ZW encoded text in the current buffer."
110 (interactive)
111 (decode-hz-region (point-min) (point-max)))
112
113;;;###autoload
114(defun encode-hz-region (beg end)
115 "Encode the text in the current region to HZ.
116Return the length of resulting text."
117 (interactive "r")
118 (save-excursion
119 (save-restriction
120 (narrow-to-region beg end)
121
122 ;; "~" -> "~~"
123 (goto-char (point-min))
124 (while (search-forward "~" nil t) (insert ?~))
125
126 ;; Chinese GB2312 -> "~{...~}"
127 (goto-char (point-min))
128 (if (re-search-forward "\\cc" nil t)
129 (let ((enable-multibyte-characters nil)
130 pos)
131 (goto-char (setq pos (match-beginning 0)))
132 (encode-coding-region pos (point-max) 'coding-system-iso-2022-7)
133 (goto-char pos)
134 (while (search-forward iso2022-gb-designation nil t)
135 (delete-char -3)
136 (insert hz-gb-designnation))
137 (goto-char pos)
138 (while (search-forward iso2022-ascii-designation nil t)
139 (delete-char -3)
140 (insert hz-ascii-designnation))))
141 (- (point-max) (point-min)))))
142
143;;;###autoload
144(defun encode-hz-buffer ()
145 "Encode the text in the current buffer to HZ."
146 (interactive)
147 (encode-hz-region (point-min) (point-max)))
148
149;;
150(provide 'language/china-util)
151
152;;; Local Variables:
153;;; generated-autoload-file: "../loaddefs.el"
154;;; End:
155;;; china-util.el ends here
diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el
new file mode 100644
index 00000000000..5e90963d11b
--- /dev/null
+++ b/lisp/language/chinese.el
@@ -0,0 +1,236 @@
1;;; chinese.el --- Support for Chinese
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Chinese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are
27;; supported.
28
29;;; Code:
30
31;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32;;; Chinese (general)
33;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34
35(make-coding-system
36 'coding-system-iso-2022-cn 2 ?C
37 "Coding system ISO-2022-CN for Chinese (GB and CNS character sets)."
38 '(ascii
39 (nil chinese-gb2312 chinese-cns11643-1)
40 (nil chinese-cns11643-2)
41 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
42 chinese-cns11643-6 chinese-cns11643-7)
43 nil ascii-eol ascii-cntl seven locking-shift single-shift))
44
45;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46;;; Chinese GB2312 (simplified)
47;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48
49(make-coding-system
50 'coding-system-euc-china 2 ?C
51 "Coding-system of Chinese EUC (so called GB Encoding)."
52 '((ascii t) chinese-gb2312 chinese-sisheng nil
53 nil ascii-eol ascii-cntl nil nil single-shift nil))
54
55(make-coding-system
56 'coding-system-hz 0 ?z
57 "Codins-system of Hz/ZW used for Chinese (GB)."
58 nil)
59(put 'coding-system-hz 'post-read-conversion 'post-read-decode-hz)
60(put 'coding-system-hz 'pre-write-conversion 'pre-write-encode-hz)
61
62(defun post-read-decode-hz (len)
63 (let ((pos (point)))
64 (decode-hz-region pos (+ pos len))))
65
66(defun pre-write-encode-hz (from to)
67 (let ((buf (current-buffer))
68 (work (get-buffer-create " *pre-write-encoding-work*")))
69 (set-buffer work)
70 (widen)
71 (erase-buffer)
72 (insert-buffer-substring buf from to)
73 (encode-hz-region 1 (point-max))
74 nil))
75
76(register-input-method
77 "Chinese-GB" '("quail-ccdospy" quail-use-package "quail/ccdospy"))
78(register-input-method
79 "Chinese-GB" '("quail-ctlau" quail-use-package "quail/ctlau"))
80(register-input-method
81 "Chinese-GB" '("quail-punct" quail-use-package "quail/punct"))
82(register-input-method
83 "Chinese-GB" '("quail-qj" quail-use-package "quail/qj"))
84(register-input-method
85 "Chinese-GB" '("quail-sw" quail-use-package "quail/sw"))
86(register-input-method
87 "Chinese-GB" '("quail-ziranma" quail-use-package "quail/ziranma"))
88(register-input-method
89 "Chinese-GB" '("quail-tonepy" quail-use-package "quail/tonepy"))
90(register-input-method
91 "Chinese-GB" '("quail-py" quail-use-package "quail/py"))
92
93(defun setup-chinese-gb-environment ()
94 (setq primary-language "Chinese-GB")
95
96 (setq coding-category-iso-8-2 'coding-system-euc-china)
97 (setq coding-category-iso-else 'coding-system-iso-2022-cn)
98
99 (set-coding-priority
100 '(coding-category-iso-7
101 coding-category-iso-else
102 coding-category-iso-8-2
103 coding-category-big5
104 coding-category-iso-8-1
105 coding-category-internal
106 ))
107
108 (setq-default buffer-file-coding-system 'coding-system-euc-china)
109 (set-terminal-coding-system 'coding-system-euc-china)
110 (set-keyboard-coding-system 'coding-system-euc-china)
111
112 (setq default-input-method '("Chinese-GB" . "quail-py"))
113 )
114
115(set-language-info-alist
116 "Chinese" '((documentation .
117"Emacs provides three kinds of Chinese support: Chinese-GB,
118Chinese-BIG5, and Chinese-CNS. Please specify one of them to get more
119information.")
120 (setup-function . setup-chinese-gb-environment)
121 (charset . (chinese-gb2312 chinese-sisheng))
122 (coding-system . (coding-system-euc-china
123 coding-system-hz
124 coding-system-iso-2022-cn))
125 (documentation . t)
126 (sample-text . "Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B")))
127
128;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
129;; Chinese BIG5 (traditional)
130;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
131
132(make-coding-system
133 'coding-system-big5 3 ?B
134 "Coding-system of BIG5.")
135
136;; Big5 font requires special encoding.
137(define-ccl-program ccl-encode-big5-font
138 `(0
139 ;; In: R0:chinese-big5-1 or chinese-big5-2
140 ;; R1:position code 1
141 ;; R2:position code 2
142 ;; Out: R1:font code point 1
143 ;; R2:font code point 2
144 ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21))
145 (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280))
146 (r1 = ((r2 / 157) + ?\xA1))
147 (r2 %= 157)
148 (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62))))
149 "CCL program to encode a Big5 code to code point of Big5 font.")
150
151(setq font-ccl-encoder-alist
152 (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist))
153
154(register-input-method
155 "Chinese-BIG5" '("quail-qj-b5" quail-use-package "quail/qj-b5"))
156(register-input-method
157 "Chinese-BIG5" '("quail-zozy" quail-use-package "quail/zozy"))
158(register-input-method
159 "Chinese-BIG5" '("quail-tsangchi-b5" quail-use-package "quail/tsangchi-b5"))
160(register-input-method
161 "Chinese-BIG5" '("quail-py-b5" quail-use-package "quail/py-b5"))
162(register-input-method
163 "Chinese-BIG5" '("quail-quick-b5" quail-use-package "quail/quick-bt"))
164(register-input-method
165 "Chinese-BIG5" '("quail-etzy" quail-use-package "quail/etzy"))
166(register-input-method
167 "Chinese-BIG5" '("quail-ecdict" quail-use-package "quail/ecdict"))
168(register-input-method
169 "Chinese-BIG5" '("quail-ctlaub" quail-use-package "quail/ctlaub"))
170(register-input-method
171 "Chinese-BIG5" '("quail-array30" quail-use-package "quail/array30"))
172(register-input-method
173 "Chinese-BIG5" '("quail-4corner" quail-use-package "quail/4corner"))
174
175(defun setup-chinese-big5-environment ()
176 (setq primary-language "Chinese-BIG5")
177
178 (setq coding-category-big5 'coding-system-big5)
179 (setq coding-category-iso-else 'coding-system-iso-2022-cn)
180
181 (set-coding-priority
182 '(coding-category-iso-7
183 coding-category-iso-else
184 coding-category-big5))
185
186 (setq-default buffer-file-coding-system 'coding-system-big5)
187 (set-terminal-coding-system 'coding-system-big5)
188 (set-keyboard-coding-system 'coding-system-big5)
189
190 (setq default-input-method '("Chinese-BIG5" . "quail-py-b5"))
191 )
192
193(set-language-info-alist
194 "Chinese-BIG5" '((setup-function . setup-chinese-big5-environment)
195 (charset . (chinese-big5-1 chinese-big5-2))
196 (coding-system . (coding-system-big5
197 coding-system-iso-2022-cn))
198 (documentation . t)
199 (sample-text . "Cantonese ($(0GnM$(B,$(0N]0*Hd(B) $(0*/=((B, $(0+$)p(B")))
200
201;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
202;; Chinese CNS11643 (traditional)
203;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
204
205(register-input-method
206 "Chinese-CNS" '("quail-quick-cns" quail-use-package "quail/quick-cns"))
207(register-input-method
208 "Chinese-CNS" '("quail-tsangchi-cns" quail-use-package "quail/tsangchi-cns"))
209
210(defun setup-chinese-cns-environment ()
211 (setq primary-language "Chinese-CNS")
212
213 (setq coding-category-iso-else 'coding-system-iso-2022-cn)
214
215 (set-coding-priority
216 '(coding-category-iso-7
217 coding-category-iso-else
218 coding-category-big5))
219
220 (setq-default buffer-file-coding-system 'coding-system-iso-2022-7)
221 (set-terminal-coding-system 'coding-system-iso-2022-7)
222 (set-keyboard-coding-system 'coding-system-iso-2022-7)
223
224 (setq default-input-method '("Chinese-CNS" . "quail-py-cns"))
225 )
226
227(set-language-info-alist
228 "Chinese-CNS" '((setup-function . setup-chinese-cns-environment)
229 (charset . (chinese-cns11643-1 chinese-cns11643-2
230 chinese-cns11643-3 chinese-cns11643-4
231 chinese-cns11643-5 chinese-cns11643-6
232 chinese-cns11643-7))
233 (coding-system . (coding-system-iso-2022-cn))
234 (documentation . t)))
235
236;;; chinese.el ends here
diff --git a/lisp/language/cyrillic.el b/lisp/language/cyrillic.el
new file mode 100644
index 00000000000..1ff084a5362
--- /dev/null
+++ b/lisp/language/cyrillic.el
@@ -0,0 +1,71 @@
1;;; cyrillic.el --- Support for languages which use Cyrillic characters
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Cyrillic
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; The character set ISO8859-5 is supported.
27
28;;; Code:
29
30(make-coding-system
31 'coding-system-iso-8859-5 2 ?5 "MIME ISO-8859-5"
32 '((ascii t) (cyrillic-iso8859-5 t) nil nil
33 nil ascii-eol ascii-cntl nil nil nil nil))
34
35(register-input-method
36 "Cyrillic" '("quail-jcuken" quail-use-package "quail/cyrillic"))
37(register-input-method
38 "Cyrillic" '("quail-macedonian" quail-use-package "quail/cyrillic"))
39(register-input-method
40 "Cyrillic" '("quail-serbian" quail-use-package "quail/cyrillic"))
41(register-input-method
42 "Cyrillic" '("quail-beylorussian" quail-use-package "quail/cyrillic"))
43(register-input-method
44 "Cyrillic" '("quail-ukrainian" quail-use-package "quail/cyrillic"))
45(register-input-method
46 "Cyrillic" '("quail-yawerty" quail-use-package "quail/cyrillic"))
47
48(defun setup-cyrillic-environment ()
49 (setq primary-language "Cyrillic")
50
51 (setq coding-category-iso-8-1 'coding-system-iso-8859-5)
52
53 (set-coding-priority
54 '(coding-category-iso-7
55 coding-category-iso-8-1))
56
57 (setq-default buffer-file-coding-system 'coding-system-iso-8859-5)
58 (set-terminal-coding-system 'coding-system-iso-8859-5)
59 (set-keyboard-coding-system 'coding-system-iso-8859-5)
60
61 (setq default-input-method '("Cyrillic" . "quail-yawerty"))
62 )
63
64(set-language-info-alist
65 "Cyrillic" '((setup-function . setup-cyrillic-environment)
66 (charset . (cyrillic-iso8859-5))
67 (coding-system . (coding-system-iso-8859-5))
68 (documentation . t)
69 (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")))
70
71;;; cyrillic.el ends here
diff --git a/lisp/language/devan-util.el b/lisp/language/devan-util.el
new file mode 100644
index 00000000000..9bdffa4586e
--- /dev/null
+++ b/lisp/language/devan-util.el
@@ -0,0 +1,1160 @@
1;;; devan-util.el --- Support for Devanagari Script Composition
2
3;; Copyright (C) 1996 Free Software Foundation, Inc.
4
5;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
6
7;; Keywords: multilingual, Indian, Devanagari
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to
23;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24
25;;; Commentary:
26
27;; History:
28;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
29
30;; Devanagari script composition rules and related programs.
31
32;;; Code:
33
34;;;
35;;; Steps toward composition of Devanagari Characters.
36;;;
37
38;;; Basic functions.
39
40;;;###autoload
41(defun indian-to-devanagari (ch)
42 "Convert IS 13194 characters to Devanagari basic characters."
43 (let ((charcodes (split-char ch)))
44 (if (eq (car charcodes) 'indian-is13194)
45 (make-char 'indian-2-column ?\x21 (nth 1 charcodes))
46 ch)))
47
48;;;###autoload
49(defun devanagari-to-indian (ch)
50 "Convert Devanagari basic characters to IS 13194 characters."
51 (let* ((charcodes (split-char ch))
52 (charset (car charcodes))
53 (code-h (car (cdr charcodes))))
54 (if (and (eq (car charcodes) 'indian-2-column)
55 (= (nth 1 charcodes) ?\x21))
56 (make-char 'indian-is13194 (nth 2 charcodes))
57 ch)))
58
59;;;###autoload
60(defun indian-to-devanagari-region (from to)
61 "Convert IS 13194 characters in region to Devanagari basic characters."
62 (interactive "r")
63 (save-restriction
64 (narrow-to-region from to)
65 (goto-char (point-min))
66 (while (re-search-forward "\\cd" nil t)
67 (let* ((devanagari-char (indian-to-devanagari (preceding-char))))
68 (delete-char -1)
69 (insert devanagari-char)))))
70
71;;;###autoload
72(defun devanagari-to-indian-region (from to)
73 "Convert Devanagari basic characters in region to Indian characters."
74 (interactive "r")
75 (save-restriction
76 (narrow-to-region from to)
77 (goto-char (point-min))
78 (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code.
79 (let* ((indian-char (devanagari-to-indian (preceding-char))))
80 (delete-char -1)
81 (insert indian-char)))))
82
83;;;###autoload
84(defun indian-to-devanagari-string (str)
85 "Convert Indian String to Devanagari Basic Character String."
86 (let ((pos 0) (dst "") (src str) char)
87 (while (not (equal src ""))
88 (setq char (string-to-char src))
89 (setq src (substring src (char-bytes char)))
90 (setq dst (concat dst (char-to-string (indian-to-devanagari char)))))
91 dst))
92
93;; Phase 0 - Determine whether the characters can be composed.
94;;
95;;;
96;;; Regular expressions to split characters for composition.
97;;;
98;;
99;; Indian script word contains one or more syllables.
100;; In BNF, it can be expressed as follows:
101;;
102;; Word ::= {Syllable} [Cons-Syllable]
103;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
104;; Vowel-Syllable ::= V[D]
105;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
106;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] Pure-Cons
107;; Pure-Cons ::= Full-Cons H
108;; Full-Cons ::= C [N]
109;;
110;; {} repeat, [] optional
111;;
112;; C - Consonant ($(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E(B
113;; $(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X(B)
114;; N - Nukta ($(5!i(B)
115;; H - Halant($(5!h(B)
116;; V - Vowel ($(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2(B)
117;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu, Visarg ($(5!!!"(B)
118;; M - Matra ($(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g(B)
119;;
120;; In Emacs, one syllable of Indian language is considered to be one
121;; composite glyph. If we expand the above expression, it would be:
122;;
123;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] [D] | V [D]
124;;
125;; Therefore, in worst case, the consonant syllabe will consist of
126;; following characters.
127;;
128;; C N H C N H C N H C N M D
129;;
130;; On the other hand, incomplete consonant syllable before inputting
131;; base consonant must satisfy the following condition:
132;;
133;; [C [N] H] [C [N] H] C [N] H
134;;
135;; This is acceptable BEFORE proper consonant-syllable is input. The
136;; string which doesn't match with the above expression is invalid and
137;; thus must be fixed.
138;;
139;; Note:
140;; Third case can be considered, which is acceptable syllable and can
141;; not add any code more.
142;;
143;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] D
144;;
145;; However, to make editing possible even in this condition, we will
146;; not consider about this case.
147
148(defconst devanagari-cons-syllable-examine
149 "\\(\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?[$(5!Z(B-$(5!g(B]?[$(5!!!"(B]?"
150 "Regexp matching to one Devanagari consonant syllable.")
151
152(defconst devanagari-cons-syllable-incomplete-examine
153 "\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B$"
154 "Regexp matching to one Devanagari incomplete consonant syllable.")
155
156(defconst devanagari-vowel-syllable-examine
157 "[$(5!$(B-$(5!2(B][$(5!!!"!#(B]?"
158 "Regexp matching to one Devanagari vowel syllable.")
159
160;;
161;; Also, digits and virams should be processed other than syllables.
162;;
163;; In IS 13194, Avagrah is obtained by Nukta after Viram, and
164;; OM is obtained by Nukta after Chandrabindu
165;;
166(defconst devanagari-digit-viram-examine
167 "[$(5!q(B-$(5!z!j(B]")
168(defconst devanagari-other-sign-examine
169 "[$(5!!!j(B]$(5!i(B")
170
171(defconst devanagari-composite-glyph-unit-examine
172 (concat "\\(" devanagari-cons-syllable-incomplete-examine
173 "\\)\\|\\(" devanagari-vowel-syllable-examine
174 "\\)\\|\\(" devanagari-digit-viram-examine
175 "\\)\\|\\(" devanagari-cons-syllable-examine
176 "\\)\\|\\(" devanagari-other-sign-examine"\\)")
177 "Regexp matching to Devanagari string to be composed form one glyph.")
178
179;;(put-charset-property charset-devanagari-1-column
180;; 'char-to-glyph 'devanagari-compose-string)
181;;(put-charset-property charset-devanagari-2-column
182;; 'char-to-glyph 'devanagari-compose-string)
183
184;; Sample
185;;
186;;(string-match devanagari-cons-syllable-examine "$(5!X![(B") => 0
187;;(string-match devanagari-cons-syllable-examine "$(5!F!h!D!\(B") => 0
188;;(string-match devanagari-cons-syllable-examine "$(5!X![!F!h!D!\(B") => 0
189
190;;
191;; Steps toward the composition
192;; Converting Character Code to Composite Glyph.
193;;
194;; Example : $(5!X![(B/$(5!F!h!D!\(B
195;;
196;; First, convert Characters to appropriate glyphs.
197;;
198;; => $(5!X![(B/$(5"F!D!\(B
199;;
200;; Then, determine the base glyph, apply-orders and apply-rules.
201;;
202;; => $(5!X(B (ml.mr) $(5![(B / $(5!D(B (ml.mr) $(5"F(B (mr ml) $(5!\(B
203;;
204;; Finally, convert 2-column glyphs to 1-column glyph
205;; if such a glyph exist.
206;;
207;; => $(6![(B (ml.mr) $(6!X(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B
208;;
209;; Compose the glyph.
210;;
211;; => 2$(6!X@![1(B/2$(6!D@"FP!\1(B
212;; => 2$(6!X@![12!D@"FP!\1(B
213;;
214
215;;
216;; Phase 1: Converting Character Code to Glyph Code.
217;;
218;;
219;; IMPORTANT:
220;; There may be many rules which you many want to be suppressed.
221;; In that case, please comment out that rule.
222;;
223;; RULES WILL BE EVALUATED FROM FIRST TO LAST.
224;; PUT MORE SPECIFIC RULES FIRST.
225;;
226;; TO DO:
227;; Prepare multiple specific list of rules for each languages
228;; which adopts Devanagari script.
229;;
230
231
232(defconst devanagari-char-to-glyph-rules
233 '(
234 ;; special form for "ru".
235 ("\\($(5!O!](B\\)" . "$(5",(B")
236 ("\\($(5!O!^(B\\)" . "$(5"-(B")
237 ("\\($(5!P!](B\\)" . "$(5".(B")
238 ("\\($(5!P!^(B\\)" . "$(5"/(B")
239
240 ;; `r' at the top of syllable and followed by other consonants.
241 ;; ("[^$(5!h(B]\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"p(B")
242 ("^\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"p(B")
243
244 ;; If "r" is preceded by the vowel-suppressed consonant
245 ;; (especially those with vertical line), it will be written as
246 ;; slanted line below the preceding consonant character. Some of
247 ;; them are pre-composed as one glyph.
248
249 ("\\($(5!:!i!h!O(B\\)" . "$(5"!(B")
250 ("\\($(5!I!i!h!O(B\\)" . "$(5""(B")
251 ("\\($(5!3!h!O(B\\)" . "$(5"#(B")
252 ("\\($(5!:!h!O(B\\)" . "$(5"$(B")
253 ("\\($(5!B!h!O(B\\)" . "$(5"%(B")
254 ("\\($(5!H!h!O(B\\)" . "$(5"&(B")
255 ("\\($(5!I!h!O(B\\)" . "$(5"'(B")
256 ("\\($(5!U!h!O(B\\)" . "$(5"((B")
257 ("\\($(5!W!h!O(B\\)" . "$(5")(B")
258
259 ;; Ligature Rules
260 ("\\($(5!3!h!B!h!O!h!M(B\\)" . "$(5$!(B")
261 ("\\($(5!3!h!B!h!T(B\\)" . "$(5$"(B")
262 ("\\($(5!3!h!B!h!M(B\\)" . "$(5$#(B")
263 ("\\($(5!3!h!F!h!M(B\\)" . "$(5$$(B")
264 ("\\($(5!3!h!O!h!M(B\\)" . "$(5$%(B")
265 ("\\($(5!3!h!T!h!M(B\\)" . "$(5$&(B")
266 ("\\($(5!3!h!3(B\\)" . "$(5$'(B")
267 ("\\($(5!3!h!B(B\\)" . "$(5$((B")
268 ("\\($(5!3!h!F(B\\)" . "$(5$)(B")
269 ("\\($(5!3!h!L(B\\)" . "$(5$*(B")
270 ("\\($(5!3!h!M(B\\)" . "$(5$+(B")
271 ("\\($(5!3!h!Q(B\\)" . "$(5$,(B")
272 ("\\($(5!3!h!T(B\\)" . "$(5$-(B")
273 ("\\($(5!3!h!V(B\\)" . "$(5$.(B")
274 ("\\($(5!6!h!F(B\\)" . "$(5$/(B")
275 ("\\($(5!7!h!3!h!B!h!M(B\\)" . "$(5$0(B")
276 ("\\($(5!7!h!3!h!V!h!T(B\\)" . "$(5$1(B")
277 ("\\($(5!7!h!3!h!B(B\\)" . "$(5$2(B")
278 ("\\($(5!7!h!3!h!V(B\\)" . "$(5$3(B")
279 ("\\($(5!7!h!6!h!O(B\\)" . "$(5$4(B")
280 ("\\($(5!7!h!3!h!M(B\\)" . "$(5$5(B")
281 ("\\($(5!7!h!4!h!M(B\\)" . "$(5$6(B")
282 ("\\($(5!7!h!5!h!M(B\\)" . "$(5$7(B")
283 ("\\($(5!7!h!6!h!M(B\\)" . "$(5$8(B")
284 ("\\($(5!7!h!3(B\\)" . "$(5$9(B")
285 ("\\($(5!7!h!4(B\\)" . "$(5$:(B")
286 ("\\($(5!7!h!5(B\\)" . "$(5$;(B")
287 ("\\($(5!7!h!6(B\\)" . "$(5$<(B")
288 ("\\($(5!7!h!7(B\\)" . "$(5$=(B")
289 ("\\($(5!7!h!F(B\\)" . "$(5$>(B")
290 ("\\($(5!7!h!L(B\\)" . "$(5$?(B")
291 ("\\($(5!7!h!M(B\\)" . "$(5$@(B")
292 ("\\($(5!8!h!8(B\\)" . "$(5$A(B")
293 ("\\($(5!8!h!<(B\\)" . "$(5$B(B")
294 ("\\($(5!9!h!M(B\\)" . "$(5$C(B")
295 ("\\($(5!:!h!O(B\\)" . "$(5$D(B")
296 ("\\($(5!:!h!h(B\\)" . "$(5$E(B")
297 ("\\($(5!<!h!8(B\\)" . "$(5$F(B")
298 ("\\($(5!<!h!:(B\\)" . "$(5$G(B")
299 ("\\($(5!=!h!3(B\\)" . "$(5$H(B")
300 ("\\($(5!=!h!=(B\\)" . "$(5$I(B")
301 ("\\($(5!=!h!>(B\\)" . "$(5$J(B")
302 ("\\($(5!=!h!M(B\\)" . "$(5$K(B")
303 ("\\($(5!>!h!M(B\\)" . "$(5$L(B")
304 ("\\($(5!?!h!5!h!M(B\\)" . "$(5$M(B")
305 ("\\($(5!?!h!6!h!O(B\\)" . "$(5$N(B")
306 ("\\($(5!?!h!O!h!M(B\\)" . "$(5$O(B")
307 ("\\($(5!?!h!5(B\\)" . "$(5$P(B")
308 ("\\($(5!?!h!6(B\\)" . "$(5$Q(B")
309 ("\\($(5!?!h!?(B\\)" . "$(5$R(B")
310 ("\\($(5!?!h!L(B\\)" . "$(5$S(B")
311 ("\\($(5!?!h!M(B\\)" . "$(5$T(B")
312 ("\\($(5!@!h!M(B\\)" . "$(5$`(B")
313 ("\\($(5!B!h!B(B\\)" . "$(5$a(B")
314 ("\\($(5!B!h!F(B\\)" . "$(5$b(B")
315 ("\\($(5!D!h!D!h!M(B\\)" . "$(5$c(B")
316 ("\\($(5!D!h!E!h!M(B\\)" . "$(5$d(B")
317 ("\\($(5!D!h!K!h!M(B\\)" . "$(5$e(B")
318 ("\\($(5!D!h!O!h!M(B\\)" . "$(5$f(B")
319 ("\\($(5!D!h!T!h!M(B\\)" . "$(5$g(B")
320 ("\\($(5!D!h!5!h!O(B\\)" . "$(5$h(B")
321 ("\\($(5!D!h!6!h!O(B\\)" . "$(5$i(B")
322 ("\\($(5!D!h!D!h!T(B\\)" . "$(5$j(B")
323 ("\\($(5!D!h!E!h!T(B\\)" . "$(5$k(B")
324 ("\\($(5!D!h!5(B\\)" . "$(5$l(B")
325 ("\\($(5!D!h!6(B\\)" . "$(5$m(B")
326 ("\\($(5!D!h!D(B\\)" . "$(5$n(B")
327 ("\\($(5!D!h!E(B\\)" . "$(5$o(B")
328 ("\\($(5!D!h!F(B\\)" . "$(5$p(B")
329 ("\\($(5!D!h!J(B\\)" . "$(5$q(B")
330 ("\\($(5!D!h!K(B\\)" . "$(5$r(B")
331 ("\\($(5!D!h!L(B\\)" . "$(5$s(B")
332 ("\\($(5!D!h!M(B\\)" . "$(5$t(B")
333 ("\\($(5!D!h!T(B\\)" . "$(5$u(B")
334 ("\\($(5!E!h!F(B\\)" . "$(5$v(B")
335 ("\\($(5!F!h!F(B\\)" . "$(5$w(B")
336 ("\\($(5!H!h!B(B\\)" . "$(5$x(B")
337 ("\\($(5!H!h!F(B\\)" . "$(5$y(B")
338 ("\\($(5!H!h!Q(B\\)" . "$(5$z(B")
339 ("\\($(5!J!h!F(B\\)" . "$(5${(B")
340 ("\\($(5!J!h!J(B\\)" . "$(5$|(B")
341 ("\\($(5!J!h!T(B\\)" . "$(5$}(B")
342 ("\\($(5!K!h!F(B\\)" . "$(5$~(B")
343 ("\\($(5!L!h!F(B\\)" . "$(5#P(B")
344 ("\\($(5!L!h!Q(B\\)" . "$(5#Q(B")
345 ("\\($(5!Q!h!Q(B\\)" . "$(5#`(B")
346 ("\\($(5!T!h!F(B\\)" . "$(5#a(B")
347 ("\\($(5!T!h!T(B\\)" . "$(5#b(B")
348 ("\\($(5!U!h!8(B\\)" . "$(5#c(B")
349 ("\\($(5!U!h!F(B\\)" . "$(5#d(B")
350 ("\\($(5!U!h!J(B\\)" . "$(5#e(B")
351 ("\\($(5!U!h!Q(B\\)" . "$(5#f(B")
352 ("\\($(5!U!h!T(B\\)" . "$(5#g(B")
353 ("\\($(5!V!h!=!h!O!h!M(B\\)" . "$(5#h(B")
354 ("\\($(5!V!h!=!h!M(B\\)" . "$(5#i(B")
355 ("\\($(5!V!h!=!h!T(B\\)" . "$(5#j(B")
356 ("\\($(5!V!h!=(B\\)" . "$(5#k(B")
357 ("\\($(5!V!h!>(B\\)" . "$(5#l(B")
358 ("\\($(5!W!h!F(B\\)" . "$(5#m(B")
359 ("\\($(5!W!h!O(B\\)" . "$(5#n(B")
360 ("\\($(5!X!h!A(B\\)" . "$(5#p(B")
361 ("\\($(5!X!h!F(B\\)" . "$(5#q(B")
362 ("\\($(5!X!h!L(B\\)" . "$(5#r(B")
363 ("\\($(5!X!h!M(B\\)" . "$(5#s(B")
364 ("\\($(5!X!h!O(B\\)" . "$(5#t(B")
365 ("\\($(5!X!h!Q(B\\)" . "$(5#u(B")
366 ("\\($(5!X!h!T(B\\)" . "$(5#v(B")
367 ;; Special Ligature Rules
368 ("\\($(5!X!_(B\\)" . "$(5#R(B")
369
370 ;; Half form with ligature. Special "r" case is included. "r"
371 ;; connection which is not listed here has not been examined yet.
372 ;; I don't know what to do with them.
373 ;;
374 ;; special forms
375 ("\\($(5!3!h!V!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"l(B")
376 ("\\($(5!:!h!<!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"m(B")
377 ;; ordinary forms
378 ("\\($(5!5!h!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"`(B")
379 ("\\($(5!6!h!F!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"a(B")
380 ;; ("\\($(5!<!h!8!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"c(B") ; Mistake, must check later.
381 ("\\($(5!B!h!B!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"c(B")
382 ("\\($(5!B!h!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"d(B")
383 ("\\($(5!E!h!F!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"e(B")
384 ("\\($(5!E!h!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"f(B")
385 ("\\($(5!H!h!B!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"g(B")
386 ("\\($(5!U!h!8!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"h(B")
387 ("\\($(5!U!h!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"i(B")
388 ("\\($(5!U!h!T!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"j(B")
389 ;; ("\\($(5!U!h!T!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"k(B") ; must check later.
390 ;; Conjunction form associated with Nukta sign.
391 ("\\($(5!3!i!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"s(B")
392 ("\\($(5!4!i!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"t(B")
393 ("\\($(5!5!i!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"u(B")
394 ("\\($(5!:!i!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"z(B")
395 ("\\($(5!I!i!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"y(B")
396
397 ;; For consonants other than listed above, glyph-composition will
398 ;; be applied. If the consonant which is preceding "$(5!O(B" does not
399 ;; have the vertical line (such as "$(5!?(B"), "$(5"r(B" is put beneath the
400 ;; consonant.
401 ;;
402 ;; ("cons-not-yet-listed-up\\($(5!h!O(B\\)" . "$(5"q(B")
403 ("[$(5!7!9!=!>!?!@!D!O!P!R!S!X(B]\\($(5!h!O(B\\)" . "$(5"r(B")
404 ("$(5!?!i(B\\($(5!h!O(B\\)" . "$(5"r(B")
405 ("$(5!@!i(B\\($(5!h!O(B\\)" . "$(5"r(B")
406
407 ;; Nukta
408 ("\\($(5!!!i(B\\)" . "$(5#!(B")
409 ("\\($(5!&!i(B\\)" . "$(5#&(B")
410 ("\\($(5!'!i(B\\)" . "$(5#'(B")
411 ("\\($(5!*!i(B\\)" . "$(5#*(B")
412 ("\\($(5!3!i(B\\)" . "$(5#3(B")
413 ("\\($(5!4!i(B\\)" . "$(5#4(B")
414 ("\\($(5!5!i(B\\)" . "$(5#5(B")
415 ("\\($(5!:!i(B\\)" . "$(5#:(B")
416 ("\\($(5!?!i(B\\)" . "$(5#?(B")
417 ("\\($(5!@!i(B\\)" . "$(5#@(B")
418 ("\\($(5!I!i(B\\)" . "$(5#I(B")
419 ("\\($(5!j!i(B\\)" . "$(5#J(B")
420
421 ;; Half forms.
422 ("\\($(5!3!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"3(B")
423 ("\\($(5!4!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"4(B")
424 ("\\($(5!5!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"5(B")
425 ("\\($(5!6!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"6(B")
426 ("\\($(5!8!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"8(B")
427 ("\\($(5!:!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5":(B")
428 ("\\($(5!;!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5";(B")
429 ("\\($(5!<!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"<(B")
430 ("\\($(5!A!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"A(B")
431 ("\\($(5!B!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"B(B")
432 ("\\($(5!C!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"C(B")
433 ("\\($(5!E!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"E(B")
434 ("\\($(5!F!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"F(B")
435 ("\\($(5!G!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"G(B")
436 ("\\($(5!H!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"H(B")
437 ("\\($(5!I!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"I(B")
438 ("\\($(5!J!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"J(B")
439 ("\\($(5!K!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"K(B")
440 ("\\($(5!L!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"L(B")
441 ("\\($(5!M!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"M(B")
442 ("\\($(5!N!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"N(B")
443 ("\\($(5!Q!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"Q(B")
444 ("\\($(5!R!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"R(B")
445 ("\\($(5!S!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"S(B")
446 ("\\($(5!T!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"T(B")
447 ("\\($(5!U!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"U(B")
448 ("\\($(5!V!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"V(B")
449 ("\\($(5!W!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"W(B")
450 )
451 "Alist of regexps of Devanagari character sequences vs composed characters.")
452
453;; Example:
454;;("\\($(5!F!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"F(B")
455;;(string-match "\\($(5!F!h(B\\)[$(5!3(B-$(5!X(B]" "$(5!X![!F!h!D!\(B") => 8
456;;(match-end 1) => 16
457
458;;
459;; Defining character properties : char-to-glyph, glyph-to-char
460;;
461;; * If char-to-glyph is non-nil, it would be one of the following forms.
462;;
463;; (("character-regexp" . "glyphs")
464;; .....) or
465;; (("character-regexp" . ?glyph)
466;; .....) or
467;; ("characters-regexp" . "glyphs")
468;; or
469;; ?glyph
470;;
471;; * If glyph-to-char is non-nil, it would be one of the following forms.
472;;
473;; (("glyph-regexp" . "characters") ;; This is the only case in Devanagari
474;; ....) or
475;; (("glyph-regexp" . ?character)
476;; ....) or
477;; ("glyph-regexp" . "characters")
478;; or
479;; "characters"
480;; or
481;; ?character
482;;
483
484(let ((rules devanagari-char-to-glyph-rules))
485 (while rules
486 (let ((rule (car rules))
487 (chars) (char) (glyph))
488 (setq rules (cdr rules))
489 (string-match "\\\\(\\(.+\\)\\\\)" (car rule))
490 (setq chars (substring (car rule) (match-beginning 1) (match-end 1)))
491 (setq char (string-to-char chars))
492 (setq glyph (string-to-char (cdr rule))) ; assume one glyph in devan.
493 (put-char-code-property
494 char 'char-to-glyph
495 (append (get-char-code-property char 'char-to-glyph) (list rule)))
496 (put-char-code-property glyph 'glyph-to-char chars))))
497
498;;
499;; Convert Character Code to Glyph Code
500;;
501
502;;;###autoload
503(defun char-to-glyph-devanagari (src-str)
504 "Convert Devanagari characters in the string to Devanagari glyphs.
505Ligatures and special rules are processed."
506 (let ((pos 0)
507 (dst-str ""))
508 (while (< pos (length src-str))
509 (let ((found nil)
510 (rules (get-char-code-property
511 (string-to-char
512 ;; caution. other forms not supported for now.
513 (substring src-str pos)) 'char-to-glyph)))
514 (while rules
515 (let* ((rule (car rules))
516 (regexp (car rule)))
517 (if (string-match regexp src-str)
518 (if (= (match-beginning 1) pos)
519 (progn
520 (setq dst-str (concat dst-str (cdr rule)))
521 (setq rules nil) ; Get out of the loop.
522 (setq found t)
523 ;; proceed `pos' for replaced characters.
524 (setq pos (match-end 1)))
525 (setq rules (cdr rules)))
526 (setq rules (cdr rules)))))
527 ;; proceed to next position
528 (if (not found)
529 (let ((nextchar (string-to-char (substring src-str pos))))
530 (setq pos (+ pos
531 (char-bytes (string-to-char (substring src-str pos)))))
532 (setq dst-str (concat dst-str (char-to-string nextchar)))))))
533 dst-str))
534
535;; Example:
536;;(char-to-glyph-devanagari "$(5!X![!F!h!D!\(B") => "$(5!X!["F!D!\(B"
537;;(char-to-glyph-devanagari "$(5!O!Z!V!h!=!h!O![!M(B") => ???
538
539;;
540;; Phase 2: Compose Glyphs to form One Glyph.
541;;
542
543;; Each list consist of glyph, application-priority and application-direction.
544;;
545;; Glyphs will be ordered from low priority number to high priority number.
546;; If application-priority is omitted, it is assumed to be 0.
547;; If application-direction is omitted, it is asumbed to be '(mr . ml).
548
549(defconst devanagari-composition-rules
550 '((?$(5!!(B 60 (tr . br))
551 (?$(5!"(B 60 (tr . br))
552 (?$(5!#(B 60)
553 (?$(5!$(B 0)
554 (?$(5!%(B 0)
555 (?$(5!&(B 0)
556 (?$(5!'(B 0)
557 (?$(5!((B 0)
558 (?$(5!)(B 0)
559 (?$(5!*(B 0)
560 (?$(5!+(B 0)
561 (?$(5!,(B 0)
562 (?$(5!-(B 0)
563 (?$(5!.(B 0)
564 (?$(5!/(B 0)
565 (?$(5!0(B 0)
566 (?$(5!1(B 0)
567 (?$(5!2(B 0)
568 (?$(5!3(B 0)
569 (?$(5!4(B 0)
570 (?$(5!5(B 0)
571 (?$(5!6(B 0)
572 (?$(5!7(B 0)
573 (?$(5!8(B 0)
574 (?$(5!9(B 0)
575 (?$(5!:(B 0)
576 (?$(5!;(B 0)
577 (?$(5!<(B 0)
578 (?$(5!=(B 0)
579 (?$(5!>(B 0)
580 (?$(5!?(B 0)
581 (?$(5!@(B 0)
582 (?$(5!A(B 0)
583 (?$(5!B(B 0)
584 (?$(5!C(B 0)
585 (?$(5!D(B 0)
586 (?$(5!E(B 0)
587 (?$(5!F(B 0)
588 (?$(5!G(B 0)
589 (?$(5!H(B 0)
590 (?$(5!I(B 0)
591 (?$(5!J(B 0)
592 (?$(5!K(B 0)
593 (?$(5!L(B 0)
594 (?$(5!M(B 0)
595 (?$(5!N(B 0)
596 (?$(5!O(B 0)
597 (?$(5!P(B 0)
598 (?$(5!Q(B 0)
599 (?$(5!R(B 0)
600 (?$(5!S(B 0)
601 (?$(5!T(B 0)
602 (?$(5!U(B 0)
603 (?$(5!V(B 0)
604 (?$(5!W(B 0)
605 (?$(5!X(B 0)
606 (?$(5!Y(B 0)
607 (?$(5!Z(B 40)
608 (?$(5![(B 40 (ml . mr))
609 (?$(5!\(B 40)
610 (?$(5!](B 40 (bc . tc))
611 (?$(5!^(B 40 (bc . tc))
612 (?$(5!_(B 40 (bc . tc))
613 (?$(5!`(B 40 (tc . bc))
614 (?$(5!a(B 40 (tc . bc))
615 (?$(5!b(B 40 (tc . bc))
616 (?$(5!c(B 40 (tc . bc))
617 (?$(5!d(B 40)
618 (?$(5!e(B 40)
619 (?$(5!f(B 40)
620 (?$(5!g(B 40)
621 (?$(5!h(B 0 (br . tr)) ; Halant's special treatment.
622 (?$(5!i(B 0 (br . tr)) ; Nukta's special treatment.
623 (?$(5!j(B 0)
624 (nil 0)
625 (nil 0)
626 (nil 0)
627 (nil 0)
628 (nil 0)
629 (nil 0)
630 (?$(5!q(B 0)
631 (?$(5!r(B 0)
632 (?$(5!s(B 0)
633 (?$(5!t(B 0)
634 (?$(5!u(B 0)
635 (?$(5!v(B 0)
636 (?$(5!w(B 0)
637 (?$(5!x(B 0)
638 (?$(5!y(B 0)
639 (?$(5!z(B 0)
640 (nil 0)
641 (nil 0)
642 (nil 0)
643 (nil 0)
644 (?$(5"!(B 0)
645 (?$(5""(B 0)
646 (?$(5"#(B 0)
647 (?$(5"$(B 0)
648 (?$(5"%(B 0)
649 (?$(5"&(B 0)
650 (?$(5"'(B 0)
651 (?$(5"((B 0)
652 (?$(5")(B 0)
653 (?$(5"*(B 0)
654 (?$(5"+(B 0)
655 (?$(5",(B 0)
656 (?$(5"-(B 0)
657 (?$(5".(B 0)
658 (?$(5"/(B 0)
659 (?$(5"0(B 0)
660 (?$(5"1(B 0)
661 (?$(5"2(B 0)
662 (?$(5"3(B 0)
663 (?$(5"4(B 0)
664 (?$(5"5(B 0)
665 (?$(5"6(B 0)
666 (?$(5"7(B 0)
667 (?$(5"8(B 0)
668 (?$(5"9(B 0)
669 (?$(5":(B 0)
670 (?$(5";(B 0)
671 (?$(5"<(B 0)
672 (?$(5"=(B 0)
673 (?$(5">(B 0)
674 (?$(5"?(B 0)
675 (?$(5"@(B 0)
676 (?$(5"A(B 0)
677 (?$(5"B(B 0)
678 (?$(5"C(B 0)
679 (?$(5"D(B 0)
680 (?$(5"E(B 0)
681 (?$(5"F(B 0)
682 (?$(5"G(B 0)
683 (?$(5"H(B 0)
684 (?$(5"I(B 0)
685 (?$(5"J(B 0)
686 (?$(5"K(B 0)
687 (?$(5"L(B 0)
688 (?$(5"M(B 0)
689 (?$(5"N(B 0)
690 (?$(5"O(B 0)
691 (?$(5"P(B 0)
692 (?$(5"Q(B 0)
693 (?$(5"R(B 0)
694 (?$(5"S(B 0)
695 (?$(5"T(B 0)
696 (?$(5"U(B 0)
697 (?$(5"V(B 0)
698 (?$(5"W(B 0)
699 (?$(5"X(B 0)
700 (?$(5"Y(B 0)
701 (?$(5"Z(B 0)
702 (?$(5"[(B 0)
703 (?$(5"\(B 0)
704 (?$(5"](B 0)
705 (?$(5"^(B 0)
706 (?$(5"_(B 0)
707 (?$(5"`(B 0)
708 (?$(5"a(B 0)
709 (?$(5"b(B 0)
710 (?$(5"c(B 0)
711 (?$(5"d(B 0)
712 (?$(5"e(B 0)
713 (?$(5"f(B 0)
714 (?$(5"g(B 0)
715 (?$(5"h(B 0)
716 (?$(5"i(B 0)
717 (?$(5"j(B 0)
718 (?$(5"k(B 0)
719 (?$(5"l(B 0)
720 (?$(5"m(B 0)
721 (?$(5"n(B 0)
722 (?$(5"o(B 0)
723 (?$(5"p(B 20 (tr . br))
724 (?$(5"q(B 20 (br . tr))
725 (?$(5"r(B 20 (br . tr))
726 (?$(5"s(B 0)
727 (?$(5"t(B 0)
728 (?$(5"u(B 0)
729 (?$(5"v(B 0)
730 (?$(5"w(B 0)
731 (?$(5"x(B 0)
732 (?$(5"y(B 0)
733 (?$(5"z(B 0)
734 (?$(5"{(B 0)
735 (?$(5"|(B 0)
736 (?$(5"}(B 0)
737 (?$(5"~(B 0)
738 (?$(5#!(B 0)
739 (?$(5#"(B 0)
740 (?$(5##(B 0)
741 (?$(5#$(B 0)
742 (?$(5#%(B 0)
743 (?$(5#&(B 0)
744 (?$(5#'(B 0)
745 (?$(5#((B 0)
746 (?$(5#)(B 0)
747 (?$(5#*(B 0)
748 (?$(5#+(B 0)
749 (?$(5#,(B 0)
750 (?$(5#-(B 0)
751 (?$(5#.(B 0)
752 (?$(5#/(B 0)
753 (?$(5#0(B 0)
754 (?$(5#1(B 0)
755 (?$(5#2(B 0)
756 (?$(5#3(B 0)
757 (?$(5#4(B 0)
758 (?$(5#5(B 0)
759 (?$(5#6(B 0)
760 (?$(5#7(B 0)
761 (?$(5#8(B 0)
762 (?$(5#9(B 0)
763 (?$(5#:(B 0)
764 (?$(5#;(B 0)
765 (?$(5#<(B 0)
766 (?$(5#=(B 0)
767 (?$(5#>(B 0)
768 (?$(5#?(B 0)
769 (?$(5#@(B 0)
770 (?$(5#A(B 0)
771 (?$(5#B(B 0)
772 (?$(5#C(B 0)
773 (?$(5#D(B 0)
774 (?$(5#E(B 0)
775 (?$(5#F(B 0)
776 (?$(5#G(B 0)
777 (?$(5#H(B 0)
778 (?$(5#I(B 0)
779 (?$(5#J(B 0)
780 (?$(5#K(B 0)
781 (?$(5#L(B 0)
782 (?$(5#M(B 0)
783 (?$(5#N(B 0)
784 (?$(5#O(B 0)
785 (?$(5#P(B 0)
786 (?$(5#Q(B 0)
787 (?$(5#R(B 0)
788 (?$(5#S(B 0)
789 (?$(5#T(B 0)
790 (?$(5#U(B 0)
791 (?$(5#V(B 0)
792 (?$(5#W(B 0)
793 (?$(5#X(B 0)
794 (?$(5#Y(B 0)
795 (?$(5#Z(B 0)
796 (?$(5#[(B 0)
797 (?$(5#\(B 0)
798 (?$(5#](B 0)
799 (?$(5#^(B 0)
800 (?$(5#_(B 0)
801 (?$(5#`(B 0)
802 (?$(5#a(B 0)
803 (?$(5#b(B 0)
804 (?$(5#c(B 0)
805 (?$(5#d(B 0)
806 (?$(5#e(B 0)
807 (?$(5#f(B 0)
808 (?$(5#g(B 0)
809 (?$(5#h(B 0)
810 (?$(5#i(B 0)
811 (?$(5#j(B 0)
812 (?$(5#k(B 0)
813 (?$(5#l(B 0)
814 (?$(5#m(B 0)
815 (?$(5#n(B 0)
816 (?$(5#o(B 0)
817 (?$(5#p(B 0)
818 (?$(5#q(B 0)
819 (?$(5#r(B 0)
820 (?$(5#s(B 0)
821 (?$(5#t(B 0)
822 (?$(5#u(B 0)
823 (?$(5#v(B 0)
824 (?$(5#w(B 0)
825 (?$(5#x(B 0)
826 (?$(5#y(B 0)
827 (?$(5#z(B 0)
828 (?$(5#{(B 0)
829 (?$(5#|(B 0)
830 (?$(5#}(B 0)
831 (?$(5#~(B 0)
832 (?$(5$!(B 0)
833 (?$(5$"(B 0)
834 (?$(5$#(B 0)
835 (?$(5$$(B 0)
836 (?$(5$%(B 0)
837 (?$(5$&(B 0)
838 (?$(5$'(B 0)
839 (?$(5$((B 0)
840 (?$(5$)(B 0)
841 (?$(5$*(B 0)
842 (?$(5$+(B 0)
843 (?$(5$,(B 0)
844 (?$(5$-(B 0)
845 (?$(5$.(B 0)
846 (?$(5$/(B 0)
847 (?$(5$0(B 0)
848 (?$(5$1(B 0)
849 (?$(5$2(B 0)
850 (?$(5$3(B 0)
851 (?$(5$4(B 0)
852 (?$(5$5(B 0)
853 (?$(5$6(B 0)
854 (?$(5$7(B 0)
855 (?$(5$8(B 0)
856 (?$(5$9(B 0)
857 (?$(5$:(B 0)
858 (?$(5$;(B 0)
859 (?$(5$<(B 0)
860 (?$(5$=(B 0)
861 (?$(5$>(B 0)
862 (?$(5$?(B 0)
863 (?$(5$@(B 0)
864 (?$(5$A(B 0)
865 (?$(5$B(B 0)
866 (?$(5$C(B 0)
867 (?$(5$D(B 0)
868 (?$(5$E(B 0)
869 (?$(5$F(B 0)
870 (?$(5$G(B 0)
871 (?$(5$H(B 0)
872 (?$(5$I(B 0)
873 (?$(5$J(B 0)
874 (?$(5$K(B 0)
875 (?$(5$L(B 0)
876 (?$(5$M(B 0)
877 (?$(5$N(B 0)
878 (?$(5$O(B 0)
879 (?$(5$P(B 0)
880 (?$(5$Q(B 0)
881 (?$(5$R(B 0)
882 (?$(5$S(B 0)
883 (?$(5$T(B 0)
884 (?$(5$U(B 0)
885 (?$(5$V(B 0)
886 (?$(5$W(B 0)
887 (?$(5$X(B 0)
888 (?$(5$Y(B 0)
889 (?$(5$Z(B 0)
890 (?$(5$[(B 0)
891 (?$(5$\(B 0)
892 (?$(5$](B 0)
893 (?$(5$^(B 0)
894 (?$(5$_(B 0)
895 (?$(5$`(B 0)
896 (?$(5$a(B 0)
897 (?$(5$b(B 0)
898 (?$(5$c(B 0)
899 (?$(5$d(B 0)
900 (?$(5$e(B 0)
901 (?$(5$f(B 0)
902 (?$(5$g(B 0)
903 (?$(5$h(B 0)
904 (?$(5$i(B 0)
905 (?$(5$j(B 0)
906 (?$(5$k(B 0)
907 (?$(5$l(B 0)
908 (?$(5$m(B 0)
909 (?$(5$n(B 0)
910 (?$(5$o(B 0)
911 (?$(5$p(B 0)
912 (?$(5$q(B 0)
913 (?$(5$r(B 0)
914 (?$(5$s(B 0)
915 (?$(5$t(B 0)
916 (?$(5$u(B 0)
917 (?$(5$v(B 0)
918 (?$(5$w(B 0)
919 (?$(5$x(B 0)
920 (?$(5$y(B 0)
921 (?$(5$z(B 0)
922 (?$(5${(B 0)
923 (?$(5$|(B 0)
924 (?$(5$}(B 0)
925 (?$(5$~(B 0)
926 ))
927
928;; Determine composition priority and rule of the array of Glyphs.
929;; Sort the glyphs with their priority.
930
931;; Example:
932;;(devanagari-reorder-glyph-for-composition '[?$(5"5(B ?$(5!X(B ?$(5![(B])
933;; => ((446680 0) (446773 0) (446683 50 (ml . mr)))
934
935(defun devanagari-reorder-glyph-for-composition (glyph-alist)
936 (let* ((pos 0)
937 (ordered-glyphs '()))
938 (while (< pos (length glyph-alist))
939 (let* ((glyph (aref glyph-alist pos)))
940 (setq pos (1+ pos))
941 (setq ordered-glyphs
942 (append ordered-glyphs (list (assq glyph devanagari-composition-rules))))))
943 (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y)))))))
944
945;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "2$(6!XP"5@![1(B"
946
947(defun devanagari-compose-to-one-glyph (devanagari-string)
948 (let* ((o-glyph-list (devanagari-reorder-glyph-for-composition
949 (string-to-vector devanagari-string)))
950 ;; List of glyphs to be composed.
951 (cmp-glyph-list (list (car (car o-glyph-list))))
952 (o-glyph-list (cdr o-glyph-list)))
953 (while o-glyph-list
954 (let* ((o-glyph (car o-glyph-list))
955 (glyph (if (< 2 (length o-glyph))
956 ;; default composition
957 (list (car (cdr (cdr o-glyph))) (car o-glyph))
958 ;; composition with a specified rule
959 (list '(mr . ml) (car o-glyph)))))
960 (setq o-glyph-list (cdr o-glyph-list))
961 (setq cmp-glyph-list (append cmp-glyph-list glyph))))
962 ;; Before applying compose-chars, convert glyphs to
963 ;; 1-column width if possible.
964 (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list))
965 (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
966 (apply 'compose-chars cmp-glyph-list))))
967
968
969;;
970;; Phase 2.5 Convert Appropriate Character to 1-column shape.
971;;
972;; This is temporary and should be removed out when Emacs supports
973;; variable width characters.
974;;
975;; This will convert the composing glyphs (2 column glyphs)
976;; to narrow (1 column) glyphs if they exist.
977;;
978;; devanagari-wide-to-narrow-old converts glyphs simply.
979;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs
980;; with 2 column base-glyph.
981;;
982;; Execution Examples
983;;(devanagari-wide-to-narrow '(446680 446773 (ml . mr) 446683))
984;;(devanagari-wide-to-narrow '(?$(5!6(B (ml . ml) 446773 (tc . mr) 446683))
985
986(defun devanagari-wide-to-narrow (src-list)
987 (if (null src-list) '()
988 (cons
989 (if (and (numberp (car src-list))
990 (cdr (assq (car src-list) devanagari-1-column-char)))
991 (cdr (assq (car src-list) devanagari-1-column-char))
992 (car src-list))
993 (devanagari-wide-to-narrow (cdr src-list)))))
994
995;; Make this function obsolete temporary Because now Emacs supports
996;; attaching 1 column character at the center 2 column char. However,
997;; there are still problems attempting to attach Halant or Nukta sign
998;; at the non-vowel consonant. This problem can not be solved until
999;; Emacs supports attaching the glyph at `temporary-preserved metric'.
1000
1001(defun devanagari-wide-to-narrow-old (src-list)
1002 (if (null src-list) (progn (error "devanagari-wide-to-narrow error") nil)
1003 (let* ((base-glyph (cdr (assq (car src-list) devanagari-1-column-char)))
1004 (wide-base-glyph nil)
1005 (apply-glyph-list (cdr src-list)))
1006 (if (null base-glyph)
1007 (progn
1008 (setq wide-base-glyph t)
1009 (setq base-glyph (car src-list))))
1010 (cons base-glyph
1011 (devanagari-wide-to-narrow-iter apply-glyph-list wide-base-glyph))
1012 )))
1013
1014;; Convert apply-glyph-list from 2-column to 1-column.
1015;; wide-base-glyph is t when base-glyph is 2-column.
1016;; When apply-glyph is put at the top or bottom of 2-column base-glyph,
1017;; they must be 2-column glyph, too. Otherwise, they will be
1018;; converted to 1-column glyph if possible.
1019
1020(defun devanagari-wide-to-narrow-iter (apply-glyph-list wide-base-glyph)
1021 (if (< (length apply-glyph-list) 2) '()
1022 (let* ((apply-dir (car apply-glyph-list))
1023 (apply-glyph (car (cdr apply-glyph-list)))
1024 (apply-rest (cdr (cdr apply-glyph-list)))
1025 (put-t-or-b (member (car apply-dir) '(tl tc tr bl bc br)))
1026 (narrow-glyph (cdr (assq apply-glyph devanagari-1-column-char))))
1027 (append
1028 (list apply-dir
1029 (if (or (and wide-base-glyph put-t-or-b)
1030 (null narrow-glyph))
1031 apply-glyph narrow-glyph))
1032 (devanagari-wide-to-narrow-iter apply-rest wide-base-glyph)))))
1033
1034;;
1035;; Summary
1036;;
1037
1038;;;###autoload
1039(defun devanagari-compose-string (str)
1040 (let ((len (length str))
1041 (src str) (dst "") rest match-b match-e)
1042 (while (string-match devanagari-composite-glyph-unit-examine src)
1043 (setq match-b (match-beginning 0) match-e (match-end 0))
1044 (setq dst
1045 (concat dst
1046 (substring src 0 match-b)
1047 (devanagari-compose-to-one-glyph
1048 (char-to-glyph-devanagari
1049 (substring src match-b match-e)))))
1050 (setq src (substring src match-e)))
1051 (setq dst (concat dst src))
1052 dst))
1053
1054;;;###autoload
1055(defun devanagari-compose-region (from to)
1056 (interactive "r")
1057 (save-restriction
1058 (narrow-to-region from to)
1059 (goto-char (point-min))
1060 (while (re-search-forward devanagari-composite-glyph-unit-examine nil t)
1061 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1062 (cmps (devanagari-compose-to-one-glyph
1063 (char-to-glyph-devanagari
1064 (buffer-substring match-b match-e)))))
1065 (delete-region match-b match-e)
1066 (insert cmps)))))
1067
1068;;
1069;; Decomposition of composite font.
1070;;
1071
1072(defun devanagari-normalize-narrow-glyph (charlist)
1073 (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char))))
1074 (if (null charlist) nil
1075 (cons (if (null wide-char) (car charlist) wide-char)
1076 (devanagari-normalize-narrow-glyph (cdr charlist))))))
1077
1078(defvar devanagari-decomposition-rules
1079 '(
1080 (?$(5"p(B -20)
1081 )
1082 )
1083
1084(defun devanagari-reorder-glyph-for-decomposition (glyphlist)
1085 "This function re-orders glyph list."
1086 (sort glyphlist
1087 '(lambda (x y)
1088 (let ((xx (assoc x devanagari-decomposition-rules))
1089 (yy (assoc y devanagari-decomposition-rules)))
1090 (if (null xx) (setq xx 0))
1091 (if (null yy) (setq yy 0))
1092 (< xx yy)))))
1093
1094(defun devanagari-decompose-char (char)
1095 "This function decomposes one Devanagari composite character to
1096 basic Devanagari character."
1097 (let ((glyphlist (decompose-composite-char char)))
1098 (if (not (listp glyphlist))
1099 (setq glyphlist (list glyphlist)))
1100 (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist))
1101 (mapconcat '(lambda (x) (let ((char (get-char-code-property
1102 x 'glyph-to-char)))
1103 (if (null char) (char-to-string x) char)))
1104 (devanagari-reorder-glyph-for-decomposition glyphlist)
1105 "")))
1106
1107;;;###autoload
1108(defun devanagari-decompose-string (str)
1109 "This function Decomposes Devanagari glyph string to
1110basic Devanagari character string."
1111 (let ((src str) (dst ""))
1112 (while (not (equal src ""))
1113 (let* ((char (string-to-char src))
1114 (clen (char-bytes char)))
1115 (setq src (substring src clen))
1116 (setq dst (concat dst
1117 (devanagari-decompose-char char)))))
1118 dst))
1119
1120;;;###autoload
1121(defun devanagari-decompose-region (from to)
1122 (interactive "r")
1123 (save-restriction
1124 (narrow-to-region from to)
1125 (goto-char (point-min))
1126 (while (re-search-forward "." nil t)
1127 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1128 (decmps (devanagari-decompose-string (buffer-substring match-b match-e))))
1129 (delete-char -1)
1130 (insert decmps)))))
1131
1132
1133
1134;; For pre-write and post-read conversion
1135
1136;;;###autoload
1137(defun devanagari-compose-from-is13194-region (from to)
1138 "Compose IS 13194 characters in the region to Devanagari characters."
1139 (interactive "r")
1140 (save-restriction
1141 (narrow-to-region from to)
1142 (indian-to-devanagari-region (point-min) (point-max))
1143 (devanagari-compose-region (point-min) (point-max))))
1144
1145;;;###autoload
1146(defun devanagari-decompose-to-is13194-region (from to)
1147 "Decompose Devanagari characters in the region to IS 13194 characters."
1148 (interactive "r")
1149 (save-restriction
1150 (narrow-to-region from to)
1151 (devanagari-decompose-region (point-min) (point-max))
1152 (devanagari-to-indian-region (point-min) (point-max))))
1153
1154;;
1155(provide 'language/devan-util)
1156
1157;;; Local Variables:
1158;;; generated-autoload-file: "../loaddefs.el"
1159;;; End:
1160;;; devan-util.el ends here
diff --git a/lisp/language/devanagari.el b/lisp/language/devanagari.el
new file mode 100644
index 00000000000..2f2ab1160aa
--- /dev/null
+++ b/lisp/language/devanagari.el
@@ -0,0 +1,541 @@
1;;; devanagari.el --- Support for Devanagari Languages
2
3;; Copyright (C) 1996 Free Software Foundation, Inc.
4
5;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
6
7;; Keywords: multilingual, Indian, Devanagari
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to
23;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24
25;;; Commentary:
26
27;; History:
28;; 1996.9.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
29
30;;; Code:
31
32(make-coding-system
33 'coding-system-is13194-devanagari 2 ?D
34 "Coding-system used for ASCII(MSB=0) & IS13194-Devanagari(MSB=1)."
35 '((ascii t) (indian-is13194 t) nil nil
36 nil ascii-eol))
37(put 'coding-system-is13194-devanagari
38 'post-read-conversion 'devanagari-compose-from-is13194-region)
39(put 'coding-system-is13194-devanagari
40 'pre-write-conversion 'devanagari-decompose-to-is13194-region)
41
42(register-input-method
43 "Devanagari" '("quail-devanagari-transliteration" quail-use-package
44 "quail/devanagari"))
45
46(register-input-method
47 "Devanagari" '("quail-devanagari-keyboard-a" quail-use-package
48 "quail/devanagari"))
49
50(register-input-method
51 "Devanagari" '("quail-devanagari-itrans" quail-use-package
52 "quail/devanagari"))
53
54(defun setup-devanagari-environment ()
55 (setq coding-category-iso-8-1 'coding-system-is13194-devanagari)
56
57 (set-coding-priority
58 '(coding-category-iso-7
59 coding-category-iso-8-1))
60
61 (setq-default buffer-file-coding-system 'coding-system-is13194-devanagari)
62
63 (setq default-input-method '("Devanagari" . "quail-devanagari-itrans"))
64 )
65
66(set-language-info-alist
67 "Devanagari" '((setup-function . setup-devanagari-environment)
68 (charset . (indian-is13194 indian-2-column indian-1-column))
69 (coding-system . (coding-system-is13194-devanagari))
70 (documentation . t)))
71
72(let ((deflist '(;; chars syntax category
73 ("$(5!!!"!#(B" "w" ?7) ; vowel-modifying diacritical mark
74 ; chandrabindu, anuswar, visarga
75 ("$(5!$(B-$(5!2(B" "w" ?5) ; independent vowel
76 ("$(5!3(B-$(5!X(B" "w" ?0) ; consonant
77 ("$(5!Z(B-$(5!g(B" "w" ?8) ; matra
78 ("$(5!q(B-$(5!z(B" "w" ?6) ; digit
79 ))
80 elm chars len syntax category to ch i)
81 (while deflist
82 (setq elm (car deflist))
83 (setq chars (car elm)
84 len (length chars)
85 syntax (nth 1 elm)
86 category (nth 2 elm)
87 i 0)
88 (while (< i len)
89 (if (= (aref chars i) ?-)
90 (setq i (1+ i)
91 to (sref chars i))
92 (setq ch (sref chars i)
93 to ch))
94 (while (<= ch to)
95 (modify-syntax-entry ch syntax)
96 (modify-category-entry ch category)
97 (setq ch (1+ ch)))
98 (setq i (+ i (char-bytes to))))
99 (setq deflist (cdr deflist))))
100
101;;
102;; Devanagari Glyph List
103;;
104;;2120 $(5!!!"!#!$!%!&!'!(!)!*!+!,!-!.!/(B
105;;2130 $(5!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?(B
106;;2140 $(5!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O(B
107;;2150 $(5!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_(B
108;;2160 $(5!`!a!b!c!d!e!f!g!h!i!j!k!l!m!n!o(B
109;;2170 $(5!p!q!r!s!t!u!v!w!x!y!z!{!|!}!~(B
110;;
111;;2220 $(5"!"""#"$"%"&"'"(")"*"+","-"."/(B
112;;2230 $(5"0"1"2"3"4"5"6"7"8"9":";"<"=">"?(B
113;;2240 $(5"@"A"B"C"D"E"F"G"H"I"J"K"L"M"N"O(B
114;;2250 $(5"P"Q"R"S"T"U"V"W"X"Y"Z"["\"]"^"_(B
115;;2260 $(5"`"a"b"c"d"e"f"g"h"i"j"k"l"m"n"o(B
116;;2270 $(5"p"q"r"s"t"u"v"w"x"y"z"{"|"}"~(B
117;;
118;;2320 $(5#!#"###$#%#&#'#(#)#*#+#,#-#.#/(B
119;;2330 $(5#0#1#2#3#4#5#6#7#8#9#:#;#<#=#>#?(B
120;;2340 $(5#@#A#B#C#D#E#F#G#H#I#J#K#L#M#N#O(B
121;;2350 $(5#P#Q#R#S#T#U#V#W#X#Y#Z#[#\#]#^#_(B
122;;2360 $(5#`#a#b#c#d#e#f#g#h#i#j#k#l#m#n#o(B
123;;2370 $(5#p#q#r#s#t#u#v#w#x#y#z#{#|#}#~(B
124;;
125;;2420 $(5$!$"$#$$$%$&$'$($)$*$+$,$-$.$/(B
126;;2430 $(5$0$1$2$3$4$5$6$7$8$9$:$;$<$=$>$?(B
127;;2440 $(5$@$A$B$C$D$E$F$G$H$I$J$K$L$M$N$O(B
128;;2450 $(5$P$Q$R$S$T$U$V$W$X$Y$Z$[$\$]$^$_(B
129;;2460 $(5$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n$o(B
130;;2470 $(5$p$q$r$s$t$u$v$w$x$y$z${$|$}$~(B
131;;
132;;2120 $(6!!!"!#!$!%!&!'!(!)!*!+!,!-!.!/(B
133;;2130 $(6!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?(B
134;;2140 $(6!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O(B
135;;2150 $(6!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_(B
136;;2160 $(6!`!a!b!c!d!e!f!g!h!i!j!k!l!m!n!o(B
137;;2170 $(6!p!q!r!s!t!u!v!w!x!y!z!{!|!}!~(B
138;;
139;;2220 $(6"!"""#"$"%"&"'"(")"*"+","-"."/(B
140;;2230 $(6"0"1"2"3"4"5"6"7"8"9":";"<"=">"?(B
141;;2240 $(6"@"A"B"C"D"E"F"G"H"I"J"K"L"M"N"O(B
142;;2250 $(6"P"Q"R"S"T"U"V"W"X"Y"Z"["\"]"^"_(B
143;;2260 $(6"`"a"b"c"d"e"f"g"h"i"j"k"l"m"n"o(B
144;;2270 $(6"p"q"r"s"t"u"v"w"x"y"z"{"|"}"~(B
145;;2320 $(6#!#"###$#%#&#'#(#)#*#+#,#-#.#/(B
146;;2330 $(6#0#1#2#3#4#5#6#7#8#9#:#;#<#=#>#?(B
147;;2340 $(6#@#A#B#C#D#E#F#G#H#I#J#K#L#M#N#O(B
148;;2350 $(6#P#Q#R#S#T#U#V#W#X#Y#Z#[#\#]#^#_(B
149;;2360 $(6#`#a#b#c#d#e#f#g#h#i#j#k#l#m#n#o(B
150;;2370 $(6#p#q#r#s#t#u#v#w#x#y#z#{#|#}#~(B
151;;
152;;2320 $(6$!$"$#$$$%$&$'$($)$*$+$,$-$.$/(B
153;;2430 $(6$0$1$2$3$4$5$6$7$8$9$:$;$<$=$>$?(B
154;;2440 $(6$@$A$B$C$D$E$F$G$H$I$J$K$L$M$N$O(B
155;;2450 $(6$P$Q$R$S$T$U$V$W$X$Y$Z$[$\$]$^$_(B
156;;2460 $(6$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n$o(B
157;;2470 $(6$p$q$r$s$t$u$v$w$x$y$z${$|$}$~(B
158;;
159;;
160;; Modify the following table if you change the set of 1-column font.
161;;
162(defconst devanagari-1-column-char
163 '((?$(5!!(B . ?$(6!!(B)
164 (?$(5!"(B . ?$(6!"(B)
165 (?$(5!#(B . ?$(6!#(B)
166 (?$(5!$(B . nil)
167 (?$(5!%(B . nil)
168 (?$(5!&(B . ?$(6!&(B)
169 (?$(5!'(B . ?$(6!'(B)
170 (?$(5!((B . ?$(6!((B)
171 (?$(5!)(B . nil)
172 (?$(5!*(B . nil)
173 (?$(5!+(B . nil)
174 (?$(5!,(B . nil)
175 (?$(5!-(B . nil)
176 (?$(5!.(B . nil)
177 (?$(5!/(B . nil)
178 (?$(5!0(B . nil)
179 (?$(5!1(B . nil)
180 (?$(5!2(B . nil)
181 (?$(5!3(B . nil)
182 (?$(5!4(B . nil)
183 (?$(5!5(B . ?$(6!5(B)
184 (?$(5!6(B . nil)
185 (?$(5!7(B . nil)
186 (?$(5!8(B . nil)
187 (?$(5!9(B . nil)
188 (?$(5!:(B . nil)
189 (?$(5!;(B . nil)
190 (?$(5!<(B . nil)
191 (?$(5!=(B . ?$(6!=(B)
192 (?$(5!>(B . ?$(6!>(B)
193 (?$(5!?(B . ?$(6!?(B)
194 (?$(5!@(B . ?$(6!@(B)
195 (?$(5!A(B . nil)
196 (?$(5!B(B . ?$(6!B(B)
197 (?$(5!C(B . ?$(6!C(B)
198 (?$(5!D(B . ?$(6!D(B)
199 (?$(5!E(B . ?$(6!E(B)
200 (?$(5!F(B . ?$(6!F(B)
201 (?$(5!G(B . ?$(6!G(B)
202 (?$(5!H(B . ?$(6!H(B)
203 (?$(5!I(B . nil)
204 (?$(5!J(B . ?$(6!J(B)
205 (?$(5!K(B . ?$(6!K(B)
206 (?$(5!L(B . ?$(6!L(B)
207 (?$(5!M(B . ?$(6!M(B)
208 (?$(5!N(B . ?$(6!N(B)
209 (?$(5!O(B . ?$(6!O(B)
210 (?$(5!P(B . ?$(6!P(B)
211 (?$(5!Q(B . nil)
212 (?$(5!R(B . nil)
213 (?$(5!S(B . nil)
214 (?$(5!T(B . ?$(6!T(B)
215 (?$(5!U(B . nil)
216 (?$(5!V(B . ?$(6!V(B)
217 (?$(5!W(B . ?$(6!W(B)
218 (?$(5!X(B . ?$(6!X(B)
219 (?$(5!Y(B . nil)
220 (?$(5!Z(B . ?$(6!Z(B)
221 (?$(5![(B . ?$(6![(B)
222 (?$(5!\(B . ?$(6!\(B)
223 (?$(5!](B . ?$(6!](B)
224 (?$(5!^(B . ?$(6!^(B)
225 (?$(5!_(B . ?$(6!_(B)
226 (?$(5!`(B . ?$(6!`(B)
227 (?$(5!a(B . ?$(6!a(B)
228 (?$(5!b(B . ?$(6!b(B)
229 (?$(5!c(B . ?$(6!c(B)
230 (?$(5!d(B . ?$(6!d(B)
231 (?$(5!e(B . ?$(6!e(B)
232 (?$(5!f(B . ?$(6!f(B)
233 (?$(5!g(B . ?$(6!g(B)
234 (?$(5!h(B . ?$(6!h(B)
235 (?$(5!i(B . ?$(6!i(B)
236 (?$(5!j(B . ?$(6!j(B)
237 (nil . nil)
238 (nil . nil)
239 (nil . nil)
240 (nil . nil)
241 (nil . nil)
242 (nil . nil)
243 (?$(5!q(B . ?$(6!q(B)
244 (?$(5!r(B . ?$(6!r(B)
245 (?$(5!s(B . ?$(6!s(B)
246 (?$(5!t(B . ?$(6!t(B)
247 (?$(5!u(B . ?$(6!u(B)
248 (?$(5!v(B . ?$(6!v(B)
249 (?$(5!w(B . ?$(6!w(B)
250 (?$(5!x(B . ?$(6!x(B)
251 (?$(5!y(B . ?$(6!y(B)
252 (?$(5!z(B . ?$(6!z(B)
253 (nil . nil)
254 (nil . nil)
255 (nil . nil)
256 (nil . nil)
257 (?$(5"!(B . nil)
258 (?$(5""(B . nil)
259 (?$(5"#(B . nil)
260 (?$(5"$(B . nil)
261 (?$(5"%(B . ?$(6"%(B)
262 (?$(5"&(B . ?$(6"&(B)
263 (?$(5"'(B . nil)
264 (?$(5"((B . nil)
265 (?$(5")(B . nil)
266 (?$(5"*(B . nil)
267 (?$(5"+(B . nil)
268 (?$(5",(B . ?$(6",(B)
269 (?$(5"-(B . nil)
270 (?$(5".(B . ?$(6".(B)
271 (?$(5"/(B . nil)
272 (?$(5"0(B . nil)
273 (?$(5"1(B . nil)
274 (?$(5"2(B . nil)
275 (?$(5"3(B . ?$(6"3(B)
276 (?$(5"4(B . ?$(6"4(B)
277 (?$(5"5(B . ?$(6"5(B)
278 (?$(5"6(B . ?$(6"6(B)
279 (?$(5"7(B . nil)
280 (?$(5"8(B . ?$(6"8(B)
281 (?$(5"9(B . nil)
282 (?$(5":(B . ?$(6":(B)
283 (?$(5";(B . ?$(6";(B)
284 (?$(5"<(B . ?$(6"<(B)
285 (?$(5"=(B . nil)
286 (?$(5">(B . nil)
287 (?$(5"?(B . nil)
288 (?$(5"@(B . nil)
289 (?$(5"A(B . ?$(6"A(B)
290 (?$(5"B(B . ?$(6"B(B)
291 (?$(5"C(B . ?$(6"C(B)
292 (?$(5"D(B . nil)
293 (?$(5"E(B . ?$(6"E(B)
294 (?$(5"F(B . ?$(6"F(B)
295 (?$(5"G(B . ?$(6"G(B)
296 (?$(5"H(B . ?$(6"H(B)
297 (?$(5"I(B . ?$(6"I(B)
298 (?$(5"J(B . ?$(6"J(B)
299 (?$(5"K(B . ?$(6"K(B)
300 (?$(5"L(B . ?$(6"L(B)
301 (?$(5"M(B . ?$(6"M(B)
302 (?$(5"N(B . ?$(6"N(B)
303 (?$(5"O(B . nil)
304 (?$(5"P(B . nil)
305 (?$(5"Q(B . ?$(6"Q(B)
306 (?$(5"R(B . nil)
307 (?$(5"S(B . nil)
308 (?$(5"T(B . ?$(6"T(B)
309 (?$(5"U(B . ?$(6"U(B)
310 (?$(5"V(B . ?$(6"V(B)
311 (?$(5"W(B . ?$(6"W(B)
312 (?$(5"X(B . nil)
313 (?$(5"Y(B . nil)
314 (?$(5"Z(B . nil)
315 (?$(5"[(B . nil)
316 (?$(5"\(B . nil)
317 (?$(5"](B . nil)
318 (?$(5"^(B . nil)
319 (?$(5"_(B . nil)
320 (?$(5"`(B . ?$(6"`(B)
321 (?$(5"a(B . ?$(6"a(B)
322 (?$(5"b(B . ?$(6"b(B)
323 (?$(5"c(B . ?$(6"c(B)
324 (?$(5"d(B . ?$(6"d(B)
325 (?$(5"e(B . ?$(6"e(B)
326 (?$(5"f(B . ?$(6"f(B)
327 (?$(5"g(B . ?$(6"g(B)
328 (?$(5"h(B . ?$(6"h(B)
329 (?$(5"i(B . ?$(6"i(B)
330 (?$(5"j(B . ?$(6"j(B)
331 (?$(5"k(B . nil)
332 (?$(5"l(B . ?$(6"l(B)
333 (?$(5"m(B . ?$(6"m(B)
334 (?$(5"n(B . nil)
335 (?$(5"o(B . nil)
336 (?$(5"p(B . ?$(6"p(B)
337 (?$(5"q(B . ?$(6"q(B)
338 (?$(5"r(B . ?$(6"r(B)
339 (?$(5"s(B . ?$(6"s(B)
340 (?$(5"t(B . ?$(6"t(B)
341 (?$(5"u(B . ?$(6"u(B)
342 (?$(5"v(B . nil)
343 (?$(5"w(B . nil)
344 (?$(5"x(B . nil)
345 (?$(5"y(B . ?$(6"y(B)
346 (?$(5"z(B . ?$(6"z(B)
347 (?$(5"{(B . nil)
348 (?$(5"|(B . nil)
349 (?$(5"}(B . nil)
350 (?$(5"~(B . nil)
351 (?$(5#!(B . nil)
352 (?$(5#"(B . nil)
353 (?$(5##(B . nil)
354 (?$(5#$(B . nil)
355 (?$(5#%(B . nil)
356 (?$(5#&(B . ?$(6#&(B)
357 (?$(5#'(B . ?$(6#'(B)
358 (?$(5#((B . nil)
359 (?$(5#)(B . nil)
360 (?$(5#*(B . ?$(6#*(B)
361 (?$(5#+(B . nil)
362 (?$(5#,(B . nil)
363 (?$(5#-(B . nil)
364 (?$(5#.(B . nil)
365 (?$(5#/(B . nil)
366 (?$(5#0(B . nil)
367 (?$(5#1(B . nil)
368 (?$(5#2(B . nil)
369 (?$(5#3(B . nil)
370 (?$(5#4(B . nil)
371 (?$(5#5(B . ?$(6#5(B)
372 (?$(5#6(B . nil)
373 (?$(5#7(B . nil)
374 (?$(5#8(B . nil)
375 (?$(5#9(B . nil)
376 (?$(5#:(B . nil)
377 (?$(5#;(B . nil)
378 (?$(5#<(B . nil)
379 (?$(5#=(B . nil)
380 (?$(5#>(B . nil)
381 (?$(5#?(B . ?$(6#?(B)
382 (?$(5#@(B . ?$(6#@(B)
383 (?$(5#A(B . nil)
384 (?$(5#B(B . nil)
385 (?$(5#C(B . nil)
386 (?$(5#D(B . nil)
387 (?$(5#E(B . nil)
388 (?$(5#F(B . nil)
389 (?$(5#G(B . nil)
390 (?$(5#H(B . nil)
391 (?$(5#I(B . nil)
392 (?$(5#J(B . ?$(6#J(B)
393 (?$(5#K(B . nil)
394 (?$(5#L(B . nil)
395 (?$(5#M(B . nil)
396 (?$(5#N(B . nil)
397 (?$(5#O(B . nil)
398 (?$(5#P(B . nil)
399 (?$(5#Q(B . nil)
400 (?$(5#R(B . ?$(6#R(B)
401 (?$(5#S(B . nil)
402 (?$(5#T(B . nil)
403 (?$(5#U(B . nil)
404 (?$(5#V(B . nil)
405 (?$(5#W(B . nil)
406 (?$(5#X(B . nil)
407 (?$(5#Y(B . nil)
408 (?$(5#Z(B . nil)
409 (?$(5#[(B . nil)
410 (?$(5#\(B . nil)
411 (?$(5#](B . nil)
412 (?$(5#^(B . nil)
413 (?$(5#_(B . nil)
414 (?$(5#`(B . nil)
415 (?$(5#a(B . ?$(6#a(B)
416 (?$(5#b(B . ?$(6#b(B)
417 (?$(5#c(B . nil)
418 (?$(5#d(B . nil)
419 (?$(5#e(B . nil)
420 (?$(5#f(B . nil)
421 (?$(5#g(B . nil)
422 (?$(5#h(B . nil)
423 (?$(5#i(B . nil)
424 (?$(5#j(B . ?$(6#j(B)
425 (?$(5#k(B . ?$(6#k(B)
426 (?$(5#l(B . ?$(6#l(B)
427 (?$(5#m(B . nil)
428 (?$(5#n(B . nil)
429 (?$(5#o(B . nil)
430 (?$(5#p(B . nil)
431 (?$(5#q(B . nil)
432 (?$(5#r(B . nil)
433 (?$(5#s(B . nil)
434 (?$(5#t(B . nil)
435 (?$(5#u(B . nil)
436 (?$(5#v(B . nil)
437 (?$(5#w(B . nil)
438 (?$(5#x(B . nil)
439 (?$(5#y(B . nil)
440 (?$(5#z(B . nil)
441 (?$(5#{(B . nil)
442 (?$(5#|(B . nil)
443 (?$(5#}(B . nil)
444 (?$(5#~(B . nil)
445 (?$(5$!(B . nil)
446 (?$(5$"(B . nil)
447 (?$(5$#(B . nil)
448 (?$(5$$(B . nil)
449 (?$(5$%(B . nil)
450 (?$(5$&(B . nil)
451 (?$(5$'(B . nil)
452 (?$(5$((B . nil)
453 (?$(5$)(B . nil)
454 (?$(5$*(B . nil)
455 (?$(5$+(B . nil)
456 (?$(5$,(B . nil)
457 (?$(5$-(B . nil)
458 (?$(5$.(B . nil)
459 (?$(5$/(B . nil)
460 (?$(5$0(B . nil)
461 (?$(5$1(B . nil)
462 (?$(5$2(B . nil)
463 (?$(5$3(B . nil)
464 (?$(5$4(B . nil)
465 (?$(5$5(B . nil)
466 (?$(5$6(B . nil)
467 (?$(5$7(B . nil)
468 (?$(5$8(B . nil)
469 (?$(5$9(B . nil)
470 (?$(5$:(B . nil)
471 (?$(5$;(B . nil)
472 (?$(5$<(B . nil)
473 (?$(5$=(B . nil)
474 (?$(5$>(B . nil)
475 (?$(5$?(B . nil)
476 (?$(5$@(B . nil)
477 (?$(5$A(B . ?$(6$A(B)
478 (?$(5$B(B . nil)
479 (?$(5$C(B . nil)
480 (?$(5$D(B . nil)
481 (?$(5$E(B . ?$(6$E(B)
482 (?$(5$F(B . nil)
483 (?$(5$G(B . nil)
484 (?$(5$H(B . ?$(6$H(B)
485 (?$(5$I(B . ?$(6$I(B)
486 (?$(5$J(B . ?$(6$J(B)
487 (?$(5$K(B . nil)
488 (?$(5$L(B . nil)
489 (?$(5$M(B . nil)
490 (?$(5$N(B . ?$(6$N(B)
491 (?$(5$O(B . nil)
492 (?$(5$P(B . ?$(6$P(B)
493 (?$(5$Q(B . ?$(6$Q(B)
494 (?$(5$R(B . ?$(6$R(B)
495 (?$(5$S(B . nil)
496 (?$(5$T(B . nil)
497 (?$(5$U(B . nil)
498 (?$(5$V(B . nil)
499 (?$(5$W(B . nil)
500 (?$(5$X(B . nil)
501 (?$(5$Y(B . nil)
502 (?$(5$Z(B . nil)
503 (?$(5$[(B . nil)
504 (?$(5$\(B . nil)
505 (?$(5$](B . nil)
506 (?$(5$^(B . nil)
507 (?$(5$_(B . nil)
508 (?$(5$`(B . nil)
509 (?$(5$a(B . nil)
510 (?$(5$b(B . nil)
511 (?$(5$c(B . nil)
512 (?$(5$d(B . nil)
513 (?$(5$e(B . nil)
514 (?$(5$f(B . nil)
515 (?$(5$g(B . nil)
516 (?$(5$h(B . ?$(6$h(B)
517 (?$(5$i(B . ?$(6$i(B)
518 (?$(5$j(B . ?$(6$j(B)
519 (?$(5$k(B . nil)
520 (?$(5$l(B . ?$(6$l(B)
521 (?$(5$m(B . ?$(6$m(B)
522 (?$(5$n(B . ?$(6$n(B)
523 (?$(5$o(B . nil)
524 (?$(5$p(B . ?$(6$p(B)
525 (?$(5$q(B . ?$(6$q(B)
526 (?$(5$r(B . ?$(6$r(B)
527 (?$(5$s(B . nil)
528 (?$(5$t(B . nil)
529 (?$(5$u(B . ?$(6$u(B)
530 (?$(5$v(B . ?$(6$v(B)
531 (?$(5$w(B . nil)
532 (?$(5$x(B . ?$(6$x(B)
533 (?$(5$y(B . ?$(6$y(B)
534 (?$(5$z(B . nil)
535 (?$(5${(B . nil)
536 (?$(5$|(B . nil)
537 (?$(5$}(B . nil)
538 (?$(5$~(B . nil)
539 ))
540
541;;; devanagari.el ends here
diff --git a/lisp/language/ethio-util.el b/lisp/language/ethio-util.el
new file mode 100644
index 00000000000..7fa0a4d7c04
--- /dev/null
+++ b/lisp/language/ethio-util.el
@@ -0,0 +1,1068 @@
1;; ethio-util.el -- utilities for Ethiopic
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, Chinese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26;;
27;; ETHIOPIC UTILITY FUNCTIONS
28;;
29
30;; To automatically convert Ethiopic text to SERA format when sending mail,
31;; (add-hook 'mail-send-hook 'fidel-to-sera-mail)
32;;
33;; To automatically convert SERA format to Ethiopic when receiving mail,
34;; (add-hook 'rmail-show-message-hook 'sera-to-fidel-mail)
35;;
36;; To automatically convert Ethiopic text to SERA format when posting news,
37;; (add-hook 'news-inews-hook 'fidel-to-sera-mail)
38;;
39;; If the filename ends in ".sera", editing will be done in fidel
40;; while file I/O will be done in sera.
41
42;;
43;; SERA to FIDEL
44;;
45
46(defconst sera-to-fidel-table
47 [
48 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
49 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
50;;; SP ! " # $ % & ' ( ) * + , - . /
51 nil nil nil nil nil nil nil ("") nil nil nil nil ("$(2$Q(B") nil ("$(2$P(B") nil
52;;; 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
53 nil nil nil nil nil nil nil nil nil nil ("$(2$S(B") ("$(2$R(B") nil nil nil nil nil
54;;; A
55 ("$(2"V(B" (?2 "$(2#b(B"))
56;;; B
57 ("$(2!F(B" (?e "$(2!A(B") (?u "$(2!B(B") (?i "$(2!C(B") (?a "$(2!D(B") (?E "$(2!E(B") (?o "$(2!G(B") (?| "$(2!F(B")
58 (?W "$(2!H(B" (?a "$(2!H(B")
59 (?e "$(2!F#L(B") (?u "$(2!F#M(B") (?i "$(2!F#N(B") (?E "$(2!F#P(B") (?' "$(2!F#M(B")))
60;;; C
61 ("$(2"8(B" (?e "$(2"3(B") (?u "$(2"4(B") (?i "$(2"5(B") (?a "$(2"6(B") (?E "$(2"7(B") (?o "$(2"9(B") (?| "$(2"8(B")
62 (?W "$(2":(B" (?a "$(2":(B")
63 (?e "$(2"8#L(B") (?u "$(2"8#M(B") (?i "$(2"8#N(B") (?E "$(2"8#P(B") (?' "$(2"8#M(B")))
64;;; D
65 ("$(2$0(B" (?e "$(2$+(B") (?u "$(2$,(B") (?i "$(2$-(B") (?a "$(2$.(B") (?E "$(2$/(B") (?o "$(2$1(B") (?| "$(2$0(B"))
66;;; E
67 ("$(2"W(B" (?2 "$(2#c(B"))
68;;; F
69 ("$(2"@(B" (?e "$(2";(B") (?u "$(2"<(B") (?i "$(2"=(B") (?a "$(2">(B") (?E "$(2"?(B") (?o "$(2"A(B") (?| "$(2"@(B")
70 (?W "$(2"B(B" (?a "$(2"B(B")
71 (?e "$(2"@#L(B") (?u "$(2"@#M(B") (?i "$(2"@#N(B") (?E "$(2"@#P(B") (?' "$(2"@#M(B")))
72;;; G
73 ("$(2$>(B" (?e "$(2$9(B") (?u "$(2$:(B") (?i "$(2$;(B") (?a "$(2$<(B") (?E "$(2$=(B") (?o "$(2$?(B") (?| "$(2$>(B"))
74;;; H
75 ("$(2$"(B" (?e "$(2#{(B") (?u "$(2#|(B") (?i "$(2#}(B") (?a "$(2#~(B") (?E "$(2$!(B") (?o "$(2$#(B") (?| "$(2$"(B"))
76;;; I
77 ("$(2"X(B" (?2 "$(2#d(B"))
78;;; J
79 ("$(2$7(B" (?e "$(2$2(B") (?u "$(2$3(B") (?i "$(2$4(B") (?a "$(2$5(B") (?E "$(2$6(B") (?o "$(2$8(B") (?| "$(2$7(B"))
80;;; K
81 ("$(2"x(B" (?e "$(2"s(B") (?u "$(2"t(B") (?i "$(2"u(B") (?a "$(2"v(B") (?E "$(2"w(B") (?o "$(2"y(B") (?| "$(2"x(B")
82 (?W "$(2"{(B" (?e "$(2"z(B") (?u "$(2"{(B") (?i "$(2"|(B") (?a "$(2"}(B") (?E "$(2"~(B")))
83;;; L
84 ("$(2!&(B" (?e "$(2!!(B") (?u "$(2!"(B") (?i "$(2!#(B") (?a "$(2!$(B") (?E "$(2!%(B") (?o "$(2!'(B") (?| "$(2!&(B")
85 (?W "$(2!((B" (?a "$(2!((B")
86 (?e "$(2!&#L(B") (?u "$(2!&#M(B") (?i "$(2!&#N(B") (?E "$(2!&#P(B") (?' "$(2!&#M(B")))
87;;; M
88 ("$(2!.(B" (?e "$(2!)(B") (?u "$(2!*(B") (?i "$(2!+(B") (?a "$(2!,(B") (?E "$(2!-(B") (?o "$(2!/(B") (?| "$(2!.(B")
89 (?W "$(2!0(B" (?a "$(2!0(B")
90 (?e "$(2!.#L(B") (?u "$(2!.#M(B") (?i "$(2!.#N(B") (?E "$(2!.#P(B") (?' "$(2!.#M(B")))
91;;; N
92 ("$(2!n(B" (?e "$(2!i(B") (?u "$(2!j(B") (?i "$(2!k(B") (?a "$(2!l(B") (?E "$(2!m(B") (?o "$(2!o(B") (?| "$(2!n(B")
93 (?W "$(2!p(B" (?a "$(2!p(B")
94 (?e "$(2!n#L(B") (?u "$(2!n#M(B") (?i "$(2!n#N(B") (?E "$(2!n#P(B") (?' "$(2!n#M(B")))
95;;; O
96 ("$(2"Y(B" (?2 "$(2#e(B"))
97;;; P
98 ("$(2$E(B" (?e "$(2$@(B") (?u "$(2$A(B") (?i "$(2$B(B") (?a "$(2$C(B") (?E "$(2$D(B") (?o "$(2$F(B") (?| "$(2$E(B"))
99;;; Q
100 ("$(2#2(B" (?e "$(2#-(B") (?u "$(2#.(B") (?i "$(2#/(B") (?a "$(2#0(B") (?E "$(2#1(B") (?o "$(2#3(B") (?| "$(2#2(B")
101 (?W "$(2#5(B" (?e "$(2#4(B") (?u "$(2#5(B") (?i "$(2#6(B") (?a "$(2#7(B") (?E "$(2#8(B")))
102;;; R
103 ("$(2!6(B" (?e "$(2!1(B") (?u "$(2!2(B") (?i "$(2!3(B") (?a "$(2!4(B") (?E "$(2!5(B") (?o "$(2!7(B") (?| "$(2!6(B")
104 (?W "$(2!8(B" (?a "$(2!8(B")
105 (?e "$(2!6#L(B") (?u "$(2!6#M(B") (?i "$(2!6#N(B") (?E "$(2!6#P(B") (?' "$(2!6#M(B")))
106;;; S
107 ("$(2"P(B" (?e "$(2"K(B") (?u "$(2"L(B") (?i "$(2"M(B") (?a "$(2"N(B") (?E "$(2"O(B") (?o "$(2"Q(B") (?| "$(2"P(B")
108 (?W "$(2"R(B" (?a "$(2"R(B")
109 (?e "$(2"P#L(B") (?u "$(2"P#M(B") (?i "$(2"P#N(B") (?E "$(2"P#P(B") (?' "$(2"P#M(B"))
110 (?2 "$(2#](B" (?| "$(2#](B")
111 (?e "$(2#X(B") (?u "$(2#Y(B") (?i "$(2#Z(B") (?a "$(2#[(B") (?E "$(2#\(B") (?o "$(2#^(B")
112 (?W "$(2"R(B"
113 (?a "$(2"R(B")
114 (?e "$(2#]#L(B") (?u "$(2#]#M(B") (?i "$(2#]#N(B") (?E "$(2#]#P(B") (?' "$(2#]#M(B"))))
115
116;;; T
117 ("$(2"0(B" (?e "$(2"+(B") (?u "$(2",(B") (?i "$(2"-(B") (?a "$(2".(B") (?E "$(2"/(B") (?o "$(2"1(B") (?| "$(2"0(B")
118 (?W "$(2"2(B" (?a "$(2"2(B")
119 (?e "$(2"0#L(B") (?u "$(2"0#M(B") (?i "$(2"0#N(B") (?E "$(2"0#P(B") (?' "$(2"0#M(B")))
120;;; U
121 ("$(2"T(B" (?2 "$(2#`(B"))
122;;; V
123 ("$(2!N(B" (?e "$(2!I(B") (?u "$(2!J(B") (?i "$(2!K(B") (?a "$(2!L(B") (?E "$(2!M(B") (?o "$(2!O(B") (?| "$(2!N(B")
124 (?W "$(2!P(B" (?a "$(2!P(B")
125 (?e "$(2!N#L(B") (?u "$(2!N#M(B") (?i "$(2!N#N(B") (?E "$(2!N#P(B") (?' "$(2!N#M(B")))
126;;; W
127 ("$(2#M(B" (?e "$(2#L(B") (?u "$(2#M(B") (?i "$(2#N(B") (?a "$(2#O(B") (?E "$(2#P(B"))
128;;; X
129 ("$(2#y(B" (?e "$(2#t(B") (?u "$(2#u(B") (?i "$(2#v(B") (?a "$(2#w(B") (?E "$(2#x(B") (?o "$(2#z(B") (?| "$(2#y(B"))
130;;; Y
131 ("$(2$)(B" (?e "$(2$$(B") (?u "$(2$%(B") (?i "$(2$&(B") (?a "$(2$'(B") (?E "$(2$((B") (?o "$(2$*(B") (?| "$(2$)(B"))
132;;; Z
133 ("$(2!~(B" (?e "$(2!y(B") (?u "$(2!z(B") (?i "$(2!{(B") (?a "$(2!|(B") (?E "$(2!}(B") (?o "$(2"!(B") (?| "$(2!~(B")
134 (?W "$(2""(B" (?a "$(2""(B")
135 (?e "$(2!~#L(B") (?u "$(2!~#M(B") (?i "$(2!~#N(B") (?E "$(2!~#P(B") (?' "$(2!~#M(B")))
136;;; [ \ ] ^ _
137 nil nil nil nil nil
138;;; `
139 ("`"
140 (?e "$(2#_(B") (?u "$(2#`(B") (?U "$(2#`(B") (?i "$(2#a(B") (?a "$(2#b(B") (?A "$(2#b(B")
141 (?E "$(2#c(B") (?I "$(2#d(B") (?o "$(2#e(B") (?O "$(2#e(B")
142 (?s "$(2#V(B"
143 (?e "$(2#Q(B") (?u "$(2#R(B") (?i "$(2#S(B") (?a "$(2#T(B") (?E "$(2#U(B") (?o "$(2#W(B") (?| "$(2#V(B")
144 (?W "$(2"J(B" (?a "$(2"J(B")
145 (?e "$(2#V#L(B") (?u "$(2#V#M(B") (?i "$(2#V#N(B") (?E "$(2#V#P(B") (?' "$(2#V#M(B")))
146 (?S "$(2#](B"
147 (?e "$(2#X(B") (?u "$(2#Y(B") (?i "$(2#Z(B") (?a "$(2#[(B") (?E "$(2#\(B") (?o "$(2#^(B") (?| "$(2#](B")
148 (?W "$(2"R(B" (?a "$(2"R(B")
149 (?e "$(2#]#L(B") (?u "$(2#]#M(B") (?i "$(2#]#N(B") (?E "$(2#]#P(B") (?' "$(2#]#M(B")))
150 (?h "$(2#k(B"
151 (?e "$(2#f(B") (?u "$(2#g(B") (?i "$(2#h(B") (?a "$(2#i(B") (?E "$(2#j(B") (?o "$(2#l(B") (?| "$(2#k(B")
152 (?W "$(2"c(B" (?e "$(2"b(B") (?u "$(2"c(B") (?i "$(2"d(B") (?a "$(2"e(B") (?E "$(2"f(B")))
153 (?k "$(2#r(B"
154 (?e "$(2#m(B") (?u "$(2#n(B") (?i "$(2#o(B") (?a "$(2#p(B") (?E "$(2#q(B") (?o "$(2#s(B") (?| "$(2#r(B")))
155;;; a
156 ("$(2"S(B" (?2 "$(2#b(B"))
157
158;;; b
159 ("$(2!F(B" (?e "$(2!A(B") (?u "$(2!B(B") (?i "$(2!C(B") (?a "$(2!D(B") (?E "$(2!E(B") (?o "$(2!G(B") (?| "$(2!F(B")
160 (?W "$(2!H(B" (?a "$(2!H(B")
161 (?e "$(2!F#L(B") (?u "$(2!F#M(B") (?i "$(2!F#N(B") (?E "$(2!F#P(B") (?' "$(2!F#M(B")))
162;;; c
163 ("$(2!^(B" (?e "$(2!Y(B") (?u "$(2!Z(B") (?i "$(2![(B") (?a "$(2!\(B") (?E "$(2!](B") (?o "$(2!_(B") (?| "$(2!^(B")
164 (?W "$(2!`(B" (?a "$(2!`(B")
165 (?e "$(2!^#L(B") (?u "$(2!^#M(B") (?i "$(2!^#N(B") (?E "$(2!^#P(B") (?' "$(2!^#M(B")))
166;;; d
167 ("$(2"((B" (?e "$(2"#(B") (?u "$(2"$(B") (?i "$(2"%(B") (?a "$(2"&(B") (?E "$(2"'(B") (?o "$(2")(B") (?| "$(2"((B")
168 (?W "$(2"*(B" (?a "$(2"*(B")
169 (?e "$(2"(#L(B") (?u "$(2"(#M(B") (?i "$(2"(#N(B") (?E "$(2"(#P(B") (?' "$(2"(#M(B")))
170;;; e
171 ("$(2"S(B" (?2 "$(2#_(B") (?3 "$(2"Z(B"))
172;;; f
173 ("$(2"@(B" (?e "$(2";(B") (?u "$(2"<(B") (?i "$(2"=(B") (?a "$(2">(B") (?E "$(2"?(B") (?o "$(2"A(B") (?| "$(2"@(B")
174 (?W "$(2"B(B" (?a "$(2"B(B")
175 (?e "$(2"@#L(B") (?u "$(2"@#M(B") (?i "$(2"@#N(B") (?E "$(2"@#P(B") (?' "$(2"@#M(B")))
176;;; g
177 ("$(2#>(B" (?e "$(2#9(B") (?u "$(2#:(B") (?i "$(2#;(B") (?a "$(2#<(B") (?E "$(2#=(B") (?o "$(2#?(B") (?| "$(2#>(B")
178 (?W "$(2#A(B" (?e "$(2#@(B") (?u "$(2#A(B") (?i "$(2#B(B") (?a "$(2#C(B") (?E "$(2#D(B")))
179;;; h
180 ("$(2"`(B" (?e "$(2"[(B") (?u "$(2"\(B") (?i "$(2"](B") (?a "$(2"^(B") (?E "$(2"_(B") (?o "$(2"a(B") (?| "$(2"`(B")
181 (?W "$(2"c(B" (?e "$(2"b(B") (?u "$(2"c(B") (?i "$(2"d(B") (?a "$(2"e(B") (?E "$(2"f(B"))
182 (?2 "$(2#k(B" (?e "$(2#f(B") (?u "$(2#g(B") (?i "$(2#h(B") (?a "$(2#i(B") (?E "$(2#j(B") (?o "$(2#l(B")
183 (?| "$(2#k(B")
184 (?W "$(2"c(B" (?e "$(2"b(B") (?u "$(2"c(B") (?i "$(2"d(B") (?a "$(2"e(B") (?E "$(2"f(B"))))
185;;; i
186 ("$(2"U(B" (?2 "$(2#a(B"))
187;;; j
188 ("$(2$7(B" (?e "$(2$2(B") (?u "$(2$3(B") (?i "$(2$4(B") (?a "$(2$5(B") (?E "$(2$6(B") (?o "$(2$8(B") (?| "$(2$7(B"))
189;;; k
190 ("$(2"l(B" (?e "$(2"g(B") (?u "$(2"h(B") (?i "$(2"i(B") (?a "$(2"j(B") (?E "$(2"k(B") (?o "$(2"m(B") (?| "$(2"l(B")
191 (?W "$(2"o(B" (?e "$(2"n(B") (?u "$(2"o(B") (?i "$(2"p(B") (?a "$(2"q(B") (?E "$(2"r(B"))
192 (?2 "$(2#r(B" (?e "$(2#m(B") (?u "$(2#n(B") (?i "$(2#o(B") (?a "$(2#p(B") (?E "$(2#q(B") (?o "$(2#s(B")
193 (?| "$(2#r(B")))
194;;; l
195 ("$(2!&(B" (?e "$(2!!(B") (?u "$(2!"(B") (?i "$(2!#(B") (?a "$(2!$(B") (?E "$(2!%(B") (?o "$(2!'(B") (?| "$(2!&(B")
196 (?W "$(2!((B" (?a "$(2!((B")
197 (?e "$(2!&#L(B") (?u "$(2!&#M(B") (?i "$(2!&#N(B") (?E "$(2!&#P(B") (?' "$(2!&#M(B")))
198;;; m
199 ("$(2!.(B" (?e "$(2!)(B") (?u "$(2!*(B") (?i "$(2!+(B") (?a "$(2!,(B") (?E "$(2!-(B") (?o "$(2!/(B") (?| "$(2!.(B")
200 (?W "$(2!0(B" (?a "$(2!0(B")
201 (?e "$(2!.#L(B") (?u "$(2!.#M(B") (?i "$(2!.#N(B") (?E "$(2!.#P(B") (?' "$(2!.#M(B")))
202;;; n
203 ("$(2!f(B" (?e "$(2!a(B") (?u "$(2!b(B") (?i "$(2!c(B") (?a "$(2!d(B") (?E "$(2!e(B") (?o "$(2!g(B") (?| "$(2!f(B")
204 (?W "$(2!h(B" (?a "$(2!h(B")
205 (?e "$(2!f#L(B") (?u "$(2!f#M(B") (?i "$(2!f#N(B") (?E "$(2!f#P(B") (?' "$(2!f#M(B")))
206;;; o
207 ("$(2"Y(B" (?2 "$(2#e(B"))
208;;; p
209 ("$(2$L(B" (?e "$(2$G(B") (?u "$(2$H(B") (?i "$(2$I(B") (?a "$(2$J(B") (?E "$(2$K(B") (?o "$(2$M(B") (?| "$(2$L(B"))
210;;; q
211 ("$(2#&(B" (?e "$(2#!(B") (?u "$(2#"(B") (?i "$(2##(B") (?a "$(2#$(B") (?E "$(2#%(B") (?o "$(2#'(B") (?| "$(2#&(B")
212 (?W "$(2#)(B" (?e "$(2#((B") (?u "$(2#)(B") (?i "$(2#*(B") (?a "$(2#+(B") (?E "$(2#,(B")))
213;;; r
214 ("$(2!6(B" (?e "$(2!1(B") (?u "$(2!2(B") (?i "$(2!3(B") (?a "$(2!4(B") (?E "$(2!5(B") (?o "$(2!7(B") (?| "$(2!6(B")
215 (?W "$(2!8(B" (?a "$(2!8(B")
216 (?e "$(2!6#L(B") (?u "$(2!6#M(B") (?i "$(2!6#N(B") (?E "$(2!6#P(B") (?' "$(2!6#M(B")))
217;;; s
218 ("$(2"H(B" (?e "$(2"C(B") (?u "$(2"D(B") (?i "$(2"E(B") (?a "$(2"F(B") (?E "$(2"G(B") (?o "$(2"I(B") (?| "$(2"H(B")
219 (?W "$(2"J(B" (?a "$(2"J(B")
220 (?e "$(2"H#L(B") (?u "$(2"H#M(B") (?i "$(2"H#N(B") (?E "$(2"H#P(B") (?' "$(2"H#M(B"))
221 (?2 "$(2#V(B" (?e "$(2#Q(B") (?u "$(2#R(B") (?i "$(2#S(B") (?a "$(2#T(B") (?E "$(2#U(B") (?o "$(2#W(B")
222 (?| "$(2#V(B")
223 (?W "$(2"J(B" (?a "$(2"J(B")
224 (?e "$(2#V#L(B") (?u "$(2#V#M(B") (?i "$(2#V#N(B") (?E "$(2#V#P(B")
225 (?' "$(2#V#M(B"))))
226;;; t
227 ("$(2!V(B" (?e "$(2!Q(B") (?u "$(2!R(B") (?i "$(2!S(B") (?a "$(2!T(B") (?E "$(2!U(B") (?o "$(2!W(B") (?| "$(2!V(B")
228 (?W "$(2!X(B" (?a "$(2!X(B")
229 (?e "$(2!V#L(B") (?u "$(2!V#M(B") (?i "$(2!V#N(B") (?E "$(2!V#P(B") (?' "$(2!V#M(B")))
230;;; u
231 ("$(2"T(B" (?2 "$(2#`(B"))
232;;; v
233 ("$(2!N(B" (?e "$(2!I(B") (?u "$(2!J(B") (?i "$(2!K(B") (?a "$(2!L(B") (?E "$(2!M(B") (?o "$(2!O(B") (?| "$(2!N(B")
234 (?W "$(2!P(B" (?a "$(2!P(B")
235 (?e "$(2!N#L(B") (?u "$(2!N#M(B") (?i "$(2!N#N(B") (?E "$(2!N#P(B") (?' "$(2!N#M(B")))
236;;; w
237 ("$(2#J(B" (?e "$(2#E(B") (?u "$(2#F(B") (?i "$(2#G(B") (?a "$(2#H(B") (?E "$(2#I(B") (?o "$(2#K(B") (?| "$(2#J(B")
238 (?W "$(2#M(B" (?e "$(2#L(B") (?u "$(2#M(B") (?i "$(2#N(B") (?a "$(2#O(B") (?E "$(2#P(B")))
239;;; x
240 ("$(2!>(B" (?e "$(2!9(B") (?u "$(2!:(B") (?i "$(2!;(B") (?a "$(2!<(B") (?E "$(2!=(B") (?o "$(2!?(B") (?| "$(2!>(B")
241 (?W "$(2!@(B" (?a "$(2!@(B")
242 (?e "$(2!>#L(B") (?u "$(2!>#M(B") (?i "$(2!>#N(B") (?E "$(2!>#P(B") (?' "$(2!>#M(B")))
243;;; y
244 ("$(2$)(B" (?e "$(2$$(B") (?u "$(2$%(B") (?i "$(2$&(B") (?a "$(2$'(B") (?E "$(2$((B") (?o "$(2$*(B") (?| "$(2$)(B"))
245;;; z
246 ("$(2!v(B" (?e "$(2!q(B") (?u "$(2!r(B") (?i "$(2!s(B") (?a "$(2!t(B") (?E "$(2!u(B") (?o "$(2!w(B") (?| "$(2!v(B")
247 (?W "$(2!x(B" (?a "$(2!x(B")
248 (?e "$(2!v#L(B") (?u "$(2!v#M(B") (?i "$(2!v#N(B") (?E "$(2!v#P(B") (?' "$(2!v#M(B")))
249 ])
250
251;;;###autoload
252(defun sera-to-fidel-region (beg end &optional ascii-mode force)
253 "Translates the characters in region from SERA to FIDEL.
254
255If the 1st optional parameter ASCII-MODE is non-NIL, assumes that the
256region begins in ASCII script.
257
258If the 2nd optional parametr FORCE is non-NIL, translates even if the
259buffer is read-only."
260
261 (interactive "r\nP")
262 (save-excursion
263 (save-restriction
264 (narrow-to-region beg end)
265 (sera-to-fidel-buffer ascii-mode force))))
266
267;;;###autoload
268(defun sera-to-fidel-buffer (&optional ascii-mode force)
269 "Translates the current buffer from SERA to FIDEL.
270
271If the 1st optional parameter ASCII-MODE is non-NIL, assumes that the
272current buffer begins in ASCII script.
273
274If the 2nd optional panametr FORCE is non-NIL, translates even if the
275buffer is read-only."
276
277 (interactive "P")
278 (if (and buffer-read-only
279 (not force)
280 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
281 (error ""))
282 (let (start pre fol hard table table2 (buffer-read-only nil))
283 (goto-char (point-min))
284 (while (not (eobp))
285 (setq start (point))
286 (forward-char 1)
287 (setq pre (preceding-char)
288 fol (following-char))
289
290 (if ascii-mode
291 (cond
292
293 ;; ascii mode, pre != \
294 ((/= pre ?\\ ))
295
296 ;; ascii mode, pre = \, fol = !
297 ((= fol ?!)
298 (backward-delete-char 1)
299 (delete-char 1)
300 (setq ascii-mode nil
301 hard (not hard)))
302
303 ;; hard ascii mode, pre = \, fol != !
304 (hard)
305
306 ;; soft ascii mode, pre = \, fol = {\ _ * < > 0..9 ~}
307 ((or (backward-delete-char 1) ; always nil
308 (eobp)
309 (sera-to-fidel-backslash)))
310
311 ;; soft ascii mode, pre = \, fol = SPC
312 ((= fol 32)
313 (delete-char 1)
314 (setq ascii-mode nil))
315
316 ;; soft ascii mode, pre = \, fol = .
317 ((= fol ?.)
318 (delete-char 1)
319 (insert ?$(2$P(B))
320
321 ;; soft ascii mode, pre = \, fol = ,
322 ((= fol ?,)
323 (delete-char 1)
324 (insert ?$(2$Q(B))
325
326 ;; soft ascii mode, pre = \, fol = ;
327 ((= fol ?\;)
328 (delete-char 1)
329 (insert ?$(2$R(B))
330
331 ;; soft ascii mode, pre = \, fol = :
332 ((= fol ?:)
333 (delete-char 1)
334 (insert ?$(2$S(B))
335
336 ;; soft ascii mode, pre = \, fol = others
337 (t
338 (setq ascii-mode nil)))
339
340 (cond
341
342 ;; very special: skip "<" to ">" (or "&" to ";") if in w3-mode
343 ((and (boundp 'sera-being-called-by-w3)
344 sera-being-called-by-w3
345 (or (= pre ?<) (= pre ?&)))
346 (search-forward (if (= pre ?<) ">" ";")
347 nil 0))
348
349 ;; ethio mode, pre != sera
350 ((or (< pre ?') (> pre ?z)))
351
352 ;; ethio mode, pre != \
353 ((/= pre ?\\ )
354 (setq table (aref sera-to-fidel-table pre))
355 (while (setq table2 (cdr (assoc (following-char) table)))
356 (setq table table2)
357 (forward-char 1))
358 (if (car table)
359 (progn
360 (delete-region start (point))
361 (insert (car table)))))
362
363 ;; ethio mode, pre = \, fol = !
364 ((= fol ?!)
365 (backward-delete-char 1)
366 (delete-char 1)
367 (setq ascii-mode t
368 hard (not hard)))
369
370 ;; hard ethio mode, pre = \, fol != !
371 (hard)
372
373 ;; soft ethio mode, pre = \, fol = {\ _ * < > 0..9 ~}
374 ((or (backward-delete-char 1) ; always nil
375 (eobp)
376 (sera-to-fidel-backslash)))
377
378 ;; soft ethio mode, pre = \, fol = SPC
379 ((= fol 32)
380 (delete-char 1)
381 (setq ascii-mode t))
382
383 ;; soft ethio mode, pre = \, fol = {. , ; : | ' `}
384 ((memq fol '(?. ?, ?\; ?: ?| ?' ?`))
385 (forward-char 1))
386
387 ;; soft ethio mode, pre = \, fol = others
388 (t
389 (setq ascii-mode t))))))
390 (goto-char (point-min)))
391
392(defun sera-to-fidel-backslash ()
393 "Handle SERA backslash escapes common to ethio- and ascii-mode.
394Returns t if something has been processed."
395 (let ((ch (following-char))
396 (converted t))
397 (if (and (>= ch ?1) (<= ch ?9))
398 (ethio-convert-digit)
399 (delete-char 1)
400 (cond
401 ((= ch ?\\ )
402 (insert ?\\ ))
403 ((= ch ?_)
404 (insert ?$(2$O(B))
405 ((= ch ?*)
406 (insert ?$(2$T(B))
407 ((= ch ?<)
408 (insert ?$(2$U(B))
409 ((= ch ?>)
410 (insert ?$(2$V(B))
411 ((= ch ?~)
412 (setq ch (following-char))
413 (delete-char 1)
414 (cond
415 ((= ch ?e)
416 (insert "$(2$k(B"))
417 ((= ch ?E)
418 (insert "$(2$l(B"))
419 ((= ch ?a)
420 (insert "$(2$m(B"))
421 ((= ch ?A)
422 (insert "$(2$n(B"))))
423 (t
424 (insert ch)
425 (backward-char 1)
426 (setq converted nil))))
427 converted))
428
429(defun ethio-convert-digit ()
430 "Convert Arabic digits to Ethiopic digits."
431 (let (ch z)
432 (while (and (>= (setq ch (following-char)) ?1)
433 (<= ch ?9))
434 (delete-char 1)
435
436 ;; count up following zeros
437 (setq z 0)
438 (while (= (following-char) ?0)
439 (delete-char 1)
440 (setq z (1+ z)))
441
442 (cond
443
444 ;; first digit is 10, 20, ..., or 90
445 ((= (mod z 2) 1)
446 ;; (- ch 40) means ?1 -> 9, ?2 -> 10, etc.
447 (insert (aref [?$(2$`(B ?$(2$a(B ?$(2$b(B ?$(2$c(B ?$(2$d(B ?$(2$e(B ?$(2$f(B ?$(2$g(B ?$(2$h(B] (- ch ?1)))
448 (setq z (1- z)))
449
450 ;; first digit is 2, 3, ..., or 9
451 ((/= ch ?1)
452 (insert (aref [?$(2$X(B ?$(2$Y(B ?$(2$Z(B ?$(2$[(B ?$(2$\(B ?$(2$](B ?$(2$^(B ?$(2$_(B] (- ch ?2))))
453
454 ;; single 1
455 ((= z 0)
456 (insert "$(2$W(B")))
457
458 ;; 100
459 (if (= (mod z 4) 2)
460 (insert"$(2$i(B"))
461
462 ;; 10000
463 (insert-char ?$(2$j(B (/ z 4)))))
464
465;;;###autoload
466(defun sera-to-fidel-mail (&optional arg)
467 "Does SERA to FIDEL conversion for reading/writing mail and news.
468
469If the buffer contains the markers \"<sera>\" and \"</sera>\",
470converts the segment between the two markers in Ethio start mode and
471the subject field in ASCII start mode.
472
473If invoked interactively and there is no marker, converts both the
474whole body and the subject field in Ethio start mode.
475
476For backward compatibility, \"<ethiopic>\" and \"<>\" can be used instead of
477\"<sera>\" and \"</sera>\"."
478
479 (interactive "p")
480 (let* ((buffer-read-only nil) border)
481
482 (save-excursion
483 (goto-char (point-min))
484 (setq border
485 (search-forward
486 (if (eq major-mode 'rmail-mode)
487 "\n\n"
488 (concat "\n" mail-header-separator "\n"))))
489
490 (cond
491
492 ;; with markers
493 ((re-search-forward "^<sera>\n" nil t)
494 (goto-char (match-beginning 0))
495 (while (re-search-forward "^<sera>\n" nil t)
496 (replace-match "" nil t)
497 (sera-to-fidel-region
498 (point)
499 (progn
500 (if (re-search-forward "^</sera>\n" nil 0)
501 (replace-match "" nil t))
502 (point))))
503
504 (goto-char (point-min))
505 (if (re-search-forward "^Subject: " border t)
506 (sera-to-fidel-region
507 (point)
508 (progn (end-of-line) (point))
509 'ascii-start)))
510
511 ;; backward compatibility
512 ((re-search-forward "^<ethiopic>\n" nil t)
513 (goto-char (match-beginning 0))
514 (while (re-search-forward "^<ethiopic>\n" nil t)
515 (replace-match "" nil t)
516 (sera-to-fidel-region
517 (setq border (point))
518 (progn
519 (if (re-search-forward "^<>\n" nil 0)
520 (replace-match "" nil t))
521 (point))))
522
523 (goto-char (point-min))
524 (if (re-search-forward "^Subject: " border t)
525 (sera-to-fidel-region
526 (point)
527 (progn (end-of-line) (point))
528 'ascii-start)))
529
530 ;; interactive & no markers
531 (arg
532 (sera-to-fidel-region border (point-max))
533 (goto-char (point-min))
534 (if (re-search-forward "^Subject: " border t)
535 (sera-to-fidel-region
536 (point)
537 (progn (end-of-line) (point))))))
538
539 ;; adjust the rmail marker
540 (if (eq major-mode 'rmail-mode)
541 (set-marker
542 (aref rmail-message-vector (1+ rmail-current-message))
543 (point-max))))))
544
545;;;###autoload
546(defun sera-to-fidel-marker ()
547 "If the buffer contains the markers \"<sera>\" and \"</sera>\",
548converts the segment between the two markers from SERA to Fidel
549in Ethio start mode. The markers will not be removed."
550
551 (interactive)
552 (if (and buffer-read-only
553 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
554 (error ""))
555 (save-excursion
556 (goto-char (point-min))
557 (while (re-search-forward "<sera>" nil t)
558 (sera-to-fidel-region
559 (point)
560 (if (re-search-forward "</sera>" nil t)
561 (match-beginning 0)
562 (point-max))
563 nil
564 'force))))
565
566;;
567;; FIDEL to SERA
568;;
569
570(defconst fidel-to-sera-map
571 ["le" "lu" "li" "la" "lE" "l" "lo" "lWa"
572 "me" "mu" "mi" "ma" "mE" "m" "mo" "mWa"
573 "re" "ru" "ri" "ra" "rE" "r" "ro" "rWa"
574 "xe" "xu" "xi" "xa" "xE" "x" "xo" "xWa"
575 "be" "bu" "bi" "ba" "bE" "b" "bo" "bWa"
576 "ve" "vu" "vi" "va" "vE" "v" "vo" "vWa"
577 "te" "tu" "ti" "ta" "tE" "t" "to" "tWa"
578 "ce" "cu" "ci" "ca" "cE" "c" "co" "cWa"
579 "ne" "nu" "ni" "na" "nE" "n" "no" "nWa"
580 "Ne" "Nu" "Ni" "Na" "NE" "N" "No" "NWa"
581 "ze" "zu" "zi" "za" "zE" "z" "zo" "zWa"
582 "Ze" "Zu" "Zi" "Za" "ZE" "Z" "Zo" "ZWa"
583 "de" "du" "di" "da" "dE" "d" "do" "dWa"
584 "Te" "Tu" "Ti" "Ta" "TE" "T" "To" "TWa"
585 "Ce" "Cu" "Ci" "Ca" "CE" "C" "Co" "CWa"
586 "fe" "fu" "fi" "fa" "fE" "f" "fo" "fWa"
587 "se" "su" "si" "sa" "sE" "s" "so" "sWa"
588 "Se" "Su" "Si" "Sa" "SE" "S" "So" "SWa"
589 "a" "u" "i" "A" "E" "I" "o" "e3"
590 "he" "hu" "hi" "ha" "hE" "h" "ho" "hWe" "hWu" "hWi" "hWa" "hWE"
591 "ke" "ku" "ki" "ka" "kE" "k" "ko" "kWe" "kWu" "kWi" "kWa" "kWE"
592 "Ke" "Ku" "Ki" "Ka" "KE" "K" "Ko" "KWe" "KWu" "KWi" "KWa" "KWE"
593 "qe" "qu" "qi" "qa" "qE" "q" "qo" "qWe" "qWu" "qWi" "qWa" "qWE"
594 "Qe" "Qu" "Qi" "Qa" "QE" "Q" "Qo" "QWe" "QWu" "QWi" "QWa" "QWE"
595 "ge" "gu" "gi" "ga" "gE" "g" "go" "gWe" "gWu" "gWi" "gWa" "gWE"
596 "we" "wu" "wi" "wa" "wE" "w" "wo" "wWe" "wWu" "wWi" "wWa" "wWE"
597 "`se" "`su" "`si" "`sa" "`sE" "`s" "`so"
598 "`Se" "`Su" "`Si" "`Sa" "`SE" "`S" "`So"
599 "`e" "`u" "`i" "`a" "`E" "`I" "`o"
600 "`he" "`hu" "`hi" "`ha" "`hE" "`h" "`ho"
601 "`ke" "`ku" "`ki" "`ka" "`kE" "`k" "`ko"
602 "Xe" "Xu" "Xi" "Xa" "XE" "X" "Xo"
603 "He" "Hu" "Hi" "Ha" "HE" "H" "Ho"
604 "ye" "yu" "yi" "ya" "yE" "y" "yo"
605 "De" "Du" "Di" "Da" "DE" "D" "Do"
606 "je" "ju" "ji" "ja" "jE" "j" "jo"
607 "Ge" "Gu" "Gi" "Ga" "GE" "G" "Go"
608 "Pe" "Pu" "Pi" "Pa" "PE" "P" "Po"
609 "pe" "pu" "pi" "pa" "pE" "p" "po"
610 " " "\\_" "." "," ";" ":" "\\*" "\\<" "\\>"
611 "1" "2" "3" "4" "5" "6" "7" "8" "9"
612 "10" "20" "30" "40" "50" "60" "70" "80" "90"
613 "100" "10000"
614 "\\~e" "\\~E" "\\~a" "\\~A"])
615
616(defvar ethio-use-tigrigna-style nil
617 "*If non-NIL, use \"e\" instead of \"a\" for the first lone vowel
618translation in sera-to-fidel and fidel-to-sera conversions.")
619
620(defvar ethio-quote-vowel-always nil
621 "*If non-NIL, lone vowels are always transcribed by \"an apostrophe
622+ the vowel\" except at word initial. Otherwise, they are quoted by
623an apostrophe only if the preceding Ethiopic character is a lone
624consonant.")
625
626(defvar ethio-W-sixth-always nil
627 "*If non-NIL, the Wu-form of a 12-form consonant is transcribed by
628\"W'\" instead of \"Wu\".")
629
630(defvar ethio-numeric-reduction 0
631 "*Degree of reduction in transcribing Ethiopic digits by Arabic
632digits. For example, $(2$`$_$i$g$](B ({10}{9}{100}{80}{7}) will be
633transcribed by:
634 \10\9\100\80\7 if ETHIO-NUMERIC-REDUCTION is 0,
635 \109100807 is 1,
636 \10900807 is 2.")
637
638;;;###autoload
639(defun fidel-to-sera-region (begin end &optional ascii-mode force)
640 "Replaces all the FIDEL characters in the region to sera format.
641
642If the 1st optional parameter ASCII-MODE is non-NIL, converts the
643region so that it begins in ASCII script.
644
645If the 2nd optional parameter FORCE is non-NIL, converts even if the
646buffer is read-only."
647
648 (interactive "r\nP")
649 (save-excursion
650 (save-restriction
651 (narrow-to-region begin end)
652 (fidel-to-sera-buffer ascii-mode force))))
653
654;;;###autoload
655(defun fidel-to-sera-buffer (&optional ascii-mode force)
656 "Replace all the FIDEL characters in the current buffer to sera format.
657
658If the 1st optional parameter ASCII-MODE is non-NIL,
659convert the current buffer so that it begins in ASCII script.
660
661If the 2nd optional parameter FORCE is non-NIL, converts even if the
662buffer is read-only.
663
664See also the description of the variables ethio-use-tigrigna-style,
665ethio-quote-vowel-on-demand and ethio-numeric-reduction."
666
667 (interactive "P")
668 (if (and buffer-read-only
669 (not force)
670 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
671 (error ""))
672
673 ;; user's preference in transcription
674 (aset fidel-to-sera-map 144 (if ethio-use-tigrigna-style "e" "a"))
675 (let ((i 160)
676 (x (if ethio-W-sixth-always
677 '("hW'" "kW'" "KW'" "qW'" "QW'" "gW'" "wW'")
678 '("hWu" "kWu" "KWu" "qWu" "QWu" "gWu" "wWu"))))
679 (while x
680 (aset fidel-to-sera-map i (car x))
681 (setq i (+ i 12)
682 x (cdr x))))
683
684 ;; main conversion routine
685 (let ((lonec nil) ; if lonec = t, previous char was a lone consonant.
686 (fidel nil) ; if fidel = t, previous char was a fidel.
687 (digit nil) ; if digit = t, previous char was an Ethiopic digit.
688 (buffer-read-only nil)
689 ch)
690 (goto-char (point-min))
691 (while (not (eobp))
692 (setq ch (following-char))
693
694 ;; ethiopic charactes
695 (if (eq (char-charset ch) 'ethiopic)
696 (progn
697 (setq ch (char-to-ethiocode ch))
698 (delete-char 1)
699
700 (cond
701
702 ;; fidels
703 ((<= ch 326)
704 (if ascii-mode
705 (insert "\\ "))
706 (if (and (memq ch '(144 145 146 147 148 150 151)) ; (auiAEoe3)
707 (or lonec
708 (and ethio-quote-vowel-always
709 fidel)))
710 (insert "'"))
711 (insert (aref fidel-to-sera-map ch))
712 (setq ascii-mode nil
713 lonec (ethio-lone-consonant-p ch)
714 fidel t
715 digit nil))
716
717 ;; punctuations and symbols
718 ((or (< ch 336) (> ch 355))
719 (if (and ascii-mode
720 (memq ch '(329 330 331 332))) ; (.,;:)
721 (insert "\\"))
722 (insert (aref fidel-to-sera-map ch))
723 (setq lonec nil
724 fidel nil
725 digit nil))
726
727 ;; now CH must be an ethiopic digit
728
729 ;; reduction = 0 or leading digit
730 ((or (= ethio-numeric-reduction 0)
731 (not digit))
732 (insert "\\" (aref fidel-to-sera-map ch))
733 (setq lonec nil
734 fidel nil
735 digit t))
736
737 ;; reduction = 2 and following 10s, 100s, 10000s
738 ((and (= ethio-numeric-reduction 2)
739 (memq ch '(345 354 355)))
740 (insert (substring (aref fidel-to-sera-map ch) 1))
741 (setq lonec nil
742 fidel nil
743 digit t))
744
745 ;; ordinary following digits
746 (t
747 (insert (aref fidel-to-sera-map ch))
748 (setq lonec nil
749 fidel nil
750 digit t))))
751
752 ;; non-ethiopic characters
753 (cond
754
755 ;; backslash is always quoted
756 ((= ch ?\\ )
757 (insert "\\"))
758
759 ;; nothing to do if in ascii-mode
760 (ascii-mode)
761
762 ;; ethio-mode -> ascii-mode
763 ((or (and (>= ch ?a) (<= ch ?z))
764 (and (>= ch ?A) (<= ch ?Z))
765 (memq ch '(?| ?' ?`)))
766 (insert "\\ ")
767 (setq ascii-mode t))
768
769 ;; ascii punctuations in ethio-mode
770 ((memq ch '(?. ?, ?\; ?:))
771 (insert "\\")))
772
773 (forward-char 1)
774 (setq lonec nil
775 fidel nil
776 digit nil)))
777
778 ;; a few modifications for readability
779 (goto-char (point-min))
780 (while (re-search-forward "\\([]!\"#$%&()*+/<=>?@[^_-]+\\)\\\\ " nil t)
781 (replace-match "\\\\ \\1"))
782
783 (goto-char (point-min))
784 (while (re-search-forward "\n\\([ \t]*\\)\\\\ " nil t)
785 (replace-match "\\\\\n\\1")))
786
787 (goto-char (point-min)))
788
789(defun ethio-lone-consonant-p (code)
790 "If the ethiocode CODE is an Ethiopic lone consonant, return t."
791 (cond
792 ((< code 144)
793 (= (mod code 8) 5))
794 ((< code 153)
795 nil)
796 ((< code 236)
797 (= (mod code 12) 1))
798 ((< code 327)
799 (= (mod code 7) 3))))
800
801;;;###autoload
802(defun fidel-to-sera-mail ()
803 "Does FIDEL to SERA conversion for reading/writing mail and news.
804
805If the buffer contains at least one Ethiopic character,
806 1) inserts the string \"<sera>\" right after the header-body separator,
807 2) inserts \"</sera>\" at the end of the buffer,
808 3) converts the body into SERA in Ethiopic start mode, and
809 4) converts the subject field in ASCII start mode."
810
811 (interactive)
812 (save-excursion
813 (goto-char (point-min))
814 (if (re-search-forward "\\cE" nil t)
815 (let ((buffer-read-only nil) border)
816
817 (goto-char (point-min))
818 (setq border
819 (search-forward
820 (if (eq major-mode 'rmail-mode)
821 "\n\n"
822 (concat "\n" mail-header-separator "\n"))))
823 (insert "<sera>\n")
824
825 (fidel-to-sera-region (point) (point-max))
826
827 (goto-char (point-max))
828 (if (/= (preceding-char) ?\n)
829 (insert "\n"))
830 (insert "</sera>\n")
831
832 (goto-char (point-min))
833 (if (re-search-forward "^Subject: " border t)
834 (fidel-to-sera-region
835 (point)
836 (progn (end-of-line) (point))
837 'ascii-start))
838
839 ;; adjust the rmail marker
840 (if (eq major-mode 'rmail-mode)
841 (set-marker
842 (aref rmail-message-vector (1+ rmail-current-message))
843 (point-max))))
844
845 (message "No Ethiopic characters in this buffer."))))
846
847;;;###autoload
848(defun fidel-to-sera-marker ()
849 "If the buffer contains the markers \"<sera>\" and \"</sera>\",
850converts the segment between the two markers from Fidel to SERA
851in Ethio start mode. The markers will not be removed."
852
853 (interactive)
854 (if (and buffer-read-only
855 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
856 (error ""))
857 (save-excursion
858 (goto-char (point-min))
859 (while (re-search-forward "^<sera>\n" nil t)
860 (fidel-to-sera-region
861 (point)
862 (if (re-search-forward "^</sera>\n" nil t)
863 (match-beginning 0)
864 (point-max))
865 nil
866 'force))))
867
868;;
869;; file I/O hooks
870;;
871
872(if (not (assoc "\\.sera$" auto-mode-alist))
873 (setq auto-mode-alist
874 (cons '("\\.sera$" . sera-to-fidel-find-file) auto-mode-alist)))
875(add-hook 'write-file-hooks 'fidel-to-sera-write-file)
876(add-hook 'after-save-hook 'sera-to-fidel-after-save)
877
878;;;###autoload
879(defun sera-to-fidel-find-file ()
880 "Intended to be called when a file whose name ends in \".sera\" is read in."
881 (sera-to-fidel-buffer nil 'force)
882 (set-buffer-modified-p nil)
883 nil)
884
885;;;###autoload
886(defun fidel-to-sera-write-file ()
887 "Intended to be used as write-file-hooks for the files
888whose name ends in \".sera\"."
889 (if (string-match "\\.sera$" (buffer-file-name))
890 (save-excursion
891 (fidel-to-sera-buffer nil 'force)
892 (set-buffer-modified-p nil)))
893 nil)
894
895;;;###autoload
896(defun sera-to-fidel-after-save ()
897 "Intended to be used as after-save-hook for the files
898whose name ends in \".sera\"."
899 (if (string-match "\\.sera$" (buffer-file-name))
900 (save-excursion
901 (sera-to-fidel-buffer nil 'force)
902 (set-buffer-modified-p nil)))
903 nil)
904
905;;
906;; vowel modification
907;;
908
909;;;###autoload
910(defun ethio-modify-vowel ()
911 "Modify the vowel of the FIDEL that is under the cursor."
912 (interactive)
913 (let ((ch (following-char)) newch base vowel)
914 (if (eq (char-charset ch) 'ethiopic)
915 (setq ch (char-to-ethiocode ch))
916 (error "Not a valid character."))
917 (if (or (and (>= ch 144) (<= ch 151)) ; lone vowels
918 (and (>= ch 250) (<= ch 256)) ; secondary lone vowels
919 (>= ch 327)) ; not FIDEL
920 (error "Not a valid character."))
921 (message "Modify vowel to: ")
922 (if (null (setq vowel (memq (read-char) '(?e ?u ?i ?a ?E ?' ?o))))
923 (error "Not a valid vowel.")
924 ;; ?e -> 0, ?u -> 1, ?i -> 2, ?a -> 3, ?E -> 4, ?' -> 5, ?o -> 6
925 (setq vowel (- 7 (length vowel))))
926
927 (cond
928
929 ;; 8-form consonant
930 ((<= ch 143)
931 (setq base (* (/ ch 8) 8))
932 (cond
933 ((< (mod ch 8) 7) ; e-form <= ch <= o-form
934 (setq newch (+ base vowel)))
935 ((= vowel 3) ; 3 = a
936 (setq newch (+ base 7))) ; (+ base 7) = Wa-form
937 ((= vowel 5) ; 5 = '
938 (setq newch
939 (cons (+ base 5) ; (+ base 5) = lone consonant
940 232))) ; 232 = Wu
941 (t
942 (setq newch
943 (cons (+ base 5) ; (+ base 5) = lone consonant
944 (+ 231 vowel)))))) ; 231 = We
945
946 ;; 12-form consonant
947 ((<= ch 235)
948 (setq ch (- ch 152) ; 152 = 12-form consonant offset
949 base (* (/ ch 12) 12))
950 (cond
951 ((< (mod ch 12) 7) ; e-form <= ch <= o-form
952 (setq newch (+ base vowel 152)))
953 ((< vowel 5) ; We-form <= ch <= WE-form
954 (setq newch (+ base vowel 159))) ; 159 = 152 (offset) + 7 (We-form)
955 ((= vowel 5) ; 5 = ' (= u in this case)
956 (setq newch (+ base 160))) ; 160 = 152 (offset) + 8 (Wu-form)
957 (t
958 (error "Not a valid vowel."))))
959
960 ;; 7-form consonant
961 (t ; 236 = 7-form consonant offset
962 (setq newch (+ (* (/ (- ch 236) 7) 7) vowel 236))))
963
964 (delete-char 1)
965
966 (cond
967 ((consp newch)
968 (insert (ethiocode-to-char (car newch))
969 (ethiocode-to-char (cdr newch)))
970 (backward-char 2))
971 (t
972 (insert (ethiocode-to-char newch))
973 (backward-char 1)))))
974
975(defun ethiocode-to-char (code)
976 (make-char 'ethiopic (/ code 94) (mod code 94)))
977
978(defun char-to-ethiocode (ch)
979 (and (eq (char-charset ch) 'ethiopic)
980 (let ((char-components (split-char ch)))
981 (+ (* (- (nth char-components 1) 161) 94)
982 (- (nth char-components 2) 161)))))
983
984;;
985;; space replacement
986;;
987
988;;;###autoload
989(defun ethio-replace-space (ch begin end)
990 "In the specified region, replace spaces between two Ethiopic characters."
991 (interactive "*cReplace spaces to: 1 (sg col), 2 (dbl col), 3 (Ethiopic)\nr")
992 (if (not (memq ch '(?1 ?2 ?3)))
993 (error ""))
994 (save-excursion
995 (save-restriction
996 (narrow-to-region begin end)
997 (goto-char (point-min))
998
999 (cond
1000
1001 ((= ch ?1)
1002
1003 ;; A double column space or an Ethiopic word separator is always
1004 ;; converted to an ASCII space.
1005 (while (re-search-forward "[$(2$N$O(B]" nil t)
1006 (replace-match " " nil nil)))
1007
1008 ((= ch ?2)
1009
1010 ;; An Ethiopic word separator is always converted to
1011 ;; a double column space.
1012 (while (search-forward "$(2$O(B" nil t)
1013 (replace-match "$(2$N(B"))
1014
1015 (goto-char (point-min))
1016
1017 ;; ASCII spaces are converted only if they are placed
1018 ;; between two Ethiopic characters.
1019 (while (re-search-forward "\\(\\cE\\)\\( \\)\\( *\\cE\\)" nil t)
1020
1021 ;; Converting the first ASCII space
1022 (replace-match "\\1$(2$N(B\\3")
1023
1024 ;; A double column space is \cE, so going back to the just
1025 ;; converted double column space makes it possible to find
1026 ;; the following ASCII spaces.
1027 (goto-char (match-beginning 2))))
1028
1029 ((= ch ?3)
1030
1031 ;; If more than one consecutive space (either ASCII or double
1032 ;; width) is found between two Ethiopic characters, the first
1033 ;; space will be converted to an Ethiopic word separator.
1034 (let (pred succ)
1035 (while (re-search-forward "[ $(2$N(B]\\([ $(2$N(B]*\\)" nil t)
1036 (and (setq pred (char-before (match-beginning 0)))
1037 (eq (char-charset pred) 'ethiopic)
1038 (setq succ (char-after (match-end 0)))
1039 (eq (char-charset succ) 'ethiopic)
1040 (replace-match "$(2$O(B\\1" nil nil)))))))))
1041
1042;;
1043;; special characters
1044;;
1045
1046;;;###autoload
1047(defun ethio-input-special-character (arg)
1048 "Allow the user to input special characters."
1049 (interactive "*cInput number: 1.$(2$k(B 2.$(2$l(B 3.$(2$m(B 4.$(2$n(B")
1050 (cond
1051 ((= arg ?1)
1052 (insert ?$(2$k(B))
1053 ((= arg ?2)
1054 (insert ?$(2$l(B))
1055 ((= arg ?3)
1056 (insert ?$(2$m(B))
1057 ((= arg ?4)
1058 (insert ?$(2$n(B))
1059 (t
1060 (error ""))))
1061
1062;;
1063(provide 'language/ethio-util)
1064
1065;;; Local Variables:
1066;;; generated-autoload-file: "../loaddefs.el"
1067;;; End:
1068;;; ethio-util.el ends here
diff --git a/lisp/language/ethiopic.el b/lisp/language/ethiopic.el
new file mode 100644
index 00000000000..d08ca5b1a43
--- /dev/null
+++ b/lisp/language/ethiopic.el
@@ -0,0 +1,85 @@
1;;; ethiopic.el --- Support for Ethiopic
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Ethiopic
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26(define-ccl-program ccl-encode-ethio-font
27 '(0
28 ;; In: R0:ethio (not checked)
29 ;; R1:position code 1
30 ;; R2:position code 2
31 ;; Out: R1:font code point 1
32 ;; R2:font code point 2
33 ((r1 -= 33)
34 (r2 -= 33)
35 (r1 *= 94)
36 (r2 += r1)
37 (if (r2 < 256) (r1 = 0) ((r2 -= 256) (r1 = 1)))))
38 "CCL program to encode an Ehitopic code to code point of Ehitopic font.")
39
40(setq font-ccl-encoder-alist
41 (cons (cons "ethio" ccl-encode-ethio-font) font-ccl-encoder-alist))
42
43(register-input-method
44 "Ethiopic" '("quail-ethio" quail-use-package "quail/ethio"))
45
46(defun setup-ethio-environment ()
47 (setq primary-language "Ethiopic")
48
49 (setq default-input-method '("Ethiopic" . "quail-ethio"))
50
51 ;;
52 ;; key bindings
53 ;;
54 (define-key global-map [f4] 'sera-to-fidel-buffer)
55 (define-key global-map [S-f4] 'sera-to-fidel-region)
56 (define-key global-map [C-f4] 'sera-to-fidel-marker)
57 (define-key global-map [f5] 'fidel-to-sera-buffer)
58 (define-key global-map [S-f5] 'fidel-to-sera-region)
59 (define-key global-map [C-f5] 'fidel-to-sera-marker)
60 (define-key global-map [f6] 'ethio-modify-vowel)
61 (define-key global-map [f7] 'ethio-replace-space)
62 (define-key global-map [S-f2] 'ethio-replace-space) ; as requested
63 (define-key global-map [f8] 'ethio-input-special-character)
64
65 (add-hook
66 'rmail-mode-hook
67 '(lambda ()
68 (define-key rmail-mode-map [C-f4] 'sera-to-fidel-mail)
69 (define-key rmail-mode-map [C-f5] 'fidel-to-sera-mail)))
70
71 (add-hook
72 'mail-mode-hook
73 '(lambda ()
74 (define-key mail-mode-map [C-f4] 'sera-to-fidel-mail)
75 (define-key mail-mode-map [C-f5] 'fidel-to-sera-mail)))
76 )
77
78(set-language-info-alist
79 "Ethiopic" '((setup-function . setup-ethio-environment)
80 (charset . (ethiopic))
81 (documentation . t)
82 (sample-text
83 . "Amharic ($(2"S!,!6!l(B) $(2#Q!$!.(B, Tigrigna ($(2!V#>!6!l(B) $(2"C!$!,!V(B")))
84
85;;; ethiopic.el ends here
diff --git a/lisp/language/european.el b/lisp/language/european.el
new file mode 100644
index 00000000000..e10169d24a7
--- /dev/null
+++ b/lisp/language/european.el
@@ -0,0 +1,105 @@
1;;; european.el --- Support for European languages
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, European
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Europeans, five character sets ISO8859-1,2,3,4,9 are supported.
27
28;;; Code:
29
30(make-coding-system
31 'coding-system-iso-8859-1 2 ?X
32 "Coding-system used in X as Compound Text Encoding."
33 '((ascii t) (latin-iso8859-1 t) nil nil
34 nil ascii-eol ascii-cntl))
35
36;; CTEXT is an alias for ISO-8859-1
37(put 'coding-system-ctext 'coding-system 'coding-system-iso-8859-1)
38
39(make-coding-system
40 'coding-system-iso-8859-2 2 ?2 "MIME ISO-8859-2"
41 '((ascii t) (latin-iso8859-2 t) nil nil
42 nil ascii-eol ascii-cntl nil nil nil nil))
43
44(make-coding-system
45 'coding-system-iso-8859-3 2 ?3 "MIME ISO-8859-3"
46 '((ascii t) (latin-iso8859-3 t) nil nil
47 nil ascii-eol ascii-cntl nil nil nil nil))
48
49(make-coding-system
50 'coding-system-iso-8859-4 2 ?4 "MIME ISO-8859-4"
51 '((ascii t) (latin-iso8859-4 t) nil nil
52 nil ascii-eol ascii-cntl nil nil nil nil))
53
54(make-coding-system
55 'coding-system-iso-8859-9 2 ?9 "MIME ISO-8859-9"
56 '((ascii t) (latin-iso8859-9 t) nil nil
57 nil ascii-eol ascii-cntl nil nil nil nil))
58
59(register-input-method "European"
60 '("quail-latin-1" quail-use-package "quail/latin"))
61(register-input-method "European"
62 '("quail-latin-2" quail-use-package "quail/latin"))
63(register-input-method "European"
64 '("quail-latin-3" quail-use-package "quail/latin"))
65(register-input-method "European"
66 '("quail-latin-4" quail-use-package "quail/latin"))
67(register-input-method "European"
68 '("quail-latin-5" quail-use-package "quail/latin"))
69
70(defun setup-european-environment ()
71 (setq coding-category-iso-8-1 'coding-system-iso-8859-1)
72
73 (set-coding-priority
74 '(coding-category-iso-7
75 coding-category-iso-8-1
76 coding-category-iso-8-2))
77
78 (setq-default buffer-file-coding-system 'coding-system-iso-8859-1)
79 (set-terminal-coding-system 'coding-system-iso-8859-1)
80 (set-keyboard-coding-system 'coding-system-iso-8859-1)
81
82 (setq default-input-method '("European" . "quail-latin-1"))
83 )
84
85(set-language-info "English" 'tutorial "TUTORIAL")
86
87(register-input-method "French"
88 '("quail-latin-1" quail-use-package "quail/latin"))
89(register-input-method "French"
90 '("quail-latin-1" quail-use-package "quail/latin"))
91
92(set-language-info-alist
93 "European" '((setup-function . setup-european-environment)
94 (charset . (ascii latin-iso8859-1 latin-iso8859-2
95 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9))
96 (coding-system . (coding-system-iso-8859-1
97 coding-system-iso-8859-2
98 coding-system-iso-8859-3
99 coding-system-iso-8859-4
100 coding-system-iso-8859-9))
101 (documentation . t)
102 (sample-text
103 . "Hello, Hej, Tere, Hei, Bonjour, Gr,A|_(B Gott, Ciao, ,A!(BHola!")))
104
105;;; european.el ends here
diff --git a/lisp/language/greek.el b/lisp/language/greek.el
new file mode 100644
index 00000000000..114dc5b7fdc
--- /dev/null
+++ b/lisp/language/greek.el
@@ -0,0 +1,59 @@
1;;; greek.el --- Support for Greek
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Greek
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Greek, the character set ISO8859-7 is supported.
27
28;;; Code:
29
30(make-coding-system
31 'coding-system-iso-8859-7 2 ?7 "MIME ISO-8859-7"
32 '((ascii t) (greek-iso8859-7 t) nil nil
33 nil ascii-eol ascii-cntl nil nil nil nil))
34
35(register-input-method
36 "Greek" '("quail-greek" quail-use-package "quail/greek"))
37
38(defun setup-greek-environment ()
39 (setq coding-category-iso-8-1 'coding-system-iso-8859-7)
40
41 (set-coding-priority
42 '(coding-category-iso-7
43 coding-category-iso-8-1))
44
45 (setq-default buffer-file-coding-system 'coding-system-iso-8859-7)
46 (set-terminal-coding-system 'coding-system-iso-8859-7)
47 (set-keyboard-coding-system 'coding-system-iso-8859-7)
48
49 (setq default-input-method '("Greek" . "quail-greek"))
50 )
51
52(set-language-info-alist
53 "Greek" '((setup-function . setup-greek-environment)
54 (charset . (greek-iso8859-7))
55 (coding-system . (coding-system-iso-8859-7))
56 (documentation . t)
57 (sample-text . "Greek (,FGkk]mija(B) ,FCei\(B ,Fsar(B")))
58
59;;; greek.el ends here
diff --git a/lisp/language/hebrew.el b/lisp/language/hebrew.el
new file mode 100644
index 00000000000..bece50500e2
--- /dev/null
+++ b/lisp/language/hebrew.el
@@ -0,0 +1,60 @@
1;;; hebrew.el --- Support for Hebrew
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Hebrew
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Hebrew, the character sets ISO8859-8 is supported.
27
28;;; Code:
29
30(make-coding-system
31 'coding-system-iso-8859-8 2 ?8 "MIME ISO-8859-8"
32 '((ascii t) (hebrew-iso8859-8 t) nil nil
33 nil ascii-eol ascii-cntl nil nil nil nil nil t))
34
35(register-input-method
36 "Hebrew" '("quail-hebrew" quail-use-package "quail/hebrew"))
37
38(defun setup-hebrew-environment ()
39 (setq coding-category-iso-8-1 'coding-system-iso-8859-8)
40
41 (set-coding-priority
42 '(coding-category-iso-7
43 coding-category-iso-8-1
44 coding-category-iso-8-2))
45
46 (setq-default buffer-file-coding-system 'coding-category-iso-8-2)
47 (set-terminal-coding-system 'coding-category-iso-8-2)
48 (set-keyboard-coding-system 'coding-category-iso-8-2)
49
50 (setq default-input-method '("Hebrew" . "quail-hebrew"))
51 )
52
53(set-language-info-alist
54 "Hebrew" '((setup-function . setup-hebrew-environment)
55 (charset . (hebrew-iso8859-8))
56 (coding-system . (coding-system-iso-8859-8))
57 (documentation . "Right-to-left writing is Not yet supported")
58 (sample-text . "Hebrew ,Hylem(B")))
59
60;;; hebew.el ends here
diff --git a/lisp/language/indian.el b/lisp/language/indian.el
new file mode 100644
index 00000000000..75ef6ffe55b
--- /dev/null
+++ b/lisp/language/indian.el
@@ -0,0 +1,328 @@
1;;; indian.el --- Support for Indian Languages
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4
5;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
6
7;; Keywords: multilingual, Indian
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to
23;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24
25;;; Commentary:
26
27;; History:
28;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
29
30;; For Indian, the character set IS 13194 is supported.
31;;
32;; IS 13194 does not specifically assign glyphs for each characters.
33;; Following code is not specific to each Indian language.
34;;
35;; Eventually, this code will support generic information about
36;; following scripts.
37;;
38;; Devanagari
39;; Bengali
40;; Gurmukhi
41;; Gujarati
42;; Oriya
43;; Tamil
44;; Telgu
45;; Kannada
46;; Malayalam
47;;
48;; In this file, charsets other than charset-ascii and charset-indian-is13194
49;; should not be used except in the comment.
50
51;;; Code:
52
53;; Followings are what you see when you refer to the Emacs
54;; representations of IS 13194 charcters. However, this is merely
55;; tentative apperance, and you must convert them by
56;; indian-to-xxxxxx(specific script) function to use them.
57;; Devanagari is not an exception of this rule.
58
59;; 0xa0 //(5!"#$%&'()*+,-./(B
60;; 0xb0 (50123456789:;<=>?(B
61;; 0xc0 (5@ABCDEFGHIJKLMNO(B
62;; 0xd0 (5PQRSTUVWXYZ[\]^_(B
63;; 0xe0 (5`abcdefghijklmno(B
64;; 0xf0 (5pqrstuvwxyz{|}~(B//
65
66;; Note - In IS 13194, several symbols are obtained by special
67;; combination of several characters and Nukta sign.
68;;
69;; Sanskrit Vowel R -> (5*(B + (5i(B
70;; Sanskrit Vowel L -> (5&(B + (5i(B
71;; Sanskrit Vowel LL -> (5'(B + (5i(B
72;; Sanskrit Avagrah -> (5j(B + (5i(B
73;; OM -> (5!(B + (5i(B
74;;
75;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are
76;; not used in Emacs.
77;;
78;; Note - the above characters DO NOT represent any script. For
79;; example, if you want to obtain Devanagari character, you must do
80;; something like the following.
81;;
82;; (char-to-string (indian-to-devanagari ?(5$(B))
83;; "$(5!$(B"
84
85(let ((deflist
86 '(;; chars syntax category
87 ("(5!"#(B" "w" ?7) ; vowel-modifying diacritical mark
88 ; chandrabindu, anuswar, visarga
89 ("(5$(B-(52(B" "w" ?5) ; independent vowel
90 ("(53(B-(5X(B" "w" ?0) ; consonant
91 ("(5Z(B-(5g(B" "w" ?8) ; matra
92 ("(5q(B-(5z(B" "w" ?6) ; digit
93 ))
94 elm chars len syntax category to ch i)
95 (while deflist
96 (setq elm (car deflist))
97 (setq chars (car elm)
98 len (length chars)
99 syntax (nth 1 elm)
100 category (nth 2 elm)
101 i 0)
102 (while (< i len)
103 (if (= (aref chars i) ?-)
104 (setq i (1+ i)
105 to (sref chars i))
106 (setq ch (sref chars i)
107 to ch))
108 (while (<= ch to)
109 (modify-syntax-entry ch syntax)
110 (modify-category-entry ch category)
111 (setq ch (1+ ch)))
112 (setq i (+ i (char-bytes to))))
113 (setq deflist (cdr deflist))))
114
115
116;;; ITRANS
117;;
118;; ITRANS is one of the most popular method to exchange indian scripts
119;; electronically. Here is the table to convert between ITRANS code and
120;; IS 13194 code.
121
122(defvar indian-itrans-consonant-alist
123 '(
124 ("k" . "(53(B")
125 ("kh" . "(54(B")
126 ("g" . "(55(B")
127 ("gh" . "(56(B")
128 ("N^" . "(57(B")
129 ("ch" . "(58(B")
130 ("chh" . "(59(B")
131 ("j" . "(5:(B")
132 ("jh" . "(5;(B")
133 ("JN" . "(5<(B")
134 ("T" . "(5=(B")
135 ("Th" . "(5>(B")
136 ("D" . "(5?(B")
137 ("Dh" . "(5@(B")
138 ("N" . "(5A(B")
139 ("t" . "(5B(B")
140 ("th" . "(5C(B")
141 ("d" . "(5D(B")
142 ("dh" . "(5E(B")
143 ("n" . "(5F(B")
144 ("nh" . "(5G(B") ; For transcription of non-Devanagari Languages.
145 ("p" . "(5H(B")
146 ("ph" . "(5I(B")
147 ("b" . "(5J(B")
148 ("bh" . "(5K(B")
149 ("m" . "(5L(B")
150 ("y" . "(5M(B")
151 ("yh" . "(5N(B") ; For transcription of non-Devanagari Languages.
152 ("r" . "(5O(B")
153 ("rh" . "(5P(B") ; For transcription of non-Devanagari Languages.
154 ("l" . "(5Q(B")
155 ("v" . "(5T(B")
156 ("sh" . "(5U(B")
157 ("shh" . "(5V(B")
158 ("s" . "(5W(B")
159 ("h" . "(5X(B")
160 ("ld" . "(5R(B")
161 ("L" . "(5R(B")
162 ("ksh" . "$(5!3!h!V(B")
163 ("GY" . "***GY***") ; Must check out later.
164 ;; special consonants
165 ("q" . "(53i(B")
166 ("K" . "(54i(B")
167 ("G" . "(55i(B")
168 ("z" . "(5:i(B")
169 ("f" . "(5Ii(B")
170 (".D" . "(5?i(B")
171 (".Dh" . "(5@i(B")
172 ))
173
174(defvar indian-itrans-vowel-sign-alist
175 '(
176 ;; Special treatment unique to IS 13194 Transliteration
177 ("" . "(5h(B")
178 ("a" . "")
179 ;; Matra (Vowel Sign)
180 ("aa" . "(5Z(B")
181 ("A" . "(5Z(B")
182 ("i" . "(5[(B")
183 ("ii" . "(5\(B")
184 ("I" . "(5\(B")
185 ("u" . "(5](B")
186 ("uu" . "(5^(B")
187 ("U" . "(5^(B")
188 ("R^i" . "(5_(B") ; These must be checked out later.
189 ("R^I" . "(5_i(B")
190 ("L^i" . "(5[i(B")
191 ("L^I" . "(5\i(B")
192 ("E" . "(5`(B") ; For transcription of non-Devanangri Languages.
193 ("e" . "(5a(B")
194 ("ai" . "(5b(B")
195 ;; ("e.c" . "(5c(B") ; Tentatively suppressed.
196 ("O" . "(5d(B") ; For transcription of non-Devanagari Languages.
197 ("o" . "(5e(B")
198 ("au" . "(5f(B")
199 ;; ("o.c" . "(5g(B") ; Tentatively suppressed.
200 ))
201
202;;
203;; Independent vowels and other signs.
204;;
205
206(defvar indian-itrans-other-letters-alist
207 '(
208 ("a" . "(5$(B")
209 ("aa" . "(5%(B")
210 ("A" . "(5%(B")
211 ("i" . "(5&(B")
212 ("ii" . "(5'(B")
213 ("I" . "(5'(B")
214 ("u" . "(5((B")
215 ("uu" . "(5)(B")
216 ("U" . "(5)(B")
217 ("R^i" . "(5*(B")
218 ("R^I" . "(5*i(B")
219 ("L^i" . "(5&i(B")
220 ("L^I" . "(5'i(B")
221 ("E" . "(5+(B") ; For transcription of non-Devanagari Languages.
222 ("e" . "(5,(B")
223 ("ai" . "(5-(B")
224 ;; ("e.c" . "(5.(B") ; Candra E
225 ("O" . "(5/(B") ; For transcription of non-Devanagari Languages.
226 ("o" . "(50(B")
227 ("au" . "(51(B")
228 ;; ("o.c" . "(52(B") ; Candra O
229 ("M" . "(5$(B")
230 ("H" . "(5#(B")
231 ("AUM" . "(5!i(B")
232 ("OM" . "(5!i(B")
233 (".r" . "(5Oh(B")
234 (".n" . "(5"(B")
235 (".N" . "(5!(B")
236 (".h" . "(5h(B") ; Halant
237 (".." . "(5j(B")
238 (".a" . "(5ji(B") ; Avagrah
239 ("0" . "(5q(B")
240 ("1" . "(5r(B")
241 ("2" . "(5s(B")
242 ("3" . "(5t(B")
243 ("4" . "(5u(B")
244 ("5" . "(5v(B")
245 ("6" . "(5w(B")
246 ("7" . "(5x(B")
247 ("8" . "(5y(B")
248 ("9" . "(5z(B")
249 ))
250
251;; Regular expression matching single Indian character represented
252;; by ITRANS.
253
254(defvar indian-itrans-regexp
255 (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)")
256 (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]")
257 (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)")
258 (lpre "\\(") (rpre "\\)") (orre "\\|"))
259 (concat lpre misc rpre orre
260 lpre lpre consonant rpre "?" lpre vowel rpre rpre orre
261 lpre consonant rpre )))
262
263;;
264;; Regular expression matching single ITRANS unit for IS 13194 characters.
265;;
266
267(defvar itrans-indian-regexp
268 (let ((vowel "[(5$(B-(52(B]")
269 (consonant "[(53(B-(5X(B]")
270 (vowel-sign "[(5Z(B-(5g(B]")
271 (misc "[(5q(B-(5z(B]")
272 (lpre "\\(") (rpre "\\)") (orre "\\|"))
273 nil)) ; not yet prepared.
274
275
276;;
277;; IS13194 - ITRANS conversion table for string matching above regexp.
278;;
279
280(defvar indian-itrans-alist
281 (let ((cl indian-itrans-consonant-alist)
282 (ml indian-itrans-other-letters-alist) rules)
283 (while cl
284 (let ((vl indian-itrans-vowel-sign-alist))
285 (while vl
286 (setq rules
287 (cons (cons (concat (car (car cl)) (car (car vl)))
288 (concat (cdr (car cl)) (cdr (car vl))))
289 rules))
290 (setq vl (cdr vl))))
291 (setq cl (cdr cl)))
292 (while ml
293 (setq rules (cons (cons (car (car ml))
294 (cdr (car ml)))
295 rules))
296 (setq ml (cdr ml)))
297 rules))
298
299;;
300;; Utility program to convert from ITRANS to IS 13194 in specified region.
301;;
302
303(defun indian-decode-itrans-region (from to)
304 "Convert `ITRANS' mnemonics of the current region to Indian characters.
305When called from a program, expects two arguments,
306positions (integers or markers) specifying the stretch of the region."
307 (interactive "r")
308 (save-restriction
309 (narrow-to-region from to)
310 (goto-char (point-min))
311 (while (re-search-forward indian-itrans-regexp nil t)
312 (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0)))
313 (ch (cdr (assoc itrans indian-itrans-alist))))
314 (if ch
315 (progn
316 (delete-region (match-beginning 0) (match-end 0))
317 (insert ch)))))
318 (goto-char (point-min))
319 (while (re-search-forward "\\((5h(B\\)[^\\c0]" nil t)
320 (delete-region (match-beginning 1) (match-end 1)))))
321
322;;
323;; Utility program to convert from IS 13194 to ITRANS in specified region.
324;;
325
326;;;;;; not yet prepared.
327
328;;; indian.el ends here
diff --git a/lisp/language/japan-util.el b/lisp/language/japan-util.el
new file mode 100644
index 00000000000..2dab876fd51
--- /dev/null
+++ b/lisp/language/japan-util.el
@@ -0,0 +1,272 @@
1;; japan-util.el -- utilities for Japanese
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, Japanese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26(defconst japanese-kana-table
27 '((?$B$"(B ?$B%"(B ?(I1(B) (?$B$$(B ?$B%$(B ?(I2(B) (?$B$&(B ?$B%&(B ?(I3(B) (?$B$((B ?$B%((B ?(I4(B) (?$B$*(B ?$B%*(B ?(I5(B)
28 (?$B$+(B ?$B%+(B ?(I6(B) (?$B$-(B ?$B%-(B ?(I7(B) (?$B$/(B ?$B%/(B ?(I8(B) (?$B$1(B ?$B%1(B ?(I9(B) (?$B$3(B ?$B%3(B ?(I:(B)
29 (?$B$5(B ?$B%5(B ?(I;(B) (?$B$7(B ?$B%7(B ?(I<(B) (?$B$9(B ?$B%9(B ?(I=(B) (?$B$;(B ?$B%;(B ?(I>(B) (?$B$=(B ?$B%=(B ?(I?(B)
30 (?$B$?(B ?$B%?(B ?(I@(B) (?$B$A(B ?$B%A(B ?(IA(B) (?$B$D(B ?$B%D(B ?(IB(B) (?$B$F(B ?$B%F(B ?(IC(B) (?$B$H(B ?$B%H(B ?(ID(B)
31 (?$B$J(B ?$B%J(B ?(IE(B) (?$B$K(B ?$B%K(B ?(IF(B) (?$B$L(B ?$B%L(B ?(IG(B) (?$B$M(B ?$B%M(B ?(IH(B) (?$B$N(B ?$B%N(B ?(II(B)
32 (?$B$O(B ?$B%O(B ?(IJ(B) (?$B$R(B ?$B%R(B ?(IK(B) (?$B$U(B ?$B%U(B ?(IL(B) (?$B$X(B ?$B%X(B ?(IM(B) (?$B$[(B ?$B%[(B ?(IN(B)
33 (?$B$^(B ?$B%^(B ?(IO(B) (?$B$_(B ?$B%_(B ?(IP(B) (?$B$`(B ?$B%`(B ?(IQ(B) (?$B$a(B ?$B%a(B ?(IR(B) (?$B$b(B ?$B%b(B ?(IS(B)
34 (?$B$d(B ?$B%d(B ?(IT(B) (?$B$f(B ?$B%f(B ?(IU(B) (?$B$h(B ?$B%h(B ?(IV(B)
35 (?$B$i(B ?$B%i(B ?(IW(B) (?$B$j(B ?$B%j(B ?(IX(B) (?$B$k(B ?$B%k(B ?(IY(B) (?$B$l(B ?$B%l(B ?(IZ(B) (?$B$m(B ?$B%m(B ?(I[(B)
36 (?$B$o(B ?$B%o(B ?(I\(B) (?$B$p(B ?$B%p(B nil) (?$B$q(B ?$B%q(B nil) (?$B$r(B ?$B%r(B ?(I&(B)
37 (?$B$s(B ?$B%s(B ?(I](B)
38 (?$B$,(B ?$B%,(B "(I6^(B") (?$B$.(B ?$B%.(B "(I7^(B") (?$B$0(B ?$B%0(B "(I8^(B") (?$B$2(B ?$B%2(B "(I9^(B") (?$B$4(B ?$B%4(B "(I:^(B")
39 (?$B$6(B ?$B%6(B "(I;^(B") (?$B$8(B ?$B%8(B "(I<^(B") (?$B$:(B ?$B%:(B "(I=^(B") (?$B$<(B ?$B%<(B "(I>^(B") (?$B$>(B ?$B%>(B "(I?^(B")
40 (?$B$@(B ?$B%@(B "(I@^(B") (?$B$B(B ?$B%B(B "(IA^(B") (?$B$E(B ?$B%E(B "(IB^(B") (?$B$G(B ?$B%G(B "(IC^(B") (?$B$I(B ?$B%I(B "(ID^(B")
41 (?$B$P(B ?$B%P(B "(IJ^(B") (?$B$S(B ?$B%S(B "(IK^(B") (?$B$V(B ?$B%V(B "(IL^(B") (?$B$Y(B ?$B%Y(B "(IM^(B") (?$B$\(B ?$B%\(B "(IN^(B")
42 (?$B$Q(B ?$B%Q(B "(IJ_(B") (?$B$T(B ?$B%T(B "(IK_(B") (?$B$W(B ?$B%W(B "(IL_(B") (?$B$Z(B ?$B%Z(B "(IM_(B") (?$B$](B ?$B%](B "(IN_(B")
43 (?$B$!(B ?$B%!(B ?(I'(B) (?$B$#(B ?$B%#(B ?(I((B) (?$B$%(B ?$B%%(B ?(I)(B) (?$B$'(B ?$B%'(B ?(I*(B) (?$B$)(B ?$B%)(B ?(I+(B)
44 (?$B$C(B ?$B%C(B ?(I/(B)
45 (?$B$c(B ?$B%c(B ?(I,(B) (?$B$e(B ?$B%e(B ?(I-(B) (?$B$g(B ?$B%g(B ?(I.(B)
46 (?$B$n(B ?$B%n(B nil)
47 (nil ?$B%t(B "(I3^(B") (nil ?$B%u(B nil) (nil ?$B%v(B nil))
48 "Japanese JISX0208 Kana character table.
49Each element is of the form (HIRAGANA KATAKANA HANKAKU-KATAKANA), where
50HIRAGANA and KATAKANA belong to `japanese-jisx0208',
51HANKAKU-KATAKANA belongs to `japanese-jisx0201-kana'.")
52
53;; Put properties 'katakana, 'hiragana, and 'jix0201 to each Japanese
54;; kana characters for conversion among them.
55(let ((l japanese-kana-table)
56 slot hiragana katakana jisx0201)
57 (while l
58 (setq slot (car l)
59 hiragana (car slot) katakana (nth 1 slot) jisx0201 (nth 2 slot)
60 l (cdr l))
61 (if hiragana
62 (progn
63 (put-char-code-property hiragana 'katakana katakana)
64 (put-char-code-property katakana 'hiragana hiragana)
65 (if jisx0201
66 (progn
67 (put-char-code-property hiragana 'jisx0201 jisx0201)
68 (if (integerp jisx0201)
69 (put-char-code-property jisx0201 'hiragana hiragana))))))
70 (if jisx0201
71 (progn
72 (put-char-code-property katakana 'jisx0201 jisx0201)
73 (if (integerp jisx0201)
74 (put-char-code-property jisx0201 'katakana katakana))))))
75
76(defconst japanese-symbol-table
77 '((?\$B!!(B ?\ ) (?$B!"(B ?, ?(I$(B) (?$B!#(B ?. ?(I!(B) (?$B!$(B ?, ?(I$(B) (?$B!%(B ?. ?(I!(B) (?$B!&(B nil ?(I%(B)
78 (?$B!'(B ?:) (?$B!((B ?\;) (?$B!)(B ??) (?$B!*(B ?!) (?$B!+(B nil ?(I^(B) (?$B!,(B nil ?(I_(B)
79 (?$B!-(B ?') (?$B!.(B ?`) (?$B!0(B ?^) (?$B!2(B ?_) (?$B!<(B ?-) (?$B!=(B ?-) (?$B!>(B ?-)
80 (?$B!?(B ?/) (?$B!@(B ?\\) (?$B!A(B ?~) (?$B!C(B ?|) (?$B!F(B ?`) (?$B!G(B ?') (?$B!H(B ?\") (?$B!I(B ?\")
81 (?\$B!J(B ?\() (?\$B!K(B ?\)) (?\$B!N(B ?[) (?\$B!O(B ?]) (?\$B!P(B ?{) (?\$B!Q(B ?})
82 (?$B!R(B ?<) (?$B!S(B ?>) (?$B!\(B ?+) (?$B!](B ?-) (?$B!a(B ?=) (?$B!c(B ?<) (?$B!d(B ?>)
83 (?$B!l(B ?') (?$B!m(B ?\") (?$B!o(B ?\\) (?$B!p(B ?$) (?$B!s(B ?%) (?$B!t(B ?#) (?$B!u(B ?&) (?$B!v(B ?*)
84 (?$B!w(B ?@))
85 "Japanese JISX0208 symbol character table.
86 Each element is of the form (SYMBOL ASCII HANKAKU), where SYMBOL
87belongs to `japanese-jisx0208', ASCII belongs to `ascii', and HANKAKU
88belongs to `japanese-jisx0201-kana'.")
89
90;; Put properties 'jisx0208, 'jisx0201, and 'ascii to each Japanese
91;; symbol and ASCII characters for conversion among them.
92(let ((l japanese-symbol-table)
93 slot jisx0208 ascii jisx0201)
94 (while l
95 (setq slot (car l)
96 jisx0208 (car slot) ascii (nth 1 slot) jisx0201 (nth 2 slot)
97 l (cdr l))
98 (if ascii
99 (progn
100 (put-char-code-property jisx0208 'ascii ascii)
101 (put-char-code-property ascii 'jisx0208 jisx0208)))
102 (if jisx0201
103 (progn
104 (put-char-code-property jisx0208 'jisx0201 jisx0201)
105 (put-char-code-property jisx0201 'jisx0208 jisx0208)))))
106
107(defconst japanese-alpha-numeric-table
108 '((?$B#0(B . ?0) (?$B#1(B . ?1) (?$B#2(B . ?2) (?$B#3(B . ?3) (?$B#4(B . ?4)
109 (?$B#5(B . ?5) (?$B#6(B . ?6) (?$B#7(B . ?7) (?$B#8(B . ?8) (?$B#9(B . ?9)
110 (?$B#A(B . ?A) (?$B#B(B . ?B) (?$B#C(B . ?C) (?$B#D(B . ?D) (?$B#E(B . ?E)
111 (?$B#F(B . ?F) (?$B#G(B . ?G) (?$B#H(B . ?H) (?$B#I(B . ?I) (?$B#J(B . ?J)
112 (?$B#K(B . ?K) (?$B#L(B . ?L) (?$B#M(B . ?M) (?$B#N(B . ?N) (?$B#O(B . ?O)
113 (?$B#P(B . ?P) (?$B#Q(B . ?Q) (?$B#R(B . ?R) (?$B#S(B . ?S) (?$B#T(B . ?T)
114 (?$B#U(B . ?U) (?$B#V(B . ?V) (?$B#W(B . ?W) (?$B#X(B . ?X) (?$B#Y(B . ?Y) (?$B#Z(B . ?Z)
115 (?$B#a(B . ?a) (?$B#b(B . ?b) (?$B#c(B . ?c) (?$B#d(B . ?d) (?$B#e(B . ?e)
116 (?$B#f(B . ?f) (?$B#g(B . ?g) (?$B#h(B . ?h) (?$B#i(B . ?i) (?$B#j(B . ?j)
117 (?$B#k(B . ?k) (?$B#l(B . ?l) (?$B#m(B . ?m) (?$B#n(B . ?n) (?$B#o(B . ?o)
118 (?$B#p(B . ?p) (?$B#q(B . ?q) (?$B#r(B . ?r) (?$B#s(B . ?s) (?$B#t(B . ?t)
119 (?$B#u(B . ?u) (?$B#v(B . ?v) (?$B#w(B . ?w) (?$B#x(B . ?x) (?$B#y(B . ?y) (?$B#z(B . ?z))
120 "Japanese JISX0208 alpha numeric character table.
121Each element is of the form (ALPHA-NUMERIC ASCII), where ALPHA-NUMERIC
122belongs to `japanese-jisx0208', ASCII belongs to `ascii'.")
123
124;; Put properties 'jisx0208 and 'ascii to each Japanese alpha numeric
125;; and ASCII characters for conversion between them.
126(let ((l japanese-alpha-numeric-table)
127 slot jisx0208 ascii)
128 (while l
129 (setq slot (car l)
130 jisx0208 (car slot) ascii (cdr slot)
131 l (cdr l))
132 (put-char-code-property jisx0208 'ascii ascii)
133 (put-char-code-property ascii 'jisx0208 jisx0208)))
134
135;; Convert string STR by FUNC and return a resulting string.
136(defun japanese-string-conversion (str func &rest args)
137 (let ((buf (get-buffer-create " *Japanese work*")))
138 (save-excursion
139 (set-buffer buf)
140 (erase-buffer)
141 (insert str)
142 (apply func 1 (point) args)
143 (buffer-string))))
144
145;;;###autoload
146(defun japanese-katakana (obj &optional hankaku)
147 "Convert argument to Katakana and return that.
148The argument may be a character or string. The result has the same type.
149The argument object is not altered--the value is a copy.
150Optional argument HANKAKU t means to convert to `hankaku' Katakana
151 \(`japanese-jisx0201-kana'), in which case return value
152 may be a string even if OBJ is a character if two Katakanas are
153 necessary to represent OBJ."
154 (if (stringp obj)
155 (japanese-string-conversion obj 'japanese-katakana-region hankaku)
156 (or (get-char-code-property obj (if hankaku 'jisx0201 'katakana))
157 obj)))
158
159;;;###autoload
160(defun japanese-hiragana (obj)
161 "Convert argument to Hiragana and return that.
162The argument may be a character or string. The result has the same type.
163The argument object is not altered--the value is a copy."
164 (if (stringp obj)
165 (japanese-string-conversion obj 'japanese-hiragana-region)
166 (or (get-char-code-property obj 'hiragana)
167 obj)))
168
169;;;###autoload
170(defun japanese-hankaku (obj &optional ascii-only)
171 "Convert argument to `hankaku' and return that.
172The argument may be a character or string. The result has the same type.
173The argument object is not altered--the value is a copy.
174Optional argument ASCII-ONLY non-nil means to return only ASCII character."
175 (if (stringp obj)
176 (japanese-string-conversion obj 'japanese-hankaku-region ascii-only)
177 (or (get-char-code-property obj 'ascii)
178 (and (not ascii-only)
179 (get-char-code-property obj 'jisx0201))
180 obj)))
181
182;;;###autoload
183(defun japanese-zenkaku (obj)
184 "Convert argument to `zenkaku' and return that.
185The argument may be a character or string. The result has the same type.
186The argument object is not altered--the value is a copy."
187 (if (stringp obj)
188 (japanese-string-conversion obj 'japanese-zenkaku-region)
189 (or (get-char-code-property obj 'jisx0208)
190 obj)))
191
192;;;###autoload
193(defun japanese-katakana-region (from to &optional hankaku)
194 "Convert Japanese `hiragana' chars in the region to `katakana' chars.
195Optional argument HANKAKU t means to convert to `hankaku katakana' character
196of which charset is `japanese-jisx0201-kana'."
197 (interactive "r\nP")
198 (save-restriction
199 (narrow-to-region from to)
200 (goto-char (point-min))
201 (while (re-search-forward "\\cH\\|\\cK" nil t)
202 (let* ((hira (preceding-char))
203 (kata (japanese-katakana hira hankaku)))
204 (if kata
205 (progn
206 (delete-region (match-beginning 0) (match-end 0))
207 (insert kata)))))))
208
209;;;###autoload
210(defun japanese-hiragana-region (from to)
211 "Convert Japanese `katakana' chars in the region to `hiragana' chars."
212 (interactive "r")
213 (save-restriction
214 (narrow-to-region from to)
215 (goto-char (point-min))
216 (while (re-search-forward "\\cK\\|\\ck" nil t)
217 (let* ((kata (preceding-char))
218 (hira (japanese-hiragana kata)))
219 (if hira
220 (progn
221 (delete-region (match-beginning 0) (match-end 0))
222 (insert hira)))))))
223
224;;;###autoload
225(defun japanese-hankaku-region (from to &optional ascii-only)
226 "Convert Japanese `zenkaku' chars in the region to `hankaku' chars.
227`Zenkaku' chars belong to `japanese-jisx0208'
228`Hankaku' chars belong to `ascii' or `japanese-jisx0201-kana'.
229Optional argument ASCII-ONLY non-nil means to convert only to ASCII char."
230 (interactive "r\nP")
231 (save-restriction
232 (narrow-to-region from to)
233 (goto-char (point-min))
234 (while (re-search-forward "\\cj" nil t)
235 (let* ((zenkaku (preceding-char))
236 (hankaku (japanese-hankaku zenkaku ascii-only)))
237 (if hankaku
238 (progn
239 (delete-region (match-beginning 0) (match-end 0))
240 (insert hankaku)))))))
241
242;;;###autoload
243(defun japanese-zenkaku-region (from to)
244 "Convert hankaku' chars in the region to Japanese `zenkaku' chars.
245`Zenkaku' chars belong to `japanese-jisx0208'
246`Hankaku' chars belong to `ascii' or `japanese-jisx0201-kana'."
247 (interactive "r")
248 (save-restriction
249 (narrow-to-region from to)
250 (goto-char (point-min))
251 (while (re-search-forward "\\ca\\|\\ck" nil t)
252 (let* ((hankaku (preceding-char))
253 (zenkaku (japanese-zenkaku hankaku)))
254 (if zenkaku
255 (progn
256 (delete-region (match-beginning 0) (match-end 0))
257 (insert zenkaku)))))))
258
259;;;###autoload
260(defun read-hiragana-string (prompt &optional initial-input)
261 "Read a Hiragana string from the minibuffer, prompting with string PROMPT.
262If non-nil, second arg INITIAL-INPUT is a string to insert before reading."
263 (read-multilingual-string prompt initial-input
264 "Japanese" "quail-ja-hiragana"))
265
266;;
267(provide 'language/japan-util)
268
269;;; Local Variables:
270;;; generated-autoload-file: "../loaddefs.el"
271;;; End:
272;;; japan-util.el ends here
diff --git a/lisp/language/japanese.el b/lisp/language/japanese.el
new file mode 100644
index 00000000000..f0dd5eb7be8
--- /dev/null
+++ b/lisp/language/japanese.el
@@ -0,0 +1,96 @@
1;;; japanese.el --- Japanese support
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Japanese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Japanese, character sets JISX0201, JISX0208, JISX0212 are
27;; supported.
28
29;;; Code:
30
31(make-coding-system
32 'coding-system-sjis 1 ?S
33 "Coding-system of Shift-JIS used in Japan." t)
34
35;; ISO-2022-JP and JUNET are aliases for ISO-2022-7.
36(put 'coding-system-iso-2022-jp 'coding-system 'coding-system-iso-2022-7)
37(put 'coding-system-junet 'coding-system 'coding-system-iso-2022-7)
38
39(make-coding-system
40 'coding-system-old-jis 2 ?J
41 "Coding-system used for old jis terminal."
42 '((ascii t) nil nil nil
43 short ascii-eol ascii-cntl seven nil nil use-roman use-oldjis))
44
45(make-coding-system
46 'coding-system-euc-japan 2 ?E
47 "Coding-system of Japanese EUC (Extended Unix Code)."
48 '(ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212
49 short ascii-eol ascii-cntl nil nil single-shift))
50
51(register-input-method
52 "Japanese" '("quail-ja" quail-use-package "quail/japanese"))
53
54(register-input-method
55 "Japanese" '("quail-ja-hiragana" quail-use-package "quail/japanese"))
56
57(defun setup-japanese-environment ()
58 (interactive)
59 (setq coding-category-iso-8-2 'coding-system-euc-japan)
60
61 (set-coding-priority
62 '(coding-category-iso-7
63 coding-category-iso-8-2
64 coding-category-sjis
65 coding-category-iso-8-1
66 coding-category-iso-else
67 coding-category-internal))
68
69 (if (eq system-type 'ms-dos)
70 (progn
71 (setq-default buffer-file-coding-system 'coding-system-sjis)
72 (set-terminal-coding-system 'coding-system-sjis)
73 (set-keyboard-coding-system 'coding-system-sjis)
74 (setq default-process-coding-system
75 '(coding-system-sjis-dos . coding-system-sjis-dos)))
76 (setq-default buffer-file-coding-system 'coding-system-iso-2022-jp)
77 (set-terminal-coding-system 'coding-system-iso-2022-jp)
78 (set-keyboard-coding-system 'coding-system-iso-2022-jp))
79
80 (set-default-input-method "Japanese" "quail-ja")
81 )
82
83(set-language-info-alist
84 "Japanese" '((setup-function . setup-japanese-environment)
85 (tutorial . "TUTORIAL.jp")
86 (charset . (japanese-jisx0208 japanese-jisx0208-1978
87 japanese-jisx0212 latin-jisx0201
88 katakana-jisx0201))
89 (coding-system . (coding-system-euc-japan
90 coding-system-sjis
91 coding-system-old-jis
92 coding-system-iso-2022-jp))
93 (documentation . t)
94 (sample-text . "Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B")))
95
96;;; japanese.el ends here
diff --git a/lisp/language/korean.el b/lisp/language/korean.el
new file mode 100644
index 00000000000..4fda60ac830
--- /dev/null
+++ b/lisp/language/korean.el
@@ -0,0 +1,78 @@
1;;; korean.el --- Support for Korean
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Korean
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Korean, the character set KSC5601 is supported.
27
28;;; Code:
29
30(make-coding-system
31 'coding-system-euc-korea 2 ?K
32 "Coding-system of Korean EUC (Extended Unix Code)."
33 '((ascii t) korean-ksc5601 nil nil
34 nil ascii-eol ascii-cntl))
35
36;; EUC-KR is an alias for EUC-KOREA.
37(put 'coding-system-euc-kr 'coding-system 'coding-system-euc-korea)
38
39(make-coding-system
40 'coding-system-iso-2022-kr 2 ?k
41 "Coding-System used for communication with mail in Korea."
42 '(ascii (nil korean-ksc5601) nil nil
43 nil ascii-eol ascii-cntl seven locking-shift))
44
45(register-input-method
46 "Korean" '("quail-hangul" quail-use-package "quail/hangul"))
47(register-input-method
48 "Korean" '("quail-hangul3" quail-use-package "quail/hangul3"))
49(register-input-method
50 "Korean" '("quail-hanja" quail-use-package "quail/hanja"))
51(register-input-method
52 "Korean" '("quail-symbol-ksc" quail-use-package "quail/symbol-ksc"))
53(register-input-method
54 "Korean" '("quail-hanja-jis" quail-use-package "quail/hanja-jis"))
55
56(defun setup-korean-environment ()
57 (setq coding-category-iso-8-2 'coding-system-euc-korea)
58
59 (set-coding-priority
60 '(coding-category-iso-7
61 coding-category-iso-8-2
62 coding-category-iso-8-1))
63
64 (setq-default buffer-file-coding-system 'coding-system-euc-korea)
65
66 (setq default-input-method '("Korean" . "quail-hangul"))
67 )
68
69(set-language-info-alist
70 "Korean" '((setup-function . setup-korean-environment)
71 (tutorial . "TUTORIAL.kr")
72 (charset . (korean-ksc5601))
73 (coding-system . (coding-system-euc-korea
74 coding-system-iso-2022-kr))
75 (documentation . t)
76 (sample-text . "Hangul ($(CGQ1[(B) $(C>H3gGO<<?d(B, $(C>H3gGO=J4O1n(B")))
77
78;;; korean.el ends here
diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el
new file mode 100644
index 00000000000..e0f6a969b4d
--- /dev/null
+++ b/lisp/language/misc-lang.el
@@ -0,0 +1,31 @@
1;;; misc-lang.el --- support for miscellaneous languages (characters)
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, character set, coding system
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25;;; IPA (International Phonetic Alphabet)
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27
28(register-input-method
29 "IPA" '("quail-ipa" quail-use-package "quail/ipa"))
30
31;;; misc-lang.el ends here
diff --git a/lisp/language/thai-util.el b/lisp/language/thai-util.el
new file mode 100644
index 00000000000..04685bf7553
--- /dev/null
+++ b/lisp/language/thai-util.el
@@ -0,0 +1,176 @@
1;; thai-util.el -- utilities for Thai
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, thai
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Code:
25
26;; Setting information of Thai characters.
27
28(let ((l '((?,T!(B consonant "LETTER KO KAI") ; 0xA1
29 (?,T"(B consonant "LETTER KHO KHAI") ; 0xA2
30 (?,T#(B consonant "LETTER KHO KHUAT") ; 0xA3
31 (?,T$(B consonant "LETTER KHO KHWAI") ; 0xA4
32 (?,T%(B consonant "LETTER KHO KHON") ; 0xA5
33 (?,T&(B consonant "LETTER KHO RAKHANG") ; 0xA6
34 (?,T'(B consonant "LETTER NGO NGU") ; 0xA7
35 (?,T((B consonant "LETTER CHO CHAN") ; 0xA8
36 (?,T)(B consonant "LETTER CHO CHING") ; 0xA9
37 (?,T*(B consonant "LETTER CHO CHANG") ; 0xAA
38 (?,T+(B consonant "LETTER SO SO") ; 0xAB
39 (?,T,(B consonant "LETTER CHO CHOE") ; 0xAC
40 (?,T-(B consonant "LETTER YO YING") ; 0xAD
41 (?,T.(B consonant "LETTER DO CHADA") ; 0xAE
42 (?,T/(B consonant "LETTER TO PATAK") ; 0xAF
43 (?,T0(B consonant "LETTER THO THAN") ; 0xB0
44 (?,T1(B consonant "LETTER THO NANGMONTHO") ; 0xB1
45 (?,T2(B consonant "LETTER THO PHUTHAO") ; 0xB2
46 (?,T3(B consonant "LETTER NO NEN") ; 0xB3
47 (?,T4(B consonant "LETTER DO DEK") ; 0xB4
48 (?,T5(B consonant "LETTER TO TAO") ; 0xB5
49 (?,T6(B consonant "LETTER THO THUNG") ; 0xB6
50 (?,T7(B consonant "LETTER THO THAHAN") ; 0xB7
51 (?,T8(B consonant "LETTER THO THONG") ; 0xB8
52 (?,T9(B consonant "LETTER NO NU") ; 0xB9
53 (?,T:(B consonant "LETTER BO BAIMAI") ; 0xBA
54 (?,T;(B consonant "LETTER PO PLA") ; 0xBB
55 (?,T<(B consonant "LETTER PHO PHUNG") ; 0xBC
56 (?,T=(B consonant "LETTER FO FA") ; 0xBD
57 (?,T>(B consonant "LETTER PHO PHAN") ; 0xBE
58 (?,T?(B consonant "LETTER FO FAN") ; 0xBF
59 (?,T@(B consonant "LETTER PHO SAMPHAO") ; 0xC0
60 (?,TA(B consonant "LETTER MO MA") ; 0xC1
61 (?,TB(B consonant "LETTER YO YAK") ; 0xC2
62 (?,TC(B consonant "LETTER RO RUA") ; 0xC3
63 (?,TD(B vowel-base "LETTER RU (Pali vowel letter)") ; 0xC4
64 (?,TE(B consonant "LETTER LO LING") ; 0xC5
65 (?,TF(B vowel-base "LETTER LU (Pali vowel letter)") ; 0xC6
66 (?,TG(B consonant "LETTER WO WAEN") ; 0xC7
67 (?,TH(B consonant "LETTER SO SALA") ; 0xC8
68 (?,TI(B consonant "LETTER SO RUSI") ; 0xC9
69 (?,TJ(B consonant "LETTER SO SUA") ; 0xCA
70 (?,TK(B consonant "LETTER HO HIP") ; 0xCB
71 (?,TL(B consonant "LETTER LO CHULA") ; 0xCC
72 (?,TM(B consonant "LETTER O ANG") ; 0xCD
73 (?,TN(B consonant "LETTER HO NOK HUK") ; 0xCE
74 (?,TO(B special "PAI YAN NOI (abbreviation)") ; 0xCF
75 (?,TP(B vowel-base "VOWEL SIGN SARA A") ; 0xD0
76 (?,TQ(B vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T") ; 0xD1
77 (?,TR(B vowel-base "VOWEL SIGN SARA AA") ; 0xD2
78 (?,TS(B vowel-base "VOWEL SIGN SARA AM") ; 0xD3
79 (?,TT(B vowel-upper "VOWEL SIGN SARA I N/S-T") ; 0xD4
80 (?,TU(B vowel-upper "VOWEL SIGN SARA II N/S-T") ; 0xD5
81 (?,TV(B vowel-upper "VOWEL SIGN SARA UE N/S-T") ; 0xD6
82 (?,TW(B vowel-upper "VOWEL SIGN SARA UEE N/S-T") ; 0xD7
83 (?,TX(B vowel-lower "VOWEL SIGN SARA U N/S-B") ; 0xD8
84 (?,TY(B vowel-lower "VOWEL SIGN SARA UU N/S-B") ; 0xD9
85 (?,TZ(B vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)") ; 0xDA
86 (?,T[(B invalid nil) ; 0xDA
87 (?,T\(B invalid nil) ; 0xDC
88 (?,T](B invalid nil) ; 0xDC
89 (?,T^(B invalid nil) ; 0xDC
90 (?,T_(B special "BAHT SIGN (currency symbol)") ; 0xDF
91 (?,T`(B vowel-base "VOWEL SIGN SARA E") ; 0xE0
92 (?,Ta(B vowel-base "VOWEL SIGN SARA AE") ; 0xE1
93 (?,Tb(B vowel-base "VOWEL SIGN SARA O") ; 0xE2
94 (?,Tc(B vowel-base "VOWEL SIGN SARA MAI MUAN") ; 0xE3
95 (?,Td(B vowel-base "VOWEL SIGN SARA MAI MALAI") ; 0xE4
96 (?,Te(B vowel-base "LAK KHANG YAO") ; 0xE5
97 (?,Tf(B special "MAI YAMOK (repetion)") ; 0xE6
98 (?,Tg(B vowel-upper "VOWEL SIGN MAI TAI KHU N/S-T") ; 0xE7
99 (?,Th(B tone "TONE MAI EK N/S-T") ; 0xE8
100 (?,Ti(B tone "TONE MAI THO N/S-T") ; 0xE9
101 (?,Tj(B tone "TONE MAI TRI N/S-T") ; 0xEA
102 (?,Tk(B tone "TONE MAI CHATTAWA N/S-T") ; 0xEB
103 (?,Tl(B tone "THANTHAKHAT N/S-T (cancellation mark)") ; 0xEC
104 (?,Tm(B tone "NIKKHAHIT N/S-T (final nasal)") ; 0xED
105 (?,Tn(B vowel-upper "YAMAKKAN N/S-T") ; 0xEE
106 (?,To(B special "FONRMAN") ; 0xEF
107 (?,Tp(B special "DIGIT ZERO") ; 0xF0
108 (?,Tq(B special "DIGIT ONE") ; 0xF1
109 (?,Tr(B special "DIGIT TWO") ; 0xF2
110 (?,Ts(B special "DIGIT THREE") ; 0xF3
111 (?,Tt(B special "DIGIT FOUR") ; 0xF4
112 (?,Tu(B special "DIGIT FIVE") ; 0xF5
113 (?,Tv(B special "DIGIT SIX") ; 0xF6
114 (?,Tw(B special "DIGIT SEVEN") ; 0xF7
115 (?,Tx(B special "DIGIT EIGHT") ; 0xF8
116 (?,Ty(B special "DIGIT NINE") ; 0xF9
117 (?,Tz(B special "ANGKHANKHU (ellipsis)") ; 0xFA
118 (?,T{(B special "KHOMUT (beginning of religious texts)") ; 0xFB
119 (?,T|(B invalid nil) ; 0xFC
120 (?,T}(B invalid nil) ; 0xFD
121 (?,T~(B invalid nil) ; 0xFE
122 ))
123 elm)
124 (while l
125 (setq elm (car l))
126 (put-char-code-property (car elm) 'phonetic-type (car (cdr elm)))
127 (put-char-code-property (car elm) 'name (nth 2 elm))
128 (setq l (cdr l))))
129
130;;;###autoload
131(defun thai-compose-region (beg end)
132 "Compose Thai characters in the region.
133When called from a program, expects two arguments,
134positions (integers or markers) specifying the region."
135 (interactive "r")
136 (save-restriction
137 (narrow-to-region beg end)
138 (decompose-region (point-min) (point-max))
139 (goto-char (point-min))
140 (while (re-search-forward "\\c0\\(\\c2\\|\\c3\\|\\c4\\)+" nil t)
141 (if (aref (char-category-set (char-after (match-beginning 0))) ?t)
142 (compose-region (match-beginning 0) (match-end 0))))))
143
144;;;###autoload
145(defun thai-compose-buffer ()
146 "Compose Thai characters in the current buffer."
147 (interactive)
148 (thai-compose-region (point-min) (point-max)))
149
150;;;###autoload
151(defun thai-post-read-conversion (len)
152 (save-excursion
153 (save-restriction
154 (let ((buffer-modified-p (buffer-modified-p)))
155 (narrow-to-region (point) (+ (point) len))
156 (thai-compose-region (point-min) (point-max))
157 (set-buffer-modified-p buffer-modified-p)
158 (point-max)))))
159
160;;;###autoload
161(defun thai-pre-write-conversion (from to)
162 (let ((old-buf (current-buffer))
163 (work-buf (get-buffer-create " *thai-work*")))
164 (set-buffer work-buf)
165 (erase-buffer)
166 (insert-buffer-substring old-buf from to)
167 (decompose-region (point-min) (point-max))))
168
169;;
170(provide 'language/thai-util)
171
172;;; Local Variables:
173;;; generated-autoload-file: "../loaddefs.el"
174;;; End:
175;;; thai-util.el ends here
176
diff --git a/lisp/language/thai.el b/lisp/language/thai.el
new file mode 100644
index 00000000000..848959674f1
--- /dev/null
+++ b/lisp/language/thai.el
@@ -0,0 +1,63 @@
1;;; thai.el --- Support for Thai
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Thai
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Thai, the character set TIS620 is supported.
27
28;;; Code:
29
30(make-coding-system
31 'coding-system-tis620 2 ?T
32 "Coding-system used for ASCII(MSB=0) & TIS620(MSB=1)."
33 '((ascii t) (thai-tis620 t) nil nil
34 nil ascii-eol))
35(put 'coding-system-tis620 'post-read-conversion
36 'thai-post-read-conversion)
37(put 'coding-system-tis620 'pre-write-conversion
38 'thai-pre-write-conversion)
39
40(register-input-method
41 "Thai" '("quail-thai" quail-use-package "quail/thai"))
42
43(defun setup-thai-environment ()
44 (setq coding-category-iso-8-1 'coding-system-tis620)
45
46 (set-coding-priority
47 '(coding-category-iso-7
48 coding-category-iso-8-1))
49
50 (setq-default buffer-file-coding-system 'coding-system-tis620)
51
52 (setq default-input-method '("Thai" . "quail-thai"))
53 )
54
55(set-language-info-alist
56 "Thai" '((tutorial . "TUTORIAL.th")
57 (setup-function . setup-thai-environment)
58 (charset . (thai-tis620))
59 (coding-systemm . (coding-system-tis620))
60 (documentation . t)
61 (sample-text . "Thai (,T@RIRd7B(B) ,TJ0GQ1J04U1$0CQ1:(B, ,TJ0GQ1J04U10$h1P(B")))
62
63;;; thai.el ends here
diff --git a/lisp/language/viet-util.el b/lisp/language/viet-util.el
new file mode 100644
index 00000000000..6303f34dc9b
--- /dev/null
+++ b/lisp/language/viet-util.el
@@ -0,0 +1,267 @@
1;; viet-util.el -- utilities for Vietnamese
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, Vietnamese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; Vietnamese uses ASCII characters and additional 134 unique
27;; characters (these are Latin alphabets with various diacritical and
28;; tone marks). As far as I know, Vietnamese now has 4 different ways
29;; for representing these characters: VISCII, VSCII, VIQR, and
30;; Unicode. VISCII and VSCII are simple 1-byte code which assigns 134
31;; unique characters in control-code area (0x00..0x1F) and right half
32;; area (0x80..0xFF). VIQR is a menmonic encoding specification
33;; representing diacritical marks by following ASCII characters.
34
35;;; Code:
36
37;; VIQR is a menmonic encoding specification for Vietnamese.
38;; It represents diacritical marks by ASCII characters as follows:
39;; ------------+----------+--------
40;; mark | mnemonic | example
41;; ------------+----------+---------
42;; breve | ( | a( -> ,1e(B
43;; circumflex | ^ | a^ -> ,1b(B
44;; horn | + | o+ -> ,1=(B
45;; ------------+----------+---------
46;; acute | ' | a' -> ,1a(B
47;; grave | ` | a` -> ,1`(B
48;; hook above | ? | a? -> ,1d(B
49;; tilde | ~ | a~ -> ,1c(B
50;; dot below | . | a. -> ,1U(B
51;; ------------+----------+---------
52;; d bar | dd | dd -> ,1p(B
53;; ------------+----------+---------
54
55(defvar viet-viqr-alist
56 '(;; lowercase
57 (?,1!(B . "a('") ; 161
58 (?,1"(B . "a(`") ; 162
59 (?,1#(B . "a(.") ; 163
60 (?,1$(B . "a^'") ; 164
61 (?,1%(B . "a^`") ; 165
62 (?,1&(B . "a^?") ; 166
63 (?,1'(B . "a^.") ; 167
64 (?,1((B . "e~") ; 168
65 (?,1)(B . "e.") ; 169
66 (?,1*(B . "e^'") ; 170
67 (?,1+(B . "e^`") ; 171
68 (?,1,(B . "e^?") ; 172
69 (?,1-(B . "e^~") ; 173
70 (?,1.(B . "e^.") ; 174
71 (?,1/(B . "o^'") ; 175
72 (?,10(B . "o^`") ; 176
73 (?,11(B . "o^?") ; 177
74 (?,12(B . "o^~") ; 178
75 (?,15(B . "o^.") ; 181
76 (?,16(B . "o+`") ; 182
77 (?,17(B . "o+?") ; 183
78 (?,18(B . "i.") ; 184
79 (?,1=(B . "o+") ; 189
80 (?,1>(B . "o+'") ; 190
81 (?,1F(B . "a(?") ; 198
82 (?,1G(B . "a(~") ; 199
83 (?,1O(B . "y`") ; 207
84 (?,1Q(B . "u+'") ; 209
85 (?,1U(B . "a.") ; 213
86 (?,1V(B . "y?") ; 214
87 (?,1W(B . "u+`") ; 215
88 (?,1X(B . "u+?") ; 216
89 (?,1[(B . "y~") ; 219
90 (?,1\(B . "y.") ; 220
91 (?,1^(B . "o+~") ; 222
92 (?,1_(B . "u+") ; 223
93 (?,1`(B . "a`") ; 224
94 (?,1a(B . "a'") ; 225
95 (?,1b(B . "a^") ; 226
96 (?,1c(B . "a~") ; 227
97 (?,1d(B . "a?") ; 228
98 (?,1e(B . "a(") ; 229
99 (?,1f(B . "u+~") ; 230
100 (?,1g(B . "a^~") ; 231
101 (?,1h(B . "e`") ; 232
102 (?,1i(B . "e'") ; 233
103 (?,1j(B . "e^") ; 234
104 (?,1k(B . "e?") ; 235
105 (?,1l(B . "i`") ; 236
106 (?,1m(B . "i'") ; 237
107 (?,1n(B . "i~") ; 238
108 (?,1o(B . "i?") ; 239
109 (?,1p(B . "dd") ; 240
110 (?,1q(B . "u+.") ; 241
111 (?,1r(B . "o`") ; 242
112 (?,1s(B . "o'") ; 243
113 (?,1t(B . "o^") ; 244
114 (?,1u(B . "o~") ; 245
115 (?,1v(B . "o?") ; 246
116 (?,1w(B . "o.") ; 247
117 (?,1x(B . "u.") ; 248
118 (?,1y(B . "u`") ; 249
119 (?,1z(B . "u'") ; 250
120 (?,1{(B . "u~") ; 251
121 (?,1|(B . "u?") ; 252
122 (?,1}(B . "y'") ; 253
123 (?,1~(B . "o+.") ; 254
124
125 ;; upper case
126 (?,2!(B . "A('") ; 161
127 (?,2"(B . "A(`") ; 162
128 (?,2#(B . "A(.") ; 163
129 (?,2$(B . "A^'") ; 164
130 (?,2%(B . "A^`") ; 165
131 (?,2&(B . "A^?") ; 166
132 (?,2'(B . "A^.") ; 167
133 (?,2((B . "E~") ; 168
134 (?,2)(B . "E.") ; 169
135 (?,2*(B . "E^'") ; 170
136 (?,2+(B . "E^`") ; 171
137 (?,2,(B . "E^?") ; 172
138 (?,2-(B . "E^~") ; 173
139 (?,2.(B . "E^.") ; 174
140 (?,2/(B . "O^'") ; 175
141 (?,20(B . "O^`") ; 176
142 (?,21(B . "O^?") ; 177
143 (?,22(B . "O^~") ; 178
144 (?,25(B . "O^.") ; 181
145 (?,26(B . "O+`") ; 182
146 (?,27(B . "O+?") ; 183
147 (?,28(B . "I.") ; 184
148 (?,2=(B . "O+") ; 189
149 (?,2>(B . "O+'") ; 190
150 (?,2F(B . "A(?") ; 198
151 (?,2G(B . "A(~") ; 199
152 (?,2O(B . "Y`") ; 207
153 (?,2Q(B . "U+'") ; 209
154 (?,2U(B . "A.") ; 213
155 (?,2V(B . "Y?") ; 214
156 (?,2W(B . "U+`") ; 215
157 (?,2X(B . "U+?") ; 216
158 (?,2[(B . "Y~") ; 219
159 (?,2\(B . "Y.") ; 220
160 (?,2^(B . "O+~") ; 222
161 (?,2_(B . "U+") ; 223
162 (?,2`(B . "A`") ; 224
163 (?,2a(B . "A'") ; 225
164 (?,2b(B . "A^") ; 226
165 (?,2c(B . "A~") ; 227
166 (?,2d(B . "A?") ; 228
167 (?,2e(B . "A(") ; 229
168 (?,2f(B . "U+~") ; 230
169 (?,2g(B . "A^~") ; 231
170 (?,2h(B . "E`") ; 232
171 (?,2i(B . "E'") ; 233
172 (?,2j(B . "E^") ; 234
173 (?,2k(B . "E?") ; 235
174 (?,2l(B . "I`") ; 236
175 (?,2m(B . "I'") ; 237
176 (?,2n(B . "I~") ; 238
177 (?,2o(B . "I?") ; 239
178 (?,2p(B . "DD") ; 240
179 (?,2p(B . "dD") ; 240
180 (?,2p(B . "Dd") ; 240
181 (?,2q(B . "U+.") ; 241
182 (?,2r(B . "O`") ; 242
183 (?,2s(B . "O'") ; 243
184 (?,2t(B . "O^") ; 244
185 (?,2u(B . "O~") ; 245
186 (?,2v(B . "O?") ; 246
187 (?,2w(B . "O.") ; 247
188 (?,2x(B . "U.") ; 248
189 (?,2y(B . "U`") ; 249
190 (?,2z(B . "U'") ; 250
191 (?,2{(B . "U~") ; 251
192 (?,2|(B . "U?") ; 252
193 (?,2}(B . "Y'") ; 253
194 (?,2~(B . "O+.") ; 254
195
196 ;; escape from composition
197 (?\( . "\\(") ; breve (left parenthesis)
198 (?^ . "\\^") ; circumflex (caret)
199 (?+ . "\\+") ; horn (plus sign)
200 (?' . "\\'") ; acute (apostrophe)
201 (?` . "\\`") ; grave (backquote)
202 (?? . "\\?") ; hook above (question mark)
203 (?~ . "\\~") ; tilde (tilde)
204 (?. . "\\.") ; dot below (period)
205 (?d . "\\d") ; d-bar (d)
206 (?\\ . "\\\\") ; literal backslash
207 )
208 "Alist of Vietnamese characters vs corresponding `VIQR' string.")
209
210;; Regular expression matching single Vietnamese character represented
211;; by VIQR.
212(defconst viqr-regexp
213 "[aeiouyAEIOUY]\\([(^+]?['`?~.]\\|[(^+]\\)\\|[Dd][Dd]")
214
215;;;###autoload
216(defun viet-decode-viqr-region (from to)
217 "Convert `VIQR' mnemonics of the current region to Vietnamese characaters.
218When called from a program, expects two arguments,
219positions (integers or markers) specifying the stretch of the region."
220 (interactive "r")
221 (save-restriction
222 (narrow-to-region from to)
223 (goto-char (point-min))
224 (while (re-search-forward viqr-regexp nil t)
225 (let* ((viqr (buffer-substring (match-beginning 0) (match-end 0)))
226 (ch (car (rassoc viet-viqr-alist viqr))))
227 (if ch
228 (progn
229 (delete-region (match-beginning 0) (match-end 0))
230 (insert ch)))))))
231
232;;;###autoload
233(defun viet-decode-viqr-buffer ()
234 "Convert `VIQR' mnemonics of the current buffer to Vietnamese characaters."
235 (interactive)
236 (viet-decode-viqr-region (point-min) (point-max)))
237
238;;;###autoload
239(defun viet-encode-viqr-region (from to)
240 "Convert Vietnamese characaters of the current region to `VIQR' mnemonics.
241When called from a program, expects two arguments,
242positions (integers or markers) specifying the stretch of the region."
243 (interactive "r")
244 (save-restriction
245 (narrow-to-region from to)
246 (goto-char (point-min))
247 (while (re-search-forward "\\cv" nil t)
248 (let* ((ch (preceding-char))
249 (viqr (cdr (assoc viet-viqr-alist ch))))
250 (if viqr
251 (progn
252 (delete-char -1)
253 (insert viqr)))))))
254
255;;;###autoload
256(defun viet-encode-viqr-buffer ()
257 "Convert Vietnamese characaters of the current buffer to `VIQR' mnemonics."
258 (interactive)
259 (viet-encode-viqr-region (point-min) (point-max)))
260
261;;;
262(provide 'language/viet-util)
263
264;;; Local Variables:
265;;; generated-autoload-file: "../loaddefs.el"
266;;; End:
267;;; viet-util.el ends here
diff --git a/lisp/language/vietnamese.el b/lisp/language/vietnamese.el
new file mode 100644
index 00000000000..bb14a4f580c
--- /dev/null
+++ b/lisp/language/vietnamese.el
@@ -0,0 +1,254 @@
1;;; vietnamese.el --- Support for Vietnamese
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: multilingual, Vietnamese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Commentary:
25
26;; For Vietnames, the character sets VISCII and VSCII are supported.
27
28;;; Code:
29
30(eval-when-compile
31
32(defvar viet-viscii-decode-table
33 [;; VISCII is a full 8-bit code.
34 0 1 ?,2F(B 3 4 ?,2G(B ?,2g(B 7 8 9 10 11 12 13 14 15
35 16 17 18 19 ?,2V(B 21 22 23 24 ?,2[(B 26 27 28 29 ?,2\(B 31
36 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
37 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
38 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
39 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
40 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
41 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
42 ?,2U(B ?,2!(B ?,2"(B ?,2#(B ?,2$(B ?,2%(B ?,2&(B ?,2'(B ?,2((B ?,2)(B ?,2*(B ?,2+(B ?,2,(B ?,2-(B ?,2.(B ?,2/(B
43 ?,20(B ?,21(B ?,22(B ?,25(B ?,2~(B ?,2>(B ?,26(B ?,27(B ?,28(B ?,2v(B ?,2w(B ?,2o(B ?,2|(B ?,2{(B ?,2x(B ?,2O(B
44 ?,2u(B ?,1!(B ?,1"(B ?,1#(B ?,1$(B ?,1%(B ?,1&(B ?,1'(B ?,1((B ?,1)(B ?,1*(B ?,1+(B ?,1,(B ?,1-(B ?,1.(B ?,1/(B
45 ?,10(B ?,11(B ?,12(B ?,2^(B ?,2=(B ?,15(B ?,16(B ?,17(B ?,18(B ?,2q(B ?,2Q(B ?,2W(B ?,2X(B ?,1=(B ?,1>(B ?,2_(B
46 ?,2`(B ?,2a(B ?,2b(B ?,2c(B ?,2d(B ?,2e(B ?,1F(B ?,1G(B ?,2h(B ?,2i(B ?,2j(B ?,2k(B ?,2l(B ?,2m(B ?,2n(B ?,1O(B
47 ?,2p(B ?,1Q(B ?,2r(B ?,2s(B ?,2t(B ?,1U(B ?,1V(B ?,1W(B ?,1X(B ?,2y(B ?,2z(B ?,1[(B ?,1\(B ?,2}(B ?,1^(B ?,1_(B
48 ?,1`(B ?,1a(B ?,1b(B ?,1c(B ?,1d(B ?,1e(B ?,1f(B ?,1g(B ?,1h(B ?,1i(B ?,1j(B ?,1k(B ?,1l(B ?,1m(B ?,1n(B ?,1o(B
49 ?,1p(B ?,1q(B ?,1r(B ?,1s(B ?,1t(B ?,1u(B ?,1v(B ?,1w(B ?,1x(B ?,1y(B ?,1z(B ?,1{(B ?,1|(B ?,1}(B ?,1~(B ?,2f(B ]
50 "Vietnamese VISCII encoding table.")
51
52(defvar viet-viscii-encode-table
53 (let ((table-lower (make-vector 128 0))
54 (table-upper (make-vector 128 0))
55 (i 0)
56 char-component)
57 (while (< i 256)
58 (setq char-component (split-char (aref viet-viscii-decode-table i)))
59 (cond ((eq (car char-component) 'vietnamese-viscii-lower)
60 (aset table-lower (nth 1 char-component) i))
61 ((eq (car char-component) 'vietnamese-viscii-upper)
62 (aset table-upper (nth 1 char-component) i)))
63 (setq i (1+ i)))
64 (cons table-lower table-upper))
65 "Vietnamese VISCII decoding table.
66Cons of tables for decoding lower-case chars and upper-case characterss.
67Both tables are indexed by the position code of Vietnamese characters.")
68
69(defvar viet-vscii-decode-table
70 [;; VSCII is a full 8-bit code.
71 0 ?,2z(B ?,2x(B 3 ?,2W(B ?,2X(B ?,2f(B 7 8 9 10 11 12 13 14 15
72 16 ?,2Q(B ?,2_(B ?,2O(B ?,2V(B ?,2[(B ?,2}(B ?,2\(B 24 25 26 27 28 29 30 31
73 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
74 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
75 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
76 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
77 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
78 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
79 ?,2`(B ?,2d(B ?,2c(B ?,2a(B ?,2U(B ?,2#(B ?,2'(B ?,2h(B ?,2k(B ?,2((B ?,2i(B ?,2)(B ?,2.(B ?,2l(B ?,2o(B ?,2n(B
80 ?,2m(B ?,28(B ?,2r(B ?,2v(B ?,2u(B ?,2s(B ?,2w(B ?,25(B ?,26(B ?,27(B ?,2^(B ?,2>(B ?,2~(B ?,2y(B ?,2|(B ?,2{(B
81 160 ?,2e(B ?,2b(B ?,2j(B ?,2t(B ?,2=(B ?,2_(B ?,2p(B ?,1e(B ?,1b(B ?,1j(B ?,1t(B ?,1>(B ?,1y(B ?,1p(B ?,2"(B
82 192 193 194 195 196 ?,1`(B ?,1d(B ?,1c(B ?,1a(B ?,1U(B ?,2F(B ?,1"(B ?,1F(B ?,1G(B ?,1!(B ?,2G(B
83 ?,2!(B ?,2%(B ?,2&(B ?,2g(B ?,2%(B ?,2+(B ?,1#(B ?,1%(B ?,1&(B ?,1g(B ?,1$(B ?,1'(B ?,1h(B ?,2,(B ?,1k(B ?,1((B
84 ?,1i(B ?,1)(B ?,1+(B ?,1,(B ?,1-(B ?,1*(B ?,1.(B ?,1l(B ?,1o(B ?,2-(B ?,2*(B ?,20(B ?,1n(B ?,1m(B ?,18(B ?,1r(B
85 ?,21(B ?,1v(B ?,1u(B ?,1s(B ?,1w(B ?,10(B ?,11(B ?,12(B ?,1/(B ?,15(B ?,16(B ?,17(B ?,1^(B ?,1>(B ?,1~(B ?,1y(B
86 ?,22(B ?,1|(B ?,1{(B ?,1z(B ?,1x(B ?,1W(B ?,1X(B ?,1f(B ?,1Q(B ?,1q(B ?,1O(B ?,1V(B ?,1[(B ?,1}(B ?,1\(B ?,2/(B]
87 "Vietnamese VSCII code table.")
88
89(defvar viet-vscii-encode-table
90 (let ((table-lower (make-vector 128 0))
91 (table-upper (make-vector 128 0))
92 (i 0)
93 char-component)
94 (while (< i 256)
95 (setq char-component (split-char (aref viet-vscii-decode-table i)))
96 (cond ((eq (car char-component) 'vietnamese-viscii-lower)
97 (aset table-lower (nth 1 char-component) i))
98 ((eq (car char-component) 'vietnamese-viscii-upper)
99 (aset table-upper (nth 1 char-component) i)))
100 (setq i (1+ i)))
101 (cons table-lower table-upper))
102 "Vietnamese VSCII decoding table.
103Cons of tables for decoding lower-case chars and upper-case characterss.
104Both tables are indexed by the position code of Vietnamese characters.")
105
106)
107
108(define-ccl-program ccl-decode-viscii
109 `(3
110 ((read r0)
111 (loop
112 (write-read-repeat r0 ,viet-viscii-decode-table))
113 ))
114 "CCL program to decode VISCII 1.1")
115
116;; Multibyte form of a Vietnamese character is as follows (3-byte):
117;; LEADING-CODE-PRIVATE-11 LEADING-CODE-EXTENDED-11 POSITION-CODE
118;; where LEADING-CODE-EXTENDED-11 for Vietnamese is
119;; `vietnamese-viscii-lower' or `vietnamese-viscii-upper'.
120
121(define-ccl-program ccl-encode-viscii
122 `(1
123 ((read r0)
124 (loop
125 (if (r0 < 128)
126 ;; ASCII
127 (write-read-repeat r0)
128 ;; not ASCII
129 (if (r0 != ,leading-code-private-11)
130 ;; not Vietnamese
131 (write-read-repeat r0)
132 ((read-if (r0 == ,(charset-id 'vietnamese-viscii-lower))
133 (;; Vietnamese lower
134 (read r0)
135 (r0 -= 128)
136 (write-read-repeat r0 ,(car viet-viscii-encode-table)))
137 (if (r0 == ,(charset-id 'vietnamese-viscii-upper))
138 (;; Vietnamese upper
139 (read r0)
140 (r0 -= 128)
141 (write-read-repeat r0 ,(cdr viet-viscii-encode-table)))
142 ;; not Vietnamese
143 (write-read-repeat r0)))))))))
144 "CCL program to encode VISCII 1.1")
145
146(define-ccl-program ccl-encode-viscii-font
147 `(0
148 ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper
149 ;; R1:position code
150 ;; Out: R1:font code point
151 (if (r0 == ,(charset-id 'vietnamese-viscii-lower))
152 (r1 = r1 ,(car viet-viscii-encode-table))
153 (r1 = r1 ,(cdr viet-viscii-encode-table)))
154 )
155 "CCL program to encode Vietnamese chars to VISCII 1.1 font")
156
157(define-ccl-program ccl-decode-vscii
158 `(3
159 ((read r0)
160 (loop
161 (write-read-repeat r0 ,viet-vscii-decode-table))
162 ))
163 "CCL program to decode VSCII-1.")
164
165(define-ccl-program ccl-encode-vscii
166 `(1
167 ((read r0)
168 (loop
169 (if (r0 < 128)
170 ;; ASCII
171 (write-read-repeat r0)
172 ;; not ASCII
173 (if (r0 != ,leading-code-private-11)
174 ;; not Vietnamese
175 (write-read-repeat r0)
176 (read-if (r0 == ,(charset-id 'vietnamese-viscii-lower))
177 (;; Vietnamese lower
178 (read r0)
179 (r0 -= 128)
180 (write-read-repeat r0 ,(car viet-vscii-encode-table)))
181 (if (r0 == ,(charset-id 'vietnamese-viscii-upper))
182 (;; Vietnamese upper
183 (read r0)
184 (r0 -= 128)
185 (write-read-repeat r0 ,(cdr viet-viscii-encode-table)))
186 ;; not Vietnamese
187 (write-read-repeat r0))))))))
188 "CCL program to encode VSCII-1.")
189
190(define-ccl-program ccl-encode-vscii-font
191 `(0
192 ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper
193 ;; R1:position code
194 ;; Out: R1:font code point
195 (if (r0 == ,(charset-id 'vietnamese-viscii-lower))
196 (r1 = r1 ,(car viet-vscii-encode-table))
197 (r1 = r1 ,(cdr viet-vscii-encode-table)))
198 )
199 "CCL program to encode Vietnamese chars to VSCII-1 font.")
200
201
202(make-coding-system
203 'coding-system-viscii 4 ?V
204 "Coding-system used for VISCII 1.1."
205 (cons ccl-decode-viscii ccl-encode-viscii))
206
207(make-coding-system
208 'coding-system-vscii 4 ?V
209 "Coding-system used for VSCII-1."
210 (cons ccl-decode-vscii ccl-encode-vscii))
211
212(make-coding-system
213 'coding-system-viqr 0 ?v
214 "Codins-system used for VIQR."
215 nil)
216(put 'coding-system-viqr 'post-read-conversion 'viet-decode-viqr-region)
217(put 'coding-system-viqr 'pre-write-conversion 'viet-encode-viqr-region)
218
219(setq font-ccl-encoder-alist
220 (cons (cons "viscii" ccl-encode-viscii-font) font-ccl-encoder-alist))
221
222(setq font-ccl-encoder-alist
223 (cons (cons "vscii" ccl-encode-vscii-font) font-ccl-encoder-alist))
224
225(register-input-method
226 "Vietnamese" '("quail-viqr" quail-use-package "quail/viqr"))
227
228(defun setup-viet-environment ()
229 ;; for VISCII users
230 (setq coding-category-binary 'coding-system-viscii)
231
232 ;; for VSCII users
233 ;; (setq coding-category-binary 'coding-system-vscii)
234
235 (set-coding-priority
236 '(coding-category-iso-7
237 coding-category-binary))
238
239 (setq-default buffer-file-coding-system 'coding-system-viscii)
240
241 (setq default-input-method '("Vietnamese" . "quail-viqr"))
242 )
243
244(set-language-info-alist
245 "Vietnamese" '((setup-function . setup-viet-environment)
246 (charset . (vietnamese-viscii-lower
247 vietnamese-viscii-upper))
248 (coding-system . (coding-system-viscii
249 coding-system-vscii
250 coding-system-viqr))
251 (documentation . t)
252 (sample-text . "Vietnamese (Ti,1*(Bng Vi,1.(Bt) Ch,1`(Bo b,1U(Bn")))
253
254;;; vietnamese.el ends here
diff --git a/src/category.c b/src/category.c
new file mode 100644
index 00000000000..8bdaee9e5af
--- /dev/null
+++ b/src/category.c
@@ -0,0 +1,665 @@
1/* GNU Emacs routines to deal with category tables.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7This file is part of GNU Emacs.
8
9GNU Emacs is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GNU Emacs is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GNU Emacs; see the file COPYING. If not, write to
21the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
22
23
24/* Here we handle three objects: category, category set, and category
25 table. Read comments in the file category.h to understand them. */
26
27#include <config.h>
28#include <ctype.h>
29#include "lisp.h"
30#include "buffer.h"
31#include "charset.h"
32#include "category.h"
33
34/* The version number of the latest category table. Each category
35 table has a unique version number. It is assigned a new number
36 also when it is modified. When a regular expression is compiled
37 into the struct re_pattern_buffer, the version number of the
38 category table (of the current buffer) at that moment is also
39 embedded in the structure.
40
41 For the moment, we are not using this feature. */
42static int category_table_version;
43
44Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
45
46/* Variables to determine word boundary. */
47Lisp_Object Vword_combining_categories, Vword_separating_categories;
48
49/* Temporary internal variable used in macro CHAR_HAS_CATEGORY. */
50Lisp_Object _temp_category_set;
51
52
53/* Category set staff. */
54
55DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
56 "Return a newly created category-set which contains CATEGORIES.\n\
57CATEGORIES is a string of category mnemonics.")
58 (categories)
59 Lisp_Object categories;
60{
61 Lisp_Object val;
62 int len;
63
64 CHECK_STRING (categories, 0);
65 val = MAKE_CATEGORY_SET;
66
67 len = XSTRING (categories)->size;
68 while (--len >= 0)
69 {
70 Lisp_Object category = make_number (XSTRING (categories)->data[len]);
71
72 CHECK_CATEGORY (category, 0);
73 SET_CATEGORY_SET (val, category, Qt);
74 }
75 return val;
76}
77
78
79/* Category staff. */
80
81Lisp_Object check_category_table ();
82
83DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
84 "Define CHAR as a category which is described by DOCSTRING.\n\
85CHAR should be a visible letter of ` ' thru `~'.\n\
86DOCSTRING is a documentation string of the category.\n\
87The category is defined only in category table TABLE, which defaults to\n\
88 the current buffer's category table.")
89 (category, docstring, table)
90 Lisp_Object category, docstring, table;
91{
92 CHECK_CATEGORY (category, 0);
93 CHECK_STRING (docstring, 1);
94 table = check_category_table (table);
95
96 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
97 error ("Category `%c' is already defined", XFASTINT (category));
98 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
99
100 return Qnil;
101}
102
103DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
104 "Return a documentation string of CATEGORY.\n\
105Optional second arg specifies CATEGORY-TABLE,\n\
106 which defaults to the current buffer's category table.")
107 (category, table)
108 Lisp_Object category, table;
109{
110 Lisp_Object doc;
111
112 CHECK_CATEGORY (category, 0);
113 table = check_category_table (table);
114
115 return CATEGORY_DOCSTRING (table, XFASTINT (category));
116}
117
118DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
119 0, 1, 0,
120 "Return a category which is not yet defined.\n\
121If total number of categories has reached the limit (95), return nil.\n\
122Optional argument specifies CATEGORY-TABLE,\n\
123 which defaults to the current buffer's category table.")
124 (table)
125 Lisp_Object table;
126{
127 int i;
128 Lisp_Object docstring_vector;
129
130 table = check_category_table (table);
131
132 for (i = ' '; i <= '~'; i++)
133 if (NILP (CATEGORY_DOCSTRING (table, i)))
134 return make_number (i);
135
136 return Qnil;
137}
138
139
140/* Category-table staff. */
141
142DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
143 "Return t if ARG is a category table.")
144 (arg)
145 Lisp_Object arg;
146{
147 if (CHAR_TABLE_P (arg)
148 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table)
149 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (arg)) == 2)
150 return Qt;
151 return Qnil;
152}
153
154/* If TABLE is nil, return the current category table. If TABLE is
155 not nil, check the validity of TABLE as a category table. If
156 valid, return TABLE itself, but if not valid, signal an error of
157 wrong-type-argument. */
158
159Lisp_Object
160check_category_table (table)
161 Lisp_Object table;
162{
163 register Lisp_Object tem;
164 if (NILP (table))
165 return current_buffer->category_table;
166 while (tem = Fcategory_table_p (table), NILP (tem))
167 table = wrong_type_argument (Qcategory_table_p, table);
168 return table;
169}
170
171DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
172 "Return the current category table.\n\
173This is the one specified by the current buffer.")
174 ()
175{
176 return current_buffer->category_table;
177}
178
179DEFUN ("standard-category-table", Fstandard_category_table,
180 Sstandard_category_table, 0, 0, 0,
181 "Return the standard category table.\n\
182This is the one used for new buffers.")
183 ()
184{
185 return Vstandard_category_table;
186}
187
188/* Return a copy of category table TABLE. We can't simply use the
189 function copy-sequence because no contents should be shared between
190 the original and the copy.
191
192 If TOP is 1, we at first copy the tree structure of the table. */
193
194Lisp_Object
195copy_category_table (table, top)
196 Lisp_Object table;
197{
198 int i;
199
200 if (top)
201 table = Fcopy_sequence (table);
202 else if (!NILP (XCHAR_TABLE (table)->defalt))
203 XCHAR_TABLE (table)->defalt
204 = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
205
206 for (i = 0; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
207 {
208 Lisp_Object idx = make_number (i);
209 Lisp_Object val = Faref (table, idx);
210
211 if (NILP (val)) /* Do nothing because we can share nil. */
212 ;
213 else if (CATEGORY_SET_P (val))
214 Faset (table, idx, Fcopy_sequence (val));
215 else if (CHAR_TABLE_P (val))
216 Faset (table, idx, copy_category_table (val, 0));
217 else /* Invalid contents. */
218 Faset (table, idx, Qnil);
219 }
220
221 return table;
222}
223
224DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
225 0, 1, 0,
226 "Construct a new category table and return it.\n\
227It is a copy of the TABLE, which defaults to the standard category table.")
228 (table)
229 Lisp_Object table;
230{
231 if (!NILP (table))
232 check_category_table (table);
233 else
234 table = Vstandard_category_table;
235
236 return copy_category_table (table, 1);
237}
238
239DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
240 "Select a new category table for the current buffer.\n\
241One argument, a category table.")
242 (table)
243 Lisp_Object table;
244{
245 table = check_category_table (table);
246 current_buffer->category_table = table;
247 /* Indicate that this buffer now has a specified category table. */
248 current_buffer->local_var_flags
249 |= XFASTINT (buffer_local_flags.category_table);
250 return table;
251}
252
253
254DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
255 "Return a category set of CHAR.")
256 (ch)
257 Lisp_Object ch;
258{
259 Lisp_Object val;
260 int charset;
261 unsigned char c1, c2;
262
263 CHECK_NUMBER (ch, 0);
264 return CATEGORY_SET (XFASTINT (ch));
265}
266
267DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
268 Scategory_set_mnemonics, 1, 1, 0,
269 "Return a string of mnemonics of all categories in CATEGORY-SET.")
270 (category_set)
271 Lisp_Object category_set;
272{
273 int i, j;
274 char str[96];
275
276 CHECK_CATEGORY_SET (category_set, 0);
277
278 j = 0;
279 for (i = 32; i < 127; i++)
280 if (CATEGORY_MEMBER (i, category_set))
281 str[j++] = i;
282 str[j] = '\0';
283
284 return build_string (str);
285}
286
287/* Modify all category sets stored under category table TABLE so that
288 they contain (SET_VALUE is t) or don't contain (SET_VALUE is nil)
289 CATEGORY. */
290
291void
292modify_lower_category_set (table, category, set_value)
293 Lisp_Object table, category, set_value;
294{
295 Lisp_Object val;
296 int i;
297
298 if (NILP (XCHAR_TABLE (table)->defalt))
299 {
300 val = MAKE_CATEGORY_SET;
301 SET_CATEGORY_SET (val, category, set_value);
302 XCHAR_TABLE (table)->defalt = val;
303 }
304
305 for (i = 32; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
306 {
307 val = XCHAR_TABLE (table)->contents[i];
308
309 if (CATEGORY_SET_P (val))
310 SET_CATEGORY_SET (val, category, set_value);
311 else if (CHAR_TABLE_P (val))
312 modify_lower_category_set (val, category, set_value);
313 }
314}
315
316void
317set_category_set (category_set, category, val)
318 Lisp_Object category_set, category, val;
319{
320 do {
321 int idx = XINT (category) / 8;
322 unsigned char bits = 1 << (XINT (category) % 8);
323
324 if (NILP (val))
325 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
326 else
327 XCATEGORY_SET (category_set)->data[idx] |= bits;
328 } while (0);
329}
330
331DEFUN ("modify-category-entry", Fmodify_category_entry,
332 Smodify_category_entry, 2, 4, 0,
333 "Modify the category set of CHAR by adding CATEGORY to it.\n\
334The category is changed only for table TABLE, which defaults to\n\
335 the current buffer's category table.\n\
336If optional forth argument RESET is non NIL,\n\
337 CATEGORY is deleted from the category set instead of being added.")
338 (ch, category, table, reset)
339 Lisp_Object ch, category, table, reset;
340{
341 int c, charset, c1, c2;
342 Lisp_Object set_value; /* Actual value to be set in category sets. */
343 Lisp_Object val, category_set;
344
345 CHECK_NUMBER (ch, 0);
346 c = XINT (ch);
347 CHECK_CATEGORY (category, 1);
348 table = check_category_table (table);
349
350 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
351 error ("Undefined category: %c", XFASTINT (category));
352
353 set_value = NILP (reset) ? Qt : Qnil;
354
355 if (SINGLE_BYTE_CHAR_P (c))
356 {
357 val = XCHAR_TABLE (table)->contents[c];
358 if (!CATEGORY_SET_P (val))
359 XCHAR_TABLE (table)->contents[c] = (val = MAKE_CATEGORY_SET);
360 SET_CATEGORY_SET (val, category, set_value);
361 return Qnil;
362 }
363
364 if (COMPOSITE_CHAR_P (c))
365 c = cmpchar_component (c, 0);
366 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
367
368 /* The top level table. */
369 val = XCHAR_TABLE (table)->contents[charset];
370 if (NILP (val))
371 {
372 category_set = MAKE_CATEGORY_SET;
373 XCHAR_TABLE (table)->contents[charset] = category_set;
374 }
375 else if (CATEGORY_SET_P (val))
376 category_set = val;
377
378 if (!c1)
379 {
380 /* Only a charset is specified. */
381 if (CHAR_TABLE_P (val))
382 /* All characters in CHARSET should be the same as for CATEGORY. */
383 modify_lower_category_set (val, category, set_value);
384 else
385 SET_CATEGORY_SET (category_set, category, set_value);
386 return Qnil;
387 }
388
389 /* The second level table. */
390 if (!CHAR_TABLE_P (val))
391 {
392 val = Fmake_char_table (Qnil, Qnil);
393 XCHAR_TABLE (table)->contents[charset] = val;
394 /* We must set default category set of CHARSET in `defalt' slot. */
395 XCHAR_TABLE (val)->defalt = category_set;
396 }
397 table = val;
398
399 val = XCHAR_TABLE (table)->contents[c1];
400 if (NILP (val))
401 {
402 category_set = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
403 XCHAR_TABLE (table)->contents[c1] = category_set;
404 }
405 else if (CATEGORY_SET_P (val))
406 category_set = val;
407
408 if (!c2)
409 {
410 if (CHAR_TABLE_P (val))
411 /* All characters in C1 group of CHARSET should be the same as
412 for CATEGORY. */
413 modify_lower_category_set (val, category, set_value);
414 else
415 SET_CATEGORY_SET (category_set, category, set_value);
416 return Qnil;
417 }
418
419 /* The third (bottom) level table. */
420 if (!CHAR_TABLE_P (val))
421 {
422 val = Fmake_char_table (Qnil, Qnil);
423 XCHAR_TABLE (table)->contents[c1] = val;
424 /* We must set default category set of CHARSET and C1 in
425 `defalt' slot. */
426 XCHAR_TABLE (val)->defalt = category_set;
427 }
428 table = val;
429
430 val = XCHAR_TABLE (table)->contents[c2];
431 if (NILP (val))
432 {
433 category_set = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
434 XCHAR_TABLE (table)->contents[c2] = category_set;
435 }
436 else if (CATEGORY_SET_P (val))
437 category_set = val;
438 else
439 /* This should never happen. */
440 error ("Invalid category table");
441
442 SET_CATEGORY_SET (category_set, category, set_value);
443
444 return Qnil;
445}
446
447/* Dump category table to buffer in human-readable format */
448
449static void
450describe_category (value)
451 Lisp_Object value;
452{
453 Lisp_Object mnemonics;
454
455 Findent_to (make_number (16), make_number (1));
456
457 if (NILP (value))
458 {
459 insert_string ("default\n");
460 return;
461 }
462
463 if (!CATEGORY_SET_P (value))
464 {
465 insert_string ("invalid\n");
466 return;
467 }
468
469 mnemonics = Fcategory_set_mnemonics (value);
470 insert_from_string (mnemonics, 0, XSTRING (mnemonics)->size, 0);
471 insert_string ("\n");
472 return;
473}
474
475static Lisp_Object
476describe_category_1 (vector)
477 Lisp_Object vector;
478{
479 struct buffer *old = current_buffer;
480 set_buffer_internal (XBUFFER (Vstandard_output));
481 describe_vector (vector, Qnil, describe_category, 0, Qnil, Qnil);
482 {
483 int i;
484 Lisp_Object docs = XCHAR_TABLE (vector)->extras[0];
485 Lisp_Object elt;
486
487 if (!VECTORP (docs) || XVECTOR (docs)->size != 95)
488 {
489 insert_string ("Invalid first extra slot in this char table\n");
490 return Qnil;
491 }
492
493 insert_string ("Meanings of mnemonice characters are:\n");
494 for (i = 0; i < 95; i++)
495 {
496 elt = XVECTOR (docs)->contents[i];
497 if (NILP (elt))
498 continue;
499
500 insert_char (i + 32);
501 insert (": ", 2);
502 insert_from_string (elt, 0, XSTRING (elt)->size, 0);
503 insert ("\n", 1);
504 }
505 }
506
507 while (! NILP (XCHAR_TABLE (vector)->parent))
508 {
509 vector = XCHAR_TABLE (vector)->parent;
510 insert_string ("\nThe parent category table is:");
511 describe_vector (vector, Qnil, describe_category, 0, Qnil, Qnil);
512 }
513
514 call0 (intern ("help-mode"));
515 set_buffer_internal (old);
516 return Qnil;
517}
518
519DEFUN ("describe-category", Fdescribe_category, Sdescribe_category, 0, 0, "",
520 "Describe the category specifications in the category table.\n\
521The descriptions are inserted in a buffer, which is then displayed.")
522 ()
523{
524 internal_with_output_to_temp_buffer
525 ("*Help*", describe_category_1, current_buffer->category_table);
526
527 return Qnil;
528}
529
530/* Return 1 if there is a word boundary between two word-constituent
531 characters C1 and C2 if they appear in this order, else return 0.
532 Use the macro WORD_BOUNDARY_P instead of calling this function
533 directly. */
534
535int
536word_boundary_p (c1, c2)
537 int c1, c2;
538{
539 Lisp_Object category_set1, category_set2;
540 Lisp_Object tail;
541 int default_result;
542
543 if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2))
544 {
545 tail = Vword_separating_categories;
546 default_result = 0;
547 }
548 else
549 {
550 tail = Vword_combining_categories;
551 default_result = 1;
552 }
553
554 category_set1 = CATEGORY_SET (c1);
555 if (NILP (category_set1))
556 return default_result;
557 category_set2 = CATEGORY_SET (c2);
558 if (NILP (category_set2))
559 return default_result;
560
561 for (; CONSP (tail); tail = XCONS (tail)->cdr)
562 {
563 Lisp_Object elt = XCONS(tail)->car;
564
565 if (CONSP (elt)
566 && CATEGORYP (XCONS (elt)->car)
567 && CATEGORYP (XCONS (elt)->cdr)
568 && CATEGORY_MEMBER (XCONS (elt)->car, category_set1)
569 && CATEGORY_MEMBER (XCONS (elt)->cdr, category_set2))
570 return !default_result;
571 }
572 return default_result;
573}
574
575
576init_category_once ()
577{
578 /* This has to be done here, before we call Fmake_char_table. */
579 Qcategory_table = intern ("category-table");
580 staticpro (&Qcategory_table);
581
582 /* Intern this now in case it isn't already done.
583 Setting this variable twice is harmless.
584 But don't staticpro it here--that is done in alloc.c. */
585 Qchar_table_extra_slots = intern ("char-table-extra-slots");
586
587 /* Now we are ready to set up this property, so we can
588 create category tables. */
589 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
590
591 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
592 /* Set a category set which contains nothing to the default. */
593 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
594 Fset_char_table_extra_slot (Vstandard_category_table, 0,
595 Fmake_vector (make_number (95), Qnil));
596}
597
598syms_of_category ()
599{
600 Qcategoryp = intern ("categoryp");
601 staticpro (&Qcategoryp);
602 Qcategorysetp = intern ("categorysetp");
603 staticpro (&Qcategorysetp);
604 Qcategory_table_p = intern ("category-table-p");
605 staticpro (&Qcategory_table_p);
606
607 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
608 "List of pair (cons) of categories to determine word boundary.\n\
609\n\
610Emacs treats a sequence of word constituent characters as a single\n\
611word (i.e. finds no word boundary between them) iff they belongs to\n\
612the same charset. But, exceptions are allowed in the following cases.\n\
613\n\
614(1) The case that characters are in different charsets is controlled\n\
615by the variable `word-combining-categories'.\n\
616\n\
617Emacs finds no word boundary between characters of different charsets\n\
618if they have categories matching some element of this list.\n\
619\n\
620More precisely, if an element of this list is a cons of category CAT1\n\
621and CAT2, and a multibyte character C1 which has CAT1 is followed by\n\
622C2 which has CAT2, there's no word boundary between C1 and C2.\n\
623\n\
624For instance, to tell that ASCII characters and Latin-1 characters can\n\
625form a single word, the element `(?l . ?l)' should be in this list\n\
626because both characters have the category `l' (Latin characters).\n\
627\n\
628(2) The case that character are in the same charset is controlled by\n\
629the variable `word-separating-categories'.\n\
630\n\
631Emacs find a word boundary between characters of the same charset\n\
632if they have categories matching some element of this list.\n\
633\n\
634More precisely, if an element of this list is a cons of category CAT1\n\
635and CAT2, and a multibyte character C1 which has CAT1 is followed by\n\
636C2 which has CAT2, there's a word boundary between C1 and C2.\n\
637\n\
638For instance, to tell that there's a word boundary between Japanese\n\
639Hiragana and Japanese Kanji (both are in the same charset), the\n\
640element `(?H . ?C) should be in this list.");
641
642 Vword_combining_categories = Qnil;
643
644 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
645 "List of pair (cons) of categories to determine word boundary.\n\
646See the documentation of the variable `word-combining-categories'.");
647
648 Vword_separating_categories = Qnil;
649
650 defsubr (&Smake_category_set);
651 defsubr (&Sdefine_category);
652 defsubr (&Scategory_docstring);
653 defsubr (&Sget_unused_category);
654 defsubr (&Scategory_table_p);
655 defsubr (&Scategory_table);
656 defsubr (&Sstandard_category_table);
657 defsubr (&Scopy_category_table);
658 defsubr (&Sset_category_table);
659 defsubr (&Schar_category_set);
660 defsubr (&Scategory_set_mnemonics);
661 defsubr (&Smodify_category_entry);
662 defsubr (&Sdescribe_category);
663
664 category_table_version = 0;
665}
diff --git a/src/category.h b/src/category.h
new file mode 100644
index 00000000000..975e82b52f2
--- /dev/null
+++ b/src/category.h
@@ -0,0 +1,130 @@
1/* Declarations having to do with Emacs category tables.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7This file is part of GNU Emacs.
8
9GNU Emacs is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GNU Emacs is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GNU Emacs; see the file COPYING. If not, write to
21the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
22
23
24/* We introduce here three types of object: category, category set,
25 and category table.
26
27 A category is like syntax but differs in the following points:
28
29 o A category is represented by a mnemonic character of the range
30 ` '(32)..`~'(126) (printable ASCII characters).
31
32 o A category is not exclusive, i.e. a character has multiple
33 categories (category set). Of course, there's a case that a
34 category set is empty, i.e. the character has no category.
35
36 o In addition to the predefined categories, a user can define new
37 categories. Total number of categories is limited to 95.
38
39 A category set is a set of categories represented by Lisp
40 bool-vector of length 128 (only elements of 31th through 125th
41 are used).
42
43 A category table is like syntax-table, represented by a Lisp
44 char-table. The contents are category sets or nil. It has two
45 extra slots. for a vector of doc string of each category and a
46 version number.
47
48 The first extra slot is a vector of doc strings of categories, the
49 length is 95. The Nth element corresponding to the category N+32.
50
51 The second extra slot is a version number of the category table.
52 But, for the moment, we are not using this slot. */
53
54#define CATEGORYP(x) \
55 (INTEGERP ((x)) && XFASTINT ((x)) >= 0x20 && XFASTINT ((x)) <= 0x7E)
56
57#define CHECK_CATEGORY(x, i) \
58 do { \
59 if (!CATEGORYP ((x))) x = wrong_type_argument (Qcategoryp, (x)); \
60 } while (0)
61
62#define XCATEGORY_SET XBOOL_VECTOR
63
64#define CATEGORY_SET_P(x) \
65 (BOOL_VECTOR_P ((x)) && (EMACS_INT) (XBOOL_VECTOR ((x))->size) == 128)
66
67/* Return a new empty category set. */
68#define MAKE_CATEGORY_SET (Fmake_bool_vector (make_number (128), Qnil))
69
70/* Make CATEGORY_SET includes (if VAL is t) or excludes (if VAL is
71 nil) CATEGORY. */
72#define SET_CATEGORY_SET(category_set, category, val) \
73 (Faset (category_set, category, val))
74
75#define CHECK_CATEGORY_SET(x, i) \
76 do { \
77 if (!CATEGORY_SET_P ((x))) x = wrong_type_argument (Qcategorysetp, (x)); \
78 } while (0)
79
80/* Return 1 if CATEGORY_SET contains CATEGORY, else return 0.
81 The faster version of `!NILP (Faref (category_set, category))'. */
82#define CATEGORY_MEMBER(category, category_set) \
83 (!NILP (category_set) \
84 && (XCATEGORY_SET (category_set)->data[XFASTINT (category) / 8] \
85 & (1 << (XFASTINT (category) % 8))))
86
87/* Temporary internal variable used in macro CHAR_HAS_CATEGORY. */
88extern Lisp_Object _temp_category_set;
89
90/* Return 1 if category set of CH contains CATEGORY, elt return 0. */
91#define CHAR_HAS_CATEGORY(ch, category) \
92 (_temp_category_set = CATEGORY_SET (ch), \
93 CATEGORY_MEMBER (category, _temp_category_set))
94
95/* The standard category table is stored where it will automatically
96 be used in all new buffers. */
97#define Vstandard_category_table buffer_defaults.category_table
98
99/* Return the category set of character C in the current category table. */
100#ifdef __GNUC__
101#define CATEGORY_SET(c) \
102 ({ Lisp_Object table = current_buffer->category_table; \
103 Lisp_Object temp; \
104 if (c < CHAR_TABLE_ORDINARY_SLOTS) \
105 while (NILP (temp = XCHAR_TABLE (table)->contents[c]) \
106 && NILP (temp = XCHAR_TABLE (table)->defalt)) \
107 table = XCHAR_TABLE (table)->parent; \
108 else \
109 temp = Faref (table, c); \
110 temp; })
111#else
112#define CATEGORY_SET(c) Faref (current_buffer->category_table, c)
113#endif
114
115/* Return the doc string of CATEGORY in category table TABLE. */
116#define CATEGORY_DOCSTRING(table, category) \
117 XVECTOR (Fchar_table_extra_slot (table, 0))->contents[(category) - ' ']
118
119/* Return the version number of category table TABLE. Not used for
120 the moment. */
121#define CATEGORY_TABLE_VERSION (table) \
122 Fchar_table_extra_slot (table, 1)
123
124/* Return 1 if there is a word boundary between two word-constituent
125 characters C1 and C2 if they appear in this order, else return 0.
126 There is no word boundary between two word-constituent ASCII
127 characters. */
128#define WORD_BOUNDARY_P(c1, c2) \
129 (!(SINGLE_BYTE_CHAR_P (c1) && SINGLE_BYTE_CHAR_P (c2)) \
130 && word_boundary_p (c1, c2))
diff --git a/src/ccl.c b/src/ccl.c
new file mode 100644
index 00000000000..11c1ae500d6
--- /dev/null
+++ b/src/ccl.c
@@ -0,0 +1,1140 @@
1/* CCL (Code Conversion Language) interpreter.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21#include <stdio.h>
22
23#ifdef emacs
24
25#include <config.h>
26#include "lisp.h"
27#include "charset.h"
28#include "ccl.h"
29#include "coding.h"
30
31#else /* not emacs */
32
33#include "mulelib.h"
34
35#endif /* not emacs */
36
37/* Alist of fontname patterns vs corresponding CCL program. */
38Lisp_Object Vfont_ccl_encoder_alist;
39
40/* Vector of CCL program names vs corresponding program data. */
41Lisp_Object Vccl_program_table;
42
43/* CCL (Code Conversion Language) is a simple language which has
44 operations on one input buffer, one output buffer, and 7 registers.
45 The syntax of CCL is described in `ccl.el'. Emacs Lisp function
46 `ccl-compile' compiles a CCL program and produces a CCL code which
47 is a vector of integers. The structure of this vector is as
48 follows: The 1st element: buffer-magnification, a factor for the
49 size of output buffer compared with the size of input buffer. The
50 2nd element: address of CCL code to be executed when encountered
51 with end of input stream. The 3rd and the remaining elements: CCL
52 codes. */
53
54/* Header of CCL compiled code */
55#define CCL_HEADER_BUF_MAG 0
56#define CCL_HEADER_EOF 1
57#define CCL_HEADER_MAIN 2
58
59/* CCL code is a sequence of 28-bit non-negative integers (i.e. the
60 MSB is always 0), each contains CCL command and/or arguments in the
61 following format:
62
63 |----------------- integer (28-bit) ------------------|
64 |------- 17-bit ------|- 3-bit --|- 3-bit --|- 5-bit -|
65 |--constant argument--|-register-|-register-|-command-|
66 ccccccccccccccccc RRR rrr XXXXX
67 or
68 |------- relative address -------|-register-|-command-|
69 cccccccccccccccccccc rrr XXXXX
70 or
71 |------------- constant or other args ----------------|
72 cccccccccccccccccccccccccccc
73
74 where, `cc...c' is a non-negative integer indicating constant value
75 (the left most `c' is always 0) or an absolute jump address, `RRR'
76 and `rrr' are CCL register number, `XXXXX' is one of the following
77 CCL commands. */
78
79/* CCL commands
80
81 Each comment fields shows one or more lines for command syntax and
82 the following lines for semantics of the command. In semantics, IC
83 stands for Instruction Counter. */
84
85#define CCL_SetRegister 0x00 /* Set register a register value:
86 1:00000000000000000RRRrrrXXXXX
87 ------------------------------
88 reg[rrr] = reg[RRR];
89 */
90
91#define CCL_SetShortConst 0x01 /* Set register a short constant value:
92 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
93 ------------------------------
94 reg[rrr] = CCCCCCCCCCCCCCCCCCC;
95 */
96
97#define CCL_SetConst 0x02 /* Set register a constant value:
98 1:00000000000000000000rrrXXXXX
99 2:CONSTANT
100 ------------------------------
101 reg[rrr] = CONSTANT;
102 IC++;
103 */
104
105#define CCL_SetArray 0x03 /* Set register an element of array:
106 1:CCCCCCCCCCCCCCCCCRRRrrrXXXXX
107 2:ELEMENT[0]
108 3:ELEMENT[1]
109 ...
110 ------------------------------
111 if (0 <= reg[RRR] < CC..C)
112 reg[rrr] = ELEMENT[reg[RRR]];
113 IC += CC..C;
114 */
115
116#define CCL_Jump 0x04 /* Jump:
117 1:A--D--D--R--E--S--S-000XXXXX
118 ------------------------------
119 IC += ADDRESS;
120 */
121
122/* Note: If CC..C is greater than 0, the second code is omitted. */
123
124#define CCL_JumpCond 0x05 /* Jump conditional:
125 1:A--D--D--R--E--S--S-rrrXXXXX
126 ------------------------------
127 if (!reg[rrr])
128 IC += ADDRESS;
129 */
130
131
132#define CCL_WriteRegisterJump 0x06 /* Write register and jump:
133 1:A--D--D--R--E--S--S-rrrXXXXX
134 ------------------------------
135 write (reg[rrr]);
136 IC += ADDRESS;
137 */
138
139#define CCL_WriteRegisterReadJump 0x07 /* Write register, read, and jump:
140 1:A--D--D--R--E--S--S-rrrXXXXX
141 2:A--D--D--R--E--S--S-rrrYYYYY
142 -----------------------------
143 write (reg[rrr]);
144 IC++;
145 read (reg[rrr]);
146 IC += ADDRESS;
147 */
148/* Note: If read is suspended, the resumed execution starts from the
149 second code (YYYYY == CCL_ReadJump). */
150
151#define CCL_WriteConstJump 0x08 /* Write constant and jump:
152 1:A--D--D--R--E--S--S-000XXXXX
153 2:CONST
154 ------------------------------
155 write (CONST);
156 IC += ADDRESS;
157 */
158
159#define CCL_WriteConstReadJump 0x09 /* Write constant, read, and jump:
160 1:A--D--D--R--E--S--S-rrrXXXXX
161 2:CONST
162 3:A--D--D--R--E--S--S-rrrYYYYY
163 -----------------------------
164 write (CONST);
165 IC += 2;
166 read (reg[rrr]);
167 IC += ADDRESS;
168 */
169/* Note: If read is suspended, the resumed execution starts from the
170 second code (YYYYY == CCL_ReadJump). */
171
172#define CCL_WriteStringJump 0x0A /* Write string and jump:
173 1:A--D--D--R--E--S--S-000XXXXX
174 2:LENGTH
175 3:0000STRIN[0]STRIN[1]STRIN[2]
176 ...
177 ------------------------------
178 write_string (STRING, LENGTH);
179 IC += ADDRESS;
180 */
181
182#define CCL_WriteArrayReadJump 0x0B /* Write an array element, read, and jump:
183 1:A--D--D--R--E--S--S-rrrXXXXX
184 2:LENGTH
185 3:ELEMENET[0]
186 4:ELEMENET[1]
187 ...
188 N:A--D--D--R--E--S--S-rrrYYYYY
189 ------------------------------
190 if (0 <= reg[rrr] < LENGTH)
191 write (ELEMENT[reg[rrr]]);
192 IC += LENGTH + 2; (... pointing at N+1)
193 read (reg[rrr]);
194 IC += ADDRESS;
195 */
196/* Note: If read is suspended, the resumed execution starts from the
197 Mth code (YYYYY == CCL_ReadJump). */
198
199#define CCL_ReadJump 0x0C /* Read and jump:
200 1:A--D--D--R--E--S--S-rrrYYYYY
201 -----------------------------
202 read (reg[rrr]);
203 IC += ADDRESS;
204 */
205
206#define CCL_Branch 0x0D /* Jump by branch table:
207 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
208 2:A--D--D--R--E-S-S[0]000XXXXX
209 3:A--D--D--R--E-S-S[1]000XXXXX
210 ...
211 ------------------------------
212 if (0 <= reg[rrr] < CC..C)
213 IC += ADDRESS[reg[rrr]];
214 else
215 IC += ADDRESS[CC..C];
216 */
217
218#define CCL_ReadRegister 0x0E /* Read bytes into registers:
219 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
220 2:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
221 ...
222 ------------------------------
223 while (CCC--)
224 read (reg[rrr]);
225 */
226
227#define CCL_WriteExprConst 0x0F /* write result of expression:
228 1:00000OPERATION000RRR000XXXXX
229 2:CONSTANT
230 ------------------------------
231 write (reg[RRR] OPERATION CONSTANT);
232 IC++;
233 */
234
235/* Note: If the Nth read is suspended, the resumed execution starts
236 from the Nth code. */
237
238#define CCL_ReadBranch 0x10 /* Read one byte into a register,
239 and jump by branch table:
240 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
241 2:A--D--D--R--E-S-S[0]000XXXXX
242 3:A--D--D--R--E-S-S[1]000XXXXX
243 ...
244 ------------------------------
245 read (read[rrr]);
246 if (0 <= reg[rrr] < CC..C)
247 IC += ADDRESS[reg[rrr]];
248 else
249 IC += ADDRESS[CC..C];
250 */
251
252#define CCL_WriteRegister 0x11 /* Write registers:
253 1:CCCCCCCCCCCCCCCCCCCrrrXXXXX
254 2:CCCCCCCCCCCCCCCCCCCrrrXXXXX
255 ...
256 ------------------------------
257 while (CCC--)
258 write (reg[rrr]);
259 ...
260 */
261
262/* Note: If the Nth write is suspended, the resumed execution
263 starts from the Nth code. */
264
265#define CCL_WriteExprRegister 0x12 /* Write result of expression
266 1:00000OPERATIONRrrRRR000XXXXX
267 ------------------------------
268 write (reg[RRR] OPERATION reg[Rrr]);
269 */
270
271#define CCL_Call 0x13 /* Write a constant:
272 1:CCCCCCCCCCCCCCCCCCCC000XXXXX
273 ------------------------------
274 call (CC..C)
275 */
276
277#define CCL_WriteConstString 0x14 /* Write a constant or a string:
278 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
279 [2:0000STRIN[0]STRIN[1]STRIN[2]]
280 [...]
281 -----------------------------
282 if (!rrr)
283 write (CC..C)
284 else
285 write_string (STRING, CC..C);
286 IC += (CC..C + 2) / 3;
287 */
288
289#define CCL_WriteArray 0x15 /* Write an element of array:
290 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
291 2:ELEMENT[0]
292 3:ELEMENT[1]
293 ...
294 ------------------------------
295 if (0 <= reg[rrr] < CC..C)
296 write (ELEMENT[reg[rrr]]);
297 IC += CC..C;
298 */
299
300#define CCL_End 0x16 /* Terminate:
301 1:00000000000000000000000XXXXX
302 ------------------------------
303 terminate ();
304 */
305
306/* The following two codes execute an assignment arithmetic/logical
307 operation. The form of the operation is like REG OP= OPERAND. */
308
309#define CCL_ExprSelfConst 0x17 /* REG OP= constant:
310 1:00000OPERATION000000rrrXXXXX
311 2:CONSTANT
312 ------------------------------
313 reg[rrr] OPERATION= CONSTANT;
314 */
315
316#define CCL_ExprSelfReg 0x18 /* REG1 OP= REG2:
317 1:00000OPERATION000RRRrrrXXXXX
318 ------------------------------
319 reg[rrr] OPERATION= reg[RRR];
320 */
321
322/* The following codes execute an arithmetic/logical operation. The
323 form of the operation is like REG_X = REG_Y OP OPERAND2. */
324
325#define CCL_SetExprConst 0x19 /* REG_X = REG_Y OP constant:
326 1:00000OPERATION000RRRrrrXXXXX
327 2:CONSTANT
328 ------------------------------
329 reg[rrr] = reg[RRR] OPERATION CONSTANT;
330 IC++;
331 */
332
333#define CCL_SetExprReg 0x1A /* REG1 = REG2 OP REG3:
334 1:00000OPERATIONRrrRRRrrrXXXXX
335 ------------------------------
336 reg[rrr] = reg[RRR] OPERATION reg[Rrr];
337 */
338
339#define CCL_JumpCondExprConst 0x1B /* Jump conditional according to
340 an operation on constant:
341 1:A--D--D--R--E--S--S-rrrXXXXX
342 2:OPERATION
343 3:CONSTANT
344 -----------------------------
345 reg[7] = reg[rrr] OPERATION CONSTANT;
346 if (!(reg[7]))
347 IC += ADDRESS;
348 else
349 IC += 2
350 */
351
352#define CCL_JumpCondExprReg 0x1C /* Jump conditional according to
353 an operation on register:
354 1:A--D--D--R--E--S--S-rrrXXXXX
355 2:OPERATION
356 3:RRR
357 -----------------------------
358 reg[7] = reg[rrr] OPERATION reg[RRR];
359 if (!reg[7])
360 IC += ADDRESS;
361 else
362 IC += 2;
363 */
364
365#define CCL_ReadJumpCondExprConst 0x1D /* Read and jump conditional according
366 to an operation on constant:
367 1:A--D--D--R--E--S--S-rrrXXXXX
368 2:OPERATION
369 3:CONSTANT
370 -----------------------------
371 read (reg[rrr]);
372 reg[7] = reg[rrr] OPERATION CONSTANT;
373 if (!reg[7])
374 IC += ADDRESS;
375 else
376 IC += 2;
377 */
378
379#define CCL_ReadJumpCondExprReg 0x1E /* Read and jump conditional according
380 to an operation on register:
381 1:A--D--D--R--E--S--S-rrrXXXXX
382 2:OPERATION
383 3:RRR
384 -----------------------------
385 read (reg[rrr]);
386 reg[7] = reg[rrr] OPERATION reg[RRR];
387 if (!reg[7])
388 IC += ADDRESS;
389 else
390 IC += 2;
391 */
392
393#define CCL_Extention 0x1F /* Extended CCL code
394 1:ExtendedCOMMNDRrrRRRrrrXXXXX
395 2:ARGUEMENT
396 3:...
397 ------------------------------
398 extended_command (rrr,RRR,Rrr,ARGS)
399 */
400
401
402/* CCL arithmetic/logical operators. */
403#define CCL_PLUS 0x00 /* X = Y + Z */
404#define CCL_MINUS 0x01 /* X = Y - Z */
405#define CCL_MUL 0x02 /* X = Y * Z */
406#define CCL_DIV 0x03 /* X = Y / Z */
407#define CCL_MOD 0x04 /* X = Y % Z */
408#define CCL_AND 0x05 /* X = Y & Z */
409#define CCL_OR 0x06 /* X = Y | Z */
410#define CCL_XOR 0x07 /* X = Y ^ Z */
411#define CCL_LSH 0x08 /* X = Y << Z */
412#define CCL_RSH 0x09 /* X = Y >> Z */
413#define CCL_LSH8 0x0A /* X = (Y << 8) | Z */
414#define CCL_RSH8 0x0B /* X = Y >> 8, r[7] = Y & 0xFF */
415#define CCL_DIVMOD 0x0C /* X = Y / Z, r[7] = Y % Z */
416#define CCL_LS 0x10 /* X = (X < Y) */
417#define CCL_GT 0x11 /* X = (X > Y) */
418#define CCL_EQ 0x12 /* X = (X == Y) */
419#define CCL_LE 0x13 /* X = (X <= Y) */
420#define CCL_GE 0x14 /* X = (X >= Y) */
421#define CCL_NE 0x15 /* X = (X != Y) */
422
423#define CCL_ENCODE_SJIS 0x16 /* X = HIGHER_BYTE (SJIS (Y, Z))
424 r[7] = LOWER_BYTE (SJIS (Y, Z) */
425#define CCL_DECODE_SJIS 0x17 /* X = HIGHER_BYTE (DE-SJIS (Y, Z))
426 r[7] = LOWER_BYTE (DE-SJIS (Y, Z)) */
427
428/* Macros for exit status of CCL program. */
429#define CCL_STAT_SUCCESS 0 /* Terminated successfully. */
430#define CCL_STAT_SUSPEND 1 /* Terminated because of empty input
431 buffer or full output buffer. */
432#define CCL_STAT_INVALID_CMD 2 /* Terminated because of invalid
433 command. */
434#define CCL_STAT_QUIT 3 /* Terminated because of quit. */
435
436/* Terminate CCL program successfully. */
437#define CCL_SUCCESS \
438 do { \
439 ccl->status = CCL_STAT_SUCCESS; \
440 ccl->ic = CCL_HEADER_MAIN; \
441 goto ccl_finish; \
442 } while (0)
443
444/* Suspend CCL program because of reading from empty input buffer or
445 writing to full output buffer. When this program is resumed, the
446 same I/O command is executed. */
447#define CCL_SUSPEND \
448 do { \
449 ic--; \
450 ccl->status = CCL_STAT_SUSPEND; \
451 goto ccl_finish; \
452 } while (0)
453
454/* Terminate CCL program because of invalid command. Should not occur
455 in the normal case. */
456#define CCL_INVALID_CMD \
457 do { \
458 ccl->status = CCL_STAT_INVALID_CMD; \
459 goto ccl_error_handler; \
460 } while (0)
461
462/* Encode one character CH to multibyte form and write to the current
463 output buffer. If CH is negative, write one byte -CH. */
464#define CCL_WRITE_CHAR(ch) \
465 do { \
466 if (!dst) \
467 CCL_INVALID_CMD; \
468 else \
469 { \
470 unsigned char work[4], *str; \
471 int len = CHAR_STRING (ch, work, str); \
472 if (dst + len <= dst_end) \
473 { \
474 bcopy (str, dst, len); \
475 dst += len; \
476 } \
477 else \
478 CCL_SUSPEND; \
479 } \
480 } while (0)
481
482/* Write a string at ccl_prog[IC] of length LEN to the current output
483 buffer. */
484#define CCL_WRITE_STRING(len) \
485 do { \
486 if (!dst) \
487 CCL_INVALID_CMD; \
488 else if (dst + len <= dst_end) \
489 for (i = 0; i < len; i++) \
490 *dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \
491 >> ((2 - (i % 3)) * 8)) & 0xFF; \
492 else \
493 CCL_SUSPEND; \
494 } while (0)
495
496/* Read one byte from the current input buffer into Rth register. */
497#define CCL_READ_CHAR(r) \
498 do { \
499 if (!src) \
500 CCL_INVALID_CMD; \
501 else if (src < src_end) \
502 r = *src++; \
503 else if (ccl->last_block) \
504 { \
505 ic = ccl->eof_ic; \
506 goto ccl_finish; \
507 } \
508 else \
509 CCL_SUSPEND; \
510 } while (0)
511
512
513/* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting
514 text goes to a place pointed by DESTINATION, the length of which
515 should not exceed DST_BYTES. The bytes actually processed is
516 returned as *CONSUMED. The return value is the length of the
517 resulting text. As a side effect, the contents of CCL registers
518 are updated. If SOURCE or DESTINATION is NULL, only operations on
519 registers are permitted. */
520
521#ifdef CCL_DEBUG
522#define CCL_DEBUG_BACKTRACE_LEN 256
523int ccl_backtrace_table[CCL_BACKTRACE_TABLE];
524int ccl_backtrace_idx;
525#endif
526
527struct ccl_prog_stack
528 {
529 int *ccl_prog; /* Pointer to an array of CCL code. */
530 int ic; /* Instruction Counter. */
531 };
532
533ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
534 struct ccl_program *ccl;
535 unsigned char *source, *destination;
536 int src_bytes, dst_bytes;
537 int *consumed;
538{
539 register int *reg = ccl->reg;
540 register int ic = ccl->ic;
541 register int code, field1, field2;
542 register int *ccl_prog = ccl->prog;
543 unsigned char *src = source, *src_end = src + src_bytes;
544 unsigned char *dst = destination, *dst_end = dst + dst_bytes;
545 int jump_address;
546 int i, j, op;
547 int stack_idx = 0;
548 /* For the moment, we only support depth 256 of stack. */
549 struct ccl_prog_stack ccl_prog_stack_struct[256];
550
551 if (ic >= ccl->eof_ic)
552 ic = CCL_HEADER_MAIN;
553
554#ifdef CCL_DEBUG
555 ccl_backtrace_idx = 0;
556#endif
557
558 for (;;)
559 {
560#ifdef CCL_DEBUG
561 ccl_backtrace_table[ccl_backtrace_idx++] = ic;
562 if (ccl_backtrace_idx >= CCL_DEBUG_BACKTRACE_LEN)
563 ccl_backtrace_idx = 0;
564 ccl_backtrace_table[ccl_backtrace_idx] = 0;
565#endif
566
567 if (!NILP (Vquit_flag) && NILP (Vinhibit_quit))
568 {
569 /* We can't just signal Qquit, instead break the loop as if
570 the whole data is processed. Don't reset Vquit_flag, it
571 must be handled later at a safer place. */
572 if (consumed)
573 src = source + src_bytes;
574 ccl->status = CCL_STAT_QUIT;
575 break;
576 }
577
578 code = XINT (ccl_prog[ic]); ic++;
579 field1 = code >> 8;
580 field2 = (code & 0xFF) >> 5;
581
582#define rrr field2
583#define RRR (field1 & 7)
584#define Rrr ((field1 >> 3) & 7)
585#define ADDR field1
586
587 switch (code & 0x1F)
588 {
589 case CCL_SetRegister: /* 00000000000000000RRRrrrXXXXX */
590 reg[rrr] = reg[RRR];
591 break;
592
593 case CCL_SetShortConst: /* CCCCCCCCCCCCCCCCCCCCrrrXXXXX */
594 reg[rrr] = field1;
595 break;
596
597 case CCL_SetConst: /* 00000000000000000000rrrXXXXX */
598 reg[rrr] = XINT (ccl_prog[ic]);
599 ic++;
600 break;
601
602 case CCL_SetArray: /* CCCCCCCCCCCCCCCCCCCCRRRrrrXXXXX */
603 i = reg[RRR];
604 j = field1 >> 3;
605 if ((unsigned int) i < j)
606 reg[rrr] = XINT (ccl_prog[ic + i]);
607 ic += j;
608 break;
609
610 case CCL_Jump: /* A--D--D--R--E--S--S-000XXXXX */
611 ic += ADDR;
612 break;
613
614 case CCL_JumpCond: /* A--D--D--R--E--S--S-rrrXXXXX */
615 if (!reg[rrr])
616 ic += ADDR;
617 break;
618
619 case CCL_WriteRegisterJump: /* A--D--D--R--E--S--S-rrrXXXXX */
620 i = reg[rrr];
621 CCL_WRITE_CHAR (i);
622 ic += ADDR;
623 break;
624
625 case CCL_WriteRegisterReadJump: /* A--D--D--R--E--S--S-rrrXXXXX */
626 i = reg[rrr];
627 CCL_WRITE_CHAR (i);
628 ic++;
629 CCL_READ_CHAR (reg[rrr]);
630 ic += ADDR - 1;
631 break;
632
633 case CCL_WriteConstJump: /* A--D--D--R--E--S--S-000XXXXX */
634 i = XINT (ccl_prog[ic]);
635 CCL_WRITE_CHAR (i);
636 ic += ADDR;
637 break;
638
639 case CCL_WriteConstReadJump: /* A--D--D--R--E--S--S-rrrXXXXX */
640 i = XINT (ccl_prog[ic]);
641 CCL_WRITE_CHAR (i);
642 ic++;
643 CCL_READ_CHAR (reg[rrr]);
644 ic += ADDR - 1;
645 break;
646
647 case CCL_WriteStringJump: /* A--D--D--R--E--S--S-000XXXXX */
648 j = XINT (ccl_prog[ic]);
649 ic++;
650 CCL_WRITE_STRING (j);
651 ic += ADDR - 1;
652 break;
653
654 case CCL_WriteArrayReadJump: /* A--D--D--R--E--S--S-rrrXXXXX */
655 i = reg[rrr];
656 j = ccl_prog[ic++];
657 if ((unsigned int) i < j)
658 {
659 i = XINT (ccl_prog[ic + i]);
660 CCL_WRITE_CHAR (i);
661 }
662 ic += j + 1;
663 CCL_READ_CHAR (reg[rrr]);
664 ic += ADDR - (j + 2);
665 break;
666
667 case CCL_ReadJump: /* A--D--D--R--E--S--S-rrrYYYYY */
668 CCL_READ_CHAR (reg[rrr]);
669 ic += ADDR;
670 break;
671
672 case CCL_ReadBranch: /* CCCCCCCCCCCCCCCCCCCCrrrXXXXX */
673 CCL_READ_CHAR (reg[rrr]);
674 /* fall through ... */
675 case CCL_Branch: /* CCCCCCCCCCCCCCCCCCCCrrrXXXXX */
676 if ((unsigned int) reg[rrr] < field1)
677 ic += XINT (ccl_prog[ic + reg[rrr]]);
678 else
679 ic += XINT (ccl_prog[ic + field1]);
680 break;
681
682 case CCL_ReadRegister: /* CCCCCCCCCCCCCCCCCCCCrrXXXXX */
683 while (1)
684 {
685 CCL_READ_CHAR (reg[rrr]);
686 if (!field1) break;
687 code = XINT (ccl_prog[ic]); ic++;
688 field1 = code >> 8;
689 field2 = (code & 0xFF) >> 5;
690 }
691 break;
692
693 case CCL_WriteExprConst: /* 1:00000OPERATION000RRR000XXXXX */
694 rrr = 7;
695 i = reg[RRR];
696 j = XINT (ccl_prog[ic]);
697 op = field1 >> 6;
698 ic++;
699 goto ccl_set_expr;
700
701 case CCL_WriteRegister: /* CCCCCCCCCCCCCCCCCCCrrrXXXXX */
702 while (1)
703 {
704 i = reg[rrr];
705 CCL_WRITE_CHAR (i);
706 if (!field1) break;
707 code = XINT (ccl_prog[ic]); ic++;
708 field1 = code >> 8;
709 field2 = (code & 0xFF) >> 5;
710 }
711 break;
712
713 case CCL_WriteExprRegister: /* 1:00000OPERATIONRrrRRR000XXXXX */
714 rrr = 7;
715 i = reg[RRR];
716 j = reg[Rrr];
717 op = field1 >> 6;
718 goto ccl_set_expr;
719
720 case CCL_Call: /* CCCCCCCCCCCCCCCCCCCC000XXXXX */
721 {
722 Lisp_Object slot;
723
724 if (stack_idx >= 256
725 || field1 < 0
726 || field1 >= XVECTOR (Vccl_program_table)->size
727 || (slot = XVECTOR (Vccl_program_table)->contents[field1],
728 !CONSP (slot))
729 || !VECTORP (XCONS (slot)->cdr))
730 {
731 if (stack_idx > 0)
732 {
733 ccl_prog = ccl_prog_stack_struct[0].ccl_prog;
734 ic = ccl_prog_stack_struct[0].ic;
735 }
736 CCL_INVALID_CMD;
737 }
738
739 ccl_prog_stack_struct[stack_idx].ccl_prog = ccl_prog;
740 ccl_prog_stack_struct[stack_idx].ic = ic;
741 stack_idx++;
742 ccl_prog = XVECTOR (XCONS (slot)->cdr)->contents;
743 ic = CCL_HEADER_MAIN;
744 }
745 break;
746
747 case CCL_WriteConstString: /* CCCCCCCCCCCCCCCCCCCCrrrXXXXX */
748 if (!rrr)
749 CCL_WRITE_CHAR (field1);
750 else
751 {
752 CCL_WRITE_STRING (field1);
753 ic += (field1 + 2) / 3;
754 }
755 break;
756
757 case CCL_WriteArray: /* CCCCCCCCCCCCCCCCCCCCrrrXXXXX */
758 i = reg[rrr];
759 if ((unsigned int) i < field1)
760 {
761 j = XINT (ccl_prog[ic + i]);
762 CCL_WRITE_CHAR (j);
763 }
764 ic += field1;
765 break;
766
767 case CCL_End: /* 0000000000000000000000XXXXX */
768 if (stack_idx-- > 0)
769 {
770 ccl_prog = ccl_prog_stack_struct[stack_idx].ccl_prog;
771 ic = ccl_prog_stack_struct[stack_idx].ic;
772 break;
773 }
774 CCL_SUCCESS;
775
776 case CCL_ExprSelfConst: /* 00000OPERATION000000rrrXXXXX */
777 i = XINT (ccl_prog[ic]);
778 ic++;
779 op = field1 >> 6;
780 goto ccl_expr_self;
781
782 case CCL_ExprSelfReg: /* 00000OPERATION000RRRrrrXXXXX */
783 i = reg[RRR];
784 op = field1 >> 6;
785
786 ccl_expr_self:
787 switch (op)
788 {
789 case CCL_PLUS: reg[rrr] += i; break;
790 case CCL_MINUS: reg[rrr] -= i; break;
791 case CCL_MUL: reg[rrr] *= i; break;
792 case CCL_DIV: reg[rrr] /= i; break;
793 case CCL_MOD: reg[rrr] %= i; break;
794 case CCL_AND: reg[rrr] &= i; break;
795 case CCL_OR: reg[rrr] |= i; break;
796 case CCL_XOR: reg[rrr] ^= i; break;
797 case CCL_LSH: reg[rrr] <<= i; break;
798 case CCL_RSH: reg[rrr] >>= i; break;
799 case CCL_LSH8: reg[rrr] <<= 8; reg[rrr] |= i; break;
800 case CCL_RSH8: reg[7] = reg[rrr] & 0xFF; reg[rrr] >>= 8; break;
801 case CCL_DIVMOD: reg[7] = reg[rrr] % i; reg[rrr] /= i; break;
802 case CCL_LS: reg[rrr] = reg[rrr] < i; break;
803 case CCL_GT: reg[rrr] = reg[rrr] > i; break;
804 case CCL_EQ: reg[rrr] = reg[rrr] == i; break;
805 case CCL_LE: reg[rrr] = reg[rrr] <= i; break;
806 case CCL_GE: reg[rrr] = reg[rrr] >= i; break;
807 case CCL_NE: reg[rrr] = reg[rrr] != i; break;
808 default: CCL_INVALID_CMD;
809 }
810 break;
811
812 case CCL_SetExprConst: /* 00000OPERATION000RRRrrrXXXXX */
813 i = reg[RRR];
814 j = XINT (ccl_prog[ic]);
815 op = field1 >> 6;
816 jump_address = ++ic;
817 goto ccl_set_expr;
818
819 case CCL_SetExprReg: /* 00000OPERATIONRrrRRRrrrXXXXX */
820 i = reg[RRR];
821 j = reg[Rrr];
822 op = field1 >> 6;
823 jump_address = ic;
824 goto ccl_set_expr;
825
826 case CCL_ReadJumpCondExprConst: /* A--D--D--R--E--S--S-rrrXXXXX */
827 CCL_READ_CHAR (reg[rrr]);
828 case CCL_JumpCondExprConst: /* A--D--D--R--E--S--S-rrrXXXXX */
829 i = reg[rrr];
830 op = XINT (ccl_prog[ic]);
831 jump_address = ic++ + ADDR;
832 j = XINT (ccl_prog[ic]);
833 ic++;
834 rrr = 7;
835 goto ccl_set_expr;
836
837 case CCL_ReadJumpCondExprReg: /* A--D--D--R--E--S--S-rrrXXXXX */
838 CCL_READ_CHAR (reg[rrr]);
839 case CCL_JumpCondExprReg:
840 i = reg[rrr];
841 op = XINT (ccl_prog[ic]);
842 jump_address = ic++ + ADDR;
843 j = reg[XINT (ccl_prog[ic])];
844 ic++;
845 rrr = 7;
846
847 ccl_set_expr:
848 switch (op)
849 {
850 case CCL_PLUS: reg[rrr] = i + j; break;
851 case CCL_MINUS: reg[rrr] = i - j; break;
852 case CCL_MUL: reg[rrr] = i * j; break;
853 case CCL_DIV: reg[rrr] = i / j; break;
854 case CCL_MOD: reg[rrr] = i % j; break;
855 case CCL_AND: reg[rrr] = i & j; break;
856 case CCL_OR: reg[rrr] = i | j; break;
857 case CCL_XOR: reg[rrr] = i ^ j;; break;
858 case CCL_LSH: reg[rrr] = i << j; break;
859 case CCL_RSH: reg[rrr] = i >> j; break;
860 case CCL_LSH8: reg[rrr] = (i << 8) | j; break;
861 case CCL_RSH8: reg[rrr] = i >> 8; reg[7] = i & 0xFF; break;
862 case CCL_DIVMOD: reg[rrr] = i / j; reg[7] = i % j; break;
863 case CCL_LS: reg[rrr] = i < j; break;
864 case CCL_GT: reg[rrr] = i > j; break;
865 case CCL_EQ: reg[rrr] = i == j; break;
866 case CCL_LE: reg[rrr] = i <= j; break;
867 case CCL_GE: reg[rrr] = i >= j; break;
868 case CCL_NE: reg[rrr] = i != j; break;
869 case CCL_ENCODE_SJIS: ENCODE_SJIS (i, j, reg[rrr], reg[7]); break;
870 case CCL_DECODE_SJIS: DECODE_SJIS (i, j, reg[rrr], reg[7]); break;
871 default: CCL_INVALID_CMD;
872 }
873 code &= 0x1F;
874 if (code == CCL_WriteExprConst || code == CCL_WriteExprRegister)
875 {
876 i = reg[rrr];
877 CCL_WRITE_CHAR (i);
878 }
879 else if (!reg[rrr])
880 ic = jump_address;
881 break;
882
883 default:
884 CCL_INVALID_CMD;
885 }
886 }
887
888 ccl_error_handler:
889 if (destination)
890 {
891 /* We can insert an error message only if DESTINATION is
892 specified and we still have a room to store the message
893 there. */
894 char msg[256];
895 int msglen;
896
897 switch (ccl->status)
898 {
899 case CCL_STAT_INVALID_CMD:
900 sprintf(msg, "\nCCL: Invalid command %x (ccl_code = %x) at %d.",
901 code & 0x1F, code, ic);
902#ifdef CCL_DEBUG
903 {
904 int i = ccl_backtrace_idx - 1;
905 int j;
906
907 msglen = strlen (msg);
908 if (dst + msglen <= dst_end)
909 {
910 bcopy (msg, dst, msglen);
911 dst += msglen;
912 }
913
914 for (j = 0; j < CCL_DEBUG_BACKTRACE_LEN; j++, i--)
915 {
916 if (i < 0) i = CCL_DEBUG_BACKTRACE_LEN - 1;
917 if (ccl_backtrace_table[i] == 0)
918 break;
919 sprintf(msg, " %d", ccl_backtrace_table[i]);
920 msglen = strlen (msg);
921 if (dst + msglen > dst_end)
922 break;
923 bcopy (msg, dst, msglen);
924 dst += msglen;
925 }
926 }
927 goto ccl_finish;
928#endif
929
930 case CCL_STAT_QUIT:
931 sprintf(msg, "\nCCL: Quited.");
932 break;
933
934 default:
935 sprintf(msg, "\nCCL: Unknown error type (%d).", ccl->status);
936 }
937
938 msglen = strlen (msg);
939 if (dst + msglen <= dst_end)
940 {
941 bcopy (msg, dst, msglen);
942 dst += msglen;
943 }
944 }
945
946 ccl_finish:
947 ccl->ic = ic;
948 if (consumed) *consumed = src - source;
949 return dst - destination;
950}
951
952/* Setup fields of the structure pointed by CCL appropriately for the
953 execution of compiled CCL code in VEC (vector of integer). */
954setup_ccl_program (ccl, vec)
955 struct ccl_program *ccl;
956 Lisp_Object vec;
957{
958 int i;
959
960 ccl->size = XVECTOR (vec)->size;
961 ccl->prog = XVECTOR (vec)->contents;
962 ccl->ic = CCL_HEADER_MAIN;
963 ccl->eof_ic = XINT (XVECTOR (vec)->contents[CCL_HEADER_EOF]);
964 ccl->buf_magnification = XINT (XVECTOR (vec)->contents[CCL_HEADER_BUF_MAG]);
965 for (i = 0; i < 8; i++)
966 ccl->reg[i] = 0;
967 ccl->last_block = 0;
968 ccl->status = 0;
969}
970
971#ifdef emacs
972
973DEFUN ("ccl-execute", Fccl_execute, Sccl_execute, 2, 2, 0,
974 "Execute CCL-PROGRAM with registers initialized by REGISTERS.\n\
975CCL-PROGRAM is a compiled code generated by `ccl-compile',\n\
976 no I/O commands should appear in the CCL program.\n\
977REGISTERS is a vector of [R0 R1 ... R7] where RN is an initial value\n\
978 of Nth register.\n\
979As side effect, each element of REGISTER holds the value of\n\
980 corresponding register after the execution.")
981 (ccl_prog, reg)
982 Lisp_Object ccl_prog, reg;
983{
984 struct ccl_program ccl;
985 int i;
986
987 CHECK_VECTOR (ccl_prog, 0);
988 CHECK_VECTOR (reg, 1);
989 if (XVECTOR (reg)->size != 8)
990 error ("Invalid length of vector REGISTERS");
991
992 setup_ccl_program (&ccl, ccl_prog);
993 for (i = 0; i < 8; i++)
994 ccl.reg[i] = (INTEGERP (XVECTOR (reg)->contents[i])
995 ? XINT (XVECTOR (reg)->contents[i])
996 : 0);
997
998 ccl_driver (&ccl, (char *)0, (char *)0, 0, 0, (int *)0);
999 QUIT;
1000 if (ccl.status != CCL_STAT_SUCCESS)
1001 error ("Error in CCL program at %dth code", ccl.ic);
1002
1003 for (i = 0; i < 8; i++)
1004 XSETINT (XVECTOR (reg)->contents[i], ccl.reg[i]);
1005 return Qnil;
1006}
1007
1008DEFUN ("ccl-execute-on-string", Fccl_execute_on_string, Sccl_execute_on_string,
1009 3, 3, 0,
1010 "Execute CCL-PROGRAM with initial STATUS on STRING.\n\
1011CCL-PROGRAM is a compiled code generated by `ccl-compile'.\n\
1012Read buffer is set to STRING, and write buffer is allocated automatically.\n\
1013STATUS is a vector of [R0 R1 ... R7 IC], where\n\
1014 R0..R7 are initial values of corresponding registers,\n\
1015 IC is the instruction counter specifying from where to start the program.\n\
1016If R0..R7 are nil, they are initialized to 0.\n\
1017If IC is nil, it is initialized to head of the CCL program.\n\
1018Returns the contents of write buffer as a string,\n\
1019 and as side effect, STATUS is updated.")
1020 (ccl_prog, status, str)
1021 Lisp_Object ccl_prog, status, str;
1022{
1023 Lisp_Object val;
1024 struct ccl_program ccl;
1025 int i, produced;
1026 int outbufsize;
1027 char *outbuf;
1028 struct gcpro gcpro1, gcpro2, gcpro3;
1029
1030 CHECK_VECTOR (ccl_prog, 0);
1031 CHECK_VECTOR (status, 1);
1032 if (XVECTOR (status)->size != 9)
1033 error ("Invalid length of vector STATUS");
1034 CHECK_STRING (str, 2);
1035 GCPRO3 (ccl_prog, status, str);
1036
1037 setup_ccl_program (&ccl, ccl_prog);
1038 for (i = 0; i < 8; i++)
1039 {
1040 if (NILP (XVECTOR (status)->contents[i]))
1041 XSETINT (XVECTOR (status)->contents[i], 0);
1042 if (INTEGERP (XVECTOR (status)->contents[i]))
1043 ccl.reg[i] = XINT (XVECTOR (status)->contents[i]);
1044 }
1045 if (INTEGERP (XVECTOR (status)->contents[i]))
1046 {
1047 i = XFASTINT (XVECTOR (status)->contents[8]);
1048 if (ccl.ic < i && i < ccl.size)
1049 ccl.ic = i;
1050 }
1051 outbufsize = XSTRING (str)->size * ccl.buf_magnification + 256;
1052 outbuf = (char *) xmalloc (outbufsize);
1053 if (!outbuf)
1054 error ("Not enough memory");
1055 ccl.last_block = 1;
1056 produced = ccl_driver (&ccl, XSTRING (str)->data, outbuf,
1057 XSTRING (str)->size, outbufsize, (int *)0);
1058 for (i = 0; i < 8; i++)
1059 XSET (XVECTOR (status)->contents[i], Lisp_Int, ccl.reg[i]);
1060 XSETINT (XVECTOR (status)->contents[8], ccl.ic);
1061 UNGCPRO;
1062
1063 val = make_string (outbuf, produced);
1064 free (outbuf);
1065 QUIT;
1066 if (ccl.status != CCL_STAT_SUCCESS
1067 && ccl.status != CCL_STAT_SUSPEND)
1068 error ("Error in CCL program at %dth code", ccl.ic);
1069
1070 return val;
1071}
1072
1073DEFUN ("register-ccl-program", Fregister_ccl_program, Sregister_ccl_program,
1074 2, 2, 0,
1075 "Register CCL program PROGRAM of NAME in `ccl-program-table'.
1076PROGRAM should be a compiled code of CCL program, or nil.
1077Return index number of the registered CCL program.")
1078 (name, ccl_prog)
1079 Lisp_Object name, ccl_prog;
1080{
1081 int len = XVECTOR (Vccl_program_table)->size;
1082 int i, idx;
1083
1084 CHECK_SYMBOL (name, 0);
1085 if (!NILP (ccl_prog))
1086 CHECK_VECTOR (ccl_prog, 1);
1087
1088 for (i = 0; i < len; i++)
1089 {
1090 Lisp_Object slot = XVECTOR (Vccl_program_table)->contents[i];
1091
1092 if (!CONSP (slot))
1093 break;
1094
1095 if (EQ (name, XCONS (slot)->car))
1096 {
1097 XCONS (slot)->cdr = ccl_prog;
1098 return make_number (i);
1099 }
1100 }
1101
1102 if (i == len)
1103 {
1104 Lisp_Object new_table = Fmake_vector (len * 2, Qnil);
1105 int j;
1106
1107 for (j = 0; j < len; j++)
1108 XVECTOR (new_table)->contents[j]
1109 = XVECTOR (Vccl_program_table)->contents[j];
1110 Vccl_program_table = new_table;
1111 }
1112
1113 XVECTOR (Vccl_program_table)->contents[i] = Fcons (name, ccl_prog);
1114 return make_number (i);
1115}
1116
1117syms_of_ccl ()
1118{
1119 staticpro (&Vccl_program_table);
1120 Vccl_program_table = Fmake_vector (32, Qnil);
1121
1122 DEFVAR_LISP ("font-ccl-encoder-alist", &Vfont_ccl_encoder_alist,
1123 "Alist of fontname patterns vs corresponding CCL program.\n\
1124Each element looks like (REGEXP . CCL-CODE),\n\
1125 where CCL-CODE is a compiled CCL program.\n\
1126When a font whose name matches REGEXP is used for displaying a character,\n\
1127 CCL-CODE is executed to calculate the code point in the font\n\
1128 from the charset number and position code(s) of the character which are set\n\
1129 in CCL registers R0, R1, and R2 before the execution.\n\
1130The code point in the font is set in CCL registers R1 and R2\n\
1131 when the execution terminated.\n\
1132If the font is single-byte font, the register R2 is not used.");
1133 Vfont_ccl_encoder_alist = Qnil;
1134
1135 defsubr (&Sccl_execute);
1136 defsubr (&Sccl_execute_on_string);
1137 defsubr (&Sregister_ccl_program);
1138}
1139
1140#endif /* emacs */
diff --git a/src/ccl.h b/src/ccl.h
new file mode 100644
index 00000000000..ebda0cc1595
--- /dev/null
+++ b/src/ccl.h
@@ -0,0 +1,53 @@
1/* Header for CCL (Code Conversion Language) interpreter.
2
3 Copyright (C) 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20#ifndef _CCL_H
21#define _CCL_H
22
23/* Structure to hold information about running CCL code. Read
24 comments in the file ccl.c for the detail of each field. */
25struct ccl_program {
26 int size; /* Size of the compiled code. */
27 Lisp_Object *prog; /* Pointer into the compiled code. */
28 int ic; /* Instruction Counter (index for PROG). */
29 int eof_ic; /* Instruction Counter for end-of-file
30 processing code. */
31 int reg[8]; /* CCL registers, reg[7] is used for
32 condition flag of relational
33 operations. */
34 int last_block; /* Set to 1 while processing the last
35 block. */
36 int status; /* Exit status of the CCL program. */
37 int buf_magnification; /* Output buffer magnification. How
38 many times bigger the output buffer
39 should be than the input buffer. */
40};
41
42/* This data type is used for the spec field of the structure
43 coding_system. */
44
45struct ccl_spec {
46 struct ccl_program decoder;
47 struct ccl_program encoder;
48};
49
50/* Alist of fontname patterns vs corresponding CCL program. */
51extern Lisp_Object Vfont_ccl_encoder_alist;
52
53#endif /* _CCL_H */
diff --git a/src/charset.c b/src/charset.c
new file mode 100644
index 00000000000..b962f346f22
--- /dev/null
+++ b/src/charset.c
@@ -0,0 +1,1452 @@
1/* Multilingual characters handler.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21/* At first, see the document in `charset.h' to understand the code in
22 this file. */
23
24#include <stdio.h>
25
26#ifdef emacs
27
28#include <sys/types.h>
29#include <config.h>
30#include "lisp.h"
31#include "buffer.h"
32#include "charset.h"
33#include "coding.h"
34
35#else /* not emacs */
36
37#include "mulelib.h"
38
39#endif /* emacs */
40
41Lisp_Object Qcharset, Qascii, Qcomposition;
42
43/* Declaration of special leading-codes. */
44int leading_code_composition; /* for composite characters */
45int leading_code_private_11; /* for private DIMENSION1 of 1-column */
46int leading_code_private_12; /* for private DIMENSION1 of 2-column */
47int leading_code_private_21; /* for private DIMENSION2 of 1-column */
48int leading_code_private_22; /* for private DIMENSION2 of 2-column */
49
50/* Declaration of special charsets. */
51int charset_ascii; /* ASCII */
52int charset_composition; /* for a composite character */
53int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
54int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
55int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
56int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
57int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
58int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
59int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
60
61Lisp_Object Qcharset_table;
62
63/* A char-table containing information of each character set. */
64Lisp_Object Vcharset_table;
65
66/* A vector of charset symbol indexed by charset-id. This is used
67 only for returning charset symbol from C functions. */
68Lisp_Object Vcharset_symbol_table;
69
70/* A list of charset symbols ever defined. */
71Lisp_Object Vcharset_list;
72
73/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
74int bytes_by_char_head[256];
75int width_by_char_head[256];
76
77/* Mapping table from ISO2022's charset (specified by DIMENSION,
78 CHARS, and FINAL-CHAR) to Emacs' charset. */
79int iso_charset_table[2][2][128];
80
81/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
82unsigned char *_fetch_multibyte_char_p;
83int _fetch_multibyte_char_len;
84
85/* Set STR a pointer to the multi-byte form of the character C. If C
86 is not a composite character, the multi-byte form is set in WORKBUF
87 and STR points WORKBUF. The caller should allocate at least 4-byte
88 area at WORKBUF in advance. Returns the length of the multi-byte
89 form.
90
91 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
92 function directly if C can be an ASCII character. */
93
94int
95non_ascii_char_to_string (c, workbuf, str)
96 int c;
97 unsigned char *workbuf, **str;
98{
99 int charset;
100 unsigned char c1, c2;
101
102 if (COMPOSITE_CHAR_P (c))
103 {
104 int cmpchar_id = COMPOSITE_CHAR_ID (c);
105
106 if (cmpchar_id < n_cmpchars)
107 {
108 *str = cmpchar_table[cmpchar_id]->data;
109 return cmpchar_table[cmpchar_id]->len;
110 }
111 else
112 {
113 *str = workbuf;
114 return 0;
115 }
116 }
117
118 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
119
120 *str = workbuf;
121 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
122 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
123 workbuf++;
124 *workbuf++ = c1 | 0x80;
125 if (c2)
126 *workbuf++ = c2 | 0x80;
127
128 return (workbuf - *str);
129}
130
131/* Return a non-ASCII character of which multi-byte form is at STR of
132 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
133 character is set to the address ACTUAL_LEN.
134
135 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
136 directly if STR can hold an ASCII character. */
137
138string_to_non_ascii_char (str, len, actual_len)
139 unsigned char *str;
140 int len, *actual_len;
141{
142 int charset;
143 unsigned char c1, c2;
144 register int c;
145
146 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
147 {
148 if (actual_len)
149 *actual_len = 1;
150 return (int) *str;
151 }
152
153 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
154
155 if (actual_len)
156 *actual_len = (charset == CHARSET_COMPOSITION
157 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
158 : BYTES_BY_CHAR_HEAD (*str));
159 return c;
160}
161
162/* Return the length of the multi-byte form at string STR of length LEN. */
163int
164multibyte_form_length (str, len)
165 unsigned char *str;
166 int len;
167{
168 int charset;
169 unsigned char c1, c2;
170 register int c;
171
172 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
173 return 1;
174
175 return (charset == CHARSET_COMPOSITION
176 ? cmpchar_table[(c1 << 7) | c2]->len
177 : BYTES_BY_CHAR_HEAD (*str));
178}
179
180/* Check if string STR of length LEN contains valid multi-byte form of
181 a character. If valid, charset and position codes of the character
182 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
183 return -1. This should be used only in the macro SPLIT_STRING
184 which checks range of STR in advance. */
185
186split_non_ascii_string (str, len, charset, c1, c2)
187 register unsigned char *str, *c1, *c2;
188 register int len, *charset;
189{
190 register unsigned int cs = *str++;
191
192 if (cs == LEADING_CODE_COMPOSITION)
193 {
194 int cmpchar_id = str_cmpchar_id (str - 1, len);
195
196 if (cmpchar_id < 0)
197 return -1;
198 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
199 }
200 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
201 && CHARSET_DEFINED_P (cs))
202 {
203 *charset = cs;
204 if (*str < 0xA0)
205 return -1;
206 *c1 = (*str++) & 0x7F;
207 if (CHARSET_DIMENSION (cs) == 2)
208 {
209 if (*str < 0xA0)
210 return -1;
211 *c2 = (*str++) & 0x7F;
212 }
213 }
214 else
215 return -1;
216 return 0;
217}
218
219/* Update the table Vcharset_table with the given arguments (see the
220 document of `define-charset' for the meaning of each argument).
221 Several other table contents are also updated. The caller should
222 check the validity of CHARSET-ID and the remaining arguments in
223 advance. */
224
225void
226update_charset_table (charset_id, dimension, chars, width, direction,
227 iso_final_char, iso_graphic_plane,
228 short_name, long_name, description)
229 Lisp_Object charset_id, dimension, chars, width, direction;
230 Lisp_Object iso_final_char, iso_graphic_plane;
231 Lisp_Object short_name, long_name, description;
232{
233 int charset = XINT (charset_id);
234 int bytes;
235 unsigned char leading_code_base, leading_code_ext;
236
237 if (NILP (Faref (Vcharset_table, charset_id)))
238 Faset (Vcharset_table, charset_id,
239 Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil));
240
241 /* Get byte length of multibyte form, base leading-code, and
242 extended leading-code of the charset. See the comment under the
243 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
244 bytes = XINT (dimension);
245 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
246 {
247 /* Official charset, it doesn't have an extended leading-code. */
248 if (charset != CHARSET_ASCII)
249 bytes += 1; /* For a base leading-code. */
250 leading_code_base = charset;
251 leading_code_ext = 0;
252 }
253 else
254 {
255 /* Private charset. */
256 bytes += 2; /* For base and extended leading-codes. */
257 leading_code_base
258 = (charset < LEADING_CODE_EXT_12
259 ? LEADING_CODE_PRIVATE_11
260 : (charset < LEADING_CODE_EXT_21
261 ? LEADING_CODE_PRIVATE_12
262 : (charset < LEADING_CODE_EXT_22
263 ? LEADING_CODE_PRIVATE_21
264 : LEADING_CODE_PRIVATE_22)));
265 leading_code_ext = charset;
266 }
267
268 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
269 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
270 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
271 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
272 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
273 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
274 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
275 = make_number (leading_code_base);
276 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
277 = make_number (leading_code_ext);
278 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
279 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
280 = iso_graphic_plane;
281 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
282 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
283 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
284 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
285
286 {
287 /* If we have already defined a charset which has the same
288 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
289 DIRECTION, we must update the entry REVERSE-CHARSET of both
290 charsets. If there's no such charset, the value of the entry
291 is set to nil. */
292 int i;
293
294 for (i = 0; i < MAX_CHARSET; i++)
295 if (!NILP (CHARSET_TABLE_ENTRY (i)))
296 {
297 if (CHARSET_DIMENSION (i) == XINT (dimension)
298 && CHARSET_CHARS (i) == XINT (chars)
299 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
300 && CHARSET_DIRECTION (i) != XINT (direction))
301 {
302 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
303 = make_number (i);
304 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
305 break;
306 }
307 }
308 if (i >= MAX_CHARSET)
309 /* No such a charset. */
310 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
311 = make_number (-1);
312 }
313
314 if (charset != CHARSET_ASCII
315 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
316 {
317 /* Update tables bytes_by_char_head and width_by_char_head. */
318 bytes_by_char_head[leading_code_base] = bytes;
319 width_by_char_head[leading_code_base] = XINT (width);
320
321 /* Update table emacs_code_class. */
322 emacs_code_class[charset] = (bytes == 2
323 ? EMACS_leading_code_2
324 : (bytes == 3
325 ? EMACS_leading_code_3
326 : EMACS_leading_code_4));
327 }
328
329 /* Update table iso_charset_table. */
330 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
331 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
332}
333
334#ifdef emacs
335
336/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
337 is invalid. */
338int
339get_charset_id (charset_symbol)
340 Lisp_Object charset_symbol;
341{
342 Lisp_Object val;
343 int charset;
344
345 return ((SYMBOLP (charset_symbol)
346 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
347 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
348 CHARSET_VALID_P (charset)))
349 ? charset : -1);
350}
351
352/* Return an identification number for a new private charset of
353 DIMENSION and WIDTH. If there's no more room for the new charset,
354 return 0. */
355Lisp_Object
356get_new_private_charset_id (dimension, width)
357 int dimension, width;
358{
359 int charset, from, to;
360
361 if (dimension == 1)
362 {
363 if (width == 1)
364 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
365 else
366 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
367 }
368 else
369 {
370 if (width == 1)
371 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
372 else
373 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
374 }
375
376 for (charset = from; charset < to; charset++)
377 if (!CHARSET_DEFINED_P (charset)) break;
378
379 return make_number (charset < to ? charset : 0);
380}
381
382DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
383 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
384If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
385 treated as a private charset.\n\
386INFO-VECTOR is a vector of the format:\n\
387 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
388 SHORT-NAME LONG-NAME DESCRIPTION]\n\
389The meanings of each elements is as follows:\n\
390DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
391CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
392WIDTH (integer) is the number of columns a character in the charset\n\
393occupies on the screen: one of 0, 1, and 2.\n\
394\n\
395DIRECTION (integer) is the rendering direction of characters in the\n\
396charset when rendering. If 0, render from right to left, else\n\
397render from left to right.\n\
398\n\
399ISO-FINAL-CHAR (character) is the final character of the\n\
400corresponding ISO 2022 charset.\n\
401\n\
402ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
403while encoding to variants of ISO 2022 coding system, one of the\n\
404following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
405\n\
406SHORT-NAME (string) is the short name to refer to the charset.\n\
407\n\
408LONG-NAME (string) is the long name to refer to the charset.\n\
409\n\
410DESCRIPTION (string) is the description string of the charset.")
411 (charset_id, charset_symbol, info_vector)
412 Lisp_Object charset_id, charset_symbol, info_vector;
413{
414 Lisp_Object *vec;
415
416 if (!NILP (charset_id))
417 CHECK_NUMBER (charset_id, 0);
418 CHECK_SYMBOL (charset_symbol, 1);
419 CHECK_VECTOR (info_vector, 2);
420
421 if (! NILP (charset_id))
422 {
423 if (! CHARSET_VALID_P (XINT (charset_id)))
424 error ("Invalid CHARSET: %d", XINT (charset_id));
425 else if (CHARSET_DEFINED_P (XINT (charset_id)))
426 error ("Already defined charset: %d", XINT (charset_id));
427 }
428
429 vec = XVECTOR (info_vector)->contents;
430 if (XVECTOR (info_vector)->size != 9
431 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
432 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
433 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
434 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
435 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
436 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
437 || !STRINGP (vec[6])
438 || !STRINGP (vec[7])
439 || !STRINGP (vec[8]))
440 error ("Invalid info-vector argument for defining charset %s",
441 XSYMBOL (charset_symbol)->name->data);
442
443 if (NILP (charset_id))
444 {
445 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
446 if (XINT (charset_id) == 0)
447 error ("There's no room for a new private charset %s",
448 XSYMBOL (charset_symbol)->name->data);
449 }
450
451 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
452 vec[4], vec[5], vec[6], vec[7], vec[8]);
453 Fput (charset_symbol, Qcharset, Faref (Vcharset_table, charset_id));
454 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
455 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
456 return Qnil;
457}
458
459DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
460 4, 4, 0,
461 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
462CHARSET should be defined by `defined-charset' in advance.")
463 (dimension, chars, final_char, charset_symbol)
464 Lisp_Object dimension, chars, final_char, charset_symbol;
465{
466 int charset;
467
468 CHECK_NUMBER (dimension, 0);
469 CHECK_NUMBER (chars, 1);
470 CHECK_NUMBER (final_char, 2);
471 CHECK_SYMBOL (charset_symbol, 3);
472
473 if (XINT (dimension) != 1 && XINT (dimension) != 2)
474 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
475 if (XINT (chars) != 94 && XINT (chars) != 96)
476 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
477 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
478 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
479 if ((charset = get_charset_id (charset_symbol)) < 0)
480 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
481
482 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
483 return Qnil;
484}
485
486/* Return number of different charsets in STR of length LEN. In
487 addition, for each found charset N, CHARSETS[N] is set 1. The
488 caller should allocate CHARSETS (MAX_CHARSET bytes) in advance. */
489
490int
491find_charset_in_str (str, len, charsets)
492 unsigned char *str, *charsets;
493 int len;
494{
495 int num = 0;
496
497 while (len > 0)
498 {
499 int bytes = BYTES_BY_CHAR_HEAD (*str);
500 int charset = CHARSET_AT (str);
501
502 if (!charsets[charset])
503 {
504 charsets[charset] = 1;
505 num += 1;
506 }
507 str += bytes;
508 len -= bytes;
509 }
510 return num;
511}
512
513DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
514 2, 2, 0,
515 "Return a list of charsets in the region between BEG and END.\n\
516BEG and END are buffer positions.")
517 (beg, end)
518 Lisp_Object beg, end;
519{
520 char charsets[MAX_CHARSET];
521 int from, to, stop, i;
522 Lisp_Object val;
523
524 validate_region (&beg, &end);
525 from = XFASTINT (beg);
526 stop = to = XFASTINT (end);
527 if (from < GPT && GPT < to)
528 stop = GPT;
529 bzero (charsets, MAX_CHARSET);
530 while (1)
531 {
532 find_charset_in_str (POS_ADDR (from), stop - from, charsets);
533 if (stop < to)
534 from = stop, stop = to;
535 else
536 break;
537 }
538 val = Qnil;
539 for (i = MAX_CHARSET - 1; i >= 0; i--)
540 if (charsets[i])
541 val = Fcons (CHARSET_SYMBOL (i), val);
542 return val;
543}
544
545DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
546 1, 1, 0,
547 "Return a list of charsets in STR.")
548 (str)
549 Lisp_Object str;
550{
551 char charsets[MAX_CHARSET];
552 int i;
553 Lisp_Object val;
554
555 CHECK_STRING (str, 0);
556 bzero (charsets, MAX_CHARSET);
557 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
558 val = Qnil;
559 for (i = MAX_CHARSET - 1; i >= 0; i--)
560 if (charsets[i])
561 val = Fcons (CHARSET_SYMBOL (i), val);
562 return val;
563}
564
565DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
566 "Return a character of CHARSET and position-codes CODE1 and CODE2.\n\
567CODE1 and CODE2 are optional, but if you don't supply\n\
568 sufficient position-codes, return a generic character which stands for\n\
569all characters or group of characters in the character sets.\n\
570A generic character can be an argument of `modify-syntax-entry' and\n\
571`modify-category-entry'.")
572 (charset, code1, code2)
573 Lisp_Object charset, code1, code2;
574{
575 CHECK_NUMBER (charset, 0);
576
577 if (NILP (code1))
578 XSETFASTINT (code1, 0);
579 else
580 CHECK_NUMBER (code1, 1);
581 if (NILP (code2))
582 XSETFASTINT (code2, 0);
583 else
584 CHECK_NUMBER (code2, 2);
585
586 if (!CHARSET_DEFINED_P (XINT (charset)))
587 error ("Invalid charset: %d", XINT (charset));
588
589 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
590}
591
592DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
593 "Return list of charset and one or two position-codes of CHAR.")
594 (ch)
595 Lisp_Object ch;
596{
597 Lisp_Object val;
598 int charset;
599 unsigned char c1, c2;
600
601 CHECK_NUMBER (ch, 0);
602 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
603 return ((charset == CHARSET_COMPOSITION || CHARSET_DIMENSION (charset) == 2)
604 ? Fcons (CHARSET_SYMBOL (charset),
605 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
606 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
607}
608
609DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
610 "Return charset of CHAR.")
611 (ch)
612 Lisp_Object ch;
613{
614 CHECK_NUMBER (ch, 0);
615
616 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
617}
618
619DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
620 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
621 (dimension, chars, final_char)
622 Lisp_Object dimension, chars, final_char;
623{
624 int charset;
625
626 CHECK_NUMBER (dimension, 0);
627 CHECK_NUMBER (chars, 1);
628 CHECK_NUMBER (final_char, 2);
629
630 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
631 return Qnil;
632 return CHARSET_SYMBOL (charset);
633}
634
635DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
636 "Return byte length of multi-byte form of CHAR.")
637 (ch)
638 Lisp_Object ch;
639{
640 Lisp_Object val;
641 int bytes;
642
643 CHECK_NUMBER (ch, 0);
644 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
645 {
646 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
647
648 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
649 }
650 else
651 {
652 int charset = CHAR_CHARSET (XFASTINT (ch));
653
654 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
655 }
656
657 XSETFASTINT (val, bytes);
658 return val;
659}
660
661/* Return the width of character of which multi-byte form starts with
662 C. The width is measured by how many columns occupied on the
663 screen when displayed in the current buffer. */
664
665#define ONE_BYTE_CHAR_WIDTH(c) \
666 (c < 0x20 \
667 ? (c == '\t' \
668 ? current_buffer->tab_width \
669 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
670 : (c < 0x7f \
671 ? 1 \
672 : (c == 0x7F \
673 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
674 : ((! NILP (current_buffer->enable_multibyte_characters) \
675 && BASE_LEADING_CODE_P (c)) \
676 ? WIDTH_BY_CHAR_HEAD (c) \
677 : 4)))) \
678
679
680DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
681 "Return width of CHAR when displayed in the current buffer.\n\
682The width is measured by how many columns it occupies on the screen.")
683 (ch)
684 Lisp_Object ch;
685{
686 Lisp_Object val;
687 int c;
688
689 CHECK_NUMBER (ch, 0);
690
691 c = XFASTINT (ch);
692 if (SINGLE_BYTE_CHAR_P (c))
693 XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
694 else if (COMPOSITE_CHAR_P (c))
695 {
696 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
697 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
698 }
699 else
700 {
701 int charset = CHAR_CHARSET (c);
702
703 XSETFASTINT (val, CHARSET_WIDTH (charset));
704 }
705 return val;
706}
707
708/* Return width of string STR of length LEN when displayed in the
709 current buffer. The width is measured by how many columns it
710 occupies on the screen. */
711int
712strwidth (str, len)
713 unsigned char *str;
714 int len;
715{
716 unsigned char *endp = str + len;
717 int width = 0;
718
719 while (str < endp) {
720 if (*str == LEADING_CODE_COMPOSITION)
721 {
722 int id = str_cmpchar_id (str, endp - str);
723
724 if (id < 0)
725 {
726 width += 4;
727 str++;
728 }
729 else
730 {
731 width += cmpchar_table[id]->width;
732 str += cmpchar_table[id]->len;
733 }
734 }
735 else
736 {
737 width += ONE_BYTE_CHAR_WIDTH (*str);
738 str += BYTES_BY_CHAR_HEAD (*str);
739 }
740 }
741 return width;
742}
743
744DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
745 "Return width of STRING when displayed in the current buffer.\n\
746Width is measured by how many columns it occupies on the screen.\n\
747When calculating width of a multi-byte character in STRING,\n\
748 only the base leading-code is considered and the validity of\n\
749 the following bytes are not checked.")
750 (str)
751 Lisp_Object str;
752{
753 Lisp_Object val;
754
755 CHECK_STRING (str, 0);
756 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
757 return val;
758}
759
760DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
761 "Return the direction of CHAR.\n\
762The returned value is 0 for left-to-right and 1 for right-to-left.")
763 (ch)
764 Lisp_Object ch;
765{
766 int charset;
767
768 CHECK_NUMBER (ch, 0);
769 charset = CHAR_CHARSET (XFASTINT (ch));
770 if (!CHARSET_DEFINED_P (charset))
771 error ("Invalid character: %d", XINT (ch));
772 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
773}
774
775DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
776 "Return number of characters in STRING.")
777 (str)
778 Lisp_Object str;
779{
780 Lisp_Object val;
781 unsigned char *p, *endp;
782 int chars;
783
784 CHECK_STRING (str, 0);
785
786 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
787 chars = 0;
788 while (p < endp)
789 {
790 if (*p == LEADING_CODE_COMPOSITION)
791 {
792 p++;
793 while (p < endp && ! CHAR_HEAD_P (p)) p++;
794 }
795 else
796 p += BYTES_BY_CHAR_HEAD (*p);
797 chars++;
798 }
799
800 XSETFASTINT (val, chars);
801 return val;
802}
803
804DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
805 "Return non-nil value if POS is at character boundary of multibyte form.\n\
806The return value is:\n\
807 0 if POS is at an ASCII character or at the end of range,\n\
808 1 if POS is at a head of 2-byte length multi-byte form,\n\
809 2 if POS is at a head of 3-byte length multi-byte form,\n\
810 3 if POS is at a head of 4-byte length multi-byte form,\n\
811 4 if POS is at a head of multi-byte form of a composite character.\n\
812If POS is out of range or not at character boundary, return NIL.")
813 (pos)
814 Lisp_Object pos;
815{
816 Lisp_Object val;
817 int n;
818
819 CHECK_NUMBER_COERCE_MARKER (pos, 0);
820
821 n = XINT (pos);
822 if (n < BEGV || n > ZV)
823 return Qnil;
824
825 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
826 XSETFASTINT (val, 0);
827 else
828 {
829 unsigned char *p = POS_ADDR (n);
830
831 if (SINGLE_BYTE_CHAR_P (*p))
832 XSETFASTINT (val, 0);
833 else if (*p == LEADING_CODE_COMPOSITION)
834 XSETFASTINT (val, 4);
835 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
836 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
837 else
838 val = Qnil;
839 }
840 return val;
841}
842
843DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
844 "Concatenate all the argument characters and make the result a string.")
845 (nargs, args)
846 int nargs;
847 Lisp_Object *args;
848{
849 int i, n = XINT (nargs);
850 unsigned char *buf
851 = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
852 unsigned char *p = buf;
853 Lisp_Object val;
854
855 for (i = 0; i < n; i++)
856 {
857 int c, len;
858 unsigned char *str;
859
860 if (!INTEGERP (args[i]))
861 {
862 free (buf);
863 CHECK_NUMBER (args[i], 0);
864 }
865 c = XINT (args[i]);
866 len = CHAR_STRING (c, p, str);
867 if (p != str)
868 /* C is a composite character. */
869 bcopy (str, p, len);
870 p += len;
871 }
872
873 val = make_string (buf, p - buf);
874 free (buf);
875 return val;
876}
877
878#endif /* emacs */
879
880/*** Composite characters staffs ***/
881
882/* Each composite character is identified by CMPCHAR-ID which is
883 assigned when Emacs needs the character code of the composite
884 character (e.g. when displaying it on the screen). See the
885 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
886 composite character is represented in Emacs. */
887
888/* If `static' is defined, it means that it is defined to null string. */
889#ifndef static
890/* The following function is copied from lread.c. */
891static int
892hash_string (ptr, len)
893 unsigned char *ptr;
894 int len;
895{
896 register unsigned char *p = ptr;
897 register unsigned char *end = p + len;
898 register unsigned char c;
899 register int hash = 0;
900
901 while (p != end)
902 {
903 c = *p++;
904 if (c >= 0140) c -= 40;
905 hash = ((hash<<3) + (hash>>28) + c);
906 }
907 return hash & 07777777777;
908}
909#endif
910
911/* Table of pointers to the structure `cmpchar_info' indexed by
912 CMPCHAR-ID. */
913struct cmpchar_info **cmpchar_table;
914/* The current size of `cmpchar_table'. */
915static int cmpchar_table_size;
916/* Number of the current composite characters. */
917int n_cmpchars;
918
919#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
920
921static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
922
923/* Each element of `cmpchar_hash_table' is a pointer to an array of
924 integer, where the 1st element is the size of the array, the 2nd
925 element is how many elements are actually used in the array, and
926 the remaining elements are CMPCHAR-IDs of composite characters of
927 the same hash value. */
928#define CMPCHAR_HASH_SIZE(table) table[0]
929#define CMPCHAR_HASH_USED(table) table[1]
930#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
931
932/* Return CMPCHAR-ID of the composite character in STR of the length
933 LEN. If the composite character has not yet been registered,
934 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
935 is the sole function for assigning CMPCHAR-ID. */
936int
937str_cmpchar_id (str, len)
938 unsigned char *str;
939 int len;
940{
941 int hash_idx, *hashp;
942 unsigned char *buf;
943 int embedded_rule; /* 1 if composition rule is embedded. */
944 int chars; /* number of components. */
945 int i;
946 struct cmpchar_info *cmpcharp;
947
948 if (len < 5)
949 /* Any composite char have at least 3-byte length. */
950 return -1;
951
952 /* The second byte 0xFF means compostion rule is embedded. */
953 embedded_rule = (str[1] == 0xFF);
954
955 /* At first, get the actual length of the composite character. */
956 {
957 unsigned char *p, *endp = str + 1, *lastp = str + len;
958 int bytes;
959
960 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
961 chars = 0;
962 p = str + 1 + embedded_rule;
963 while (p < endp)
964 {
965 /* No need of checking if *P is 0xA0 because
966 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
967 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
968 chars++;
969 }
970 len = (p -= embedded_rule) - str;
971 if (p > endp)
972 len -= - bytes, chars--;
973
974 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
975 /* Invalid number of components. */
976 return -1;
977 }
978 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
979 hashp = cmpchar_hash_table[hash_idx];
980
981 /* Then, look into the hash table. */
982 if (hashp != NULL)
983 /* Find the correct one among composite characters of the same
984 hash value. */
985 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
986 {
987 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
988 if (len == cmpcharp->len
989 && ! bcmp (str, cmpcharp->data, len))
990 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
991 }
992
993 /* We have to register the composite character in cmpchar_table. */
994 /* Make the entry in hash table. */
995 if (hashp == NULL)
996 {
997 /* Make a table for 8 composite characters initially. */
998 hashp = (cmpchar_hash_table[hash_idx]
999 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1000 CMPCHAR_HASH_SIZE (hashp) = 10;
1001 CMPCHAR_HASH_USED (hashp) = 2;
1002 }
1003 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1004 {
1005 CMPCHAR_HASH_SIZE (hashp) += 8;
1006 hashp = (cmpchar_hash_table[hash_idx]
1007 = (int *) xrealloc (hashp,
1008 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1009 }
1010 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1011 CMPCHAR_HASH_USED (hashp)++;
1012
1013 /* Set information of the composite character in cmpchar_table. */
1014 if (cmpchar_table_size == 0)
1015 {
1016 /* This is the first composite character to be registered. */
1017 cmpchar_table_size = 256;
1018 cmpchar_table
1019 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1020 * cmpchar_table_size);
1021 }
1022 else if (cmpchar_table_size <= n_cmpchars)
1023 {
1024 cmpchar_table_size += 256;
1025 cmpchar_table
1026 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1027 sizeof (cmpchar_table[0])
1028 * cmpchar_table_size);
1029 }
1030
1031 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1032
1033 cmpcharp->len = len;
1034 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1035 bcopy (str, cmpcharp->data, len);
1036 cmpcharp->data[len] = 0;
1037 cmpcharp->glyph_len = chars;
1038 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1039 if (embedded_rule)
1040 {
1041 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1042 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1043 }
1044 else
1045 {
1046 cmpcharp->cmp_rule = NULL;
1047 cmpcharp->col_offset = NULL;
1048 }
1049
1050 /* Setup GLYPH data and composition rules (if any) so as not to make
1051 them every time on displaying. */
1052 {
1053 unsigned char *bufp;
1054 int width;
1055 float leftmost = 0.0, rightmost = 1.0;
1056
1057 if (embedded_rule)
1058 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1059 cmpcharp->col_offset[0] = 0;
1060
1061 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1062 {
1063 if (embedded_rule)
1064 cmpcharp->cmp_rule[i] = *bufp++;
1065
1066 if (*bufp == 0xA0) /* This is an ASCII character. */
1067 {
1068 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1069 width = 1;
1070 bufp++;
1071 }
1072 else /* Multibyte character. */
1073 {
1074 /* Make `bufp' point normal multi-byte form temporally. */
1075 *bufp -= 0x20;
1076 cmpcharp->glyph[i]
1077 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1078 width = WIDTH_BY_CHAR_HEAD (*bufp);
1079 *bufp += 0x20;
1080 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1081 }
1082
1083 if (embedded_rule && i > 0)
1084 {
1085 /* Reference points (global_ref and new_ref) are
1086 encoded as below:
1087
1088 0--1--2 -- ascent
1089 | |
1090 | |
1091 | 4 -+--- center
1092 -- 3 5 -- baseline
1093 | |
1094 6--7--8 -- descent
1095
1096 Now, we calculate the column offset of the new glyph
1097 from the left edge of the first glyph. This can avoid
1098 the same calculation everytime displaying this
1099 composite character. */
1100
1101 /* Reference points of global glyph and new glyph. */
1102 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1103 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1104 /* Column offset relative to the first glyph. */
1105 float left = (leftmost
1106 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1107 - (new_ref % 3) * width / 2.0);
1108
1109 cmpcharp->col_offset[i] = left;
1110 if (left < leftmost)
1111 leftmost = left;
1112 if (left + width > rightmost)
1113 rightmost = left + width;
1114 }
1115 else
1116 {
1117 if (width > rightmost)
1118 rightmost = width;
1119 }
1120 }
1121 if (embedded_rule)
1122 {
1123 /* Now col_offset[N] are relative to the left edge of the
1124 first component. Make them relative to the left edge of
1125 overall glyph. */
1126 for (i = 0; i < chars; i++)
1127 cmpcharp->col_offset[i] -= leftmost;
1128 /* Make rightmost holds width of overall glyph. */
1129 rightmost -= leftmost;
1130 }
1131
1132 cmpcharp->width = rightmost;
1133 if (cmpcharp->width < rightmost)
1134 /* To get a ceiling integer value. */
1135 cmpcharp->width++;
1136 }
1137
1138 cmpchar_table[n_cmpchars] = cmpcharp;
1139
1140 return n_cmpchars++;
1141}
1142
1143/* Return the Nth element of the composite character C. */
1144int
1145cmpchar_component (c, n)
1146 unsigned int c, n;
1147{
1148 int id = COMPOSITE_CHAR_ID (c);
1149
1150 if (id >= n_cmpchars /* C is not a valid composite character. */
1151 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1152 return -1;
1153 /* No face data is stored in glyph code. */
1154 return ((int) (cmpchar_table[id]->glyph[n]));
1155}
1156
1157DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1158 "T if CHAR is a composite character.")
1159 (ch)
1160 Lisp_Object ch;
1161{
1162 CHECK_NUMBER (ch, 0);
1163 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1164}
1165
1166DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1167 2, 2, 0,
1168 "Return the IDXth component character of composite character CHARACTER.")
1169 (character, idx)
1170 Lisp_Object character, idx;
1171{
1172 int c;
1173
1174 CHECK_NUMBER (character, 0);
1175 CHECK_NUMBER (idx, 1);
1176
1177 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1178 args_out_of_range (character, idx);
1179
1180 return make_number (c);
1181}
1182
1183DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1184 2, 2, 0,
1185 "Return the IDXth composition rule embedded in composite character CHARACTER.
1186The returned rule is for composing the IDXth component
1187on the (IDX-1)th component. If IDX is 0, the returned value is always 255.")
1188 (character, idx)
1189 Lisp_Object character, idx;
1190{
1191 int id, i;
1192
1193 CHECK_NUMBER (character, 0);
1194 CHECK_NUMBER (idx, 1);
1195
1196 id = COMPOSITE_CHAR_ID (XINT (character));
1197 if (id < 0 || id >= n_cmpchars)
1198 error ("Invalid composite character: %d", XINT (character));
1199 i = XINT (idx);
1200 if (i > cmpchar_table[id]->glyph_len)
1201 args_out_of_range (character, idx);
1202
1203 return make_number (cmpchar_table[id]->cmp_rule[i]);
1204}
1205
1206DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1207 Scmpchar_cmp_rule_p, 1, 1, 0,
1208 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1209 (character)
1210 Lisp_Object character;
1211{
1212 int id;
1213
1214 CHECK_NUMBER (character, 0);
1215 id = COMPOSITE_CHAR_ID (XINT (character));
1216 if (id < 0 || id >= n_cmpchars)
1217 error ("Invalid composite character: %d", XINT (character));
1218
1219 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1220}
1221
1222DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1223 Scmpchar_cmp_count, 1, 1, 0,
1224 "Return number of compoents of composite character CHARACTER.")
1225 (character)
1226 Lisp_Object character;
1227{
1228 int id;
1229
1230 CHECK_NUMBER (character, 0);
1231 id = COMPOSITE_CHAR_ID (XINT (character));
1232 if (id < 0 || id >= n_cmpchars)
1233 error ("Invalid composite character: %d", XINT (character));
1234
1235 return (make_number (cmpchar_table[id]->glyph_len));
1236}
1237
1238DEFUN ("compose-string", Fcompose_string, Scompose_string,
1239 1, 1, 0,
1240 "Return one char string composed from all characters in STRING.")
1241 (str)
1242 Lisp_Object str;
1243{
1244 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1245 int len, i;
1246
1247 CHECK_STRING (str, 0);
1248
1249 buf[0] = LEADING_CODE_COMPOSITION;
1250 p = XSTRING (str)->data;
1251 pend = p + XSTRING (str)->size;
1252 i = 1;
1253 while (p < pend)
1254 {
1255 if (*p < 0x20 || *p == 127) /* control code */
1256 error ("Invalid component character: %d", *p);
1257 else if (*p < 0x80) /* ASCII */
1258 {
1259 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1260 error ("Too long string to be composed: %s", XSTRING (str)->data);
1261 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1262 code itself. */
1263 buf[i++] = 0xA0;
1264 buf[i++] = *p++ + 0x80;
1265 }
1266 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1267 {
1268 /* Already composed. Eliminate the heading
1269 LEADING_CODE_COMPOSITION, keep the remaining bytes
1270 unchanged. */
1271 p++;
1272 ptemp = p;
1273 while (! CHAR_HEAD_P (p)) p++;
1274 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1275 error ("Too long string to be composed: %s", XSTRING (str)->data);
1276 bcopy (ptemp, buf + i, p - ptemp);
1277 i += p - ptemp;
1278 }
1279 else /* multibyte char */
1280 {
1281 /* Add 0x20 to the base leading-code, keep the remaining
1282 bytes unchanged. */
1283 len = BYTES_BY_CHAR_HEAD (*p);
1284 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1285 error ("Too long string to be composed: %s", XSTRING (str)->data);
1286 bcopy (p, buf + i, len);
1287 buf[i] += 0x20;
1288 p += len, i += len;
1289 }
1290 }
1291
1292 if (i < 5)
1293 /* STR contains only one character, which can't be composed. */
1294 error ("Too short string to be composed: %s", XSTRING (str)->data);
1295
1296 return make_string (buf, i);
1297}
1298
1299
1300charset_id_internal (charset_name)
1301 char *charset_name;
1302{
1303 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1304
1305 if (!VECTORP (val))
1306 error ("Charset %s is not defined", charset_name);
1307
1308 return (XINT (XVECTOR (val)->contents[0]));
1309}
1310
1311DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1312 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1313 ()
1314{
1315 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1316 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1317 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1318 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1319 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1320 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1321 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1322 return Qnil;
1323}
1324
1325init_charset_once ()
1326{
1327 int i, j, k;
1328
1329 staticpro (&Vcharset_table);
1330 staticpro (&Vcharset_symbol_table);
1331
1332 /* This has to be done here, before we call Fmake_char_table. */
1333 Qcharset_table = intern ("charset-table");
1334 staticpro (&Qcharset_table);
1335
1336 /* Intern this now in case it isn't already done.
1337 Setting this variable twice is harmless.
1338 But don't staticpro it here--that is done in alloc.c. */
1339 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1340
1341 /* Now we are ready to set up this property, so we can
1342 create the charset table. */
1343 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1344 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1345
1346 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET), Qnil);
1347
1348 /* Setup tables. */
1349 for (i = 0; i < 2; i++)
1350 for (j = 0; j < 2; j++)
1351 for (k = 0; k < 128; k++)
1352 iso_charset_table [i][j][k] = -1;
1353
1354 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1355 cmpchar_table_size = n_cmpchars = 0;
1356
1357 for (i = 0; i < 256; i++)
1358 BYTES_BY_CHAR_HEAD (i) = 1;
1359 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1360 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1361 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1362 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1363 /* The following doesn't reflect the actual bytes, but just to tell
1364 that it is a start of a multibyte character. */
1365 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1366
1367 for (i = 0; i < 128; i++)
1368 WIDTH_BY_CHAR_HEAD (i) = 1;
1369 for (; i < 256; i++)
1370 WIDTH_BY_CHAR_HEAD (i) = 4;
1371 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1372 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1373 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1374 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1375}
1376
1377#ifdef emacs
1378
1379syms_of_charset ()
1380{
1381 Qascii = intern ("ascii");
1382 staticpro (&Qascii);
1383
1384 Qcharset = intern ("charset");
1385 staticpro (&Qcharset);
1386
1387 /* Define ASCII charset now. */
1388 update_charset_table (make_number (CHARSET_ASCII),
1389 make_number (1), make_number (94),
1390 make_number (1),
1391 make_number (0),
1392 make_number ('B'),
1393 make_number (0),
1394 build_string ("ASCII"),
1395 build_string ("ASCII"),
1396 build_string ("ASCII (ISO646 IRV)"));
1397 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1398 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1399
1400 Qcomposition = intern ("composition");
1401 staticpro (&Qcomposition);
1402 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1403
1404 defsubr (&Sdefine_charset);
1405 defsubr (&Sdeclare_equiv_charset);
1406 defsubr (&Sfind_charset_region);
1407 defsubr (&Sfind_charset_string);
1408 defsubr (&Smake_char_internal);
1409 defsubr (&Ssplit_char);
1410 defsubr (&Schar_charset);
1411 defsubr (&Siso_charset);
1412 defsubr (&Schar_bytes);
1413 defsubr (&Schar_width);
1414 defsubr (&Sstring_width);
1415 defsubr (&Schar_direction);
1416 defsubr (&Schars_in_string);
1417 defsubr (&Schar_boundary_p);
1418 defsubr (&Sconcat_chars);
1419 defsubr (&Scmpcharp);
1420 defsubr (&Scmpchar_component);
1421 defsubr (&Scmpchar_cmp_rule);
1422 defsubr (&Scmpchar_cmp_rule_p);
1423 defsubr (&Scmpchar_cmp_count);
1424 defsubr (&Scompose_string);
1425 defsubr (&Ssetup_special_charsets);
1426
1427 DEFVAR_LISP ("charset-list", &Vcharset_list,
1428 "List of charsets ever defined.");
1429 Vcharset_list = Fcons (Qascii, Qnil);
1430
1431 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1432 "Leading-code of composite characters.");
1433 leading_code_composition = LEADING_CODE_COMPOSITION;
1434
1435 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1436 "Leading-code of private TYPE9N charset of column-width 1.");
1437 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1438
1439 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1440 "Leading-code of private TYPE9N charset of column-width 2.");
1441 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1442
1443 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1444 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1445 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1446
1447 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1448 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1449 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1450}
1451
1452#endif /* emacs */
diff --git a/src/charset.h b/src/charset.h
new file mode 100644
index 00000000000..677a5755adf
--- /dev/null
+++ b/src/charset.h
@@ -0,0 +1,649 @@
1/* Header for multilingual character handler.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21#ifndef _CHARSET_H
22#define _CHARSET_H
23
24/*** GENERAL NOTE on CHARACTER SET (CHARSET) ***
25
26 A character set ("charset" hereafter) is a meaningful collection
27 (i.e. language, culture, functionality, etc) of characters. Emacs
28 handles multiple charsets at once. Each charset corresponds to one
29 of ISO charsets (except for a special charset for composition
30 characters). Emacs identifies a charset by a unique identification
31 number, whereas ISO identifies a charset by a triplet of DIMENSION,
32 CHARS and FINAL-CHAR. So, hereafter, just saying "charset" means an
33 identification number (integer value).
34
35 The value range of charset is 0x00, 0x80..0xFE. There are four
36 kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or
37 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94
38
39
40 Within Emacs Lisp, a charset is treated as a symbol which has a
41 property `charset'. The property value is a vector containing
42 various information about the charset. For readability of C codes,
43 we use the following convention on C variable names:
44 charset_symbol: Emacs Lisp symbol of a charset
45 charset_id: Emacs Lisp integer of an identification number of a charset
46 charset: C integer of an identification number of a charset
47
48 Each charset (except for ASCII) is assigned a base leading-code
49 (range 0x80..0x9D). In addition, a charset of greater than 0xA0
50 (whose base leading-code is 0x9A..0x9D) is assigned an extended
51 leading-code (range 0xA0..0xFE). In this case, each base
52 leading-code specify the allowable range of extended leading-code as
53 shown in the table below. A leading-code is used to represent a
54 character in Emacs' buffer and string.
55
56 We call a charset which has extended leading-code as "private
57 charset" because those are mainly for a charset which is not
58 registered by ISO. On the contrary, we call a charset which does
59 not have extended leading-code as "official charset".
60
61 ---------------------------------------------------------------------------
62 charset dimension base leading-code extended leading-code
63 ---------------------------------------------------------------------------
64 0x00 official dim1 -- none -- -- none --
65 (ASCII)
66 0x01..0x7F --never used--
67 0x80 COMPOSITION same as charset -- none --
68 0x81..0x8F official dim1 same as charset -- none --
69 0x90..0x99 official dim2 same as charset -- none --
70 0x9A..0x9F --never used--
71 0xA0..0xDF private dim1 0x9A same as charset
72 of 1-column width
73 0xE0..0xEF private dim1 0x9B same as charset
74 of 2-column width
75 0xF0..0xF4 private dim2 0x9C same as charset
76 of 1-column width
77 0xF5..0xFE private dim2 0x9D same as charset
78 of 2-column width
79 0xFF --never used--
80 ---------------------------------------------------------------------------
81
82 In the table, "COMPOSITION" means a charset for a composite
83 character which is a character composed from several (up to 16)
84 non-composite characters (components). Although a composite
85 character can contain components of many charsets, a composite
86 character itself belongs to the charset CHARSET-COMPOSITION. See
87 the document "GENERAL NOTE on COMPOSITE CHARACTER" below for more
88 detail.
89
90*/
91
92/* Definition of special leading-codes. */
93/* Base leading-code. */
94/* Special leading-code followed by components of a composite character. */
95#define LEADING_CODE_COMPOSITION 0x80
96/* Leading-code followed by extended leading-code. */
97#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
98#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
99#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
100#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2o f 2-column */
101
102/* Extended leading-code. */
103/* Start of each extended leading-codes. */
104#define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */
105#define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */
106#define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */
107#define LEADING_CODE_EXT_22 0xF5 /* follows LEADING_CODE_PRIVATE_22 */
108/* Maximum value of extended leading-codes. */
109#define LEADING_CODE_EXT_MAX 0xFE
110
111/* Definition of minimum/maximum charset of each DIMENSION. */
112#define MIN_CHARSET_OFFICIAL_DIMENSION1 0x81
113#define MAX_CHARSET_OFFICIAL_DIMENSION1 0x8F
114#define MIN_CHARSET_OFFICIAL_DIMENSION2 0x90
115#define MAX_CHARSET_OFFICIAL_DIMENSION2 0x99
116#define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11
117#define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21
118
119/* Definition of special charsets. */
120#define CHARSET_ASCII 0
121#define CHARSET_COMPOSITION 0x80
122
123extern int charset_ascii; /* ASCII */
124extern int charset_composition; /* for a composite character */
125extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
126extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
127extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
128extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
129extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
130extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
131extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
132
133/* Check if STR points the head of multi-byte form, i.e. *STR is an
134 ASCII character or a base leading-code. */
135#define CHAR_HEAD_P(str) ((unsigned char) *(str) < 0xA0)
136
137/*** GENERAL NOTE on CHARACTER REPRESENTATION ***
138
139 At first, the term "character" or "char" is used for a multilingual
140 character (of course, including ASCII character), not for a byte in
141 computer memory. We use the term "code" or "byte" for the latter
142 case.
143
144 A character is identified by charset and one or two POSITION-CODEs.
145 POSITION-CODE is the position of the character in the charset. A
146 character of DIMENSION1 charset has one POSITION-CODE: POSITION-CODE-1.
147 A character of DIMENSION2 charset has two POSITION-CODE:
148 POSITION-CODE-1 and POSITION-CODE-2. The code range of
149 POSITION-CODE is 0x20..0x7F.
150
151 Emacs has two kinds of representation of a character: multi-byte
152 form (for buffer and string) and single-word form (for character
153 object in Emacs Lisp). The latter is called "character code" here
154 after. Both representation encode the information of charset and
155 POSITION-CODE but in a different way (for instance, MSB of
156 POSITION-CODE is set in multi-byte form).
157
158 For details of multi-byte form, see the section "2. Emacs internal
159 format handlers" of `coding.c'.
160
161 Emacs uses 19 bits for a character code. The bits are divided into
162 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits).
163
164 A character code of DIMENSION1 character uses FIELD2 to hold charset
165 and FIELD3 to hold POSITION-CODE-1. A character code of DIMENSION2
166 character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold
167 POSITION-CODE-1 and POSITION-CODE-2 respectively.
168
169 More precisely...
170
171 FIELD2 of DIMENSION1 character (except for ASCII) is "charset - 0x70".
172 This is to make all character codes except for ASCII greater than
173 256 (ASCII's FIELD2 is 0). So, the range of FIELD2 of DIMENSION1
174 character is 0 or 0x11..0x7F.
175
176 FIELD1 of DIMENSION2 character is "charset - 0x8F" for official
177 charset and "charset - 0xE0" for private charset. So, the range of
178 FIELD1 of DIMENSION2 character is 0x01..0x1E.
179
180 -----------------------------------------------------------------------
181 charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit)
182 -----------------------------------------------------------------------
183 ASCII 0 0 POSITION-CODE-1
184 DIMENSION1 0 charset - 0x70 POSITION-CODE-1
185 DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2
186 DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2
187 -----------------------------------------------------------------------
188 "(o)": official, "(p)": private
189 -----------------------------------------------------------------------
190
191*/
192
193/*** GENERAL NOTE on COMPOSITE CHARACTER ***
194
195 A composite character is a character composed from several (up to
196 16) non-composite characters (components). Although each components
197 can belong to any charset, a composite character itself belongs to
198 the charset `charset-composition' and is assigned a special
199 leading-code `LEADING_CODE_COMPOSITION' for multi-byte form. See
200 the document "2. Emacs internal format handlers" in `coding.c' for
201 more detail about multi-byte form.
202
203 A character code of composite character has special format. In the
204 above document, FIELD1 of a composite character is 0x1F. Each
205 composite character is assigned a sequential number CMPCHAR-ID.
206 FIELD2 and FIELD3 are combined to make 14bits field for holding
207 CMPCHAR-ID, which means that Emacs can handle at most 2^14 (= 16384)
208 composite characters at once.
209
210 -----------------------------------------------------------------------
211 charset FIELD1 (5-bit) FIELD2&3 (14-bit)
212 -----------------------------------------------------------------------
213 CHARSET-COMPOSITION 0x1F CMPCHAR-ID
214 -----------------------------------------------------------------------
215
216 Emacs assigns CMPCHAR-ID to a composite character only when it
217 requires the character code of the composite character (e.g. while
218 displaying the composite character).
219
220*/
221
222/* Masks of each field of character code. */
223#define CHAR_FIELD1_MASK (0x1F << 14)
224#define CHAR_FIELD2_MASK (0x7F << 7)
225#define CHAR_FIELD3_MASK 0x7F
226
227/* Macros to access each field of character C. */
228#define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14)
229#define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7)
230#define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK)
231
232/* Minimum character code of character of each DIMENSION. */
233#define MIN_CHAR_OFFICIAL_DIMENSION1 \
234 ((MIN_CHARSET_OFFICIAL_DIMENSION1 - 0x70) << 7)
235#define MIN_CHAR_PRIVATE_DIMENSION1 \
236 ((MIN_CHARSET_PRIVATE_DIMENSION1 - 0x70) << 7)
237#define MIN_CHAR_OFFICIAL_DIMENSION2 \
238 ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14)
239#define MIN_CHAR_PRIVATE_DIMENSION2 \
240 ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
241#define MIN_CHAR_COMPOSITION \
242 (0x1F << 14)
243
244/* 1 if C is an ASCII character, else 0. */
245#define SINGLE_BYTE_CHAR_P(c) ((c) < 0x100)
246/* 1 if C is an composite character, else 0. */
247#define COMPOSITE_CHAR_P(c) ((c) >= MIN_CHAR_COMPOSITION)
248
249/* A char-table containing information of each character set.
250
251 Unlike ordinary char-tables, this doesn't contain any nested table.
252 Only the top level elements are used. Each element is a vector of
253 the following information:
254 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
255 LEADING-CODE-BASE, LEADING-CODE-EXT,
256 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
257 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
258 PLIST.
259
260 CHARSET-ID (integer) is the identification number of the charset.
261
262 BYTE (integer) is the length of multi-byte form of a character in
263 the charset: one of 1, 2, 3, and 4.
264
265 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
266
267 CHARS (integer) is the number of characters in a dimension: 94 or 96.
268
269 WIDTH (integer) is the number of columns a character in the charset
270 occupies on the screen: one of 0, 1, and 2.
271
272 DIRECTION (integer) is the rendering direction of characters in the
273 charset when rendering. If 0, render from right to left, else
274 render from left to right.
275
276 LEADING-CODE-BASE (integer) is the base leading-code for the
277 charset.
278
279 LEADING-CODE-EXT (integer) is the extended leading-code for the
280 charset. All charsets of less than 0xA0 has the value 0.
281
282 ISO-FINAL-CHAR (character) is the final character of the
283 corresponding ISO 2022 charset.
284
285 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
286 while encoding to variants of ISO 2022 coding system, one of the
287 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
288
289 REVERSE-CHARSET (integer) is the charset which differs only in
290 LEFT-TO-RIGHT value from the charset. If there's no such a
291 charset, the value is -1.
292
293 SHORT-NAME (string) is the short name to refer to the charset.
294
295 LONG-NAME (string) is the long name to refer to the charset.
296
297 DESCRIPTION (string) is the description string of the charset.
298
299 PLIST (property list) may contain any type of information a user
300 want to put and get by functions `put-charset-property' and
301 `get-charset-property' respectively. */
302extern Lisp_Object Vcharset_table;
303
304/* Macros to access various information of CHARSET in Vcharset_table.
305 We provide these macros for efficiency. No range check of CHARSET. */
306
307/* Return entry of CHARSET (lisp integer) in Vcharset_table. */
308#define CHARSET_TABLE_ENTRY(charset) \
309 XCHAR_TABLE (Vcharset_table)->contents[charset]
310
311/* Return information INFO-IDX of CHARSET. */
312#define CHARSET_TABLE_INFO(charset, info_idx) \
313 XVECTOR (CHARSET_TABLE_ENTRY (charset))->contents[info_idx]
314
315#define CHARSET_ID_IDX (0)
316#define CHARSET_BYTES_IDX (1)
317#define CHARSET_DIMENSION_IDX (2)
318#define CHARSET_CHARS_IDX (3)
319#define CHARSET_WIDTH_IDX (4)
320#define CHARSET_DIRECTION_IDX (5)
321#define CHARSET_LEADING_CODE_BASE_IDX (6)
322#define CHARSET_LEADING_CODE_EXT_IDX (7)
323#define CHARSET_ISO_FINAL_CHAR_IDX (8)
324#define CHARSET_ISO_GRAPHIC_PLANE_IDX (9)
325#define CHARSET_REVERSE_CHARSET_IDX (10)
326#define CHARSET_SHORT_NAME_IDX (11)
327#define CHARSET_LONG_NAME_IDX (12)
328#define CHARSET_DESCRIPTION_IDX (13)
329#define CHARSET_PLIST_IDX (14)
330/* Size of a vector of each entry of Vcharset_table. */
331#define CHARSET_MAX_IDX (15)
332
333/* And several more macros to be used frequently. */
334#define CHARSET_BYTES(charset) \
335 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX))
336#define CHARSET_DIMENSION(charset) \
337 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX))
338#define CHARSET_CHARS(charset) \
339 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX))
340#define CHARSET_WIDTH(charset) \
341 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX))
342#define CHARSET_DIRECTION(charset) \
343 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX))
344#define CHARSET_LEADING_CODE_BASE(charset) \
345 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX))
346#define CHARSET_LEADING_CODE_EXT(charset) \
347 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX))
348#define CHARSET_ISO_FINAL_CHAR(charset) \
349 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX))
350#define CHARSET_ISO_GRAPHIC_PLANE(charset) \
351 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX))
352#define CHARSET_REVERSE_CHARSET(charset) \
353 XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX))
354
355/* Macros to specify direction of a charset. */
356#define CHARSET_DIRECTION_LEFT_TO_RIGHT 0
357#define CHARSET_DIRECTION_RIGHT_TO_LEFT 1
358
359/* A vector of charset symbol indexed by charset-id. This is used
360 only for returning charset symbol from C functions. */
361extern Lisp_Object Vcharset_symbol_table;
362
363/* Return symbol of CHARSET. */
364#define CHARSET_SYMBOL(charset) \
365 XVECTOR (Vcharset_symbol_table)->contents[charset]
366
367/* 1 if CHARSET is valid, else 0. */
368#define CHARSET_VALID_P(charset) \
369 ((charset) == 0 \
370 || ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
371 || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) < MAX_CHARSET))
372
373/* 1 if CHARSET is already defined, else 0. */
374#define CHARSET_DEFINED_P(charset) \
375 (((charset) >= 0) && ((charset) < MAX_CHARSET) \
376 && !NILP (CHARSET_TABLE_ENTRY (charset)))
377
378/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
379 Vcharset_table can be retrieved only from the first byte of
380 multi-byte form (an ASCII code or a base leading-code), we provide
381 here tables to be used by macros BYTES_BY_CHAR_HEAD and
382 WIDTH_BY_CHAR_HEAD for faster information retrieval. */
383extern int bytes_by_char_head[256];
384extern int width_by_char_head[256];
385
386#define BYTES_BY_CHAR_HEAD(char_head) bytes_by_char_head[char_head]
387#define WIDTH_BY_CHAR_HEAD(char_head) width_by_char_head[char_head]
388
389/* Charset of the character C. */
390#define CHAR_CHARSET(c) \
391 (SINGLE_BYTE_CHAR_P (c) \
392 ? CHARSET_ASCII \
393 : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
394 ? CHAR_FIELD2 (c) + 0x70 \
395 : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \
396 ? CHAR_FIELD1 (c) + 0x8F \
397 : ((c) < MIN_CHAR_COMPOSITION \
398 ? CHAR_FIELD1 (c) + 0xE0 \
399 : CHARSET_COMPOSITION))))
400
401/* Return charset at the place pointed by P. */
402#define CHARSET_AT(p) \
403 (*(p) < 0x80 \
404 ? CHARSET_ASCII \
405 : (*(p) == LEADING_CODE_COMPOSITION \
406 ? CHARSET_COMPOSITION \
407 : (*(p) < LEADING_CODE_PRIVATE_11 \
408 ? (int)*(p) \
409 : (*(p) <= LEADING_CODE_PRIVATE_22 \
410 ? (int)*((p) + 1) \
411 : -1))))
412
413/* Same as `CHARSET_AT ()' but perhaps runs faster because of an
414 additional argument C which is the code (byte) at P. */
415#define FIRST_CHARSET_AT(p, c) \
416 ((c) < 0x80 \
417 ? CHARSET_ASCII \
418 : ((c) == LEADING_CODE_COMPOSITION \
419 ? CHARSET_COMPOSITION \
420 : ((c) < LEADING_CODE_PRIVATE_11 \
421 ? (int)(c) \
422 : ((c) <= LEADING_CODE_PRIVATE_22 \
423 ? (int)*((p) + 1) \
424 : -1))))
425
426/* Check if two characters C1 and C2 belong to the same charset.
427 Always return 0 for composite characters. */
428#define SAME_CHARSET_P(c1, c2) \
429 (c1 < MIN_CHAR_COMPOSITION \
430 && (SINGLE_BYTE_CHAR_P (c1) \
431 ? SINGLE_BYTE_CHAR_P (c2) \
432 : (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \
433 ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \
434 : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))))
435
436/* Return a non-ASCII character of which charset is CHARSET and
437 position-codes are C1 and C2. DIMENSION1 character ignores C2. */
438#define MAKE_NON_ASCII_CHAR(charset, c1, c2) \
439 ((charset) == CHARSET_COMPOSITION \
440 ? MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2)) \
441 : (CHARSET_DIMENSION (charset) == 1 \
442 ? (((charset) - 0x70) << 7) | (c1) \
443 : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \
444 ? (((charset) - 0x8F) << 14) | ((c1) << 7) | (c2) \
445 : (((charset) - 0xE0) << 14) | ((c1) << 7) | (c2))))
446
447/* Return a composite character of which CMPCHAR-ID is ID. */
448#define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id))
449
450/* Return CMPCHAR-ID of a composite character C. */
451#define COMPOSITE_CHAR_ID(c) ((c) - MIN_CHAR_COMPOSITION)
452
453/* Return a character of which charset is CHARSET and position-codes
454 are C1 and C2. DIMENSION1 character ignores C2. */
455#define MAKE_CHAR(charset, c1, c2) \
456 ((charset) == CHARSET_ASCII \
457 ? (c1) \
458 : MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F))
459
460/* The charset of non-ASCII character C is set to CHARSET, and the
461 position-codes of C are set to C1 and C2. C2 of DIMENSION1 character
462 is 0. */
463#define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2) \
464 ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
465 ? (charset = CHAR_FIELD2 (c) + 0x70, \
466 c1 = CHAR_FIELD3 (c), \
467 c2 = 0) \
468 : (charset = ((c) < MIN_CHAR_COMPOSITION \
469 ? (CHAR_FIELD1 (c) \
470 + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
471 : CHARSET_COMPOSITION), \
472 c1 = CHAR_FIELD2 (c), \
473 c2 = CHAR_FIELD3 (c)))
474
475/* The charset of character C is set to CHARSET, and the
476 position-codes of C are set to C1 and C2. C2 of DIMENSION1 character
477 is 0. */
478#define SPLIT_CHAR(c, charset, c1, c2) \
479 (SINGLE_BYTE_CHAR_P (c) \
480 ? charset = CHARSET_ASCII, c1 = (c), c2 = 0 \
481 : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))
482
483/* The charset of the character at STR is set to CHARSET, and the
484 position-codes are set to C1 and C2. C2 of DIMENSION1 character is 0.
485 If the character is a composite character, the upper 7-bit and
486 lower 7-bit of CMPCHAR-ID are set in C1 and C2 respectively. No
487 range checking. */
488#define SPLIT_STRING(str, len, charset, c1, c2) \
489 ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \
490 || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \
491 || split_non_ascii_string (str, len, &charset, &c1, &c2, 0) < 0) \
492 ? c1 = *(str), charset = CHARSET_ASCII \
493 : charset)
494
495#define MAX_CHARSET 0xFF
496
497/* Mapping table from ISO2022's charset (specified by DIMENSION,
498 CHARS, and FINAL_CHAR) to Emacs' charset. Should be accessed by
499 macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
500extern int iso_charset_table[2][2][128];
501
502#define ISO_CHARSET_TABLE(dimension, chars, final_char) \
503 iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)]
504
505#define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1)
506
507/* The following two macros CHAR_STRING and STRING_CHAR are the main
508 entry points to convert between Emacs two types of character
509 representations: multi-byte form and single-word form (character
510 code). */
511
512/* Set STR a pointer to the multi-byte form of the character C. If C
513 is not a composite character, the multi-byte form is set in WORKBUF
514 and STR points WORKBUF. The caller should allocate at least 4-byte
515 area at WORKBUF in advance. Returns the length of the multi-byte
516 form. */
517
518#define CHAR_STRING(c, workbuf, str) \
519 (SINGLE_BYTE_CHAR_P (c) \
520 ? *(str = workbuf) = (unsigned char)(c), 1 \
521 : non_ascii_char_to_string (c, workbuf, &str))
522
523/* Return a character code of the character of which multi-byte form
524 is at STR and the length is LEN. If STR doesn't contain valid
525 multi-byte form, only the first byte in STR is returned. */
526
527#define STRING_CHAR(str, len) \
528 ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
529 || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > (len)) \
530 ? (unsigned char) *(str) \
531 : string_to_non_ascii_char (str, len, 0))
532
533/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to
534 the length of the multi-byte form. Just to know the length, use
535 MULTIBYTE_FORM_LENGTH. */
536
537#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
538 ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
539 || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > (len)) \
540 ? (actual_len = 1), (unsigned char) *(str) \
541 : string_to_non_ascii_char (str, len, &actual_len))
542
543/* Return the length of the multi-byte form at string STR of length LEN. */
544
545#define MULTIBYTE_FORM_LENGTH(str, len) \
546 ((BYTES_BY_CHAR_HEAD (*(unsigned char *)(str)) == 1 \
547 || BYTES_BY_CHAR_HEAD (*(unsigned char *)(str)) > (len)) \
548 ? 1 \
549 : multibyte_form_length (str, len))
550
551/* Set C a (possibly multibyte) character at P. P points into a
552 string which is the virtual concatenation of STR1 (which ends at
553 END1) or STR2 (which ends at END2). */
554
555#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
556 do { \
557 const char *dtemp = (p) == (end1) ? (str2) : (p); \
558 const char *dlimit = ((p) >= (str1) && (p) < (end1)) ? (end1) : (end2); \
559 c = STRING_CHAR (dtemp, dlimit - dtemp); \
560 } while (0)
561
562/* Set C a (possibly multibyte) character before P. P points into a
563 string which is the virtual concatenation of STR1 (which ends at
564 END1) or STR2 (which ends at END2). */
565
566#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
567 do { \
568 const char *dtemp = (p); \
569 const char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
570 while (dtemp-- > dlimit && (unsigned char) *dtemp >= 0xA0); \
571 c = STRING_CHAR (dtemp, p - dtemp); \
572 } while (0)
573
574#ifdef emacs
575
576/* Increase the buffer point POS of the current buffer to the next
577 character boundary. This macro relies on the fact that *GPT_ADDR
578 and *Z_ADDR are always accessible and the values are '\0'. No
579 range checking of POS. */
580#define INC_POS(pos) \
581 do { \
582 unsigned char *p = POS_ADDR (pos) + 1; \
583 pos++; \
584 while (!CHAR_HEAD_P (p)) p++, pos++; \
585 } while (0)
586
587/* Decrease the buffer point POS of the current buffer to the previous
588 character boundary. No range checking of POS. */
589#define DEC_POS(pos) \
590 do { \
591 unsigned char *p, *p_min; \
592 if (--pos < GPT) \
593 p = BEG_ADDR + pos - 1, p_min = BEG_ADDR; \
594 else \
595 p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR; \
596 while (p > p_min && !CHAR_HEAD_P (p)) p--, pos--; \
597 } while (0)
598
599#endif /* emacs */
600
601/* Maximum counts of components in one composite character. */
602#define MAX_COMPONENT_COUNT 16
603
604/* Structure to hold information of a composite character. */
605struct cmpchar_info {
606 /* Byte length of the composite character. */
607 int len;
608
609 /* Multi-byte form of the composite character. */
610 unsigned char *data;
611
612 /* Length of glyph codes. */
613 int glyph_len;
614
615 /* Width of the overall glyph of the composite character. */
616 int width;
617
618 /* Pointer to an array of glyph codes of the composite character.
619 This actually contains only character code, no face. */
620 GLYPH *glyph;
621
622 /* Pointer to an array of composition rules. The value has the form:
623 (0xA0 + ((GLOBAL-REF-POINT << 2) | NEW-REF-POINT))
624 where each XXX-REF-POINT is 0..8. */
625 unsigned char *cmp_rule;
626
627 /* Pointer to an array of x-axis offset of left edge of glyphs
628 relative to the left of of glyph[0] except for the first element
629 which is the absolute offset from the left edge of overall glyph.
630 The actual pixel offset should be calculated by multiplying each
631 frame's one column width by this value:
632 (i.e. FONT_WIDTH (f->output_data.x->font) * col_offset[N]). */
633 float *col_offset;
634
635 /* Work slot used by `dumpglyphs' (xterm.c). */
636 int face_work;
637};
638
639/* Table of pointers to the structure `cmpchar_info' indexed by
640 CMPCHAR-ID. */
641extern struct cmpchar_info **cmpchar_table;
642/* Number of the current composite characters. */
643extern int n_cmpchars;
644
645/* This is the maximum length of multi-byte form. */
646#define MAX_LENGTH_OF_MULTI_BYTE_FORM (MAX_COMPONENT_COUNT * 6)
647
648#endif /* _CHARSET_H */
649
diff --git a/src/coding.c b/src/coding.c
new file mode 100644
index 00000000000..95bbd26fef9
--- /dev/null
+++ b/src/coding.c
@@ -0,0 +1,3520 @@
1/* Coding system handler (conversion, detection, and etc).
2 Ver.1.0.
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21/*** TABLE OF CONTENTS ***
22
23 1. Preamble
24 2. Emacs' internal format handlers
25 3. ISO2022 handlers
26 4. Shift-JIS and BIG5 handlers
27 5. End-of-line handlers
28 6. C library functions
29 7. Emacs Lisp library functions
30 8. Post-amble
31
32*/
33
34/*** GENERAL NOTE on CODING SYSTEM ***
35
36 Coding system is an encoding mechanism of one or more character
37 sets. Here's a list of coding systems which Emacs can handle. When
38 we say "decode", it means converting some other coding system to
39 Emacs' internal format, and when we say "encode", it means
40 converting Emacs' internal format to some other coding system.
41
42 0. Emacs' internal format
43
44 Emacs itself holds a multi-lingual character in a buffer and a string
45 in a special format. Details are described in the section 2.
46
47 1. ISO2022
48
49 The most famous coding system for multiple character sets. X's
50 Compound Text, various EUCs (Extended Unix Code), and such coding
51 systems used in Internet communication as ISO-2022-JP are all
52 variants of ISO2022. Details are described in the section 3.
53
54 2. SJIS (or Shift-JIS or MS-Kanji-Code)
55
56 A coding system to encode character sets: ASCII, JISX0201, and
57 JISX0208. Widely used for PC's in Japan. Details are described in
58 the section 4.
59
60 3. BIG5
61
62 A coding system to encode character sets: ASCII and Big5. Widely
63 used by Chinese (mainly in Taiwan and Hong Kong). Details are
64 described in the section 4. In this file, when written as "BIG5"
65 (all uppercase), it means the coding system, and when written as
66 "Big5" (capitalized), it means the character set.
67
68 4. Else
69
70 If a user want to read/write a text encoded in a coding system not
71 listed above, he can supply a decoder and an encoder for it in CCL
72 (Code Conversion Language) programs. Emacs executes the CCL program
73 while reading/writing.
74
75 Emacs represent a coding-system by a Lisp symbol that has a property
76 `coding-system'. But, before actually using the coding-system, the
77 information about it is set in a structure of type `struct
78 coding_system' for rapid processing. See the section 6 for more
79 detail.
80
81*/
82
83/*** GENERAL NOTES on END-OF-LINE FORMAT ***
84
85 How end-of-line of a text is encoded depends on a system. For
86 instance, Unix's format is just one byte of `line-feed' code,
87 whereas DOS's format is two bytes sequence of `carriage-return' and
88 `line-feed' codes. MacOS's format is one byte of `carriage-return'.
89
90 Since how characters in a text is encoded and how end-of-line is
91 encoded is independent, any coding system described above can take
92 any format of end-of-line. So, Emacs has information of format of
93 end-of-line in each coding-system. See the section 6 for more
94 detail.
95
96*/
97
98/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
99
100 These functions check if a text between SRC and SRC_END is encoded
101 in the coding system category XXX. Each returns an integer value in
102 which appropriate flag bits for the category XXX is set. The flag
103 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
104 template of these functions. */
105#if 0
106int
107detect_coding_internal (src, src_end)
108 unsigned char *src, *src_end;
109{
110 ...
111}
112#endif
113
114/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
115
116 These functions decode SRC_BYTES length text at SOURCE encoded in
117 CODING to Emacs' internal format. The resulting text goes to a
118 place pointed by DESTINATION, the length of which should not exceed
119 DST_BYTES. The bytes actually processed is returned as *CONSUMED.
120 The return value is the length of the decoded text. Below is a
121 template of these functions. */
122#if 0
123decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
124 struct coding_system *coding;
125 unsigned char *source, *destination;
126 int src_bytes, dst_bytes;
127 int *consumed;
128{
129 ...
130}
131#endif
132
133/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
134
135 These functions encode SRC_BYTES length text at SOURCE of Emacs
136 internal format to CODING. The resulting text goes to a place
137 pointed by DESTINATION, the length of which should not exceed
138 DST_BYTES. The bytes actually processed is returned as *CONSUMED.
139 The return value is the length of the encoded text. Below is a
140 template of these functions. */
141#if 0
142encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
143 struct coding_system *coding;
144 unsigned char *source, *destination;
145 int src_bytes, dst_bytes;
146 int *consumed;
147{
148 ...
149}
150#endif
151
152/*** COMMONLY USED MACROS ***/
153
154/* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
155 THREE_MORE_BYTES safely get one, two, and three bytes from the
156 source text respectively. If there are not enough bytes in the
157 source, they jump to `label_end_of_loop'. The caller should set
158 variables `src' and `src_end' to appropriate areas in advance. */
159
160#define ONE_MORE_BYTE(c1) \
161 do { \
162 if (src < src_end) \
163 c1 = *src++; \
164 else \
165 goto label_end_of_loop; \
166 } while (0)
167
168#define TWO_MORE_BYTES(c1, c2) \
169 do { \
170 if (src + 1 < src_end) \
171 c1 = *src++, c2 = *src++; \
172 else \
173 goto label_end_of_loop; \
174 } while (0)
175
176#define THREE_MORE_BYTES(c1, c2, c3) \
177 do { \
178 if (src + 2 < src_end) \
179 c1 = *src++, c2 = *src++, c3 = *src++; \
180 else \
181 goto label_end_of_loop; \
182 } while (0)
183
184/* The following three macros DECODE_CHARACTER_ASCII,
185 DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
186 the multi-byte form of a character of each class at the place
187 pointed by `dst'. The caller should set the variable `dst' to
188 point to an appropriate area and the variable `coding' to point to
189 the coding-system of the currently decoding text in advance. */
190
191/* Decode one ASCII character C. */
192
193#define DECODE_CHARACTER_ASCII(c) \
194 do { \
195 if (COMPOSING_P (coding->composing)) \
196 *dst++ = 0xA0, *dst++ = (c) | 0x80; \
197 else \
198 *dst++ = (c); \
199 } while (0)
200
201/* Decode one DIMENSION1 character of which charset is CHARSET and
202 position-code is C. */
203
204#define DECODE_CHARACTER_DIMENSION1(charset, c) \
205 do { \
206 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
207 if (COMPOSING_P (coding->composing)) \
208 *dst++ = leading_code + 0x20; \
209 else \
210 *dst++ = leading_code; \
211 if (leading_code = CHARSET_LEADING_CODE_EXT (charset)) \
212 *dst++ = leading_code; \
213 *dst++ = (c) | 0x80; \
214 } while (0)
215
216/* Decode one DIMENSION2 character of which charset is CHARSET and
217 position-codes are C1 and C2. */
218
219#define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \
220 do { \
221 DECODE_CHARACTER_DIMENSION1 (charset, c1); \
222 *dst++ = (c2) | 0x80; \
223 } while (0)
224
225
226/*** 1. Preamble ***/
227
228#include <stdio.h>
229
230#ifdef emacs
231
232#include <config.h>
233#include "lisp.h"
234#include "buffer.h"
235#include "charset.h"
236#include "ccl.h"
237#include "coding.h"
238#include "window.h"
239
240#else /* not emacs */
241
242#include "mulelib.h"
243
244#endif /* not emacs */
245
246Lisp_Object Qcoding_system, Qeol_type;
247Lisp_Object Qbuffer_file_coding_system;
248Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
249
250extern Lisp_Object Qinsert_file_contents, Qwrite_region;
251Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
252Lisp_Object Qstart_process, Qopen_network_stream;
253Lisp_Object Qtarget_idx;
254
255/* Mnemonic character of each format of end-of-line. */
256int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
257/* Mnemonic character to indicate format of end-of-line is not yet
258 decided. */
259int eol_mnemonic_undecided;
260
261#ifdef emacs
262
263Lisp_Object Qcoding_system_vector, Qcoding_system_p, Qcoding_system_error;
264
265/* Coding-systems are handed between Emacs Lisp programs and C internal
266 routines by the following three variables. */
267/* Coding-system for reading files and receiving data from process. */
268Lisp_Object Vcoding_system_for_read;
269/* Coding-system for writing files and sending data to process. */
270Lisp_Object Vcoding_system_for_write;
271/* Coding-system actually used in the latest I/O. */
272Lisp_Object Vlast_coding_system_used;
273
274/* Coding-system of what terminal accept for displaying. */
275struct coding_system terminal_coding;
276
277/* Coding-system of what is sent from terminal keyboard. */
278struct coding_system keyboard_coding;
279
280Lisp_Object Vcoding_system_alist;
281
282#endif /* emacs */
283
284Lisp_Object Qcoding_category_index;
285
286/* List of symbols `coding-category-xxx' ordered by priority. */
287Lisp_Object Vcoding_category_list;
288
289/* Table of coding-systems currently assigned to each coding-category. */
290Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
291
292/* Table of names of symbol for each coding-category. */
293char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
294 "coding-category-internal",
295 "coding-category-sjis",
296 "coding-category-iso-7",
297 "coding-category-iso-8-1",
298 "coding-category-iso-8-2",
299 "coding-category-iso-else",
300 "coding-category-big5",
301 "coding-category-binary"
302};
303
304/* Alist of charsets vs the alternate charsets. */
305Lisp_Object Valternate_charset_table;
306
307/* Alist of charsets vs revision number. */
308Lisp_Object Vcharset_revision_alist;
309
310
311/*** 2. Emacs internal format handlers ***/
312
313/* Emacs' internal format for encoding multiple character sets is a
314 kind of multi-byte encoding, i.e. encoding a character by a sequence
315 of one-byte codes of variable length. ASCII characters and control
316 characters (e.g. `tab', `newline') are represented by one-byte as
317 is. It takes the range 0x00 through 0x7F. The other characters
318 are represented by a sequence of `base leading-code', optional
319 `extended leading-code', and one or two `position-code's. Length
320 of the sequence is decided by the base leading-code. Leading-code
321 takes the range 0x80 through 0x9F, whereas extended leading-code
322 and position-code take the range 0xA0 through 0xFF. See the
323 document of `charset.h' for more detail about leading-code and
324 position-code.
325
326 There's one exception in this rule. Special leading-code
327 `leading-code-composition' denotes that the following several
328 characters should be composed into one character. Leading-codes of
329 components (except for ASCII) are added 0x20. An ASCII character
330 component is represented by a 2-byte sequence of `0xA0' and
331 `ASCII-code + 0x80'. See also the document in `charset.h' for the
332 detail of composite character. Hence, we can summarize the code
333 range as follows:
334
335 --- CODE RANGE of Emacs' internal format ---
336 (character set) (range)
337 ASCII 0x00 .. 0x7F
338 ELSE (1st byte) 0x80 .. 0x9F
339 (rest bytes) 0xA0 .. 0xFF
340 ---------------------------------------------
341
342 */
343
344enum emacs_code_class_type emacs_code_class[256];
345
346/* Go to the next statement only if *SRC is accessible and the code is
347 greater than 0xA0. */
348#define CHECK_CODE_RANGE_A0_FF \
349 do { \
350 if (src >= src_end) \
351 goto label_end_of_switch; \
352 else if (*src++ < 0xA0) \
353 return 0; \
354 } while (0)
355
356/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
357 Check if a text is encoded in Emacs' internal format. If it is,
358 return CODING_CATEGORY_MASK_INTERNAL, else return 0. */
359
360int
361detect_coding_internal (src, src_end)
362 unsigned char *src, *src_end;
363{
364 unsigned char c;
365 int composing = 0;
366
367 while (src < src_end)
368 {
369 c = *src++;
370
371 if (composing)
372 {
373 if (c < 0xA0)
374 composing = 0;
375 else
376 c -= 0x20;
377 }
378
379 switch (emacs_code_class[c])
380 {
381 case EMACS_ascii_code:
382 case EMACS_linefeed_code:
383 break;
384
385 case EMACS_control_code:
386 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
387 return 0;
388 break;
389
390 case EMACS_invalid_code:
391 return 0;
392
393 case EMACS_leading_code_composition: /* c == 0x80 */
394 if (composing)
395 CHECK_CODE_RANGE_A0_FF;
396 else
397 composing = 1;
398 break;
399
400 case EMACS_leading_code_4:
401 CHECK_CODE_RANGE_A0_FF;
402 /* fall down to check it two more times ... */
403
404 case EMACS_leading_code_3:
405 CHECK_CODE_RANGE_A0_FF;
406 /* fall down to check it one more time ... */
407
408 case EMACS_leading_code_2:
409 CHECK_CODE_RANGE_A0_FF;
410 break;
411
412 default:
413 label_end_of_switch:
414 break;
415 }
416 }
417 return CODING_CATEGORY_MASK_INTERNAL;
418}
419
420
421/*** 3. ISO2022 handlers ***/
422
423/* The following note describes the coding system ISO2022 briefly.
424 Since the intension of this note is to help understanding of the
425 programs in this file, some parts are NOT ACCURATE or OVERLY
426 SIMPLIFIED. For the thorough understanding, please refer to the
427 original document of ISO2022.
428
429 ISO2022 provides many mechanisms to encode several character sets
430 in 7-bit and 8-bit environment. If one choose 7-bite environment,
431 all text is encoded by codes of less than 128. This may make the
432 encoded text a little bit longer, but the text get more stability
433 to pass through several gateways (some of them split MSB off).
434
435 There are two kind of character set: control character set and
436 graphic character set. The former contains control characters such
437 as `newline' and `escape' to provide control functions (control
438 functions are provided also by escape sequence). The latter
439 contains graphic characters such as ' A' and '-'. Emacs recognizes
440 two control character sets and many graphic character sets.
441
442 Graphic character sets are classified into one of the following
443 four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
444 DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
445 bytes (DIMENSION) and the number of characters in one dimension
446 (CHARS) of the set. In addition, each character set is assigned an
447 identification tag (called "final character" and denoted as <F>
448 here after) which is unique in each class. <F> of each character
449 set is decided by ECMA(*) when it is registered in ISO. Code range
450 of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
451
452 Note (*): ECMA = European Computer Manufacturers Association
453
454 Here are examples of graphic character set [NAME(<F>)]:
455 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
456 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
457 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
458 o DIMENSION2_CHARS96 -- none for the moment
459
460 A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
461 C0 [0x00..0x1F] -- control character plane 0
462 GL [0x20..0x7F] -- graphic character plane 0
463 C1 [0x80..0x9F] -- control character plane 1
464 GR [0xA0..0xFF] -- graphic character plane 1
465
466 A control character set is directly designated and invoked to C0 or
467 C1 by an escape sequence. The most common case is that ISO646's
468 control character set is designated/invoked to C0 and ISO6429's
469 control character set is designated/invoked to C1, and usually
470 these designations/invocations are omitted in a coded text. With
471 7-bit environment, only C0 can be used, and a control character for
472 C1 is encoded by an appropriate escape sequence to fit in the
473 environment. All control characters for C1 are defined the
474 corresponding escape sequences.
475
476 A graphic character set is at first designated to one of four
477 graphic registers (G0 through G3), then these graphic registers are
478 invoked to GL or GR. These designations and invocations can be
479 done independently. The most common case is that G0 is invoked to
480 GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
481 these invocations and designations are omitted in a coded text.
482 With 7-bit environment, only GL can be used.
483
484 When a graphic character set of CHARS94 is invoked to GL, code 0x20
485 and 0x7F of GL area work as control characters SPACE and DEL
486 respectively, and code 0xA0 and 0xFF of GR area should not be used.
487
488 There are two ways of invocation: locking-shift and single-shift.
489 With locking-shift, the invocation lasts until the next different
490 invocation, whereas with single-shift, the invocation works only
491 for the following character and doesn't affect locking-shift.
492 Invocations are done by the following control characters or escape
493 sequences.
494
495 ----------------------------------------------------------------------
496 function control char escape sequence description
497 ----------------------------------------------------------------------
498 SI (shift-in) 0x0F none invoke G0 to GL
499 SI (shift-out) 0x0E none invoke G1 to GL
500 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
501 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
502 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 into GL
503 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 into GL
504 ----------------------------------------------------------------------
505 The first four are for locking-shift. Control characters for these
506 functions are defined by macros ISO_CODE_XXX in `coding.h'.
507
508 Designations are done by the following escape sequences.
509 ----------------------------------------------------------------------
510 escape sequence description
511 ----------------------------------------------------------------------
512 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
513 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
514 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
515 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
516 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
517 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
518 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
519 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
520 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
521 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
522 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
523 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
524 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
525 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
526 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
527 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
528 ----------------------------------------------------------------------
529
530 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
531 of dimension 1, chars 94, and final character <F>, and etc.
532
533 Note (*): Although these designations are not allowed in ISO2022,
534 Emacs accepts them on decoding, and produces them on encoding
535 CHARS96 character set in a coding system which is characterized as
536 7-bit environment, non-locking-shift, and non-single-shift.
537
538 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
539 '(' can be omitted. We call this as "short-form" here after.
540
541 Now you may notice that there are a lot of ways for encoding the
542 same multilingual text in ISO2022. Actually, there exist many
543 coding systems such as Compound Text (used in X's inter client
544 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
545 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
546 localized platforms), and all of these are variants of ISO2022.
547
548 In addition to the above, Emacs handles two more kinds of escape
549 sequences: ISO6429's direction specification and Emacs' private
550 sequence for specifying character composition.
551
552 ISO6429's direction specification takes the following format:
553 o CSI ']' -- end of the current direction
554 o CSI '0' ']' -- end of the current direction
555 o CSI '1' ']' -- start of left-to-right text
556 o CSI '2' ']' -- start of right-to-left text
557 The control character CSI (0x9B: control sequence introducer) is
558 abbreviated to the escape sequence ESC '[' in 7-bit environment.
559
560 Character composition specification takes the following format:
561 o ESC '0' -- start character composition
562 o ESC '1' -- end character composition
563 Since these are not standard escape sequences of any ISO, the use
564 of them for these meaning is restricted to Emacs only. */
565
566enum iso_code_class_type iso_code_class[256];
567
568/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
569 Check if a text is encoded in ISO2022. If it is, returns an
570 integer in which appropriate flag bits any of:
571 CODING_CATEGORY_MASK_ISO_7
572 CODING_CATEGORY_MASK_ISO_8_1
573 CODING_CATEGORY_MASK_ISO_8_2
574 CODING_CATEGORY_MASK_ISO_ELSE
575 are set. If a code which should never appear in ISO2022 is found,
576 returns 0. */
577
578int
579detect_coding_iso2022 (src, src_end)
580 unsigned char *src, *src_end;
581{
582 unsigned char graphic_register[4];
583 unsigned char c, esc_cntl;
584 int mask = (CODING_CATEGORY_MASK_ISO_7
585 | CODING_CATEGORY_MASK_ISO_8_1
586 | CODING_CATEGORY_MASK_ISO_8_2);
587 /* We may look ahead maximum 3 bytes. */
588 unsigned char *adjusted_src_end = src_end - 3;
589 int i;
590
591 for (i = 0; i < 4; i++)
592 graphic_register[i] = CHARSET_ASCII;
593
594 while (src < adjusted_src_end)
595 {
596 c = *src++;
597 switch (c)
598 {
599 case ISO_CODE_ESC:
600 if (src >= adjusted_src_end)
601 break;
602 c = *src++;
603 if (c == '$')
604 {
605 /* Designation of 2-byte character set. */
606 if (src >= adjusted_src_end)
607 break;
608 c = *src++;
609 }
610 if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/'))
611 /* Designation to graphic register 1, 2, or 3. */
612 mask &= ~CODING_CATEGORY_MASK_ISO_7;
613 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
614 return CODING_CATEGORY_MASK_ISO_ELSE;
615 break;
616
617 case ISO_CODE_SI:
618 case ISO_CODE_SO:
619 return CODING_CATEGORY_MASK_ISO_ELSE;
620
621 case ISO_CODE_CSI:
622 case ISO_CODE_SS2:
623 case ISO_CODE_SS3:
624 mask &= ~CODING_CATEGORY_MASK_ISO_7;
625 break;
626
627 default:
628 if (c < 0x80)
629 break;
630 else if (c < 0xA0)
631 return 0;
632 else
633 {
634 int count = 1;
635
636 mask &= ~CODING_CATEGORY_MASK_ISO_7;
637 while (src < adjusted_src_end && *src >= 0xA0)
638 count++, src++;
639 if (count & 1 && src < adjusted_src_end)
640 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
641 }
642 break;
643 }
644 }
645
646 return mask;
647}
648
649/* Decode a character of which charset is CHARSET and the 1st position
650 code is C1. If dimension of CHARSET 2, the 2nd position code is
651 fetched from SRC and set to C2. If CHARSET is negative, it means
652 that we are decoding ill formed text, and what we can do is just to
653 read C1 as is. */
654
655#define DECODE_ISO_CHARACTER(charset, c1) \
656 do { \
657 if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2) \
658 ONE_MORE_BYTE (c2); \
659 if (COMPOSING_HEAD_P (coding->composing)) \
660 { \
661 *dst++ = LEADING_CODE_COMPOSITION; \
662 if (COMPOSING_WITH_RULE_P (coding->composing)) \
663 /* To tell composition rules are embeded. */ \
664 *dst++ = 0xFF; \
665 coding->composing += 2; \
666 } \
667 if ((charset) < 0) \
668 *dst++ = c1; \
669 else if ((charset) == CHARSET_ASCII) \
670 DECODE_CHARACTER_ASCII (c1); \
671 else if (CHARSET_DIMENSION (charset) == 1) \
672 DECODE_CHARACTER_DIMENSION1 (charset, c1); \
673 else \
674 DECODE_CHARACTER_DIMENSION2 (charset, c1, c2); \
675 if (COMPOSING_WITH_RULE_P (coding->composing)) \
676 /* To tell a composition rule follows. */ \
677 coding->composing = COMPOSING_WITH_RULE_RULE; \
678 } while (0)
679
680/* Set designation state into CODING. */
681#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
682 do { \
683 int charset = ISO_CHARSET_TABLE (dimension, chars, final_char); \
684 Lisp_Object temp \
685 = Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table); \
686 if (! NILP (temp)) \
687 charset = get_charset_id (XCONS (temp)->cdr); \
688 if (charset >= 0) \
689 { \
690 if (coding->direction == 1 \
691 && CHARSET_REVERSE_CHARSET (charset) >= 0) \
692 charset = CHARSET_REVERSE_CHARSET (charset); \
693 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
694 } \
695 } while (0)
696
697/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
698
699int
700decode_coding_iso2022 (coding, source, destination,
701 src_bytes, dst_bytes, consumed)
702 struct coding_system *coding;
703 unsigned char *source, *destination;
704 int src_bytes, dst_bytes;
705 int *consumed;
706{
707 unsigned char *src = source;
708 unsigned char *src_end = source + src_bytes;
709 unsigned char *dst = destination;
710 unsigned char *dst_end = destination + dst_bytes;
711 /* Since the maximum bytes produced by each loop is 7, we subtract 6
712 from DST_END to assure that overflow checking is necessary only
713 at the head of loop. */
714 unsigned char *adjusted_dst_end = dst_end - 6;
715 int charset;
716 /* Charsets invoked to graphic plane 0 and 1 respectively. */
717 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
718 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
719
720 while (src < src_end && dst < adjusted_dst_end)
721 {
722 /* SRC_BASE remembers the start position in source in each loop.
723 The loop will be exited when there's not enough source text
724 to analyze long escape sequence or 2-byte code (within macros
725 ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset
726 to SRC_BASE before exiting. */
727 unsigned char *src_base = src;
728 unsigned char c1 = *src++, c2, cmprule;
729
730 switch (iso_code_class [c1])
731 {
732 case ISO_0x20_or_0x7F:
733 if (!coding->composing
734 && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
735 {
736 /* This is SPACE or DEL. */
737 *dst++ = c1;
738 break;
739 }
740 /* This is a graphic character, we fall down ... */
741
742 case ISO_graphic_plane_0:
743 if (coding->composing == COMPOSING_WITH_RULE_RULE)
744 {
745 /* This is a composition rule. */
746 *dst++ = c1 | 0x80;
747 coding->composing = COMPOSING_WITH_RULE_TAIL;
748 }
749 else
750 DECODE_ISO_CHARACTER (charset0, c1);
751 break;
752
753 case ISO_0xA0_or_0xFF:
754 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94)
755 {
756 /* Invalid code. */
757 *dst++ = c1;
758 break;
759 }
760 /* This is a graphic character, we fall down ... */
761
762 case ISO_graphic_plane_1:
763 DECODE_ISO_CHARACTER (charset1, c1);
764 break;
765
766 case ISO_control_code:
767 /* All ISO2022 control characters in this class have the
768 same representation in Emacs internal format. */
769 *dst++ = c1;
770 break;
771
772 case ISO_carriage_return:
773 if (coding->eol_type == CODING_EOL_CR)
774 {
775 *dst++ = '\n';
776 }
777 else if (coding->eol_type == CODING_EOL_CRLF)
778 {
779 ONE_MORE_BYTE (c1);
780 if (c1 == ISO_CODE_LF)
781 *dst++ = '\n';
782 else
783 {
784 src--;
785 *dst++ = c1;
786 }
787 }
788 else
789 {
790 *dst++ = c1;
791 }
792 break;
793
794 case ISO_shift_out:
795 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
796 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
797 break;
798
799 case ISO_shift_in:
800 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
801 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
802 break;
803
804 case ISO_single_shift_2_7:
805 case ISO_single_shift_2:
806 /* SS2 is handled as an escape sequence of ESC 'N' */
807 c1 = 'N';
808 goto label_escape_sequence;
809
810 case ISO_single_shift_3:
811 /* SS2 is handled as an escape sequence of ESC 'O' */
812 c1 = 'O';
813 goto label_escape_sequence;
814
815 case ISO_control_sequence_introducer:
816 /* CSI is handled as an escape sequence of ESC '[' ... */
817 c1 = '[';
818 goto label_escape_sequence;
819
820 case ISO_escape:
821 ONE_MORE_BYTE (c1);
822 label_escape_sequence:
823 /* Escape sequences handled by Emacs are invocation,
824 designation, direction specification, and character
825 composition specification. */
826 switch (c1)
827 {
828 case '&': /* revision of following character set */
829 ONE_MORE_BYTE (c1);
830 if (!(c1 >= '@' && c1 <= '~'))
831 {
832 goto label_invalid_escape_sequence;
833 }
834 ONE_MORE_BYTE (c1);
835 if (c1 != ISO_CODE_ESC)
836 {
837 goto label_invalid_escape_sequence;
838 }
839 ONE_MORE_BYTE (c1);
840 goto label_escape_sequence;
841
842 case '$': /* designation of 2-byte character set */
843 ONE_MORE_BYTE (c1);
844 if (c1 >= '@' && c1 <= 'B')
845 { /* designation of JISX0208.1978, GB2312.1980,
846 or JISX0208.1980 */
847 DECODE_DESIGNATION (0, 2, 94, c1);
848 }
849 else if (c1 >= 0x28 && c1 <= 0x2B)
850 { /* designation of DIMENSION2_CHARS94 character set */
851 ONE_MORE_BYTE (c2);
852 DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
853 }
854 else if (c1 >= 0x2C && c1 <= 0x2F)
855 { /* designation of DIMENSION2_CHARS96 character set */
856 ONE_MORE_BYTE (c2);
857 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
858 }
859 else
860 {
861 goto label_invalid_escape_sequence;
862 }
863 break;
864
865 case 'n': /* invocation of locking-shift-2 */
866 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
867 break;
868
869 case 'o': /* invocation of locking-shift-3 */
870 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
871 break;
872
873 case 'N': /* invocation of single-shift-2 */
874 ONE_MORE_BYTE (c1);
875 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
876 DECODE_ISO_CHARACTER (charset, c1);
877 break;
878
879 case 'O': /* invocation of single-shift-3 */
880 ONE_MORE_BYTE (c1);
881 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
882 DECODE_ISO_CHARACTER (charset, c1);
883 break;
884
885 case '0': /* start composing without embeded rules */
886 coding->composing = COMPOSING_NO_RULE_HEAD;
887 break;
888
889 case '1': /* end composing */
890 coding->composing = COMPOSING_NO;
891 break;
892
893 case '2': /* start composing with embeded rules */
894 coding->composing = COMPOSING_WITH_RULE_HEAD;
895 break;
896
897 case '[': /* specification of direction */
898 /* For the moment, nested direction is not supported.
899 So, the value of `coding->direction' is 0 or 1: 0
900 means left-to-right, 1 means right-to-left. */
901 ONE_MORE_BYTE (c1);
902 switch (c1)
903 {
904 case ']': /* end of the current direction */
905 coding->direction = 0;
906
907 case '0': /* end of the current direction */
908 case '1': /* start of left-to-right direction */
909 ONE_MORE_BYTE (c1);
910 if (c1 == ']')
911 coding->direction = 0;
912 else
913 goto label_invalid_escape_sequence;
914 break;
915
916 case '2': /* start of right-to-left direction */
917 ONE_MORE_BYTE (c1);
918 if (c1 == ']')
919 coding->direction= 1;
920 else
921 goto label_invalid_escape_sequence;
922 break;
923
924 default:
925 goto label_invalid_escape_sequence;
926 }
927 break;
928
929 default:
930 if (c1 >= 0x28 && c1 <= 0x2B)
931 { /* designation of DIMENSION1_CHARS94 character set */
932 ONE_MORE_BYTE (c2);
933 DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
934 }
935 else if (c1 >= 0x2C && c1 <= 0x2F)
936 { /* designation of DIMENSION1_CHARS96 character set */
937 ONE_MORE_BYTE (c2);
938 DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
939 }
940 else
941 {
942 goto label_invalid_escape_sequence;
943 }
944 }
945 /* We must update these variables now. */
946 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
947 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
948 break;
949
950 label_invalid_escape_sequence:
951 {
952 int length = src - src_base;
953
954 bcopy (src_base, dst, length);
955 dst += length;
956 }
957 }
958 continue;
959
960 label_end_of_loop:
961 coding->carryover_size = src - src_base;
962 bcopy (src_base, coding->carryover, coding->carryover_size);
963 src = src_base;
964 break;
965 }
966
967 /* If this is the last block of the text to be decoded, we had
968 better just flush out all remaining codes in the text although
969 they are not valid characters. */
970 if (coding->last_block)
971 {
972 bcopy (src, dst, src_end - src);
973 dst += (src_end - src);
974 src = src_end;
975 }
976 *consumed = src - source;
977 return dst - destination;
978}
979
980/* ISO2022 encoding staffs. */
981
982/*
983 It is not enough to say just "ISO2022" on encoding, but we have to
984 specify more details. In Emacs, each coding-system of ISO2022
985 variant has the following specifications:
986 1. Initial designation to G0 thru G3.
987 2. Allows short-form designation?
988 3. ASCII should be designated to G0 before control characters?
989 4. ASCII should be designated to G0 at end of line?
990 5. 7-bit environment or 8-bit environment?
991 6. Use locking-shift?
992 7. Use Single-shift?
993 And the following two are only for Japanese:
994 8. Use ASCII in place of JIS0201-1976-Roman?
995 9. Use JISX0208-1983 in place of JISX0208-1978?
996 These specifications are encoded in `coding->flags' as flag bits
997 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
998 detail.
999*/
1000
1001/* Produce codes (escape sequence) for designating CHARSET to graphic
1002 register REG. If <final-char> of CHARSET is '@', 'A', or 'B' and
1003 the coding system CODING allows, produce designation sequence of
1004 short-form. */
1005
1006#define ENCODE_DESIGNATION(charset, reg, coding) \
1007 do { \
1008 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1009 char *intermediate_char_94 = "()*+"; \
1010 char *intermediate_char_96 = ",-./"; \
1011 Lisp_Object temp \
1012 = Fassq (make_number (charset), Vcharset_revision_alist); \
1013 if (! NILP (temp)) \
1014 { \
1015 *dst++ = ISO_CODE_ESC; \
1016 *dst++ = '&'; \
1017 *dst++ = XINT (XCONS (temp)->cdr) + '@'; \
1018 } \
1019 *dst++ = ISO_CODE_ESC; \
1020 if (CHARSET_DIMENSION (charset) == 1) \
1021 { \
1022 if (CHARSET_CHARS (charset) == 94) \
1023 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1024 else \
1025 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1026 } \
1027 else \
1028 { \
1029 *dst++ = '$'; \
1030 if (CHARSET_CHARS (charset) == 94) \
1031 { \
1032 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
1033 || reg != 0 \
1034 || final_char < '@' || final_char > 'B') \
1035 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1036 } \
1037 else \
1038 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1039 } \
1040 *dst++ = final_char; \
1041 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1042 } while (0)
1043
1044/* The following two macros produce codes (control character or escape
1045 sequence) for ISO2022 single-shift functions (single-shift-2 and
1046 single-shift-3). */
1047
1048#define ENCODE_SINGLE_SHIFT_2 \
1049 do { \
1050 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1051 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1052 else \
1053 *dst++ = ISO_CODE_SS2; \
1054 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1055 } while (0)
1056
1057#define ENCODE_SINGLE_SHIFT_3 \
1058 do { \
1059 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1060 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1061 else \
1062 *dst++ = ISO_CODE_SS3; \
1063 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1064 } while (0)
1065
1066/* The following four macros produce codes (control character or
1067 escape sequence) for ISO2022 locking-shift functions (shift-in,
1068 shift-out, locking-shift-2, and locking-shift-3). */
1069
1070#define ENCODE_SHIFT_IN \
1071 do { \
1072 *dst++ = ISO_CODE_SI; \
1073 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1074 } while (0)
1075
1076#define ENCODE_SHIFT_OUT \
1077 do { \
1078 *dst++ = ISO_CODE_SO; \
1079 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1080 } while (0)
1081
1082#define ENCODE_LOCKING_SHIFT_2 \
1083 do { \
1084 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
1085 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1086 } while (0)
1087
1088#define ENCODE_LOCKING_SHIFT_3 \
1089 do { \
1090 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
1091 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1092 } while (0)
1093
1094/* Produce codes for a DIMENSION1 character of which character set is
1095 CHARSET and position-code is C1. Designation and invocation
1096 sequences are also produced in advance if necessary. */
1097
1098
1099#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
1100 do { \
1101 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1102 { \
1103 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1104 *dst++ = c1 & 0x7F; \
1105 else \
1106 *dst++ = c1 | 0x80; \
1107 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1108 break; \
1109 } \
1110 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1111 { \
1112 *dst++ = c1 & 0x7F; \
1113 break; \
1114 } \
1115 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1116 { \
1117 *dst++ = c1 | 0x80; \
1118 break; \
1119 } \
1120 else \
1121 /* Since CHARSET is not yet invoked to any graphic planes, we \
1122 must invoke it, or, at first, designate it to some graphic \
1123 register. Then repeat the loop to actually produce the \
1124 character. */ \
1125 dst = encode_invocation_designation (charset, coding, dst); \
1126 } while (1)
1127
1128/* Produce codes for a DIMENSION2 character of which character set is
1129 CHARSET and position-codes are C1 and C2. Designation and
1130 invocation codes are also produced in advance if necessary. */
1131
1132#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
1133 do { \
1134 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1135 { \
1136 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1137 *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
1138 else \
1139 *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
1140 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1141 break; \
1142 } \
1143 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1144 { \
1145 *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
1146 break; \
1147 } \
1148 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1149 { \
1150 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1151 break; \
1152 } \
1153 else \
1154 /* Since CHARSET is not yet invoked to any graphic planes, we \
1155 must invoke it, or, at first, designate it to some graphic \
1156 register. Then repeat the loop to actually produce the \
1157 character. */ \
1158 dst = encode_invocation_designation (charset, coding, dst); \
1159 } while (1)
1160
1161/* Produce designation and invocation codes at a place pointed by DST
1162 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1163 Return new DST. */
1164
1165unsigned char *
1166encode_invocation_designation (charset, coding, dst)
1167 int charset;
1168 struct coding_system *coding;
1169 unsigned char *dst;
1170{
1171 int reg; /* graphic register number */
1172
1173 /* At first, check designations. */
1174 for (reg = 0; reg < 4; reg++)
1175 if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1176 break;
1177
1178 if (reg >= 4)
1179 {
1180 /* CHARSET is not yet designated to any graphic registers. */
1181 /* At first check the requested designation. */
1182 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1183 if (reg < 0)
1184 /* Since CHARSET requests no special designation, designate to
1185 graphic register 0. */
1186 reg = 0;
1187
1188 ENCODE_DESIGNATION (charset, reg, coding);
1189 }
1190
1191 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1192 && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1193 {
1194 /* Since the graphic register REG is not invoked to any graphic
1195 planes, invoke it to graphic plane 0. */
1196 switch (reg)
1197 {
1198 case 0: /* graphic register 0 */
1199 ENCODE_SHIFT_IN;
1200 break;
1201
1202 case 1: /* graphic register 1 */
1203 ENCODE_SHIFT_OUT;
1204 break;
1205
1206 case 2: /* graphic register 2 */
1207 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1208 ENCODE_SINGLE_SHIFT_2;
1209 else
1210 ENCODE_LOCKING_SHIFT_2;
1211 break;
1212
1213 case 3: /* graphic register 3 */
1214 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1215 ENCODE_SINGLE_SHIFT_3;
1216 else
1217 ENCODE_LOCKING_SHIFT_3;
1218 break;
1219 }
1220 }
1221 return dst;
1222}
1223
1224/* The following two macros produce codes for indicating composition. */
1225#define ENCODE_COMPOSITION_NO_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '0'
1226#define ENCODE_COMPOSITION_WITH_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '2'
1227#define ENCODE_COMPOSITION_END *dst++ = ISO_CODE_ESC, *dst++ = '1'
1228
1229/* The following three macros produce codes for indicating direction
1230 of text. */
1231#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1232 do { \
1233 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
1234 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
1235 else \
1236 *dst++ = ISO_CODE_CSI; \
1237 } while (0)
1238
1239#define ENCODE_DIRECTION_R2L \
1240 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1241
1242#define ENCODE_DIRECTION_L2R \
1243 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1244
1245/* Produce codes for designation and invocation to reset the graphic
1246 planes and registers to initial state. */
1247#define ENCODE_RESET_PLANE_AND_REGISTER(eol) \
1248 do { \
1249 int reg; \
1250 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
1251 ENCODE_SHIFT_IN; \
1252 for (reg = 0; reg < 4; reg++) \
1253 { \
1254 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0) \
1255 { \
1256 if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1; \
1257 } \
1258 else if (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1259 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)) \
1260 ENCODE_DESIGNATION \
1261 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1262 } \
1263 } while (0)
1264
1265/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1266
1267int
1268encode_coding_iso2022 (coding, source, destination,
1269 src_bytes, dst_bytes, consumed)
1270 struct coding_system *coding;
1271 unsigned char *source, *destination;
1272 int src_bytes, dst_bytes;
1273 int *consumed;
1274{
1275 unsigned char *src = source;
1276 unsigned char *src_end = source + src_bytes;
1277 unsigned char *dst = destination;
1278 unsigned char *dst_end = destination + dst_bytes;
1279 /* Since the maximum bytes produced by each loop is 6, we subtract 5
1280 from DST_END to assure overflow checking is necessary only at the
1281 head of loop. */
1282 unsigned char *adjusted_dst_end = dst_end - 5;
1283
1284 while (src < src_end && dst < adjusted_dst_end)
1285 {
1286 /* SRC_BASE remembers the start position in source in each loop.
1287 The loop will be exited when there's not enough source text
1288 to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1289 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
1290 reset to SRC_BASE before exiting. */
1291 unsigned char *src_base = src;
1292 unsigned char c1 = *src++, c2, c3, c4;
1293 int charset;
1294
1295 /* If we are seeing a component of a composite character, we are
1296 seeing a leading-code specially encoded for composition, or a
1297 composition rule if composing with rule. We must set C1
1298 to a normal leading-code or an ASCII code. If we are not at
1299 a composed character, we must reset the composition state. */
1300 if (COMPOSING_P (coding->composing))
1301 {
1302 if (c1 < 0xA0)
1303 {
1304 /* We are not in a composite character any longer. */
1305 coding->composing = COMPOSING_NO;
1306 ENCODE_COMPOSITION_END;
1307 }
1308 else
1309 {
1310 if (coding->composing == COMPOSING_WITH_RULE_RULE)
1311 {
1312 *dst++ = c1 & 0x7F;
1313 coding->composing = COMPOSING_WITH_RULE_HEAD;
1314 continue;
1315 }
1316 else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1317 coding->composing = COMPOSING_WITH_RULE_RULE;
1318 if (c1 == 0xA0)
1319 {
1320 /* This is an ASCII component. */
1321 ONE_MORE_BYTE (c1);
1322 c1 &= 0x7F;
1323 }
1324 else
1325 /* This is a leading-code of non ASCII component. */
1326 c1 -= 0x20;
1327 }
1328 }
1329
1330 /* Now encode one character. C1 is a control character, an
1331 ASCII character, or a leading-code of multi-byte character. */
1332 switch (emacs_code_class[c1])
1333 {
1334 case EMACS_ascii_code:
1335 ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
1336 break;
1337
1338 case EMACS_control_code:
1339 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1340 ENCODE_RESET_PLANE_AND_REGISTER (0);
1341 *dst++ = c1;
1342 break;
1343
1344 case EMACS_carriage_return_code:
1345 if (!coding->selective)
1346 {
1347 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1348 ENCODE_RESET_PLANE_AND_REGISTER (0);
1349 *dst++ = c1;
1350 break;
1351 }
1352 /* fall down to treat '\r' as '\n' ... */
1353
1354 case EMACS_linefeed_code:
1355 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1356 ENCODE_RESET_PLANE_AND_REGISTER (1);
1357 if (coding->eol_type == CODING_EOL_LF
1358 || coding->eol_type == CODING_EOL_AUTOMATIC)
1359 *dst++ = ISO_CODE_LF;
1360 else if (coding->eol_type == CODING_EOL_CRLF)
1361 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1362 else
1363 *dst++ = ISO_CODE_CR;
1364 break;
1365
1366 case EMACS_leading_code_2:
1367 ONE_MORE_BYTE (c2);
1368 ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
1369 break;
1370
1371 case EMACS_leading_code_3:
1372 TWO_MORE_BYTES (c2, c3);
1373 if (c1 < LEADING_CODE_PRIVATE_11)
1374 ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);
1375 else
1376 ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);
1377 break;
1378
1379 case EMACS_leading_code_4:
1380 THREE_MORE_BYTES (c2, c3, c4);
1381 ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);
1382 break;
1383
1384 case EMACS_leading_code_composition:
1385 ONE_MORE_BYTE (c1);
1386 if (c1 == 0xFF)
1387 {
1388 coding->composing = COMPOSING_WITH_RULE_HEAD;
1389 ENCODE_COMPOSITION_WITH_RULE_START;
1390 }
1391 else
1392 {
1393 /* Rewind one byte because it is a character code of
1394 composition elements. */
1395 src--;
1396 coding->composing = COMPOSING_NO_RULE_HEAD;
1397 ENCODE_COMPOSITION_NO_RULE_START;
1398 }
1399 break;
1400
1401 case EMACS_invalid_code:
1402 *dst++ = c1;
1403 break;
1404 }
1405 continue;
1406 label_end_of_loop:
1407 coding->carryover_size = src - src_base;
1408 bcopy (src_base, coding->carryover, coding->carryover_size);
1409 src = src_base;
1410 break;
1411 }
1412
1413 /* If this is the last block of the text to be encoded, we must
1414 reset the state of graphic planes and registers to initial one.
1415 In addition, we had better just flush out all remaining codes in
1416 the text although they are not valid characters. */
1417 if (coding->last_block)
1418 {
1419 ENCODE_RESET_PLANE_AND_REGISTER (1);
1420 bcopy(src, dst, src_end - src);
1421 dst += (src_end - src);
1422 src = src_end;
1423 }
1424 *consumed = src - source;
1425 return dst - destination;
1426}
1427
1428
1429/*** 4. SJIS and BIG5 handlers ***/
1430
1431/* Although SJIS and BIG5 are not ISO's coding system, They are used
1432 quite widely. So, for the moment, Emacs supports them in the bare
1433 C code. But, in the future, they may be supported only by CCL. */
1434
1435/* SJIS is a coding system encoding three character sets: ASCII, right
1436 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
1437 as is. A character of charset katakana-jisx0201 is encoded by
1438 "position-code + 0x80". A character of charset japanese-jisx0208
1439 is encoded in 2-byte but two position-codes are divided and shifted
1440 so that it fit in the range below.
1441
1442 --- CODE RANGE of SJIS ---
1443 (character set) (range)
1444 ASCII 0x00 .. 0x7F
1445 KATAKANA-JISX0201 0xA0 .. 0xDF
1446 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xFF
1447 (2nd byte) 0x40 .. 0xFF
1448 -------------------------------
1449
1450*/
1451
1452/* BIG5 is a coding system encoding two character sets: ASCII and
1453 Big5. An ASCII character is encoded as is. Big5 is a two-byte
1454 character set and is encoded in two-byte.
1455
1456 --- CODE RANGE of BIG5 ---
1457 (character set) (range)
1458 ASCII 0x00 .. 0x7F
1459 Big5 (1st byte) 0xA1 .. 0xFE
1460 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
1461 --------------------------
1462
1463 Since the number of characters in Big5 is larger than maximum
1464 characters in Emacs' charset (96x96), it can't be handled as one
1465 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
1466 and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
1467 contains frequently used characters and the latter contains less
1468 frequently used characters. */
1469
1470/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
1471 are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
1472 C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
1473 format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
1474
1475/* Number of Big5 characters which have the same code in 1st byte. */
1476#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1477
1478#define DECODE_BIG5(b1, b2, charset, c1, c2) \
1479 do { \
1480 unsigned int temp \
1481 = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
1482 if (b1 < 0xC9) \
1483 charset = charset_big5_1; \
1484 else \
1485 { \
1486 charset = charset_big5_2; \
1487 temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
1488 } \
1489 c1 = temp / (0xFF - 0xA1) + 0x21; \
1490 c2 = temp % (0xFF - 0xA1) + 0x21; \
1491 } while (0)
1492
1493#define ENCODE_BIG5(charset, c1, c2, b1, b2) \
1494 do { \
1495 unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
1496 if (charset == charset_big5_2) \
1497 temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
1498 b1 = temp / BIG5_SAME_ROW + 0xA1; \
1499 b2 = temp % BIG5_SAME_ROW; \
1500 b2 += b2 < 0x3F ? 0x40 : 0x62; \
1501 } while (0)
1502
1503/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1504 Check if a text is encoded in SJIS. If it is, return
1505 CODING_CATEGORY_MASK_SJIS, else return 0. */
1506
1507int
1508detect_coding_sjis (src, src_end)
1509 unsigned char *src, *src_end;
1510{
1511 unsigned char c;
1512
1513 while (src < src_end)
1514 {
1515 c = *src++;
1516 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
1517 return 0;
1518 if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
1519 {
1520 if (src < src_end && *src++ < 0x40)
1521 return 0;
1522 }
1523 }
1524 return CODING_CATEGORY_MASK_SJIS;
1525}
1526
1527/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1528 Check if a text is encoded in BIG5. If it is, return
1529 CODING_CATEGORY_MASK_BIG5, else return 0. */
1530
1531int
1532detect_coding_big5 (src, src_end)
1533 unsigned char *src, *src_end;
1534{
1535 unsigned char c;
1536
1537 while (src < src_end)
1538 {
1539 c = *src++;
1540 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
1541 return 0;
1542 if (c >= 0xA1)
1543 {
1544 if (src >= src_end)
1545 break;
1546 c = *src++;
1547 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
1548 return 0;
1549 }
1550 }
1551 return CODING_CATEGORY_MASK_BIG5;
1552}
1553
1554/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
1555 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
1556
1557int
1558decode_coding_sjis_big5 (coding, source, destination,
1559 src_bytes, dst_bytes, consumed, sjis_p)
1560 struct coding_system *coding;
1561 unsigned char *source, *destination;
1562 int src_bytes, dst_bytes;
1563 int *consumed;
1564 int sjis_p;
1565{
1566 unsigned char *src = source;
1567 unsigned char *src_end = source + src_bytes;
1568 unsigned char *dst = destination;
1569 unsigned char *dst_end = destination + dst_bytes;
1570 /* Since the maximum bytes produced by each loop is 4, we subtract 3
1571 from DST_END to assure overflow checking is necessary only at the
1572 head of loop. */
1573 unsigned char *adjusted_dst_end = dst_end - 3;
1574
1575 while (src < src_end && dst < adjusted_dst_end)
1576 {
1577 /* SRC_BASE remembers the start position in source in each loop.
1578 The loop will be exited when there's not enough source text
1579 to analyze two-byte character (within macro ONE_MORE_BYTE).
1580 In that case, SRC is reset to SRC_BASE before exiting. */
1581 unsigned char *src_base = src;
1582 unsigned char c1 = *src++, c2, c3, c4;
1583
1584 if (c1 == '\r')
1585 {
1586 if (coding->eol_type == CODING_EOL_CRLF)
1587 {
1588 ONE_MORE_BYTE (c2);
1589 if (c2 == '\n')
1590 *dst++ = c2;
1591 else
1592 /* To process C2 again, SRC is subtracted by 1. */
1593 *dst++ = c1, src--;
1594 }
1595 else
1596 *dst++ = c1;
1597 }
1598 else if (c1 < 0x80)
1599 *dst++ = c1;
1600 else if (c1 < 0xA0 || c1 >= 0xE0)
1601 {
1602 /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */
1603 if (sjis_p)
1604 {
1605 ONE_MORE_BYTE (c2);
1606 DECODE_SJIS (c1, c2, c3, c4);
1607 DECODE_CHARACTER_DIMENSION2 (charset_jisx0208, c3, c4);
1608 }
1609 else if (c1 >= 0xE0 && c1 < 0xFF)
1610 {
1611 int charset;
1612
1613 ONE_MORE_BYTE (c2);
1614 DECODE_BIG5 (c1, c2, charset, c3, c4);
1615 DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
1616 }
1617 else /* Invalid code */
1618 *dst++ = c1;
1619 }
1620 else
1621 {
1622 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
1623 if (sjis_p)
1624 DECODE_CHARACTER_DIMENSION1 (charset_katakana_jisx0201, c1);
1625 else
1626 {
1627 int charset;
1628
1629 ONE_MORE_BYTE (c2);
1630 DECODE_BIG5 (c1, c2, charset, c3, c4);
1631 DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
1632 }
1633 }
1634 continue;
1635
1636 label_end_of_loop:
1637 coding->carryover_size = src - src_base;
1638 bcopy (src_base, coding->carryover, coding->carryover_size);
1639 src = src_base;
1640 break;
1641 }
1642
1643 *consumed = src - source;
1644 return dst - destination;
1645}
1646
1647/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
1648 This function can encode `charset_ascii', `charset_katakana_jisx0201',
1649 `charset_jisx0208', `charset_big5_1', and `charset_big5-2'. We are
1650 sure that all these charsets are registered as official charset
1651 (i.e. do not have extended leading-codes). Characters of other
1652 charsets are produced without any encoding. If SJIS_P is 1, encode
1653 SJIS text, else encode BIG5 text. */
1654
1655int
1656encode_coding_sjis_big5 (coding, source, destination,
1657 src_bytes, dst_bytes, consumed, sjis_p)
1658 struct coding_system *coding;
1659 unsigned char *source, *destination;
1660 int src_bytes, dst_bytes;
1661 int *consumed;
1662 int sjis_p;
1663{
1664 unsigned char *src = source;
1665 unsigned char *src_end = source + src_bytes;
1666 unsigned char *dst = destination;
1667 unsigned char *dst_end = destination + dst_bytes;
1668 /* Since the maximum bytes produced by each loop is 2, we subtract 1
1669 from DST_END to assure overflow checking is necessary only at the
1670 head of loop. */
1671 unsigned char *adjusted_dst_end = dst_end - 1;
1672
1673 while (src < src_end && dst < adjusted_dst_end)
1674 {
1675 /* SRC_BASE remembers the start position in source in each loop.
1676 The loop will be exited when there's not enough source text
1677 to analyze multi-byte codes (within macros ONE_MORE_BYTE and
1678 TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE
1679 before exiting. */
1680 unsigned char *src_base = src;
1681 unsigned char c1 = *src++, c2, c3, c4;
1682
1683 if (coding->composing)
1684 {
1685 if (c1 == 0xA0)
1686 {
1687 ONE_MORE_BYTE (c1);
1688 c1 &= 0x7F;
1689 }
1690 else if (c1 >= 0xA0)
1691 c1 -= 0x20;
1692 else
1693 coding->composing = 0;
1694 }
1695
1696 switch (emacs_code_class[c1])
1697 {
1698 case EMACS_ascii_code:
1699 case EMACS_control_code:
1700 *dst++ = c1;
1701 break;
1702
1703 case EMACS_carriage_return_code:
1704 if (!coding->selective)
1705 {
1706 *dst++ = c1;
1707 break;
1708 }
1709 /* fall down to treat '\r' as '\n' ... */
1710
1711 case EMACS_linefeed_code:
1712 if (coding->eol_type == CODING_EOL_LF
1713 || coding->eol_type == CODING_EOL_AUTOMATIC)
1714 *dst++ = '\n';
1715 else if (coding->eol_type == CODING_EOL_CRLF)
1716 *dst++ = '\r', *dst++ = '\n';
1717 else
1718 *dst++ = '\r';
1719 break;
1720
1721 case EMACS_leading_code_2:
1722 ONE_MORE_BYTE (c2);
1723 if (sjis_p && c1 == charset_katakana_jisx0201)
1724 *dst++ = c2;
1725 else
1726 *dst++ = c1, *dst++ = c2;
1727 break;
1728
1729 case EMACS_leading_code_3:
1730 TWO_MORE_BYTES (c2, c3);
1731 c2 &= 0x7F, c3 &= 0x7F;
1732 if (sjis_p && c1 == charset_jisx0208)
1733 {
1734 unsigned char s1, s2;
1735
1736 ENCODE_SJIS (c2, c3, s1, s2);
1737 *dst++ = s1, *dst++ = s2;
1738 }
1739 else if (!sjis_p && (c1 == charset_big5_1 || c1 == charset_big5_2))
1740 {
1741 unsigned char b1, b2;
1742
1743 ENCODE_BIG5 (c1, c2, c3, b1, b2);
1744 *dst++ = b1, *dst++ = b2;
1745 }
1746 else
1747 *dst++ = c1, *dst++ = c2, *dst++ = c3;
1748 break;
1749
1750 case EMACS_leading_code_4:
1751 THREE_MORE_BYTES (c2, c3, c4);
1752 *dst++ = c1, *dst++ = c2, *dst++ = c3, *dst++ = c4;
1753 break;
1754
1755 case EMACS_leading_code_composition:
1756 coding->composing = 1;
1757 break;
1758
1759 default: /* i.e. case EMACS_invalid_code: */
1760 *dst++ = c1;
1761 }
1762 continue;
1763
1764 label_end_of_loop:
1765 coding->carryover_size = src - src_base;
1766 bcopy (src_base, coding->carryover, coding->carryover_size);
1767 src = src_base;
1768 break;
1769 }
1770
1771 *consumed = src - source;
1772 return dst - destination;
1773}
1774
1775
1776/*** 5. End-of-line handlers ***/
1777
1778/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
1779 This function is called only when `coding->eol_type' is
1780 CODING_EOL_CRLF or CODING_EOL_CR. */
1781
1782decode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
1783 struct coding_system *coding;
1784 unsigned char *source, *destination;
1785 int src_bytes, dst_bytes;
1786 int *consumed;
1787{
1788 unsigned char *src = source;
1789 unsigned char *src_end = source + src_bytes;
1790 unsigned char *dst = destination;
1791 unsigned char *dst_end = destination + dst_bytes;
1792 int produced;
1793
1794 switch (coding->eol_type)
1795 {
1796 case CODING_EOL_CRLF:
1797 {
1798 /* Since the maximum bytes produced by each loop is 2, we
1799 subtract 1 from DST_END to assure overflow checking is
1800 necessary only at the head of loop. */
1801 unsigned char *adjusted_dst_end = dst_end - 1;
1802
1803 while (src < src_end && dst < adjusted_dst_end)
1804 {
1805 unsigned char *src_base = src;
1806 unsigned char c = *src++;
1807 if (c == '\r')
1808 {
1809 ONE_MORE_BYTE (c);
1810 if (c != '\n')
1811 *dst++ = '\r';
1812
1813 }
1814 else
1815 *dst++ = c;
1816 continue;
1817
1818 label_end_of_loop:
1819 coding->carryover_size = src - src_base;
1820 bcopy (src_base, coding->carryover, coding->carryover_size);
1821 src = src_base;
1822 break;
1823 }
1824 *consumed = src - source;
1825 produced = dst - destination;
1826 break;
1827 }
1828
1829 case CODING_EOL_CR:
1830 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1831 bcopy (source, destination, produced);
1832 dst_end = destination + produced;
1833 while (dst < dst_end)
1834 if (*dst++ == '\r') dst[-1] = '\n';
1835 *consumed = produced;
1836 break;
1837
1838 default: /* i.e. case: CODING_EOL_LF */
1839 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1840 bcopy (source, destination, produced);
1841 *consumed = produced;
1842 break;
1843 }
1844
1845 return produced;
1846}
1847
1848/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
1849 format of end-of-line according to `coding->eol_type'. If
1850 `coding->selective' is 1, code '\r' in source text also means
1851 end-of-line. */
1852
1853encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
1854 struct coding_system *coding;
1855 unsigned char *source, *destination;
1856 int src_bytes, dst_bytes;
1857 int *consumed;
1858{
1859 unsigned char *src = source;
1860 unsigned char *dst = destination;
1861 int produced;
1862
1863 if (src_bytes <= 0)
1864 return 0;
1865
1866 switch (coding->eol_type)
1867 {
1868 case CODING_EOL_LF:
1869 case CODING_EOL_AUTOMATIC:
1870 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1871 bcopy (source, destination, produced);
1872 if (coding->selective)
1873 {
1874 int i = produced;
1875 while (i--)
1876 if (*dst++ == '\r') dst[-1] = '\n';
1877 }
1878 *consumed = produced;
1879
1880 case CODING_EOL_CRLF:
1881 {
1882 unsigned char c;
1883 unsigned char *src_end = source + src_bytes;
1884 unsigned char *dst_end = destination + dst_bytes;
1885 /* Since the maximum bytes produced by each loop is 2, we
1886 subtract 1 from DST_END to assure overflow checking is
1887 necessary only at the head of loop. */
1888 unsigned char *adjusted_dst_end = dst_end - 1;
1889
1890 while (src < src_end && dst < adjusted_dst_end)
1891 {
1892 c = *src++;
1893 if (c == '\n' || (c == '\r' && coding->selective))
1894 *dst++ = '\r', *dst++ = '\n';
1895 else
1896 *dst++ = c;
1897 }
1898 produced = dst - destination;
1899 *consumed = src - source;
1900 break;
1901 }
1902
1903 default: /* i.e. case CODING_EOL_CR: */
1904 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1905 bcopy (source, destination, produced);
1906 {
1907 int i = produced;
1908 while (i--)
1909 if (*dst++ == '\n') dst[-1] = '\r';
1910 }
1911 *consumed = produced;
1912 }
1913
1914 return produced;
1915}
1916
1917
1918/*** 6. C library functions ***/
1919
1920/* In Emacs Lisp, coding system is represented by a Lisp symbol which
1921 has a property `coding-system'. The value of this property is a
1922 vector of length 5 (called as coding-vector). Among elements of
1923 this vector, the first (element[0]) and the fifth (element[4])
1924 carry important information for decoding/encoding. Before
1925 decoding/encoding, this information should be set in fields of a
1926 structure of type `coding_system'.
1927
1928 A value of property `coding-system' can be a symbol of another
1929 subsidiary coding-system. In that case, Emacs gets coding-vector
1930 from that symbol.
1931
1932 `element[0]' contains information to be set in `coding->type'. The
1933 value and its meaning is as follows:
1934
1935 0 -- coding_system_internal
1936 1 -- coding_system_sjis
1937 2 -- coding_system_iso2022
1938 3 -- coding_system_big5
1939 4 -- coding_system_ccl
1940 nil -- coding_system_no_conversion
1941 t -- coding_system_automatic
1942
1943 `element[4]' contains information to be set in `coding->flags' and
1944 `coding->spec'. The meaning varies by `coding->type'.
1945
1946 If `coding->type' is `coding_type_iso2022', element[4] is a vector
1947 of length 32 (of which the first 13 sub-elements are used now).
1948 Meanings of these sub-elements are:
1949
1950 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
1951 If the value is an integer of valid charset, the charset is
1952 assumed to be designated to graphic register N initially.
1953
1954 If the value is minus, it is a minus value of charset which
1955 reserves graphic register N, which means that the charset is
1956 not designated initially but should be designated to graphic
1957 register N just before encoding a character in that charset.
1958
1959 If the value is nil, graphic register N is never used on
1960 encoding.
1961
1962 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
1963 Each value takes t or nil. See the section ISO2022 of
1964 `coding.h' for more information.
1965
1966 If `coding->type' is `coding_type_big5', element[4] is t to denote
1967 BIG5-ETen or nil to denote BIG5-HKU.
1968
1969 If `coding->type' takes the other value, element[4] is ignored.
1970
1971 Emacs Lisp's coding system also carries information about format of
1972 end-of-line in a value of property `eol-type'. If the value is
1973 integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
1974 means CODING_EOL_CR. If it is not integer, it should be a vector
1975 of subsidiary coding systems of which property `eol-type' has one
1976 of above values.
1977
1978*/
1979
1980/* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
1981 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
1982 is setup so that no conversion is necessary and return -1, else
1983 return 0. */
1984
1985int
1986setup_coding_system (coding_system_symbol, coding)
1987 Lisp_Object coding_system_symbol;
1988 struct coding_system *coding;
1989{
1990 Lisp_Object coding_system_vector = Qnil;
1991 Lisp_Object type, eol_type;
1992
1993 /* At first, set several fields default values. */
1994 coding->require_flushing = 0;
1995 coding->last_block = 0;
1996 coding->selective = 0;
1997 coding->composing = 0;
1998 coding->direction = 0;
1999 coding->carryover_size = 0;
2000 coding->symbol = Qnil;
2001 coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2002
2003 /* Get value of property `coding-system'. If it is a Lisp symbol
2004 pointing another coding system, fetch its property until we get a
2005 vector. */
2006 while (!NILP (coding_system_symbol))
2007 {
2008 coding->symbol = coding_system_symbol;
2009 if (NILP (coding->post_read_conversion))
2010 coding->post_read_conversion = Fget (coding_system_symbol,
2011 Qpost_read_conversion);
2012 if (NILP (coding->pre_write_conversion))
2013 coding->pre_write_conversion = Fget (coding_system_symbol,
2014 Qpre_write_conversion);
2015
2016 coding_system_vector = Fget (coding_system_symbol, Qcoding_system);
2017 if (VECTORP (coding_system_vector))
2018 break;
2019 coding_system_symbol = coding_system_vector;
2020 }
2021 Vlast_coding_system_used = coding->symbol;
2022
2023 if (!VECTORP (coding_system_vector)
2024 || XVECTOR (coding_system_vector)->size != 5)
2025 goto label_invalid_coding_system;
2026
2027 /* Get value of property `eol-type' by searching from the root
2028 coding-system. */
2029 coding_system_symbol = coding->symbol;
2030 eol_type = Qnil;
2031 while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol))
2032 {
2033 eol_type = Fget (coding_system_symbol, Qeol_type);
2034 if (!NILP (eol_type))
2035 break;
2036 coding_system_symbol = Fget (coding_system_symbol, Qcoding_system);
2037 }
2038
2039 if (VECTORP (eol_type))
2040 coding->eol_type = CODING_EOL_AUTOMATIC;
2041 else if (XFASTINT (eol_type) == 1)
2042 coding->eol_type = CODING_EOL_CRLF;
2043 else if (XFASTINT (eol_type) == 2)
2044 coding->eol_type = CODING_EOL_CR;
2045 else
2046 coding->eol_type = CODING_EOL_LF;
2047
2048 type = XVECTOR (coding_system_vector)->contents[0];
2049 switch (XFASTINT (type))
2050 {
2051 case 0:
2052 coding->type = coding_type_internal;
2053 break;
2054
2055 case 1:
2056 coding->type = coding_type_sjis;
2057 break;
2058
2059 case 2:
2060 coding->type = coding_type_iso2022;
2061 {
2062 Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
2063 Lisp_Object *flags;
2064 int i, charset, default_reg_bits = 0;
2065
2066 if (!VECTORP (val) || XVECTOR (val)->size != 32)
2067 goto label_invalid_coding_system;
2068
2069 flags = XVECTOR (val)->contents;
2070 coding->flags
2071 = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2072 | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2073 | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2074 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2075 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2076 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2077 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2078 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2079 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION));
2080
2081 /* Invoke graphic register 0 to plane 0. */
2082 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2083 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
2084 CODING_SPEC_ISO_INVOCATION (coding, 1)
2085 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2086 /* Not single shifting at first. */
2087 CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
2088
2089 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2090 FLAGS[REG] can be one of below:
2091 integer CHARSET: CHARSET occupies register I,
2092 t: designate nothing to REG initially, but can be used
2093 by any charsets,
2094 list of integer, nil, or t: designate the first
2095 element (if integer) to REG initially, the remaining
2096 elements (if integer) is designated to REG on request,
2097 if an element is t, REG can be used by any charset,
2098 nil: REG is never used. */
2099 for (charset = 0; charset < MAX_CHARSET; charset++)
2100 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1;
2101 for (i = 0; i < 4; i++)
2102 {
2103 if (INTEGERP (flags[i])
2104 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
2105 {
2106 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2107 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2108 }
2109 else if (EQ (flags[i], Qt))
2110 {
2111 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2112 default_reg_bits |= 1 << i;
2113 }
2114 else if (CONSP (flags[i]))
2115 {
2116 Lisp_Object tail = flags[i];
2117
2118 if (INTEGERP (XCONS (tail)->car)
2119 && (charset = XINT (XCONS (tail)->car),
2120 CHARSET_VALID_P (charset)))
2121 {
2122 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2123 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2124 }
2125 else
2126 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2127 tail = XCONS (tail)->cdr;
2128 while (CONSP (tail))
2129 {
2130 if (INTEGERP (XCONS (tail)->car)
2131 && (charset = XINT (XCONS (tail)->car),
2132 CHARSET_VALID_P (charset)))
2133 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2134 = i;
2135 else if (EQ (XCONS (tail)->car, Qt))
2136 default_reg_bits |= 1 << i;
2137 tail = XCONS (tail)->cdr;
2138 }
2139 }
2140 else
2141 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2142
2143 CODING_SPEC_ISO_DESIGNATION (coding, i)
2144 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2145 }
2146
2147 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2148 {
2149 /* REG 1 can be used only by locking shift in 7-bit env. */
2150 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2151 default_reg_bits &= ~2;
2152 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
2153 /* Without any shifting, only REG 0 and 1 can be used. */
2154 default_reg_bits &= 3;
2155 }
2156
2157 for (charset = 0; charset < MAX_CHARSET; charset++)
2158 if (CHARSET_VALID_P (charset)
2159 && CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) < 0)
2160 {
2161 /* We have not yet decided where to designate CHARSET. */
2162 int reg_bits = default_reg_bits;
2163
2164 if (CHARSET_CHARS (charset) == 96)
2165 /* A charset of CHARS96 can't be designated to REG 0. */
2166 reg_bits &= ~1;
2167
2168 if (reg_bits)
2169 /* There exist some default graphic register. */
2170 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2171 = (reg_bits & 1
2172 ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
2173 else
2174 /* We anyway have to designate CHARSET to somewhere. */
2175 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2176 = (CHARSET_CHARS (charset) == 94
2177 ? 0
2178 : ((coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT
2179 || ! coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2180 ? 1
2181 : (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT
2182 ? 2 : 0)));
2183 }
2184 }
2185 coding->require_flushing = 1;
2186 break;
2187
2188 case 3:
2189 coding->type = coding_type_big5;
2190 coding->flags
2191 = (NILP (XVECTOR (coding_system_vector)->contents[4])
2192 ? CODING_FLAG_BIG5_HKU
2193 : CODING_FLAG_BIG5_ETEN);
2194 break;
2195
2196 case 4:
2197 coding->type = coding_type_ccl;
2198 {
2199 Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
2200 if (CONSP (val)
2201 && VECTORP (XCONS (val)->car)
2202 && VECTORP (XCONS (val)->cdr))
2203 {
2204 setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
2205 setup_ccl_program (&(coding->spec.ccl.encoder), XCONS (val)->cdr);
2206 }
2207 else
2208 goto label_invalid_coding_system;
2209 }
2210 coding->require_flushing = 1;
2211 break;
2212
2213 default:
2214 if (EQ (type, Qt))
2215 coding->type = coding_type_automatic;
2216 else
2217 coding->type = coding_type_no_conversion;
2218 break;
2219 }
2220 return 0;
2221
2222 label_invalid_coding_system:
2223 coding->type = coding_type_no_conversion;
2224 return -1;
2225}
2226
2227/* Emacs has a mechanism to automatically detect a coding system if it
2228 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
2229 it's impossible to distinguish some coding systems accurately
2230 because they use the same range of codes. So, at first, coding
2231 systems are categorized into 7, those are:
2232
2233 o coding-category-internal
2234
2235 The category for a coding system which has the same code range
2236 as Emacs' internal format. Assigned the coding-system (Lisp
2237 symbol) `coding-system-internal' by default.
2238
2239 o coding-category-sjis
2240
2241 The category for a coding system which has the same code range
2242 as SJIS. Assigned the coding-system (Lisp
2243 symbol) `coding-system-sjis' by default.
2244
2245 o coding-category-iso-7
2246
2247 The category for a coding system which has the same code range
2248 as ISO2022 of 7-bit environment. Assigned the coding-system
2249 (Lisp symbol) `coding-system-junet' by default.
2250
2251 o coding-category-iso-8-1
2252
2253 The category for a coding system which has the same code range
2254 as ISO2022 of 8-bit environment and graphic plane 1 used only
2255 for DIMENSION1 charset. Assigned the coding-system (Lisp
2256 symbol) `coding-system-ctext' by default.
2257
2258 o coding-category-iso-8-2
2259
2260 The category for a coding system which has the same code range
2261 as ISO2022 of 8-bit environment and graphic plane 1 used only
2262 for DIMENSION2 charset. Assigned the coding-system (Lisp
2263 symbol) `coding-system-euc-japan' by default.
2264
2265 o coding-category-iso-else
2266
2267 The category for a coding system which has the same code range
2268 as ISO2022 but not belongs to any of the above three
2269 categories. Assigned the coding-system (Lisp symbol)
2270 `coding-system-iso-2022-ss2-7' by default.
2271
2272 o coding-category-big5
2273
2274 The category for a coding system which has the same code range
2275 as BIG5. Assigned the coding-system (Lisp symbol)
2276 `coding-system-big5' by default.
2277
2278 o coding-category-binary
2279
2280 The category for a coding system not categorized in any of the
2281 above. Assigned the coding-system (Lisp symbol)
2282 `coding-system-noconv' by default.
2283
2284 Each of them is a Lisp symbol and the value is an actual
2285 `coding-system's (this is also a Lisp symbol) assigned by a user.
2286 What Emacs does actually is to detect a category of coding system.
2287 Then, it uses a `coding-system' assigned to it. If Emacs can't
2288 decide only one possible category, it selects a category of the
2289 highest priority. Priorities of categories are also specified by a
2290 user in a Lisp variable `coding-category-list'.
2291
2292*/
2293
2294/* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
2295 If it detects possible coding systems, return an integer in which
2296 appropriate flag bits are set. Flag bits are defined by macros
2297 CODING_CATEGORY_MASK_XXX in `coding.h'. */
2298
2299int
2300detect_coding_mask (src, src_bytes)
2301 unsigned char *src;
2302 int src_bytes;
2303{
2304 register unsigned char c;
2305 unsigned char *src_end = src + src_bytes;
2306 int mask;
2307
2308 /* At first, skip all ASCII characters and control characters except
2309 for three ISO2022 specific control characters. */
2310 while (src < src_end)
2311 {
2312 c = *src;
2313 if (c >= 0x80
2314 || (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2315 break;
2316 src++;
2317 }
2318
2319 if (src >= src_end)
2320 /* We found nothing other than ASCII. There's nothing to do. */
2321 return CODING_CATEGORY_MASK_ANY;
2322
2323 /* The text seems to be encoded in some multilingual coding system.
2324 Now, try to find in which coding system the text is encoded. */
2325 if (c < 0x80)
2326 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
2327 /* C is an ISO2022 specific control code of C0. */
2328 mask = detect_coding_iso2022 (src, src_end);
2329
2330 else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
2331 /* C is an ISO2022 specific control code of C1,
2332 or the first byte of SJIS's 2-byte character code,
2333 or a leading code of Emacs. */
2334 mask = (detect_coding_iso2022 (src, src_end)
2335 | detect_coding_sjis (src, src_end)
2336 | detect_coding_internal (src, src_end));
2337
2338 else if (c < 0xA0)
2339 /* C is the first byte of SJIS character code,
2340 or a leading-code of Emacs. */
2341 mask = (detect_coding_sjis (src, src_end)
2342 | detect_coding_internal (src, src_end));
2343
2344 else
2345 /* C is a character of ISO2022 in graphic plane right,
2346 or a SJIS's 1-byte character code (i.e. JISX0201),
2347 or the first byte of BIG5's 2-byte code. */
2348 mask = (detect_coding_iso2022 (src, src_end)
2349 | detect_coding_sjis (src, src_end)
2350 | detect_coding_big5 (src, src_end));
2351
2352 return mask;
2353}
2354
2355/* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
2356 The information of the detected coding system is set in CODING. */
2357
2358void
2359detect_coding (coding, src, src_bytes)
2360 struct coding_system *coding;
2361 unsigned char *src;
2362 int src_bytes;
2363{
2364 int mask = detect_coding_mask (src, src_bytes);
2365 int idx;
2366
2367 if (mask == CODING_CATEGORY_MASK_ANY)
2368 /* We found nothing other than ASCII. There's nothing to do. */
2369 return;
2370
2371 if (!mask)
2372 /* The source text seems to be encoded in unknown coding system.
2373 Emacs regards the category of such a kind of coding system as
2374 `coding-category-binary'. We assume that a user has assigned
2375 an appropriate coding system for a `coding-category-binary'. */
2376 idx = CODING_CATEGORY_IDX_BINARY;
2377 else
2378 {
2379 /* We found some plausible coding systems. Let's use a coding
2380 system of the highest priority. */
2381 Lisp_Object val = Vcoding_category_list;
2382
2383 if (CONSP (val))
2384 while (!NILP (val))
2385 {
2386 idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
2387 if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
2388 break;
2389 val = XCONS (val)->cdr;
2390 }
2391 else
2392 val = Qnil;
2393
2394 if (NILP (val))
2395 {
2396 /* For unknown reason, `Vcoding_category_list' contains none
2397 of found categories. Let's use any of them. */
2398 for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
2399 if (mask & (1 << idx))
2400 break;
2401 }
2402 }
2403 setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding);
2404}
2405
2406/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
2407 is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
2408 CODING_EOL_CR, and CODING_EOL_AUTOMATIC. */
2409
2410int
2411detect_eol_type (src, src_bytes)
2412 unsigned char *src;
2413 int src_bytes;
2414{
2415 unsigned char *src_end = src + src_bytes;
2416 unsigned char c;
2417
2418 while (src < src_end)
2419 {
2420 c = *src++;
2421 if (c == '\n')
2422 return CODING_EOL_LF;
2423 else if (c == '\r')
2424 {
2425 if (src < src_end && *src == '\n')
2426 return CODING_EOL_CRLF;
2427 else
2428 return CODING_EOL_CR;
2429 }
2430 }
2431 return CODING_EOL_AUTOMATIC;
2432}
2433
2434/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
2435 is encoded. If it detects an appropriate format of end-of-line, it
2436 sets the information in *CODING. */
2437
2438void
2439detect_eol (coding, src, src_bytes)
2440 struct coding_system *coding;
2441 unsigned char *src;
2442 int src_bytes;
2443{
2444 Lisp_Object val;
2445 int eol_type = detect_eol_type (src, src_bytes);
2446
2447 if (eol_type == CODING_EOL_AUTOMATIC)
2448 /* We found no end-of-line in the source text. */
2449 return;
2450
2451 val = Fget (coding->symbol, Qeol_type);
2452 if (VECTORP (val) && XVECTOR (val)->size == 3)
2453 setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
2454}
2455
2456/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
2457 decoding, it may detect coding system and format of end-of-line if
2458 those are not yet decided. */
2459
2460int
2461decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
2462 struct coding_system *coding;
2463 unsigned char *source, *destination;
2464 int src_bytes, dst_bytes;
2465 int *consumed;
2466{
2467 int produced;
2468
2469 if (src_bytes <= 0)
2470 {
2471 *consumed = 0;
2472 return 0;
2473 }
2474
2475 if (coding->type == coding_type_automatic)
2476 detect_coding (coding, source, src_bytes);
2477
2478 if (coding->eol_type == CODING_EOL_AUTOMATIC)
2479 detect_eol (coding, source, src_bytes);
2480
2481 coding->carryover_size = 0;
2482 switch (coding->type)
2483 {
2484 case coding_type_no_conversion:
2485 label_no_conversion:
2486 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
2487 bcopy (source, destination, produced);
2488 *consumed = produced;
2489 break;
2490
2491 case coding_type_internal:
2492 case coding_type_automatic:
2493 if (coding->eol_type == CODING_EOL_LF
2494 || coding->eol_type == CODING_EOL_AUTOMATIC)
2495 goto label_no_conversion;
2496 produced = decode_eol (coding, source, destination,
2497 src_bytes, dst_bytes, consumed);
2498 break;
2499
2500 case coding_type_sjis:
2501 produced = decode_coding_sjis_big5 (coding, source, destination,
2502 src_bytes, dst_bytes, consumed,
2503 1);
2504 break;
2505
2506 case coding_type_iso2022:
2507 produced = decode_coding_iso2022 (coding, source, destination,
2508 src_bytes, dst_bytes, consumed);
2509 break;
2510
2511 case coding_type_big5:
2512 produced = decode_coding_sjis_big5 (coding, source, destination,
2513 src_bytes, dst_bytes, consumed,
2514 0);
2515 break;
2516
2517 case coding_type_ccl:
2518 produced = ccl_driver (&coding->spec.ccl.decoder, source, destination,
2519 src_bytes, dst_bytes, consumed);
2520 break;
2521 }
2522
2523 return produced;
2524}
2525
2526/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". */
2527
2528int
2529encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
2530 struct coding_system *coding;
2531 unsigned char *source, *destination;
2532 int src_bytes, dst_bytes;
2533 int *consumed;
2534{
2535 int produced;
2536
2537 coding->carryover_size = 0;
2538 switch (coding->type)
2539 {
2540 case coding_type_no_conversion:
2541 label_no_conversion:
2542 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
2543 if (produced > 0)
2544 {
2545 bcopy (source, destination, produced);
2546 if (coding->selective)
2547 {
2548 unsigned char *p = destination, *pend = destination + produced;
2549 while (p < pend)
2550 if (*p++ = '\015') p[-1] = '\n';
2551 }
2552 }
2553 *consumed = produced;
2554 break;
2555
2556 case coding_type_internal:
2557 case coding_type_automatic:
2558 if (coding->eol_type == CODING_EOL_LF
2559 || coding->eol_type == CODING_EOL_AUTOMATIC)
2560 goto label_no_conversion;
2561 produced = encode_eol (coding, source, destination,
2562 src_bytes, dst_bytes, consumed);
2563 break;
2564
2565 case coding_type_sjis:
2566 produced = encode_coding_sjis_big5 (coding, source, destination,
2567 src_bytes, dst_bytes, consumed,
2568 1);
2569 break;
2570
2571 case coding_type_iso2022:
2572 produced = encode_coding_iso2022 (coding, source, destination,
2573 src_bytes, dst_bytes, consumed);
2574 break;
2575
2576 case coding_type_big5:
2577 produced = encode_coding_sjis_big5 (coding, source, destination,
2578 src_bytes, dst_bytes, consumed,
2579 0);
2580 break;
2581
2582 case coding_type_ccl:
2583 produced = ccl_driver (&coding->spec.ccl.encoder, source, destination,
2584 src_bytes, dst_bytes, consumed);
2585 break;
2586 }
2587
2588 return produced;
2589}
2590
2591#define CONVERSION_BUFFER_EXTRA_ROOM 256
2592
2593/* Return maximum size (bytes) of a buffer enough for decoding
2594 SRC_BYTES of text encoded in CODING. */
2595
2596int
2597decoding_buffer_size (coding, src_bytes)
2598 struct coding_system *coding;
2599 int src_bytes;
2600{
2601 int magnification;
2602
2603 if (coding->type == coding_type_iso2022)
2604 magnification = 3;
2605 else if (coding->type == coding_type_ccl)
2606 magnification = coding->spec.ccl.decoder.buf_magnification;
2607 else
2608 magnification = 2;
2609
2610 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
2611}
2612
2613/* Return maximum size (bytes) of a buffer enough for encoding
2614 SRC_BYTES of text to CODING. */
2615
2616int
2617encoding_buffer_size (coding, src_bytes)
2618 struct coding_system *coding;
2619 int src_bytes;
2620{
2621 int magnification;
2622
2623 if (coding->type == coding_type_ccl)
2624 magnification = coding->spec.ccl.encoder.buf_magnification;
2625 else
2626 magnification = 3;
2627
2628 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
2629}
2630
2631#ifndef MINIMUM_CONVERSION_BUFFER_SIZE
2632#define MINIMUM_CONVERSION_BUFFER_SIZE 1024
2633#endif
2634
2635char *conversion_buffer;
2636int conversion_buffer_size;
2637
2638/* Return a pointer to a SIZE bytes of buffer to be used for encoding
2639 or decoding. Sufficient memory is allocated automatically. If we
2640 run out of memory, return NULL. */
2641
2642char *
2643get_conversion_buffer (size)
2644 int size;
2645{
2646 if (size > conversion_buffer_size)
2647 {
2648 char *buf;
2649 int real_size = conversion_buffer_size * 2;
2650
2651 while (real_size < size) real_size *= 2;
2652 buf = (char *) xmalloc (real_size);
2653 xfree (conversion_buffer);
2654 conversion_buffer = buf;
2655 conversion_buffer_size = real_size;
2656 }
2657 return conversion_buffer;
2658}
2659
2660
2661#ifdef emacs
2662/*** 7. Emacs Lisp library functions ***/
2663
2664DEFUN ("coding-system-vector", Fcoding_system_vector, Scoding_system_vector,
2665 1, 1, 0,
2666 "Return coding-vector of CODING-SYSTEM.\n\
2667If CODING-SYSTEM is not a valid coding-system, return nil.")
2668 (obj)
2669 Lisp_Object obj;
2670{
2671 while (SYMBOLP (obj) && !NILP (obj))
2672 obj = Fget (obj, Qcoding_system);
2673 return ((NILP (obj) || !VECTORP (obj) || XVECTOR (obj)->size != 5)
2674 ? Qnil : obj);
2675}
2676
2677DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
2678 "Return t if OBJECT is nil or a coding-system.\n\
2679See document of make-coding-system for coding-system object.")
2680 (obj)
2681 Lisp_Object obj;
2682{
2683 return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil);
2684}
2685
2686DEFUN ("read-non-nil-coding-system",
2687 Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
2688 "Read a coding-system from the minibuffer, prompting with string PROMPT.")
2689 (prompt)
2690 Lisp_Object prompt;
2691{
2692 return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
2693 Qt, Qnil, Qnil),
2694 Qnil);
2695}
2696
2697DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
2698 "Read a coding-system or nil from the minibuffer, prompting with string PROMPT.")
2699 (prompt)
2700 Lisp_Object prompt;
2701{
2702 return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
2703 Qt, Qnil, Qnil),
2704 Qnil);
2705}
2706
2707DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
2708 1, 1, 0,
2709 "Check validity of CODING-SYSTEM.\n\
2710If valid, return CODING-SYSTEM, else `coding-system-error' is signaled.\n\
2711CODING-SYSTEM is valid if it is a symbol and has \"coding-system\" property.\n\
2712The value of property should be a vector of length 5.")
2713 (coding_system)
2714 Lisp_Object coding_system;
2715{
2716 CHECK_SYMBOL (coding_system, 0);
2717 if (!NILP (Fcoding_system_p (coding_system)))
2718 return coding_system;
2719 while (1)
2720 Fsignal (Qcoding_system_error, coding_system);
2721}
2722
2723DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2724 2, 2, 0,
2725 "Detect coding-system of the text in the region between START and END.\n\
2726Return a list of possible coding-systems ordered by priority.\n\
2727If only ASCII characters are found, it returns `coding-system-automatic'\n\
2728 or its subsidiary coding-system according to a detected end-of-line format.")
2729 (b, e)
2730 Lisp_Object b, e;
2731{
2732 int coding_mask, eol_type;
2733 Lisp_Object val;
2734 int beg, end;
2735
2736 validate_region (&b, &e);
2737 beg = XINT (b), end = XINT (e);
2738 if (beg < GPT && end >= GPT) move_gap (end);
2739
2740 coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg);
2741 eol_type = detect_eol_type (POS_ADDR (beg), end - beg);
2742
2743 if (coding_mask == CODING_CATEGORY_MASK_ANY)
2744 {
2745 val = intern ("coding-system-automatic");
2746 if (eol_type != CODING_EOL_AUTOMATIC)
2747 {
2748 Lisp_Object val2 = Fget (val, Qeol_type);
2749 if (VECTORP (val2))
2750 val = XVECTOR (val2)->contents[eol_type];
2751 }
2752 }
2753 else
2754 {
2755 Lisp_Object val2;
2756
2757 /* At first, gather possible coding-systems in VAL in a reverse
2758 order. */
2759 val = Qnil;
2760 for (val2 = Vcoding_category_list;
2761 !NILP (val2);
2762 val2 = XCONS (val2)->cdr)
2763 {
2764 int idx
2765 = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index));
2766 if (coding_mask & (1 << idx))
2767 val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
2768 }
2769
2770 /* Then, change the order of the list, while getting subsidiary
2771 coding-systems. */
2772 val2 = val;
2773 val = Qnil;
2774 for (; !NILP (val2); val2 = XCONS (val2)->cdr)
2775 {
2776 if (eol_type == CODING_EOL_AUTOMATIC)
2777 val = Fcons (XCONS (val2)->car, val);
2778 else
2779 {
2780 Lisp_Object val3 = Fget (XCONS (val2)->car, Qeol_type);
2781 if (VECTORP (val3))
2782 val = Fcons (XVECTOR (val3)->contents[eol_type], val);
2783 else
2784 val = Fcons (XCONS (val2)->car, val);
2785 }
2786 }
2787 }
2788
2789 return val;
2790}
2791
2792/* Scan text in the region between *BEGP and *ENDP, skip characters
2793 which we never have to encode to (iff ENCODEP is 1) or decode from
2794 coding system CODING at the head and tail, then set BEGP and ENDP
2795 to the addresses of start and end of the text we actually convert. */
2796
2797void
2798shrink_conversion_area (begp, endp, coding, encodep)
2799 unsigned char **begp, **endp;
2800 struct coding_system *coding;
2801 int encodep;
2802{
2803 register unsigned char *beg_addr = *begp, *end_addr = *endp;
2804
2805 if (coding->eol_type != CODING_EOL_LF
2806 && coding->eol_type != CODING_EOL_AUTOMATIC)
2807 /* Since we anyway have to convert end-of-line format, it is not
2808 worth skipping at most 100 bytes or so. */
2809 return;
2810
2811 if (encodep) /* for encoding */
2812 {
2813 switch (coding->type)
2814 {
2815 case coding_type_no_conversion:
2816 case coding_type_internal:
2817 case coding_type_automatic:
2818 /* We need no conversion. */
2819 *begp = *endp;
2820 return;
2821 case coding_type_ccl:
2822 /* We can't skip any data. */
2823 return;
2824 default:
2825 /* We can skip all ASCII characters at the head and tail. */
2826 while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
2827 while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
2828 break;
2829 }
2830 }
2831 else /* for decoding */
2832 {
2833 switch (coding->type)
2834 {
2835 case coding_type_no_conversion:
2836 /* We need no conversion. */
2837 *begp = *endp;
2838 return;
2839 case coding_type_internal:
2840 if (coding->eol_type == CODING_EOL_LF)
2841 {
2842 /* We need no conversion. */
2843 *begp = *endp;
2844 return;
2845 }
2846 /* We can skip all but carriage-return. */
2847 while (beg_addr < end_addr && *beg_addr != '\r') beg_addr++;
2848 while (beg_addr < end_addr && *(end_addr - 1) != '\r') end_addr--;
2849 break;
2850 case coding_type_sjis:
2851 case coding_type_big5:
2852 /* We can skip all ASCII characters at the head. */
2853 while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
2854 /* We can skip all ASCII characters at the tail except for
2855 the second byte of SJIS or BIG5 code. */
2856 while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
2857 if (end_addr != *endp)
2858 end_addr++;
2859 break;
2860 case coding_type_ccl:
2861 /* We can't skip any data. */
2862 return;
2863 default: /* i.e. case coding_type_iso2022: */
2864 {
2865 unsigned char c;
2866
2867 /* We can skip all ASCII characters except for a few
2868 control codes at the head. */
2869 while (beg_addr < end_addr && (c = *beg_addr) < 0x80
2870 && c != ISO_CODE_CR && c != ISO_CODE_SO
2871 && c != ISO_CODE_SI && c != ISO_CODE_ESC)
2872 beg_addr++;
2873 }
2874 break;
2875 }
2876 }
2877 *begp = beg_addr;
2878 *endp = end_addr;
2879 return;
2880}
2881
2882/* Encode to (iff ENCODEP is 1) or decode form coding system CODING a
2883 text between B and E. B and E are buffer position. */
2884
2885Lisp_Object
2886code_convert_region (b, e, coding, encodep)
2887 Lisp_Object b, e;
2888 struct coding_system *coding;
2889 int encodep;
2890{
2891 int beg, end, len, consumed, produced;
2892 char *buf;
2893 unsigned char *begp, *endp;
2894 int pos = PT;
2895
2896 validate_region (&b, &e);
2897 beg = XINT (b), end = XINT (e);
2898 if (beg < GPT && end >= GPT)
2899 move_gap (end);
2900
2901 if (encodep && !NILP (coding->pre_write_conversion))
2902 {
2903 /* We must call a pre-conversion function which may put a new
2904 text to be converted in a new buffer. */
2905 struct buffer *old = current_buffer, *new;
2906
2907 TEMP_SET_PT (beg);
2908 call2 (coding->pre_write_conversion, b, e);
2909 if (old != current_buffer)
2910 {
2911 /* Replace the original text by the text just generated. */
2912 len = ZV - BEGV;
2913 new = current_buffer;
2914 set_buffer_internal (old);
2915 del_range (beg, end);
2916 insert_from_buffer (new, 1, len, 0);
2917 end = beg + len;
2918 }
2919 }
2920
2921 /* We may be able to shrink the conversion region. */
2922 begp = POS_ADDR (beg); endp = begp + (end - beg);
2923 shrink_conversion_area (&begp, &endp, coding, encodep);
2924
2925 if (begp == endp)
2926 /* We need no conversion. */
2927 len = end - beg;
2928 else
2929 {
2930 beg += begp - POS_ADDR (beg);
2931 end = beg + (endp - begp);
2932
2933 if (encodep)
2934 len = encoding_buffer_size (coding, end - beg);
2935 else
2936 len = decoding_buffer_size (coding, end - beg);
2937 buf = get_conversion_buffer (len);
2938
2939 coding->last_block = 1;
2940 produced = (encodep
2941 ? encode_coding (coding, POS_ADDR (beg), buf, end - beg, len,
2942 &consumed)
2943 : decode_coding (coding, POS_ADDR (beg), buf, end - beg, len,
2944 &consumed));
2945
2946 len = produced + (beg - XINT (b)) + (XINT (e) - end);
2947
2948 TEMP_SET_PT (beg);
2949 insert (buf, produced);
2950 del_range (PT, PT + end - beg);
2951 if (pos >= end)
2952 pos = PT + (pos - end);
2953 else if (pos > beg)
2954 pos = beg;
2955 TEMP_SET_PT (pos);
2956 }
2957
2958 if (!encodep && !NILP (coding->post_read_conversion))
2959 {
2960 /* We must call a post-conversion function which may alter
2961 the text just converted. */
2962 Lisp_Object insval;
2963
2964 beg = XINT (b);
2965 TEMP_SET_PT (beg);
2966 insval = call1 (coding->post_read_conversion, make_number (len));
2967 CHECK_NUMBER (insval, 0);
2968 len = XINT (insval);
2969 }
2970
2971 return make_number (len);
2972}
2973
2974Lisp_Object
2975code_convert_string (str, coding, encodep)
2976 Lisp_Object str;
2977 struct coding_system *coding;
2978 int encodep;
2979{
2980 int len, consumed, produced;
2981 char *buf;
2982 unsigned char *begp, *endp;
2983 int head_skip, tail_skip;
2984 struct gcpro gcpro1;
2985
2986 if (encodep && !NILP (coding->pre_write_conversion)
2987 || !encodep && !NILP (coding->post_read_conversion))
2988 {
2989 /* Since we have to call Lisp functions which assume target text
2990 is in a buffer, after setting a temporary buffer, call
2991 code_convert_region. */
2992 int count = specpdl_ptr - specpdl;
2993 int len = XSTRING (str)->size;
2994 Lisp_Object result;
2995 struct buffer *old = current_buffer;
2996
2997 record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
2998 temp_output_buffer_setup (" *code-converting-work*");
2999 set_buffer_internal (XBUFFER (Vstandard_output));
3000 insert_from_string (str, 0, len, 0);
3001 code_convert_region (make_number (BEGV), make_number (ZV),
3002 coding, encodep);
3003 result = make_buffer_string (BEGV, ZV, 0);
3004 set_buffer_internal (old);
3005 return unbind_to (count, result);
3006 }
3007
3008 /* We may be able to shrink the conversion region. */
3009 begp = XSTRING (str)->data;
3010 endp = begp + XSTRING (str)->size;
3011 shrink_conversion_area (&begp, &endp, coding, encodep);
3012
3013 if (begp == endp)
3014 /* We need no conversion. */
3015 return str;
3016
3017 head_skip = begp - XSTRING (str)->data;
3018 tail_skip = XSTRING (str)->size - head_skip - (endp - begp);
3019
3020 GCPRO1 (str);
3021
3022 if (encodep)
3023 len = encoding_buffer_size (coding, endp - begp);
3024 else
3025 len = decoding_buffer_size (coding, endp - begp);
3026 buf = get_conversion_buffer (len + head_skip + tail_skip);
3027
3028 bcopy (XSTRING (str)->data, buf, head_skip);
3029 coding->last_block = 1;
3030 produced = (encodep
3031 ? encode_coding (coding, XSTRING (str)->data + head_skip,
3032 buf + head_skip, endp - begp, len, &consumed)
3033 : decode_coding (coding, XSTRING (str)->data + head_skip,
3034 buf + head_skip, endp - begp, len, &consumed));
3035 bcopy (XSTRING (str)->data + head_skip + (endp - begp),
3036 buf + head_skip + produced,
3037 tail_skip);
3038
3039 UNGCPRO;
3040
3041 return make_string (buf, head_skip + produced + tail_skip);
3042}
3043
3044DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
3045 3, 3, 0,
3046 "Decode the text between START and END which is encoded in CODING-SYSTEM.\n\
3047Return length of decoded text.")
3048 (b, e, coding_system)
3049 Lisp_Object b, e, coding_system;
3050{
3051 struct coding_system coding;
3052
3053 CHECK_NUMBER_COERCE_MARKER (b, 0);
3054 CHECK_NUMBER_COERCE_MARKER (e, 1);
3055 CHECK_SYMBOL (coding_system, 2);
3056
3057 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3058 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3059
3060 return code_convert_region (b, e, &coding, 0);
3061}
3062
3063DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
3064 3, 3, 0,
3065 "Encode the text between START and END to CODING-SYSTEM.\n\
3066Return length of encoded text.")
3067 (b, e, coding_system)
3068 Lisp_Object b, e, coding_system;
3069{
3070 struct coding_system coding;
3071
3072 CHECK_NUMBER_COERCE_MARKER (b, 0);
3073 CHECK_NUMBER_COERCE_MARKER (e, 1);
3074 CHECK_SYMBOL (coding_system, 2);
3075
3076 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3077 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3078
3079 return code_convert_region (b, e, &coding, 1);
3080}
3081
3082DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
3083 2, 2, 0,
3084 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.")
3085 (string, coding_system)
3086 Lisp_Object string, coding_system;
3087{
3088 struct coding_system coding;
3089
3090 CHECK_STRING (string, 0);
3091 CHECK_SYMBOL (coding_system, 1);
3092
3093 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3094 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3095
3096 return code_convert_string (string, &coding, 0);
3097}
3098
3099DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
3100 2, 2, 0,
3101 "Encode STRING to CODING-SYSTEM, and return the result.")
3102 (string, coding_system)
3103 Lisp_Object string, coding_system;
3104{
3105 struct coding_system coding;
3106
3107 CHECK_STRING (string, 0);
3108 CHECK_SYMBOL (coding_system, 1);
3109
3110 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3111 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3112
3113 return code_convert_string (string, &coding, 1);
3114}
3115
3116DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
3117 "Decode a JISX0208 character of SJIS coding-system-sjis.\n\
3118CODE is the character code in SJIS.\n\
3119Return the corresponding character.")
3120 (code)
3121 Lisp_Object code;
3122{
3123 unsigned char c1, c2, s1, s2;
3124 Lisp_Object val;
3125
3126 CHECK_NUMBER (code, 0);
3127 s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
3128 DECODE_SJIS (s1, s2, c1, c2);
3129 XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
3130 return val;
3131}
3132
3133DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
3134 "Encode a JISX0208 character CHAR to SJIS coding-system.\n\
3135Return the corresponding character code in SJIS.")
3136 (ch)
3137 Lisp_Object ch;
3138{
3139 int charset;
3140 unsigned char c1, c2, s1, s2;
3141 Lisp_Object val;
3142
3143 CHECK_NUMBER (ch, 0);
3144 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
3145 if (charset == charset_jisx0208)
3146 {
3147 ENCODE_SJIS (c1, c2, s1, s2);
3148 XSETFASTINT (val, ((int)s1 << 8) | s2);
3149 }
3150 else
3151 XSETFASTINT (val, 0);
3152 return val;
3153}
3154
3155DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
3156 "Decode a Big5 character CODE of BIG5 coding-system.\n\
3157CODE is the character code in BIG5.\n\
3158Return the corresponding character.")
3159 (code)
3160 Lisp_Object code;
3161{
3162 int charset;
3163 unsigned char b1, b2, c1, c2;
3164 Lisp_Object val;
3165
3166 CHECK_NUMBER (code, 0);
3167 b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
3168 DECODE_BIG5 (b1, b2, charset, c1, c2);
3169 XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
3170 return val;
3171}
3172
3173DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
3174 "Encode the Big5 character CHAR to BIG5 coding-system.\n\
3175Return the corresponding character code in Big5.")
3176 (ch)
3177 Lisp_Object ch;
3178{
3179 int charset;
3180 unsigned char c1, c2, b1, b2;
3181 Lisp_Object val;
3182
3183 CHECK_NUMBER (ch, 0);
3184 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
3185 if (charset == charset_big5_1 || charset == charset_big5_2)
3186 {
3187 ENCODE_BIG5 (charset, c1, c2, b1, b2);
3188 XSETFASTINT (val, ((int)b1 << 8) | b2);
3189 }
3190 else
3191 XSETFASTINT (val, 0);
3192 return val;
3193}
3194
3195DEFUN ("set-terminal-coding-system",
3196 Fset_terminal_coding_system, Sset_terminal_coding_system, 1, 1,
3197 "zCoding-system for terminal display: ",
3198 "Set coding-system of your terminal to CODING-SYSTEM.\n\
3199All outputs to terminal are encoded to this coding-system.")
3200 (coding_system)
3201 Lisp_Object coding_system;
3202{
3203 CHECK_SYMBOL (coding_system, 0);
3204 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
3205 update_mode_lines++;
3206 if (!NILP (Finteractive_p ()))
3207 Fredraw_display ();
3208 return Qnil;
3209}
3210
3211DEFUN ("terminal-coding-system",
3212 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
3213 "Return coding-system of your terminal.")
3214 ()
3215{
3216 return terminal_coding.symbol;
3217}
3218
3219DEFUN ("set-keyboard-coding-system",
3220 Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,
3221 "zCoding-system for keyboard input: ",
3222 "Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\
3223All inputs from terminal are decoded from this coding-system.")
3224 (coding_system)
3225 Lisp_Object coding_system;
3226{
3227 CHECK_SYMBOL (coding_system, 0);
3228 setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
3229 return Qnil;
3230}
3231
3232DEFUN ("keyboard-coding-system",
3233 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
3234 "Return coding-system of what is sent from terminal keyboard.")
3235 ()
3236{
3237 return keyboard_coding.symbol;
3238}
3239
3240
3241DEFUN ("find-coding-system", Ffind_coding_system, Sfind_coding_system,
3242 1, MANY, 0,
3243 "Return a cons of coding systems for I/O primitive OPERATION.\n\
3244Remaining arguments are for OPERATION.\n\
3245OPERATION is one of the following Emacs I/O primitives:\n\
3246 For file I/O, insert-file-contents or write-region.\n\
3247 For process I/O, call-process, call-process-region, or start-process.\n\
3248 For network I/O, open-network-stream.\n\
3249For each OPERATION, TARGET is selected from the arguments as below:\n\
3250 For file I/O, TARGET is a file name.\n\
3251 For process I/O, TARGET is a process name.\n\
3252 For network I/O, TARGET is a service name or a port number\n\
3253\n\
3254The return value is a cons of coding systems for decoding and encoding\n\
3255registered in nested alist `coding-system-alist' (which see) at a slot\n\
3256corresponding to OPERATION and TARGET.
3257If a function symbol is at the slot, return a result of the function call.\n\
3258The function is called with one argument, a list of all the arguments.")
3259 (nargs, args)
3260 int nargs;
3261 Lisp_Object *args;
3262{
3263 Lisp_Object operation, target_idx, target, val;
3264 register Lisp_Object chain;
3265
3266 if (nargs < 2)
3267 error ("Too few arguments");
3268 operation = args[0];
3269 if (!SYMBOLP (operation)
3270 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
3271 error ("Invalid first arguement");
3272 if (nargs < 1 + XINT (target_idx))
3273 error ("Too few arguments for operation: %s",
3274 XSYMBOL (operation)->name->data);
3275 target = args[XINT (target_idx) + 1];
3276 if (!(STRINGP (target)
3277 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
3278 error ("Invalid %dth argument", XINT (target_idx) + 1);
3279
3280 chain = Fassq (operation, Vcoding_system_alist);
3281 if (NILP (chain))
3282 return Qnil;
3283
3284 for (chain = XCONS (chain)->cdr; CONSP (chain); chain = XCONS (chain)->cdr)
3285 {
3286 Lisp_Object elt = XCONS (chain)->car;
3287
3288 if (CONSP (elt)
3289 && ((STRINGP (target)
3290 && STRINGP (XCONS (elt)->car)
3291 && fast_string_match (XCONS (elt)->car, target) >= 0)
3292 || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
3293 return (CONSP (val = XCONS (elt)->cdr)
3294 ? val
3295 : ((SYMBOLP (val) && Fboundp (val)
3296 ? call2 (val, Flist (nargs, args))
3297 : Qnil)));
3298 }
3299 return Qnil;
3300}
3301
3302#endif /* emacs */
3303
3304
3305/*** 8. Post-amble ***/
3306
3307init_coding_once ()
3308{
3309 int i;
3310
3311 /* Emacs internal format specific initialize routine. */
3312 for (i = 0; i <= 0x20; i++)
3313 emacs_code_class[i] = EMACS_control_code;
3314 emacs_code_class[0x0A] = EMACS_linefeed_code;
3315 emacs_code_class[0x0D] = EMACS_carriage_return_code;
3316 for (i = 0x21 ; i < 0x7F; i++)
3317 emacs_code_class[i] = EMACS_ascii_code;
3318 emacs_code_class[0x7F] = EMACS_control_code;
3319 emacs_code_class[0x80] = EMACS_leading_code_composition;
3320 for (i = 0x81; i < 0xFF; i++)
3321 emacs_code_class[i] = EMACS_invalid_code;
3322 emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
3323 emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
3324 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
3325 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
3326
3327 /* ISO2022 specific initialize routine. */
3328 for (i = 0; i < 0x20; i++)
3329 iso_code_class[i] = ISO_control_code;
3330 for (i = 0x21; i < 0x7F; i++)
3331 iso_code_class[i] = ISO_graphic_plane_0;
3332 for (i = 0x80; i < 0xA0; i++)
3333 iso_code_class[i] = ISO_control_code;
3334 for (i = 0xA1; i < 0xFF; i++)
3335 iso_code_class[i] = ISO_graphic_plane_1;
3336 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
3337 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
3338 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
3339 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
3340 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
3341 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
3342 iso_code_class[ISO_CODE_ESC] = ISO_escape;
3343 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
3344 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
3345 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
3346
3347 Qcoding_system = intern ("coding-system");
3348 staticpro (&Qcoding_system);
3349
3350 Qeol_type = intern ("eol-type");
3351 staticpro (&Qeol_type);
3352
3353 Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
3354 staticpro (&Qbuffer_file_coding_system);
3355
3356 Qpost_read_conversion = intern ("post-read-conversion");
3357 staticpro (&Qpost_read_conversion);
3358
3359 Qpre_write_conversion = intern ("pre-write-conversion");
3360 staticpro (&Qpre_write_conversion);
3361
3362 Qcoding_system_vector = intern ("coding-system-vector");
3363 staticpro (&Qcoding_system_vector);
3364
3365 Qcoding_system_p = intern ("coding-system-p");
3366 staticpro (&Qcoding_system_p);
3367
3368 Qcoding_system_error = intern ("coding-system-error");
3369 staticpro (&Qcoding_system_error);
3370
3371 Fput (Qcoding_system_error, Qerror_conditions,
3372 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
3373 Fput (Qcoding_system_error, Qerror_message,
3374 build_string ("Coding-system error"));
3375
3376 Qcoding_category_index = intern ("coding-category-index");
3377 staticpro (&Qcoding_category_index);
3378
3379 {
3380 int i;
3381 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3382 {
3383 coding_category_table[i] = intern (coding_category_name[i]);
3384 staticpro (&coding_category_table[i]);
3385 Fput (coding_category_table[i], Qcoding_category_index,
3386 make_number (i));
3387 }
3388 }
3389
3390 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
3391 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
3392
3393 setup_coding_system (Qnil, &keyboard_coding);
3394 setup_coding_system (Qnil, &terminal_coding);
3395}
3396
3397#ifdef emacs
3398
3399syms_of_coding ()
3400{
3401 Qtarget_idx = intern ("target-idx");
3402 staticpro (&Qtarget_idx);
3403
3404 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
3405 Fput (Qwrite_region, Qtarget_idx, make_number (2));
3406
3407 Qcall_process = intern ("call-process");
3408 staticpro (&Qcall_process);
3409 Fput (Qcall_process, Qtarget_idx, make_number (0));
3410
3411 Qcall_process_region = intern ("call-process-region");
3412 staticpro (&Qcall_process_region);
3413 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
3414
3415 Qstart_process = intern ("start-process");
3416 staticpro (&Qstart_process);
3417 Fput (Qstart_process, Qtarget_idx, make_number (2));
3418
3419 Qopen_network_stream = intern ("open-network-stream");
3420 staticpro (&Qopen_network_stream);
3421 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
3422
3423 defsubr (&Scoding_system_vector);
3424 defsubr (&Scoding_system_p);
3425 defsubr (&Sread_coding_system);
3426 defsubr (&Sread_non_nil_coding_system);
3427 defsubr (&Scheck_coding_system);
3428 defsubr (&Sdetect_coding_region);
3429 defsubr (&Sdecode_coding_region);
3430 defsubr (&Sencode_coding_region);
3431 defsubr (&Sdecode_coding_string);
3432 defsubr (&Sencode_coding_string);
3433 defsubr (&Sdecode_sjis_char);
3434 defsubr (&Sencode_sjis_char);
3435 defsubr (&Sdecode_big5_char);
3436 defsubr (&Sencode_big5_char);
3437 defsubr (&Sset_terminal_coding_system);
3438 defsubr (&Sterminal_coding_system);
3439 defsubr (&Sset_keyboard_coding_system);
3440 defsubr (&Skeyboard_coding_system);
3441 defsubr (&Sfind_coding_system);
3442
3443 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
3444 "List of coding-categories (symbols) ordered by priority.");
3445 {
3446 int i;
3447
3448 Vcoding_category_list = Qnil;
3449 for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
3450 Vcoding_category_list
3451 = Fcons (coding_category_table[i], Vcoding_category_list);
3452 }
3453
3454 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
3455 "A variable of internal use only.\n\
3456If the value is a coding system, it is used for decoding on read operation.\n\
3457If not, an appropriate element in `coding-system-alist' (which see) is used.");
3458 Vcoding_system_for_read = Qnil;
3459
3460 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
3461 "A variable of internal use only.\n\
3462If the value is a coding system, it is used for encoding on write operation.\n\
3463If not, an appropriate element in `coding-system-alist' (which see) is used.");
3464 Vcoding_system_for_write = Qnil;
3465
3466 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
3467 "Coding-system used in the latest file or process I/O.");
3468 Vlast_coding_system_used = Qnil;
3469
3470 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
3471 "Nested alist to decide a coding system for a specific I/O operation.\n\
3472The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
3473
3474OPERATION is one of the following Emacs I/O primitives:\n\
3475 For file I/O, insert-file-contents and write-region.\n\
3476 For process I/O, call-process, call-process-region, and start-process.\n\
3477 For network I/O, open-network-stream.\n\
3478In addition, for process I/O, `process-argument' can be specified for\n\
3479encoding arguments of the process.\n\
3480\n\
3481REGEXP is a regular expression matching a target of OPERATION, where\n\
3482target is a file name for file I/O operations, a process name for\n\
3483process I/O operations, or a service name for network I/O\n\
3484operations. REGEXP might be a port number for network I/O operation.\n\
3485\n\
3486CODING-SYSTEMS is a cons of coding systems to encode and decode\n\
3487character code on OPERATION, or a function symbol returning the cons.\n\
3488See the documentation of `find-coding-system' for more detail.");
3489 Vcoding_system_alist = Qnil;
3490
3491 DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
3492 "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
3493 eol_mnemonic_unix = '.';
3494
3495 DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
3496 "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
3497 eol_mnemonic_dos = ':';
3498
3499 DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
3500 "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
3501 eol_mnemonic_mac = '\'';
3502
3503 DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
3504 "Mnemonic character indicating end-of-line format is not yet decided.");
3505 eol_mnemonic_undecided = '-';
3506
3507 DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,
3508 "Alist of charsets vs the alternate charsets.\n\
3509While decoding, if a charset (car part of an element) is found,\n\
3510decode it as the alternate charset (cdr part of the element).");
3511 Valternate_charset_table = Qnil;
3512
3513 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
3514 "Alist of charsets vs revision numbers.\n\
3515While encoding, if a charset (car part of an element) is found,\n\
3516designate it with the escape sequence identifing revision (cdr part of the element).");
3517 Vcharset_revision_alist = Qnil;
3518}
3519
3520#endif /* emacs */
diff --git a/src/coding.h b/src/coding.h
new file mode 100644
index 00000000000..3ec2fcc32f3
--- /dev/null
+++ b/src/coding.h
@@ -0,0 +1,409 @@
1/* Header for coding system handler.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21#ifndef _CODING_H
22#define _CODING_H
23
24#ifndef _CCL_H
25#include "../src/ccl.h"
26#endif
27
28/*** EMACS' INTERNAL FORMAT section ***/
29
30/* All code (1-byte) of Emacs' internal format is classified into one
31 of the followings. See also `charset.h'. */
32enum emacs_code_class_type
33 {
34 EMACS_control_code, /* Control codes in the range
35 0x00..0x1F and 0x7F except for the
36 following two codes. */
37 EMACS_linefeed_code, /* 0x0A (linefeed) to denote
38 end-of-line. */
39 EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used
40 in selective display mode. */
41 EMACS_ascii_code, /* ASCII characters. */
42 EMACS_leading_code_composition, /* Leading code of a composite
43 character. */
44 EMACS_leading_code_2, /* Base leading code of official
45 TYPE9N character. */
46 EMACS_leading_code_3, /* Base leading code of private TYPE9N
47 or official TYPE9Nx9N character. */
48 EMACS_leading_code_4, /* Base leading code of private
49 TYPE9Nx9N character. */
50 EMACS_invalid_code /* Invalid code, i.e. a base leading
51 code not yet assigned to any
52 charset, or a code of the range
53 0xA0..0xFF. */
54 };
55
56extern enum emacs_code_class_type emacs_code_class[256];
57
58/*** ISO2022 section ***/
59
60/* Macros to define code of control characters for ISO2022's functions. */
61 /* code */ /* function */
62#define ISO_CODE_LF 0x0A /* line-feed */
63#define ISO_CODE_CR 0x0D /* carriage-return */
64#define ISO_CODE_SO 0x0E /* shift-out */
65#define ISO_CODE_SI 0x0F /* shift-in */
66#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
67#define ISO_CODE_ESC 0x1B /* escape */
68#define ISO_CODE_SS2 0x8E /* single-shift-2 */
69#define ISO_CODE_SS3 0x8F /* single-shift-3 */
70#define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
71
72/* All code (1-byte) of ISO2022 is classified into one of the
73 followings. */
74enum iso_code_class_type
75 {
76 ISO_control_code, /* Control codes in the range
77 0x00..0x1F, 0x7F, and 0x80..0x9F,
78 except for the following seven
79 codes. */
80 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
81 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
82 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
83 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
84 ISO_escape, /* ISO_CODE_SO (0x1B) */
85 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
86 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
87 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
88 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
89 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
90 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
91 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
92 };
93
94/** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags'
95 element in the structure `coding_system'. This information is used
96 while encoding a text to ISO2022. **/
97
98/* If set, produce short-form designation sequence (e.g. ESC $ A)
99 instead of long-form sequence (e.g. ESC $ ( A). */
100#define CODING_FLAG_ISO_SHORT_FORM 0x0001
101
102/* If set, reset graphic planes and registers at end-of-line to the
103 initial state. */
104#define CODING_FLAG_ISO_RESET_AT_EOL 0x0002
105
106/* If set, reset graphic planes and registers before any control
107 characters to the initial state. */
108#define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004
109
110/* If set, encode by 7-bit environment. */
111#define CODING_FLAG_ISO_SEVEN_BITS 0x0008
112
113/* If set, use locking-shift function. */
114#define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010
115
116/* If set, use single-shift function. Overwrite
117 CODING_FLAG_ISO_LOCKING_SHIFT. */
118#define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020
119
120/* If set, designate JISX0201-Roman instead of ASCII. */
121#define CODING_FLAG_ISO_USE_ROMAN 0x0040
122
123/* If set, designate JISX0208-1978 instead of JISX0208-1983. */
124#define CODING_FLAG_ISO_USE_OLDJIS 0x0080
125
126/* If set, do not produce ISO6429's direction specifying sequence. */
127#define CODING_FLAG_ISO_NO_DIRECTION 0x0100
128
129/* Structure of the field `spec.iso2022' in the structure `coding_system'. */
130struct iso2022_spec
131{
132 /* The current graphic register invoked to each graphic plane. */
133 int current_invocation[2];
134
135 /* The current charset designated to each graphic register. */
136 int current_designation[4];
137
138 /* A charset initially designated to each graphic register. */
139 int initial_designation[4];
140
141 /* A graphic register to which each charset should be designated. */
142 int requested_designation[MAX_CHARSET];
143
144 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
145 by single-shift while encoding. */
146 int single_shifting;
147};
148
149/* Macros to access each field in the structure `spec.iso2022'. */
150#define CODING_SPEC_ISO_INVOCATION(coding, plane) \
151 coding->spec.iso2022.current_invocation[plane]
152#define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
153 coding->spec.iso2022.current_designation[reg]
154#define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
155 coding->spec.iso2022.initial_designation[reg]
156#define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
157 coding->spec.iso2022.requested_designation[charset]
158
159/* Set to 1 temporarily only when encoding a character with
160 single-shift function. */
161#define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
162 coding->spec.iso2022.single_shifting
163
164/* Return a charset which is currently designated to the graphic plane
165 PLANE in the coding-system CODING. */
166#define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
167 CODING_SPEC_ISO_DESIGNATION \
168 (coding, CODING_SPEC_ISO_INVOCATION (coding, plane))
169
170/*** BIG5 section ***/
171
172/* Macros to denote each type of BIG5 coding system. */
173#define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of
174 BIG5 developed by Hong Kong
175 University. */
176#define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants
177 of BIG5 developed by the
178 company ETen in Taiwan. */
179
180/*** GENERAL section ***/
181
182/* Types of coding system. */
183enum coding_type
184 {
185 coding_type_no_conversion, /* A coding system which requires no
186 conversion for reading and writing
187 including end-of-line format. */
188 coding_type_internal, /* A coding system used in Emacs'
189 buffer and string. Requires no
190 conversion for reading and writing
191 except for end-of-line format. */
192 coding_type_automatic, /* A coding system which requires
193 automatic detection of a real
194 coding system. */
195 coding_type_sjis, /* SJIS coding system for Japanese. */
196 coding_type_iso2022, /* Any coding system of ISO2022
197 variants. */
198 coding_type_big5, /* BIG5 coding system for Chinese. */
199 coding_type_ccl /* The coding system of which decoder
200 and encoder are written in CCL. */
201 };
202
203/* Formats of end-of-line. */
204#define CODING_EOL_LF 0 /* Line-feed only, same as Emacs'
205 internal format. */
206#define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
207 line-feed. */
208#define CODING_EOL_CR 2 /* Carriage-return only. */
209#define CODING_EOL_AUTOMATIC 3 /* This value is used to denote the
210 eol-type is not yet decided. */
211
212/* Character composition status while encoding/decoding. */
213#define COMPOSING_NO 0 /* not composing */
214#define COMPOSING_WITH_RULE_HEAD 1 /* 1st char of with-rule composing follow */
215#define COMPOSING_NO_RULE_HEAD 2 /* 1st char of no-rule composing follow */
216#define COMPOSING_WITH_RULE_TAIL 3 /* Nth char of with-rule composing follow */
217#define COMPOSING_NO_RULE_TAIL 4 /* Nth char of no-rule composing follow */
218#define COMPOSING_WITH_RULE_RULE 5 /* composition rule follow */
219
220/* 1 iff composing. */
221#define COMPOSING_P(composing) (composing)
222/* 1 iff 1st char of composing element follows. */
223#define COMPOSING_HEAD_P(composing) \
224 ((composing) && (composing) <= COMPOSING_NO_RULE_HEAD)
225/* 1 iff composing with embeded composition rule. */
226#define COMPOSING_WITH_RULE_P(composing) ((composing) & 1)
227
228struct coding_system
229{
230 /* Type of the coding system. */
231 enum coding_type type;
232
233 /* If the coding system requires specific code to be attached at the
234 tail of converted text, this value should be set to `1'. */
235 int require_flushing;
236
237 /* Flag bits of the coding system. The meaning of each bit depends
238 on the type of the coding system. */
239 unsigned int flags;
240
241 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
242 int eol_type;
243
244 /* Non-zero means that the current source text is the last block of the
245 whole text to be converted. */
246 int last_block;
247
248 /* Non-zero means that characters are being composed currently while
249 decoding or encoding. See macros COMPOSING_XXXX above for the
250 meaing of each non-zero value. */
251 int composing;
252
253 /* 0 (left-to-right) or 1 (right-to-left): the direction of the text
254 being processed currently. */
255 int direction;
256
257 /* Non-zero means that the current source text is in a buffer which
258 enables selective display. */
259 int selective;
260
261 /* Detailed information specific to each type of coding system. */
262 union spec
263 {
264 struct iso2022_spec iso2022;
265 struct ccl_spec ccl; /* Defined in ccl.h. */
266 } spec;
267
268 /* Backward pointer to the Lisp symbol of the coding system. */
269 Lisp_Object symbol;
270
271 /* Lisp function (symbol) to be called after decoding to do
272 additional conversion. */
273 Lisp_Object post_read_conversion;
274
275 /* Lisp function (symbol) to be called before encoding to do
276 additional conversion. */
277 Lisp_Object pre_write_conversion;
278
279 /* Carryover yielded by decoding/encoding incomplete source. No
280 coding-system yields more than 7-byte of carryover. This does
281 not include a text which is not processed because of short of
282 output buffer. */
283 char carryover[8];
284
285 /* Actual data length in the above array. */
286 int carryover_size;
287};
288
289/* Return 1 if the coding-system CODING requires conversion of
290 representation of a visible character (text). */
291#define CODING_REQUIRE_TEXT_CONVERSION(coding) \
292 ((coding)->type != coding_type_no_conversion \
293 && (coding)->type != coding_type_internal)
294
295/* Return 1 if the coding-system CODING requires conversion of the
296 format of end-of-line. */
297#define CODING_REQUIRE_EOL_CONVERSION(coding) \
298 ((coding)->eol_type != CODING_EOL_AUTOMATIC \
299 && (coding)->eol_type != CODING_EOL_LF)
300
301/* Return 1 if the coding-system CODING requires some conversion. */
302#define CODING_REQUIRE_CONVERSION(coding) \
303 (CODING_REQUIRE_TEXT_CONVERSION (coding) \
304 || CODING_REQUIRE_EOL_CONVERSION (coding))
305
306/* Index for each coding category in `coding_category_table' */
307#define CODING_CATEGORY_IDX_INTERNAL 0
308#define CODING_CATEGORY_IDX_SJIS 1
309#define CODING_CATEGORY_IDX_ISO_7 2
310#define CODING_CATEGORY_IDX_ISO_8_1 3
311#define CODING_CATEGORY_IDX_ISO_8_2 4
312#define CODING_CATEGORY_IDX_ISO_ELSE 5
313#define CODING_CATEGORY_IDX_BIG5 6
314#define CODING_CATEGORY_IDX_BINARY 7
315#define CODING_CATEGORY_IDX_MAX 8
316
317/* Definitions of flag bits returned by the function
318 detect_coding_mask (). */
319#define CODING_CATEGORY_MASK_INTERNAL (1 << CODING_CATEGORY_IDX_INTERNAL)
320#define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
321#define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
322#define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
323#define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
324#define CODING_CATEGORY_MASK_ISO_ELSE (1 << CODING_CATEGORY_IDX_ISO_ELSE)
325#define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
326
327/* This value is returned if detect_coding_mask () find nothing other
328 than ASCII characters. */
329#define CODING_CATEGORY_MASK_ANY \
330 ( CODING_CATEGORY_MASK_INTERNAL \
331 | CODING_CATEGORY_MASK_SJIS \
332 | CODING_CATEGORY_MASK_ISO_7 \
333 | CODING_CATEGORY_MASK_ISO_8_1 \
334 | CODING_CATEGORY_MASK_ISO_8_2 \
335 | CODING_CATEGORY_MASK_ISO_ELSE \
336 | CODING_CATEGORY_MASK_BIG5)
337
338/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
339 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
340 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
341 internal format. */
342
343#define DECODE_SJIS(s1, s2, c1, c2) \
344 do { \
345 if (s2 >= 0x9F) \
346 c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
347 c2 = s2 - 0x7E; \
348 else \
349 c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
350 c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \
351 } while (0)
352
353#define ENCODE_SJIS(c1, c2, s1, s2) \
354 do { \
355 if (c1 & 1) \
356 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \
357 s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \
358 else \
359 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \
360 s2 = c2 + 0x7E; \
361 } while (0)
362
363/* Extern declarations. */
364extern int decode_coding (), encode_coding ();
365extern int decoding_buffer_size (), encoding_buffer_size ();
366extern int conversion_buffer_size;
367extern char *conversion_buffer, *get_conversion_buffer ();
368extern Lisp_Object Fcheck_coding_system ();
369extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
370extern Lisp_Object Qbuffer_file_coding_system;
371extern Lisp_Object Vcoding_category_list;
372
373/* Mnemonic character to indicate each type of end-of-line. */
374extern int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
375/* Mnemonic character to indicate type of end-of-line is not yet decided. */
376extern int eol_mnemonic_undecided;
377
378/* Table of coding-systems currently assigned to each coding-category. */
379extern Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
380/* Table of names of symbol for each coding-category. */
381extern char *coding_category_name[CODING_CATEGORY_IDX_MAX];
382
383#ifdef emacs
384extern Lisp_Object Qfile_coding_system;
385extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
386extern Lisp_Object Qstart_process, Qopen_network_stream;
387
388/* Coding-system for reading files and receiving data from process. */
389extern Lisp_Object Vcoding_system_for_read;
390/* Coding-system for writing files and sending data to process. */
391extern Lisp_Object Vcoding_system_for_write;
392/* Coding-system actually used in the latest I/O. */
393extern Lisp_Object Vlast_coding_system_used;
394
395/* Coding-system to be used for encoding terminal output. This
396 structure contains information of a coding-system specified by the
397 function `set-terminal-coding-system'. */
398extern struct coding_system terminal_coding;
399
400/* Coding-system of what is sent from terminal keyboard. This
401 structure contains information of a coding-system specified by the
402 function `set-keyboard-coding-system'. */
403extern struct coding_system keyboard_coding;
404
405extern Lisp_Object Vcoding_system_alist;
406
407#endif
408
409#endif /* _CODING_H */
diff --git a/src/fontset.c b/src/fontset.c
new file mode 100644
index 00000000000..7d88e90ae89
--- /dev/null
+++ b/src/fontset.c
@@ -0,0 +1,819 @@
1/* Fontset handler.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21#include <config.h>
22#if HAVE_ALLOCA_H
23#include <alloca.h>
24#endif /* HAVE_ALLOCA_H */
25#include "lisp.h"
26#include "charset.h"
27#include "ccl.h"
28#include "fontset.h"
29#include "frame.h"
30
31Lisp_Object Vglobal_fontset_alist;
32
33Lisp_Object Vfont_encoding_alist;
34
35/* We had better have our own strcasecmp function because some system
36 doesn't have it. */
37static char my_strcasetbl[256];
38
39/* Compare two strings S0 and S1 while ignoring differences in case.
40 Return 1 if they differ, else return 0. */
41static int
42my_strcasecmp (s0, s1)
43 unsigned char *s0, *s1;
44{
45 while (*s0)
46 if (my_strcasetbl[*s0++] != my_strcasetbl[*s1++]) return 1;
47 return (int) *s1;
48}
49
50/* The following six are window system dependent functions. See
51 the comments in src/fontset.h for more detail. */
52
53/* Return a pointer to struct font_info of font FONT_IDX of frame F. */
54struct font_info *(*get_font_info_func) (/* FRAME_PTR f; int font_idx */);
55
56/* Return a list of font names which matches PATTERN. See the document of
57 `x-list-fonts' for more detail. */
58Lisp_Object (*list_fonts_func) (/* Lisp_Object pattern, face, frame, width */);
59
60/* Load a font named NAME for frame F and return a pointer to the
61 information of the loaded font. If loading is failed, return 0. */
62struct font_info *(*load_font_func) (/* FRAME_PTR f; char *name */);
63
64/* Return a pointer to struct font_info of a font named NAME for frame F. */
65struct font_info *(*query_font_func) (/* FRAME_PTR f; char *name */);
66
67/* Additional function for setting fontset or changing fontset
68 contents of frame F. */
69void (*set_frame_fontset_func) (/* FRAME_PTR f; Lisp_Object arg, oldval */);
70
71/* Check if any window system is used now. */
72void (*check_window_system_func) ();
73
74struct fontset_data *
75alloc_fontset_data ()
76{
77 struct fontset_data *fontset_data
78 = (struct fontset_data *) xmalloc (sizeof (struct fontset_data));
79
80 bzero (fontset_data, sizeof (struct fontset_data));
81
82 return fontset_data;
83}
84
85void
86free_fontset_data (fontset_data)
87 struct fontset_data *fontset_data;
88{
89 int i;
90
91 for (i = 0; i < fontset_data->n_fontsets; i++)
92 {
93 int j;
94
95 xfree (fontset_data->fontset_table[i]->name);
96 for (j = 0; j < MAX_CHARSET; j++)
97 if (fontset_data->fontset_table[i]->fontname[j])
98 xfree (fontset_data->fontset_table[i]->fontname[j]);
99 xfree (fontset_data->fontset_table[i]);
100 }
101 xfree (fontset_data->fontset_table);
102
103 xfree (fontset_data);
104}
105
106/* Load a font named FONTNAME for displaying CHARSET on frame F.
107 All fonts for frame F is stored in a table pointed by FONT_TABLE.
108 Return a pointer to the struct font_info of the loaded font.
109 If loading fails, return 0;
110 If FONTNAME is NULL, the name is taken from the information of FONTSET.
111 If FONTSET is given, try to load a font whose size matches that of
112 FONTSET, and, the font index is stored in the table for FONTSET. */
113
114struct font_info *
115fs_load_font (f, font_table, charset, fontname, fontset)
116 FRAME_PTR f;
117 struct font_info *font_table;
118 int charset, fontset;
119 char *fontname;
120{
121 Lisp_Object font_list;
122 Lisp_Object list, elt;
123 int font_idx;
124 int size = 0;
125 struct fontset_info *fontsetp = 0;
126 struct font_info *fontp;
127
128 if (fontset >= 0 && fontset < FRAME_FONTSET_DATA (f)->n_fontsets)
129 {
130 fontsetp = FRAME_FONTSET_DATA (f)->fontset_table[fontset];
131 font_idx = fontsetp->font_indexes[charset];
132 if (font_idx >= 0)
133 /* We have already loaded a font. */
134 return font_table + font_idx;
135 else if (font_idx == FONT_NOT_FOUND)
136 /* We have already tried loading a font and failed. */
137 return 0;
138 if (!fontname)
139 fontname = fontsetp->fontname[charset];
140 }
141
142 if (!fontname)
143 /* No way to get fontname. */
144 return 0;
145
146 /* If a fontset is specified and we have already loaded some fonts
147 in the fontset, we need a font of appropriate size to be used
148 with the fonts. */
149 if (fontsetp && fontsetp->size)
150 size = fontsetp->size * CHARSET_WIDTH (charset);
151
152 fontp = (*load_font_func) (f, fontname, size);
153
154 if (!fontp)
155 {
156 if (fontsetp)
157 fontsetp->font_indexes[charset] = FONT_NOT_FOUND;
158 return 0;
159 }
160
161 /* Fill in fields (CHARSET, ENCODING, and FONT_ENCODER) which are
162 not set by (*load_font_func). */
163 fontp->charset = charset;
164
165 if (fontp->encoding[1] >= 0)
166 {
167 /* The font itself tells which code points to be used. Use this
168 encoding for all other charsets. */
169 int i;
170
171 fontp->encoding[0] = fontp->encoding[1];
172 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; i < MAX_CHARSET; i++)
173 fontp->encoding[i] = fontp->encoding[1];
174 }
175 else
176 {
177 /* The font itself doesn't tell which code points to be used. */
178 int i;
179
180 /* At first, set 1 (means 0xA0..0xFF) as the default. */
181 fontp->encoding[0] = 1;
182 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; i < MAX_CHARSET; i++)
183 fontp->encoding[i] = 1;
184 /* Then override them by a specification in Vfont_encoding_alist. */
185 for (list = Vfont_encoding_alist; CONSP (list); list = XCONS (list)->cdr)
186 {
187 elt = XCONS (list)->car;
188 if (CONSP (elt)
189 && STRINGP (XCONS (elt)->car) && CONSP (XCONS (elt)->cdr)
190 && (fast_string_match_ignore_case (XCONS (elt)->car, fontname)
191 >= 0))
192 {
193 Lisp_Object tmp;
194
195 for (tmp = XCONS (elt)->cdr; CONSP (tmp); tmp = XCONS (tmp)->cdr)
196 if (CONSP (XCONS (tmp)->car)
197 && INTEGERP (XCONS (XCONS (tmp)->car)->car)
198 && ((i = get_charset_id (XCONS (XCONS (tmp)->car)->car))
199 >= 0)
200 && INTEGERP (XCONS (XCONS (tmp)->car)->cdr)
201 && XFASTINT (XCONS (XCONS (tmp)->car)->cdr) < 4)
202 fontp->encoding[i]
203 = XFASTINT (XCONS (XCONS (tmp)->car)->cdr);
204 }
205 }
206 }
207
208 fontp->font_encoder = (struct ccl_program *) 0;
209 for (list = Vfont_ccl_encoder_alist; CONSP (list); list = XCONS (list)->cdr)
210 {
211 elt = XCONS (list)->car;
212 if (CONSP (elt)
213 && STRINGP (XCONS (elt)->car) && VECTORP (XCONS (elt)->cdr)
214 && fast_string_match_ignore_case (XCONS (elt)->car, fontname) >= 0)
215 {
216 fontp->font_encoder
217 = (struct ccl_program *) xmalloc (sizeof (struct ccl_program));
218 setup_ccl_program (fontp->font_encoder, XCONS (elt)->cdr);
219 break;
220 }
221 }
222
223 if (fontsetp)
224 {
225 fontsetp->font_indexes[charset] = fontp->font_idx;
226 if (fontsetp->size == 0)
227 fontsetp->size = fontp->size / CHARSET_WIDTH (charset);
228
229 if (charset == CHARSET_ASCII
230 && fontsetp->size != fontp->size)
231 {
232 /* When loading ASCII font of the different size from the
233 size of FONTSET, we have to update the size of FONTSET.
234 Since changing the size of FONTSET may make some fonts
235 already loaded inappropriate to be used in FONTSET, we
236 must delete the record of such fonts. In that case, we
237 also have to calculate the height of FONTSET from the
238 remaining fonts. */
239 int i;
240
241 fontsetp->size = fontp->size;
242 fontsetp->height = fontp->height;
243 for (i = CHARSET_ASCII + 1; i < MAX_CHARSET; i++)
244 {
245 font_idx = fontsetp->font_indexes[i];
246 if (font_idx >= 0)
247 {
248 struct font_info *fontp2 = font_table + font_idx;
249
250 if (fontp2->size != fontp->size * CHARSET_WIDTH (i))
251 fontsetp->font_indexes[i] = FONT_NOT_OPENED;
252 else if (fontsetp->height < fontp->height)
253 fontsetp->height = fontp->height;
254 }
255 }
256 }
257 else if (fontsetp->height < fontp->height)
258 fontsetp->height = fontp->height;
259 }
260
261 return fontp;
262}
263
264/* Return ID of the fontset named NAME on frame F. */
265
266int
267fs_query_fontset (f, name)
268 FRAME_PTR f;
269 char *name;
270{
271 struct fontset_data *fontset_data = FRAME_FONTSET_DATA (f);
272 int i;
273
274 for (i = 0; i < fontset_data->n_fontsets; i++)
275 if (!my_strcasecmp(name, fontset_data->fontset_table[i]->name))
276 return i;
277 return -1;
278}
279
280/* Register a fontset specified by FONTSET_INFO for frame FRAME.
281 Return the fontset ID if successfully registered, else return -1.
282 FONTSET_INFO is a cons of name of the fontset and FONTLIST, where
283 FONTLIST is an alist of charsets vs fontnames. */
284
285int
286fs_register_fontset (f, fontset_info)
287 FRAME_PTR f;
288 Lisp_Object fontset_info;
289{
290 struct fontset_data *fontset_data = FRAME_FONTSET_DATA (f);
291 Lisp_Object name, fontlist;
292 int fontset;
293 struct fontset_info *fontsetp;
294 int i;
295
296 if (!CONSP (fontset_info)
297 || !STRINGP (XCONS (fontset_info)->car)
298 || !CONSP (XCONS (fontset_info)->cdr))
299 /* Invalid data in FONTSET_INFO. */
300 return -1;
301
302 name = XCONS (fontset_info)->car;
303 if ((fontset = fs_query_fontset (f, XSTRING (name)->data)) >= 0)
304 /* This fontset already exists on frame F. */
305 return fontset;
306
307 fontsetp = (struct fontset_info *) xmalloc (sizeof (struct fontset_info));
308
309 fontsetp->name = (char *) xmalloc (XSTRING (name)->size + 1);
310 bcopy(XSTRING (name)->data, fontsetp->name, XSTRING (name)->size + 1);
311
312 fontsetp->size = fontsetp->height = 0;
313
314 for (i = 0; i < MAX_CHARSET; i++)
315 {
316 fontsetp->fontname[i] = (char *) 0;
317 fontsetp->font_indexes[i] = FONT_NOT_OPENED;
318 }
319
320 for (fontlist = XCONS (fontset_info)->cdr; CONSP (fontlist);
321 fontlist = XCONS (fontlist)->cdr)
322 {
323 Lisp_Object tem = Fcar (fontlist);
324 int charset;
325
326 if (CONSP (tem)
327 && (charset = get_charset_id (XCONS (tem)->car)) >= 0
328 && STRINGP (XCONS (tem)->cdr))
329 {
330 fontsetp->fontname[charset]
331 = (char *) xmalloc (XSTRING (XCONS (tem)->cdr)->size + 1);
332 bcopy (XSTRING (XCONS (tem)->cdr)->data,
333 fontsetp->fontname[charset],
334 XSTRING (XCONS (tem)->cdr)->size + 1);
335 }
336 else
337 /* Broken or invalid data structure. */
338 return -1;
339 }
340
341 /* Do we need to create the table? */
342 if (fontset_data->fontset_table_size == 0)
343 {
344 fontset_data->fontset_table_size = 8;
345 fontset_data->fontset_table
346 = (struct fontset_info **) xmalloc (fontset_data->fontset_table_size
347 * sizeof (struct fontset_info *));
348 }
349 /* Do we need to grow the table? */
350 else if (fontset_data->n_fontsets >= fontset_data->fontset_table_size)
351 {
352 fontset_data->fontset_table_size += 8;
353 fontset_data->fontset_table
354 = (struct fontset_info **) xrealloc (fontset_data->fontset_table,
355 fontset_data->fontset_table_size
356 * sizeof (struct fontset_info *));
357 }
358 fontset = fontset_data->n_fontsets++;
359 fontset_data->fontset_table[fontset] = fontsetp;
360
361 return fontset;
362}
363
364/* Cache data used by fontset_pattern_regexp. The car part is a
365 pattern string containing at least one wild card, the cdr part is
366 the corresponding regular expression. */
367static Lisp_Object Vcached_fontset_data;
368
369#define CACHED_FONTSET_NAME (XSTRING (XCONS (Vcached_fontset_data)->car)->data)
370#define CACHED_FONTSET_REGEX (XCONS (Vcached_fontset_data)->cdr)
371
372/* If fontset name PATTERN contains any wild card, return regular
373 expression corresponding to PATTERN. */
374
375Lisp_Object
376fontset_pattern_regexp (pattern)
377 Lisp_Object pattern;
378{
379 int nickname = 0;
380
381 if (!index (XSTRING (pattern)->data, '*')
382 && !index (XSTRING (pattern)->data, '?'))
383 /* PATTERN does not contain any wild cards. */
384 {
385 if (XSTRING (pattern)->size > 8
386 && ! bcmp (XSTRING (pattern)->data, "fontset-", 8))
387 /* Just a nickname of a fontset is specified. */
388 nickname = 1;
389 else
390 return Qnil;
391 }
392
393 if (!CONSP (Vcached_fontset_data)
394 || strcmp (XSTRING (pattern)->data, CACHED_FONTSET_NAME))
395 {
396 /* We must at first update the cached data. */
397 char *regex = (char *) alloca (XSTRING (pattern)->size * 2 + 3);
398 char *p0, *p1 = regex;
399
400 if (nickname)
401 {
402 /* Just prepend ".*-" to PATTERN. */
403 *p1++= '.'; *p1++= '*', *p1++= '-';
404 bcopy (XSTRING (pattern)->data, p1, XSTRING (pattern)->size);
405 p1 += XSTRING (pattern)->size;
406 }
407 else
408 {
409 /* Convert "*" to ".*", "?" to ".". */
410 *p1++ = '^';
411 for (p0 = XSTRING (pattern)->data; *p0; p0++)
412 {
413 if (*p0 == '*')
414 {
415 *p1++ = '.';
416 *p1++ = '*';
417 }
418 else if (*p0 == '?')
419 *p1++ == '.';
420 else
421 *p1++ = *p0;
422 }
423 }
424 *p1++ = '$';
425 *p1++ = 0;
426
427 Vcached_fontset_data = Fcons (build_string (XSTRING (pattern)->data),
428 build_string (regex));
429 }
430
431 return CACHED_FONTSET_REGEX;
432}
433
434DEFUN ("query-fontset", Fquery_fontset, Squery_fontset, 1, 1, 0,
435 "Return a fontset name which matches PATTERN, nil if no matching fontset.\n\
436PATTERN can contain `*' or `?' as a wild card\n\
437just like X's font name matching algorithm allows.")
438 (pattern)
439 Lisp_Object pattern;
440{
441 Lisp_Object regexp, tem;
442
443 (*check_window_system_func) ();
444
445 CHECK_STRING (pattern, 0);
446
447 if (XSTRING (pattern)->size == 0)
448 return Qnil;
449
450 regexp = fontset_pattern_regexp (pattern);
451
452 for (tem = Vglobal_fontset_alist; CONSP (tem); tem = XCONS (tem)->cdr)
453 {
454 Lisp_Object fontset_name = XCONS (XCONS (tem)->car)->car;
455 if (!NILP (regexp))
456 {
457 if (fast_string_match_ignore_case (regexp,
458 XSTRING (fontset_name)->data)
459 >= 0)
460 return fontset_name;
461 }
462 else
463 {
464 if (!my_strcasecmp (XSTRING (pattern)->data,
465 XSTRING (fontset_name)->data))
466 return fontset_name;
467 }
468 }
469
470 return Qnil;
471}
472
473Lisp_Object Fframe_char_width ();
474
475/* Return a list of names of available fontsets matching PATTERN on
476 frame F. If SIZE is not 0, it is the size (maximum bound width) of
477 fontsets to be listed. */
478
479Lisp_Object
480list_fontsets (f, pattern, size)
481 FRAME_PTR f;
482 Lisp_Object pattern;
483 int size;
484{
485 int i;
486 Lisp_Object regexp, val;
487
488 regexp = fontset_pattern_regexp (pattern);
489
490 val = Qnil;
491 for (i = 0; i < FRAME_FONTSET_DATA (f)->n_fontsets; i++)
492 {
493 struct fontset_info *fontsetp = FRAME_FONTSET_DATA (f)->fontset_table[i];
494 int name_matched = 0;
495 int size_matched = 0;
496
497 if (!NILP (regexp))
498 {
499 if (fast_string_match_ignore_case (regexp, fontsetp->name) >= 0)
500 name_matched = 1;
501 }
502 else
503 {
504 if (!my_strcasecmp (XSTRING (pattern)->data, fontsetp->name))
505 name_matched = 1;
506 }
507
508 if (name_matched)
509 {
510 if (!size || fontsetp->size == size)
511 size_matched = 1;
512 else if (fontsetp->size == 0)
513 {
514 /* No font of this fontset has loaded yet. Try loading
515 one with SIZE. */
516 int j;
517
518 for (j = 0; j < MAX_CHARSET; j++)
519 if (fontsetp->fontname[j])
520 {
521 if ((*load_font_func) (f, fontsetp->fontname[j], size))
522 size_matched = 1;
523 break;
524 }
525 }
526
527 if (size_matched)
528 val = Fcons (build_string (fontsetp->name), val);
529 }
530 }
531
532 return val;
533}
534
535DEFUN ("new-fontset", Fnew_fontset, Snew_fontset, 2, 2, 0,
536 "Create a new fontset NAME which contains fonts in FONTLIST.\n\
537FONTLIST is an alist of charsets vs corresponding font names.")
538 (name, fontlist)
539 Lisp_Object name, fontlist;
540{
541 Lisp_Object fullname, fontset_info;
542 Lisp_Object tail;
543
544 (*check_window_system_func) ();
545
546 CHECK_STRING (name, 0);
547 CHECK_LIST (fontlist, 1);
548
549 fullname = Fquery_fontset (name);
550 if (!NILP (fullname))
551 error ("Fontset \"%s\" matches the existing fontset \"%s\"",
552 XSTRING (name)->data, XSTRING (fullname)->data);
553
554 /* Check the validity of FONTLIST. */
555 for (tail = fontlist; CONSP (tail); tail = XCONS (tail)->cdr)
556 {
557 Lisp_Object tem = XCONS (tail)->car;
558 int charset;
559
560 if (!CONSP (tem)
561 || (charset = get_charset_id (XCONS (tem)->car)) < 0
562 || !STRINGP (XCONS (tem)->cdr))
563 error ("Elements of fontlist must be a cons of charset and font name");
564 }
565
566 fontset_info = Fcons (name, fontlist);
567 Vglobal_fontset_alist = Fcons (fontset_info, Vglobal_fontset_alist);
568
569 /* Register this fontset for all existing frames. */
570 {
571 Lisp_Object framelist, frame;
572
573 FOR_EACH_FRAME (framelist, frame)
574 if (!FRAME_TERMCAP_P (XFRAME (frame)))
575 fs_register_fontset (XFRAME (frame), fontset_info);
576 }
577
578 return Qnil;
579}
580
581extern Lisp_Object Fframe_parameters ();
582extern Lisp_Object Qfont;
583Lisp_Object Qfontset;
584
585DEFUN ("set-fontset-font", Fset_fontset_font, Sset_fontset_font, 3, 4, 0,
586 "Set FONTNAME for a font of CHARSET in fontset NAME on frame FRAME.\n\
587If FRAME is omitted or nil, all frames are affected.")
588 (name, charset_symbol, fontname, frame)
589 Lisp_Object name, charset_symbol, fontname, frame;
590{
591 int charset;
592 Lisp_Object fullname, fontlist;
593
594 (*check_window_system_func) ();
595
596 CHECK_STRING (name, 0);
597 CHECK_SYMBOL (charset_symbol, 1);
598 CHECK_STRING (fontname, 2);
599 if (!NILP (frame))
600 CHECK_LIVE_FRAME (frame, 3);
601
602 if ((charset = get_charset_id (charset_symbol)) < 0)
603 error ("Invalid charset: %s", XSYMBOL (charset_symbol)->name->data);
604
605 fullname = Fquery_fontset (name);
606 if (NILP (fullname))
607 error ("Fontset \"%s\" does not exist", XSTRING (name)->data);
608
609 /* If FRAME is not specified, we must, at first, update contents of
610 `global-fontset-alist' for a frame created in the future. */
611 if (NILP (frame))
612 {
613 Lisp_Object fontset_info = Fassoc (fullname, Vglobal_fontset_alist);
614 Lisp_Object tem = Fassq (charset, XCONS (fontset_info)->cdr);
615
616 if (NILP (tem))
617 XCONS (fontset_info)->cdr
618 = Fcons (Fcons (charset, fontname), XCONS (fontset_info)->cdr);
619 else
620 XCONS (tem)->cdr = fontname;
621 }
622
623 /* Then, update information in the specified frame or all existing
624 frames. */
625 {
626 Lisp_Object framelist, tem;
627
628 FOR_EACH_FRAME (framelist, tem)
629 if (!FRAME_TERMCAP_P (XFRAME (tem))
630 && (NILP (frame) || EQ (frame, tem)))
631 {
632 FRAME_PTR f = XFRAME (tem);
633 int fontset = fs_query_fontset (f, XSTRING (fullname)->data);
634 struct fontset_info *fontsetp
635 = FRAME_FONTSET_DATA (f)->fontset_table[fontset];
636
637 if (fontsetp->fontname[XINT (charset)])
638 xfree (fontsetp->fontname[XINT (charset)]);
639 fontsetp->fontname[XINT (charset)]
640 = (char *) xmalloc (XSTRING (fontname)->size + 1);
641 bcopy (XSTRING (fontname)->data, fontsetp->fontname[XINT (charset)],
642 XSTRING (fontname)->size + 1);
643 fontsetp->font_indexes[XINT (charset)] = FONT_NOT_OPENED;
644
645 if (charset == CHARSET_ASCII)
646 {
647 Lisp_Object font_param = Fassq (Qfont, Fframe_parameters (tem));
648
649 if (set_frame_fontset_func
650 && !NILP (font_param)
651 && !strcmp (XSTRING (fullname)->data,
652 XSTRING (XCONS (font_param)->cdr)->data))
653 /* This fontset is the default fontset on frame TEM.
654 We may have to resize this frame because of new
655 ASCII font. */
656 (*set_frame_fontset_func) (f, fullname, Qnil);
657 }
658 }
659 }
660
661 return Qnil;
662}
663
664DEFUN ("font-info", Ffont_info, Sfont_info, 1, 2, 0,
665 "Return information about a font named NAME on frame FRAME.\n\
666If FRAME is omitted or nil, use the selected frame.\n\
667The returned value is a vector of OPENED-NAME, FULL-NAME, CHARSET, SIZE,\n\
668 HEIGHT, BASELINE-OFFSET, and RELATIVE-COMPOSE,\n\
669where\n\
670 OPENED-NAME is the name used for opening the font,\n\
671 FULL-NAME is the full name of the font,\n\
672 CHARSET is the charset displayed by the font,\n\
673 SIZE is the minimum bound width of the font,\n\
674 HEIGHT is the height of the font,\n\
675 BASELINE-OFFSET is the upward offset pixels from ASCII baseline,\n\
676 RELATIVE-COMPOSE is the number controlling how to compose characters.\n\
677If the named font is not yet loaded, return nil.")
678 (name, frame)
679 Lisp_Object name, frame;
680{
681 FRAME_PTR f;
682 struct font_info *fontp;
683 Lisp_Object info;
684
685 (*check_window_system_func) ();
686
687 CHECK_STRING (name, 0);
688 if (NILP (frame))
689 f = selected_frame;
690 else
691 {
692 CHECK_LIVE_FRAME (frame, 1);
693 f = XFRAME (frame);
694 }
695
696 if (!query_font_func)
697 error ("Font query function is not supported");
698
699 fontp = (*query_font_func) (f, XSTRING (name)->data);
700 if (!fontp)
701 return Qnil;
702
703 info = Fmake_vector (make_number (6), Qnil);
704
705 XVECTOR (info)->contents[0] = build_string (fontp->name);
706 XVECTOR (info)->contents[1] = build_string (fontp->full_name);
707 XVECTOR (info)->contents[2] = CHARSET_SYMBOL (fontp->charset);
708 XVECTOR (info)->contents[3] = make_number (fontp->size);
709 XVECTOR (info)->contents[4] = make_number (fontp->height);
710 XVECTOR (info)->contents[5] = make_number (fontp->baseline_offset);
711 XVECTOR (info)->contents[6] = make_number (fontp->relative_compose);
712
713 return info;
714}
715
716DEFUN ("fontset-info", Ffontset_info, Sfontset_info, 1, 2, 0,
717 "Return information about a fontset named NAME on frame FRAME.\n\
718If FRAME is omitted or nil, use the selected frame.\n\
719The returned value is a vector of SIZE, HEIGHT, and FONT-LIST,\n\
720where\n\
721 SIZE is the minimum bound width of ASCII font of the fontset,\n\
722 HEIGHT is the height of the tallest font in the fontset, and\n\
723 FONT-LIST is an alist of the format:\n\
724 (CHARSET REQUESTED-FONT-NAME LOADED-FONT-NAME).\n\
725LOADED-FONT-NAME t means the font is not yet loaded, nil means the\n\
726loading failed.")
727 (name, frame)
728 Lisp_Object name, frame;
729{
730 FRAME_PTR f;
731 int fontset;
732 struct fontset_info *fontsetp;
733 Lisp_Object info, val;
734 int i;
735
736 (*check_window_system_func) ();
737
738 CHECK_STRING(name, 0);
739 if (NILP (frame))
740 f = selected_frame;
741 else
742 {
743 CHECK_LIVE_FRAME (frame, 1);
744 f = XFRAME (frame);
745 }
746
747 fontset = fs_query_fontset (f, XSTRING (name)->data);
748 if (fontset < 0)
749 error ("Fontset \"%s\" does not exist", XSTRING (name)->data);
750
751 info = Fmake_vector (make_number (3), Qnil);
752
753 fontsetp = FRAME_FONTSET_DATA (f)->fontset_table[fontset];
754
755 XVECTOR (info)->contents[0] = make_number (fontsetp->size);
756 XVECTOR (info)->contents[1] = make_number (fontsetp->height);
757 val = Qnil;
758 for (i = 0; i < MAX_CHARSET; i++)
759 if (fontsetp->fontname[i])
760 {
761 int font_idx = fontsetp->font_indexes[i];
762 Lisp_Object loaded;
763
764 if (font_idx == FONT_NOT_OPENED)
765 loaded = Qt;
766 else if (font_idx == FONT_NOT_FOUND)
767 loaded = Qnil;
768 else
769 loaded
770 = build_string ((*get_font_info_func) (f, font_idx)->full_name);
771 val = Fcons (Fcons (CHARSET_SYMBOL (i),
772 Fcons (build_string (fontsetp->fontname[i]),
773 Fcons (loaded, Qnil))),
774 val);
775 }
776 XVECTOR (info)->contents[2] = val;
777 return info;
778}
779
780syms_of_fontset ()
781{
782 int i;
783
784 for (i = 0; i < 256; i++)
785 my_strcasetbl[i] = (i >= 'A' && i <= 'Z') ? i + 'a' - 'A' : i;
786
787 if (!load_font_func)
788 /* Window system initializer should have set proper functions. */
789 abort ();
790
791 staticpro (&Qfontset);
792
793 Vcached_fontset_data = Qnil;
794 staticpro (&Vcached_fontset_data);
795
796 DEFVAR_LISP ("global-fontset-alist", &Vglobal_fontset_alist,
797 "Internal data for fontset. Not for external use.\n\
798This is an alist associating fontset names with the lists of fonts\n\
799 contained in them.\n\
800Newly created frames make their own fontset database from here.");
801 Vglobal_fontset_alist = Qnil;
802
803 DEFVAR_LISP ("font-encoding-alist", &Vfont_encoding_alist,
804 "Alist of fontname patterns vs corresponding encoding info.\n\
805Each element looks like (REGEXP . ENCODING-INFO),\n\
806 where ENCODING-INFO is an alist of CHARSET vs ENCODING.\n\
807ENCODING is one of the following integer values:\n\
808 0: code points 0x20..0x7F or 0x2020..0x7F7F are used,\n\
809 1: code points 0xA0..0xFF or 0xA0A0..0xFFFF are used,\n\
810 2: code points 0x20A0..0x7FFF are used,\n\
811 3: code points 0xA020..0xFF7F are used.");
812 Vfont_encoding_alist = Qnil;
813
814 defsubr (&Squery_fontset);
815 defsubr (&Snew_fontset);
816 defsubr (&Sset_fontset_font);
817 defsubr (&Sfont_info);
818 defsubr (&Sfontset_info);
819}
diff --git a/src/fontset.h b/src/fontset.h
new file mode 100644
index 00000000000..902f1691d6f
--- /dev/null
+++ b/src/fontset.h
@@ -0,0 +1,201 @@
1/* Header for fontset handler.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21#ifndef _FONTSET_H
22#define _FONTSET_H
23
24/*
25
26#define GENERIC_FONT_PTR void
27
28/* This data type is used for the font_table field of window system
29 depending data area (e.g. struct x_display_info on X window). */
30
31struct font_info
32{
33 /* Pointer to window system dependent font structure. On X window,
34 this value should be coerced to (XFontStruct *). */
35 void *font;
36
37 /* Index number of the font. */
38 int font_idx;
39
40 /* Name to be used to find the font. */
41 char *name;
42
43 /* Full name of the font given by a window system. */
44 char *full_name;
45
46 /* Charset of characters displayed by the font. */
47 int charset;
48
49 /* Maximum bound width over all existing characters of the font. On
50 X window, this is same as (font->max_bounds.width) */
51 int size;
52
53 /* Height of the font. On X window, this is same as (font->ascent
54 + font->descent). */
55 int height;
56
57 /* Encodings of the font indexed by CHARSET. The value an integer
58 0, 1, 2, or 3:
59 0: code points 0x20..0x7F or 0x2020..0x7F7F are used
60 1: code points 0xA0..0xFF or 0xA0A0..0xFFFF are used
61 2: code points 0x20A0..0x7FFF are used
62 3: code points 0xA020..0xFF7F are used
63 For instance, ASCII and Latin-1 characters may use the same font
64 but different code points (ASCII uses 0x20..0x7F and Latin-1 uses
65 0xA0..0xFF).
66
67 If the value can't be decided from information of the font, we
68 consult `font-encoding-alist' to get of the corresponding charset
69 whose default value is defined in lisp/fontset.el. Since there's
70 no charset whose id is 1, we use encoding[1] to store the
71 encoding information decided by the font itself. */
72 char encoding[MAX_CHARSET];
73
74 /* The baseline position of a font is normally `ascent' value of the
75 font. However, there exists many fonts which don't set `ascent'
76 an appropriate value to be used as baseline position. This is
77 typical in such ASCII fonts which are designed to be used with
78 Chinese, Japanese, Korean characters. When we use mixture of
79 such fonts and normal fonts (having correct `ascent' value), a
80 display line gets very ugly. Since we have no way to fix it
81 automatically, it is users responsibility to supply well designed
82 fonts or correct `ascent' value of fonts. But, the latter
83 requires heavy work (modifying all bitmap data in BDF files).
84 So, Emacs accepts a private font property
85 `_MULE_BASELINE_OFFSET'. If a font has this property, we
86 calculate the baseline position by subtracting the value from
87 `ascent'. In other words, the value indicates how many bits
88 higher we should draw a character of the font than normal ASCII
89 text for a better looking.
90
91 We also have to consider the fact that the concept of `baseline'
92 differs among languages to which each character belongs. For
93 instance, baseline should be at the bottom most position of all
94 glyphs for Chinese, Japanese, and Korean. But, many of existing
95 fonts for those characters doesn't have correct `ascent' values
96 because they are designed to be used with ASCII fonts. To
97 display characters of different language on the same line, the
98 best way will be to arrange them in the middle of the line. So,
99 in such a case, again, we utilize the font property
100 `_MULE_BASELINE_OFFSET'. If the value is larger than `ascent' we
101 calculate baseline so that a character is arranged in the middle
102 of a line. */
103
104 int baseline_offset;
105
106 /* Non zero means a character should be composed at a position
107 relative to the height (or depth) of previous glyphs in the
108 following cases:
109 (1) The bottom of the character is higher than this value. In
110 this case, the character is drawn above the previous glyphs.
111 (2) The top of the character is lower than 0 (i.e. baseline
112 height). In this case, the character is drawn beneath the
113 previous glyphs.
114
115 This value is take from a private font property
116 `_MULE_RELATIVE_COMPOSE' which is introduced by Emacs. */
117 int relative_compose;
118
119 /* CCL program to calculate code points of the font. */
120 struct ccl_program *font_encoder;
121};
122
123#define FONT_NOT_OPENED -1
124#define FONT_NOT_FOUND -2
125
126struct fontset_info
127{
128 /* Name of the fontset. */
129 char *name;
130
131 /* Size of the fontset. This is the same as the size of ASCII font
132 of this fontset. */
133 int size;
134
135 /* Height of the tallest font in the fontset. */
136 int height;
137
138 /* Table of font name for each character set. */
139 char *fontname[MAX_CHARSET];
140
141 /* Table of index numbers of fonts indexed by charset. If a font is
142 not yet loaded, the value is -1 (FONT_NOT_OPENED). If font
143 loading is failed, the value is -2 (FONT_NOT_FOUND). */
144 int font_indexes[MAX_CHARSET];
145};
146
147/* This data type is used for the fontset_data field of struct frame. */
148
149struct fontset_data
150{
151 /* A table of pointers to all the fontsets. */
152 struct fontset_info **fontset_table;
153
154 /* The current capacity of fontset_table. */
155 int fontset_table_size;
156
157 /* The number of fontsets actually stored in fontset_table.
158 fontset_table[n] is used and valid iff 0 <= n < n_fontsets.
159 0 <= n_fontsets <= fontset_table_size. */
160 int n_fontsets;
161};
162
163/* The following six are window system dependent functions.
164 Initialization routine of each window system should set appropriate
165 functions to these variables. For instance, in case of X window,
166 x_term_init does this. */
167
168/* Return a pointer to struct font_info of font FONT_IDX of frame F. */
169extern struct font_info *(*get_font_info_func) (/* FRAME_PTR f;
170 int font_idx */);
171
172/* Return a list of font names which matches PATTERN. See the document of
173 `x-list-fonts' for more detail. */
174extern Lisp_Object (*list_fonts_func) (/* Lisp_Object pattern, face, frame,
175 width */);
176
177/* Load a font named NAME for frame F and return a pointer to the
178 information of the loaded font. If loading is failed, return -1. */
179extern struct font_info *(*load_font_func) (/* FRAME_PTR f; char *name */);
180
181/* Return a pointer to struct font_info of a font named NAME for frame F.
182 If no such font is loaded, return NULL. */
183extern struct font_info *(*query_font_func) (/* FRAME_PTR f; char *name */);
184
185/* Additional function for setting fontset or changing fontset
186 contents of frame F. This function may change the coordinate of
187 the frame. */
188extern void (*set_frame_fontset_func) (/* FRAME_PTR f; Lisp_Object arg, oldval */);
189
190/* Check if any window system is used now. */
191extern void (*check_window_system_func) ();
192
193extern struct fontset_data *alloc_fontset_data ();
194extern void free_fontset_data ();
195extern struct font_info *fs_load_font ();
196extern Lisp_Object list_fontsets ();
197extern Lisp_Object Vglobal_fontset_alist;
198
199extern Lisp_Object Qfontset;
200
201#endif /* _FONTSET_H */