aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2002-03-01 02:22:38 +0000
committerKenichi Handa2002-03-01 02:22:38 +0000
commitc184177262bc93a25ae7c781dcabbcc1ba213b04 (patch)
treec9486650246da0f5b26d8ae6ebaaa6370df695c5
parent2c390c27dcc603e16c247a992419d4f0b207f113 (diff)
downloademacs-c184177262bc93a25ae7c781dcabbcc1ba213b04.tar.gz
emacs-c184177262bc93a25ae7c781dcabbcc1ba213b04.zip
(char-valid-p): Make it an alias of characterp.
(define-charset): Fully re-designed. (charset-quoted-standard-p): Deleted. (charsetp): Moved to charset.c. (charset-info, charset-id, charset-bytes, charset-width, charset-directioin, charset-iso-graphic-plane, charset-reverse-charset): Deleted. (charset-dimension, charset-chars, charset-iso-final-char, charset-description, charset-short-name, charset-long-name): Call charset-plist instead of charset-info. (charset-plist, set-charset-plist): Moved to charset.c. (get-charset-property, put-charset-property): Moved from mule-cmds.el. Call charset-plist and set-charset-plist. (make-char): Deleted. (generic-char-p): Make it always return nil. (decode-char, encode-char): Moved to charset.c. (coding-spec-XXX-idx): Variables deleted. (coding-system-iso-2022-flags): New variable. (define-coding-system): New function. (transform-make-coding-system-args, make-coding-system): Deleted. (set-coding-priority): Make it obsolete. (after-insert-file-set-buffer-file-coding-system): Adjusted for the new coding system structure. (find-new-buffer-file-coding-system): Likewise.
-rw-r--r--lisp/international/mule.el1411
1 files changed, 500 insertions, 911 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 8235ce58e65..47e18a91b9d 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -3,6 +3,9 @@
3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4;; Licensed to the Free Software Foundation. 4;; Licensed to the Free Software Foundation.
5;; Copyright (C) 2001 Free Software Foundation, Inc. 5;; Copyright (C) 2001 Free Software Foundation, Inc.
6;; Copyright (C) 2001, 2002
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H13PRO009
6 9
7;; Keywords: mule, multilingual, character set, coding system 10;; Keywords: mule, multilingual, character set, coding system
8 11
@@ -27,12 +30,165 @@
27 30
28;;; Code: 31;;; Code:
29 32
30(defconst mule-version "5.0 (SAKAKI)" "\ 33(defconst mule-version "7.0 (SAKAKI)" "\
31Version number and name of this version of MULE (multilingual environment).") 34Version number and name of this version of MULE (multilingual environment).")
32 35
33(defconst mule-version-date "1999.12.7" "\ 36(defconst mule-version-date "2002.2.28" "\
34Distribution date of this version of MULE (multilingual environment).") 37Distribution date of this version of MULE (multilingual environment).")
35 38
39
40
41;;; CHARACTER
42(defalias 'char-valid-p 'characterp)
43(make-obsolete 'char-valid-p 'characterp "22.1")
44
45
46;;; CHARSET
47
48(defun define-charset (name docstring &rest props)
49 "Define NAME (symbol) as a charset with DOCSTRING.
50The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
51may be any symbol. The followings have special meanings, and one of
52`:code-offset', `:map', `:parents' must be specified.
53
54`:short-name'
55
56VALUE must be a short string to identify the charset. If omitted,
57NAME is used.
58
59`:long-name'
60
61VALUE must be a string longer than `:short-name' to identify the
62charset. If omitted, the value of `:short-name' attribute is used.
63
64`:dimension'
65
66VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
67code-points of the charsets. If omitted, it is calculated from a
68value of `:code-space' attribute.
69
70`:code-space'
71
72VALUE must be a vector of length at most 8 specifying the byte code
73range of each dimension in this format:
74 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
75where, MIN-N is the minimum byte value of Nth dimension of code-point,
76MAX-N is the maximum byte value of that.
77
78`:iso-final-char'
79
80VALUE must be a character in the range 32 to 127 (inclusive)
81specifying the final char of the charset for ISO-2022 encoding. If
82omitted, the charset can't be encoded by ISO-2022 based
83coding-systems.
84
85`:iso-revision-number'
86
87VALUE must be an integer in the range 0..63, specifying the revision
88number of the charset for ISO-2022 encoding.
89
90`:emacs-mule-id'
91
92VALUE must be an integer of 0, 128..255. If omitted, the charset
93can't be encoded by coding-systems of type `emacs-mule'.
94
95`:ascii-compatible-p'
96
97VALUE must be nil or t. If the VALUE is nil, the charset is a not
98compatible with ASCII. The default value is nil.
99
100`:supplementary-p'
101
102VALUE must be nil or t. If the VALUE is t, the charset is
103supplementary, which means the charset is used only as a parent of
104some other charset.
105
106`:invalid-code'
107
108VALUE must be a nonnegative integer that can be used as an invalid
109code point of the charset. If the minimum code is 0 and the maximum
110code is greater than Emacs' maximum integer value, `:invalid-code'
111should not be omitted.
112
113`:code-offset'
114
115VALUE must be an integer added to an index number of character to get
116the corresponding character code.
117
118`:map'
119
120VALUE must be vector or string.
121
122If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
123where CODE-n is a code-point of the charset, and CHAR-n is the
124corresponding charcter code.
125
126If it is a string, it is a name of file that contains the above
127information.
128
129`:parents'
130
131VALUE must be a list of parent charsets. The charset inherits
132characters from them. Each element of the list may be a cons (PARENT
133. OFFSET), where PARENT is a parent charset, and OFFSET is an offset
134value to add to a code point of this charset to get the corresponding
135code point of PARENT.
136
137`:unify-map'
138
139VALUE must be vector or string.
140
141If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
142where CODE-n is a code-point of the charset, and CHAR-n is the
143corresponding unified charcter code.
144
145If it is a string, it is a name of file that contains the above
146information."
147 (let ((attrs (mapcar 'list '(:dimension
148 :code-space
149 :iso-final-char
150 :iso-revision-number
151 :emacs-mule-id
152 :ascii-compatible-p
153 :supplementary-p
154 :invalid-code
155 :code-offset
156 :map
157 :parents
158 :unify-map
159 :plist))))
160
161 ;; If :dimension is omitted, get the dimension from :code-space.
162 (let ((dimension (plist-get props :dimension)))
163 (or dimension
164 (progn
165 (setq dimension (/ (length (plist-get props :code-space)) 2))
166 (setq props (plist-put props :dimension dimension)))))
167
168 (dolist (slot attrs)
169 (setcdr slot (plist-get props (car slot))))
170
171 ;; Make sure that the value of :code-space is a vector of 8
172 ;; elements.
173 (let* ((slot (assq :code-space attrs))
174 (val (cdr slot))
175 (len (length val)))
176 (if (< len 8)
177 (setcdr slot
178 (vconcat val (make-vector (- 8 len) 0)))))
179
180 ;; Add :name and :docstring properties to PROPS.
181 (setq props
182 (cons :name (cons name (cons :docstring (cons docstring props)))))
183 (or (plist-get props :short-name)
184 (plist-put props :short-name (symbol-name name)))
185 (or (plist-get props :long-name)
186 (plist-put props :long-name (plist-get props :short-name)))
187 (setcdr (assq :plist attrs) props)
188
189 (apply 'define-charset-internal name (mapcar 'cdr attrs))))
190
191
36(defun load-with-code-conversion (fullname file &optional noerror nomessage) 192(defun load-with-code-conversion (fullname file &optional noerror nomessage)
37 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. 193 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
38The file contents are decoded before evaluation if necessary. 194The file contents are decoded before evaluation if necessary.
@@ -103,190 +259,46 @@ Return t if file exists."
103 259
104;; API (Application Program Interface) for charsets. 260;; API (Application Program Interface) for charsets.
105 261
106(defsubst charset-quoted-standard-p (obj) 262;;; Charset property
107 "Return t if OBJ is a quoted symbol, and is the name of a standard charset." 263
108 (and (listp obj) (eq (car obj) 'quote) 264(defun get-charset-property (charset propname)
109 (symbolp (car-safe (cdr obj))) 265 "Return the value of CHARSET's PROPNAME property.
110 (let ((vector (get (car-safe (cdr obj)) 'charset))) 266This is the last value stored with
111 (and (vectorp vector) 267 (put-charset-property CHARSET PROPNAME VALUE)."
112 (< (aref vector 0) 160))))) 268 (plist-get (charset-plist charset) propname))
113 269
114(defsubst charsetp (object) 270(defun put-charset-property (charset propname value)
115 "T if OBJECT is a charset." 271 "Store CHARSETS's PROPNAME property with value VALUE.
116 (and (symbolp object) (vectorp (get object 'charset)))) 272It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
117 273 (set-charset-plist charset
118(defsubst charset-info (charset) 274 (plist-put (charset-plist charset) propname value)))
119 "Return a vector of information of CHARSET. 275
120The elements of the vector are: 276
121 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, 277(defun charset-description (charset)
122 LEADING-CODE-BASE, LEADING-CODE-EXT, 278 "Return description string of CHARSET."
123 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, 279 (plist-get (charset-plist charset) :docstring))
124 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, 280
125 PLIST, 281(defun charset-dimension (charset)
126where 282 "Return dimension string of CHARSET."
127CHARSET-ID (integer) is the identification number of the charset. 283 (plist-get (charset-plist charset) :dimension))
128BYTES (integer) is the length of multi-byte form of a character in 284
129 the charset: one of 1, 2, 3, and 4. 285(defun charset-chars (charset)
130DIMENSION (integer) is the number of bytes to represent a character of 286 "Return character numbers contained in a dimension of CHARSET."
131the charset: 1 or 2. 287 (let ((code-space (plist-get (cahrset-plist charset) :code-space)))
132CHARS (integer) is the number of characters in a dimension: 94 or 96. 288 (1+ (- (aref code-space 1) (aref code-space 0)))))
133WIDTH (integer) is the number of columns a character in the charset 289
134 occupies on the screen: one of 0, 1, and 2. 290(defun charset-iso-final-char (charset)
135DIRECTION (integer) is the rendering direction of characters in the 291 "Return final char of CHARSET."
136 charset when rendering. If 0, render from left to right, else 292 (or (plist-get (charset-plist charset) :iso-final-char)
137 render from right to left. 293 -1))
138LEADING-CODE-BASE (integer) is the base leading-code for the
139 charset.
140LEADING-CODE-EXT (integer) is the extended leading-code for the
141 charset. All charsets of less than 0xA0 has the value 0.
142ISO-FINAL-CHAR (character) is the final character of the
143 corresponding ISO 2022 charset. If the charset is not assigned
144 any final character, the value is -1.
145ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
146 while encoding to variants of ISO 2022 coding system, one of the
147 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
148 If the charset is not assigned any final character, the value is -1.
149REVERSE-CHARSET (integer) is the charset which differs only in
150 LEFT-TO-RIGHT value from the charset. If there's no such a
151 charset, the value is -1.
152SHORT-NAME (string) is the short name to refer to the charset.
153LONG-NAME (string) is the long name to refer to the charset
154DESCRIPTION (string) is the description string of the charset.
155PLIST (property list) may contain any type of information a user
156 want to put and get by functions `put-charset-property' and
157 `get-charset-property' respectively."
158 (get charset 'charset))
159
160;; It is better not to use backquote in this file,
161;; because that makes a bootstrapping problem
162;; if you need to recompile all the Lisp files using interpreted code.
163
164(defmacro charset-id (charset)
165 "Return charset identification number of CHARSET."
166 (if (charset-quoted-standard-p charset)
167 (aref (charset-info (nth 1 charset)) 0)
168 (list 'aref (list 'charset-info charset) 0)))
169
170(defmacro charset-bytes (charset)
171 "Return bytes of CHARSET.
172See the function `charset-info' for more detail."
173 (if (charset-quoted-standard-p charset)
174 (aref (charset-info (nth 1 charset)) 1)
175 (list 'aref (list 'charset-info charset) 1)))
176
177(defmacro charset-dimension (charset)
178 "Return dimension of CHARSET.
179See the function `charset-info' for more detail."
180 (if (charset-quoted-standard-p charset)
181 (aref (charset-info (nth 1 charset)) 2)
182 (list 'aref (list 'charset-info charset) 2)))
183
184(defmacro charset-chars (charset)
185 "Return character numbers contained in a dimension of CHARSET.
186See the function `charset-info' for more detail."
187 (if (charset-quoted-standard-p charset)
188 (aref (charset-info (nth 1 charset)) 3)
189 (list 'aref (list 'charset-info charset) 3)))
190
191(defmacro charset-width (charset)
192 "Return width (how many column occupied on a screen) of CHARSET.
193See the function `charset-info' for more detail."
194 (if (charset-quoted-standard-p charset)
195 (aref (charset-info (nth 1 charset)) 4)
196 (list 'aref (list 'charset-info charset) 4)))
197
198(defmacro charset-direction (charset)
199 "Return direction of CHARSET.
200See the function `charset-info' for more detail."
201 (if (charset-quoted-standard-p charset)
202 (aref (charset-info (nth 1 charset)) 5)
203 (list 'aref (list 'charset-info charset) 5)))
204
205(defmacro charset-iso-final-char (charset)
206 "Return final char of CHARSET.
207See the function `charset-info' for more detail."
208 (if (charset-quoted-standard-p charset)
209 (aref (charset-info (nth 1 charset)) 8)
210 (list 'aref (list 'charset-info charset) 8)))
211
212(defmacro charset-iso-graphic-plane (charset)
213 "Return graphic plane of CHARSET.
214See the function `charset-info' for more detail."
215 (if (charset-quoted-standard-p charset)
216 (aref (charset-info (nth 1 charset)) 9)
217 (list 'aref (list 'charset-info charset) 9)))
218
219(defmacro charset-reverse-charset (charset)
220 "Return reverse charset of CHARSET.
221See the function `charset-info' for more detail."
222 (if (charset-quoted-standard-p charset)
223 (aref (charset-info (nth 1 charset)) 10)
224 (list 'aref (list 'charset-info charset) 10)))
225 294
226(defmacro charset-short-name (charset) 295(defmacro charset-short-name (charset)
227 "Return short name of CHARSET. 296 "Return short name of CHARSET."
228See the function `charset-info' for more detail." 297 (plist-get (charset-plist charset) :short-name))
229 (if (charset-quoted-standard-p charset)
230 (aref (charset-info (nth 1 charset)) 11)
231 (list 'aref (list 'charset-info charset) 11)))
232 298
233(defmacro charset-long-name (charset) 299(defmacro charset-long-name (charset)
234 "Return long name of CHARSET. 300 "Return long name of CHARSET."
235See the function `charset-info' for more detail." 301 (plist-get (charset-plist charset) :long-name))
236 (if (charset-quoted-standard-p charset)
237 (aref (charset-info (nth 1 charset)) 12)
238 (list 'aref (list 'charset-info charset) 12)))
239
240(defmacro charset-description (charset)
241 "Return description of CHARSET.
242See the function `charset-info' for more detail."
243 (if (charset-quoted-standard-p charset)
244 (aref (charset-info (nth 1 charset)) 13)
245 (list 'aref (list 'charset-info charset) 13)))
246
247(defmacro charset-plist (charset)
248 "Return list charset property of CHARSET.
249See the function `charset-info' for more detail."
250 (list 'aref
251 (if (charset-quoted-standard-p charset)
252 (charset-info (nth 1 charset))
253 (list 'charset-info charset))
254 14))
255
256(defun set-charset-plist (charset plist)
257 "Set CHARSET's property list to PLIST, and return PLIST."
258 (aset (charset-info charset) 14 plist))
259
260(defun make-char (charset &optional code1 code2)
261 "Return a character of CHARSET whose position codes are CODE1 and CODE2.
262CODE1 and CODE2 are optional, but if you don't supply
263sufficient position codes, return a generic character which stands for
264all characters or group of characters in the character set.
265A generic character can be used to index a char table (e.g. syntax-table).
266
267Such character sets as ascii, eight-bit-control, and eight-bit-graphic
268don't have corresponding generic characters. If CHARSET is one of
269them and you don't supply CODE1, return the character of the smallest
270code in CHARSET.
271
272If CODE1 or CODE2 are invalid (out of range), this function signals an
273error. However, the eighth bit of both CODE1 and CODE2 is zeroed
274before they are used to index CHARSET. Thus you may use, say, the
275actual ISO 8859 character code rather than subtracting 128, as you
276would need to index the corresponding Emacs charset."
277 (make-char-internal (charset-id charset) code1 code2))
278
279(put 'make-char 'byte-compile
280 (function
281 (lambda (form)
282 (let ((charset (nth 1 form)))
283 (if (charset-quoted-standard-p charset)
284 (byte-compile-normal-call
285 (cons 'make-char-internal
286 (cons (charset-id (nth 1 charset)) (nthcdr 2 form))))
287 (byte-compile-normal-call
288 (cons 'make-char-internal
289 (cons (list 'charset-id charset) (nthcdr 2 form)))))))))
290 302
291(defun charset-list () 303(defun charset-list ()
292 "Return list of charsets ever defined. 304 "Return list of charsets ever defined.
@@ -295,152 +307,314 @@ This function is provided for backward compatibility.
295Now we have the variable `charset-list'." 307Now we have the variable `charset-list'."
296 charset-list) 308 charset-list)
297 309
298(defsubst generic-char-p (char) 310(defun generic-char-p (char)
299 "Return t if and only if CHAR is a generic character. 311 "Always return nil. This exists only for backward compatibility."
300See also the documentation of `make-char'." 312 nil)
301 (and (>= char 0400)
302 (let ((l (split-char char)))
303 (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0))
304 (not (eq (car l) 'composition))))))
305
306(defun decode-char (ccs code-point &optional restriction)
307 "Return character specified by coded character set CCS and CODE-POINT in it.
308Return nil if such a character is not supported.
309Currently the only supported coded character set is `ucs' (ISO/IEC
31010646: Universal Multi-Octet Coded Character Set).
311
312Optional argument RESTRICTION specifies a way to map the pair of CCS
313and CODE-POINT to a character. Currently not supported and just ignored."
314 (cond ((eq ccs 'ucs)
315 (cond ((< code-point 160)
316 code-point)
317 ((< code-point 256)
318 (make-char 'latin-iso8859-1 code-point))
319 ((< code-point #x2500)
320 (setq code-point (- code-point #x0100))
321 (make-char 'mule-unicode-0100-24ff
322 (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
323 ((< code-point #x3400)
324 (setq code-point (- code-point #x2500))
325 (make-char 'mule-unicode-2500-33ff
326 (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
327 ((and (>= code-point #xe000) (< code-point #x10000))
328 (setq code-point (- code-point #xe000))
329 (make-char 'mule-unicode-e000-ffff
330 (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
331 ))))
332
333(defun encode-char (char ccs &optional restriction)
334 "Return code-point in coded character set CCS that corresponds to CHAR.
335Return nil if CHAR is not included in CCS.
336Currently the only supported coded character set is `ucs' (ISO/IEC
33710646: Universal Multi-Octet Coded Character Set).
338
339CHAR should be in one of these charsets:
340 ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff,
341 mule-unicode-e000-ffff, eight-bit-control
342Otherwise, return nil.
343
344Optional argument RESTRICTION specifies a way to map CHAR to a
345code-point in CCS. Currently not supported and just ignored."
346 (let* ((split (split-char char))
347 (charset (car split)))
348 (cond ((eq ccs 'ucs)
349 (cond ((eq charset 'ascii)
350 char)
351 ((eq charset 'latin-iso8859-1)
352 (+ (nth 1 split) 128))
353 ((eq charset 'mule-unicode-0100-24ff)
354 (+ #x0100 (+ (* (- (nth 1 split) 32) 96)
355 (- (nth 2 split) 32))))
356 ((eq charset 'mule-unicode-2500-33ff)
357 (+ #x2500 (+ (* (- (nth 1 split) 32) 96)
358 (- (nth 2 split) 32))))
359 ((eq charset 'mule-unicode-e000-ffff)
360 (+ #xe000 (+ (* (- (nth 1 split) 32) 96)
361 (- (nth 2 split) 32))))
362 ((eq charset 'eight-bit-control)
363 char))))))
364
365 313
366;; Coding system stuff 314;; Coding system stuff
367 315
368;; Coding system is a symbol that has the property `coding-system'. 316;; Coding system is a symbol that has been defined by the function
369;; 317;; `define-coding-system'.
370;; The value of the property `coding-system' is a vector of the
371;; following format:
372;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
373;; We call this vector as coding-spec. See comments in src/coding.c
374;; for more detail.
375
376(defconst coding-spec-type-idx 0)
377(defconst coding-spec-mnemonic-idx 1)
378(defconst coding-spec-doc-string-idx 2)
379(defconst coding-spec-plist-idx 3)
380(defconst coding-spec-flags-idx 4)
381
382;; PLIST is a property list of a coding system. To share PLIST among
383;; alias coding systems, a coding system has PLIST in coding-spec
384;; instead of having it in normal property list of Lisp symbol.
385;; Here's a list of coding system properties currently being used.
386;;
387;; o coding-category
388;;
389;; The value is a coding category the coding system belongs to. The
390;; function `make-coding-system' sets this value automatically
391;; unless its argument PROPERTIES specifies this property.
392;;
393;; o alias-coding-systems
394;;
395;; The value is a list of coding systems of the same alias group. The
396;; first element is the coding system made at first, which we call as
397;; `base coding system'. The function `make-coding-system' sets this
398;; value automatically and `define-coding-system-alias' updates it.
399;;
400;; See the documentation of make-coding-system for the meanings of the
401;; following properties.
402;;
403;; o post-read-conversion
404;; o pre-write-conversion
405;; o translation-table-for-decode
406;; o translation-table-for-encode
407;; o safe-chars
408;; o safe-charsets
409;; o mime-charset
410;; o valid-codes (meaningful only for a coding system based on CCL)
411 318
319(defconst coding-system-iso-2022-flags
320 '(long-form
321 ascii-at-eol
322 ascii-at-cntl
323 7-bit
324 locking-shift
325 single-shift
326 designation
327 revision
328 direction
329 init-at-bol
330 designate-at-bol
331 safe
332 latin-extra
333 composition
334 euc-tw-shift)
335 "List of symbols that control ISO-2022 encoder/decoder.
412 336
413(defsubst coding-system-spec (coding-system) 337The value of `:flags' attribute in the argument of the function
414 "Return coding-spec of CODING-SYSTEM." 338`define-coding-system' must be one of them.
415 (get (check-coding-system coding-system) 'coding-system))
416 339
417(defun coding-system-type (coding-system) 340If `long-form' is specified, use a long designation sequence on
418 "Return the coding type of CODING-SYSTEM. 341encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
419A coding type is an integer value indicating the encoding method 342and `japanese-jisx0208'. The long designation sequence doesn't
420of CODING-SYSTEM. See the function `make-coding-system' for more detail." 343conform to ISO 2022, but used by such a coding system as
421 (aref (coding-system-spec coding-system) coding-spec-type-idx)) 344`compound-text'.
345
346If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
347on encoding.
348
349If `ascii-at-cntl' is specified, designate ASCII to g0 before control
350codes and SPC on encoding.
351
352If `7-bit' is specified, use 7-bit code only on encoding.
353
354If `locking-shift' is specified, decode locking-shift code correctly
355on decoding, and use locking-shift to invoke a graphic element on
356encoding.
357
358If `single-shift' is specified, decode single-shift code correctly on
359decoding, and use single-shift to invoke a graphic element on encoding.
360
361If `designation' is specified, decode designation code correctly on
362decoding, and use designation to designate a charset to a graphic
363element on encoding.
364
365If `revision' is specified, produce an escape sequence to specify
366revision number of a charset on encoding. Such an escape sequence is
367always correctly decoded on decoding.
368
369If `direction' is specified, decode ISO6429's code for specifying
370direction correctly, and produced the code on encoding.
371
372If `init-at-bol' is specified, on encoding, it is assumed that
373invocation and designation statuses are reset at each beginning of
374line even if `ascii-at-eol' is not specified thus no code for
375resetting them are produced.
376
377If `safe' is specified, on encoding, characters not supported by a
378coding are replaced with `?'.
379
380If `latin-extra' is specified, code-detection routine assumes that a
381code specified in `latin-extra-code-table' (which see) is valid.
382
383If `composition' is specified, an escape sequence to specify
384composition sequence is correctly decode on decoding, and is produced
385on encoding.
386
387If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
388correctly decoded on decoding, and is produced on encoding.")
389
390(defun define-coding-system (name docstring &rest props)
391 "Define NAME (symbol) as a coding system with DOCSTRING and attributes.
392The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
393may be any symbol.
394
395The following attributes have special meanings. If labeled as
396\"(required)\", it should not be omitted.
397
398`:mnemonic' (required)
399
400VALUE is a character to display on mode line for the coding system.
401
402`:coding-type' (required)
403
404VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
405`emacs-mule', `sjis', `big5', `ccl', `raw-text', `undecided'.
406
407`:eol-type' (optional)
408
409VALUE is an EOL (end-of-line) format of the coding system. It must be
410one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
411\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
412and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
413decoding by the coding system, Emacs automatically detects an EOL
414format of the source text.
415
416`:charset-list' (required)
417
418VALUE must be a list of charsets supported by the coding system. On
419encoding by the coding system, if a character belongs to multiple
420charsets in the list, a charset that comes earlier in the list is
421selected.
422
423`:ascii-compatible-p' (optional)
424
425If VALUE is non-nil, the coding system decodes all 7-bit bytes into
426the correponding ASCII characters, and encodes all ASCII characters
427back to the correponding 7-bit bytes. If omitted, the VALUE defaults
428to nil.
429
430`:decode-translation-table' (optional)
431
432VALUE must be a translation table to use on decoding.
433
434`:encode-translation-table' (optional)
435
436VALUE must be a translation table to use on encoding.
437
438`:post-read-conversion' (optional)
439
440VALUE must be a function to call after some text is inserted and
441decoded by the coding system itself and before any functions in
442`after-insert-functions' are called. The arguments to this function
443is the same as those of a function in `after-insert-functions',
444i.e. LENGTH of a text while putting point at the head of the text to
445be decoded
446
447`:pre-write-conversion'
448
449VALUE must be a function to call after all functions in
450`write-region-annotate-functions' and `buffer-file-format' are called,
451and before the text is encoded by the coding system itself. The
452arguments to this function is the same as those of a function in
453`write-region-annotate-functions', i.e. FROM and TO specifying region
454of a text.
455
456`:default-char'
457
458VALUE must be a character. On encoding, a character not supported by
459the coding system is replaced with VALUE.
460
461`:eol-type'
462
463VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
464EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
465EOL (CR). If omitted, on decoding, the coding system detect EOL
466format automatically, and on encoding, used Unix-like EOL.
467
468`:mime-charset'
469
470VALUE must be a symbol who has MIME-charset name.
471
472`:flags'
473
474VALUE must be a list of symbols that control ISO-2022 converter. Each
475symbol must be a member of the variable `coding-system-iso-2022-flags'
476\(which see). This attribute has a meaning only when `:coding-type'
477is `iso-2022'.
478
479`:designation'
480
481VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
482GN-USAGE specifies the usage of graphic register GN as follows.
483
484If it is nil, no charset can be designated to GN.
485
486If it is a charset, the charset is initially designaged to GN, and
487never used by the other charsets.
488
489If it is a list, the elements must be charsets, nil, 94, or 96. GN
490can be used by all listed charsets. If the list contains 94, any
491charsets whose iso-chars is 94 can be designaged to GN. If the list
492contains 96, any charsets whose iso-chars is 96 can be designaged to
493GN. If the first element is a charset, the charset is initially
494designaged to GN.
495
496This attribute has a meaning only when `:coding-type' is `iso-2022'.
497
498`:bom'
499
500VALUE must nil, t, or cons of coding systems whose `:coding-type' is
501`utf-16'.
502
503This attribute has a meaning only when `:coding-type' is `utf-16'.
504
505`:endian'
506
507VALUE must be t or nil. See the above description for the detail.
508
509This attribute has a meaning only when `:coding-type' is `utf-16'.
510
511`:ccl-decoder'
512
513This attribute has a meaning only when `:coding-type' is `ccl'.
514
515`:ccl-encoder'
516
517This attribute has a meaning only when `:coding-type' is `ccl'."
518 (let* ((common-attrs (mapcar 'list
519 '(:mnemonic
520 :coding-type
521 :charset-list
522 :ascii-compatible-p
523 :docode-translation-table
524 :encode-translation-table
525 :post-read-conversion
526 :pre-write-conversion
527 :default-char
528 :plist
529 :eol-type)))
530 (coding-type (plist-get props :coding-type))
531 (spec-attrs (mapcar 'list
532 (cond ((eq coding-type 'iso-2022)
533 '(:initial
534 :reg-usage
535 :request
536 :flags))
537 ((eq coding-type 'utf-16)
538 '(:bom
539 :endian))
540 ((eq coding-type 'ccl)
541 '(:ccl-decoder
542 :ccl-encoder
543 :valids))))))
544
545 (dolist (slot common-attrs)
546 (setcdr slot (plist-get props (car slot))))
547
548 (dolist (slot spec-attrs)
549 (setcdr slot (plist-get props (car slot))))
550
551 (if (eq coding-type 'iso-2022)
552 (let ((designation (plist-get props :designation))
553 (flags (plist-get props :flags))
554 (initial (make-vector 4 nil))
555 (reg-usage (cons 4 4))
556 request elt)
557 (dotimes (i 4)
558 (setq elt (aref designation i))
559 (cond ((charsetp elt)
560 (aset initial i elt)
561 (setq request (cons (cons elt i) request)))
562 ((consp elt)
563 (aset initial i (car elt))
564 (if (charsetp (car elt))
565 (setq request (cons (cons (car elt) i) request)))
566 (dolist (e (cdr elt))
567 (cond ((charsetp e)
568 (setq request (cons (cons e i) request)))
569 ((eq e 94)
570 (setcar reg-usage i))
571 ((eq e 96)
572 (setcdr reg-usage i))
573 ((eq e t)
574 (setcar reg-usage i)
575 (setcdr reg-usage i)))))))
576 (setcdr (assq :initial spec-attrs) initial)
577 (setcdr (assq :reg-usage spec-attrs) reg-usage)
578 (setcdr (assq :request spec-attrs) request)
579
580 ;; Change :flags value from a list to a bit-mask.
581 (let ((bits 0)
582 (i 0))
583 (dolist (elt coding-system-iso-2022-flags)
584 (if (memq elt flags)
585 (setq bits (logior bits (lsh 1 i))))
586 (setq i (1+ i)))
587 (setcdr (assq :flags spec-attrs) bits))))
588
589 ;; Add :name and :docstring properties to PROPS.
590 (setq props
591 (cons :name (cons name (cons :docstring (cons docstring props)))))
592 (setcdr (assq :plist common-attrs) props)
593
594 (apply 'define-coding-system-internal
595 name (mapcar 'cdr (append common-attrs spec-attrs)))))
596
597(defun coding-system-doc-string (coding-system)
598 "Return the documentation string for CODING-SYSTEM."
599 (plist-get (coding-system-plist coding-system) :docstring))
422 600
423(defun coding-system-mnemonic (coding-system) 601(defun coding-system-mnemonic (coding-system)
424 "Return the mnemonic character of CODING-SYSTEM. 602 "Return the mnemonic character of CODING-SYSTEM.
425The mnemonic character of a coding system is used in mode line 603The mnemonic character of a coding system is used in mode line
426to indicate the coding system. If the arg is nil, return ?-." 604to indicate the coding system. If the arg is nil, return ?-."
427 (let ((spec (coding-system-spec coding-system))) 605 (plist-get (coding-system-plist coding-system) :mnemonic))
428 (if spec (aref spec coding-spec-mnemonic-idx) ?-)))
429
430(defun coding-system-doc-string (coding-system)
431 "Return the documentation string for CODING-SYSTEM."
432 (aref (coding-system-spec coding-system) coding-spec-doc-string-idx))
433 606
434(defun coding-system-plist (coding-system) 607(defun coding-system-type (coding-system)
435 "Return the property list of CODING-SYSTEM." 608 "Return the coding type of CODING-SYSTEM.
436 (aref (coding-system-spec coding-system) coding-spec-plist-idx)) 609A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
610See the function `define-coding-system' for more detail."
611 (plist-get (coding-system-plist coding-system) :coding-type))
437 612
438(defun coding-system-flags (coding-system) 613(defun coding-system-charset-list (coding-system)
439 "Return `flags' of CODING-SYSTEM. 614 "Return list of charsets supported by COIDNG-SYSTEM.
440A `flags' of a coding system is a vector of length 32 indicating detailed 615If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
441information of a coding system. See the function `make-coding-system' 616If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
442for more detail." 617 (plist-get (coding-system-plist coding-system) :charset-list))
443 (aref (coding-system-spec coding-system) coding-spec-flags-idx))
444 618
445(defun coding-system-get (coding-system prop) 619(defun coding-system-get (coding-system prop)
446 "Extract a value from CODING-SYSTEM's property list for property PROP." 620 "Extract a value from CODING-SYSTEM's property list for property PROP."
@@ -448,22 +622,7 @@ for more detail."
448 622
449(defun coding-system-put (coding-system prop val) 623(defun coding-system-put (coding-system prop val)
450 "Change value in CODING-SYSTEM's property list PROP to VAL." 624 "Change value in CODING-SYSTEM's property list PROP to VAL."
451 (let ((plist (coding-system-plist coding-system))) 625 (plist-put (coding-system-plist coding-system) prop val))
452 (if plist
453 (plist-put plist prop val)
454 (aset (coding-system-spec coding-system) coding-spec-plist-idx
455 (list prop val)))))
456
457(defun coding-system-category (coding-system)
458 "Return the coding category of CODING-SYSTEM.
459See also `coding-category-list'."
460 (coding-system-get coding-system 'coding-category))
461
462(defun coding-system-base (coding-system)
463 "Return the base coding system of CODING-SYSTEM.
464A base coding system is what made by `make-coding-system'.
465Any alias nor subsidiary coding systems are not base coding system."
466 (car (coding-system-get coding-system 'alias-coding-systems)))
467 626
468(defalias 'coding-system-parent 'coding-system-base) 627(defalias 'coding-system-parent 'coding-system-base)
469(make-obsolete 'coding-system-parent 'coding-system-base "20.3") 628(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
@@ -478,18 +637,6 @@ Any alias nor subsidiary coding systems are not base coding system."
478;; automatically. Nth element of the vector is the subsidiary coding 637;; automatically. Nth element of the vector is the subsidiary coding
479;; system whose `eol-type' property is N. 638;; system whose `eol-type' property is N.
480 639
481(defun coding-system-eol-type (coding-system)
482 "Return eol-type of CODING-SYSTEM.
483An eol-type is integer 0, 1, 2, or a vector of coding systems.
484
485Integer values 0, 1, and 2 indicate a format of end-of-line; LF,
486CRLF, and CR respectively.
487
488A vector value indicates that a format of end-of-line should be
489detected automatically. Nth element of the vector is the subsidiary
490coding system whose eol-type is N."
491 (get coding-system 'eol-type))
492
493(defun coding-system-lessp (x y) 640(defun coding-system-lessp (x y)
494 (cond ((eq x 'no-conversion) t) 641 (cond ((eq x 'no-conversion) t)
495 ((eq y 'no-conversion) nil) 642 ((eq y 'no-conversion) nil)
@@ -540,566 +687,6 @@ formats (e.g. iso-latin-1-unix, koi8-r-dos)."
540 (setq tail (cdr tail))))) 687 (setq tail (cdr tail)))))
541 codings)) 688 codings))
542 689
543(defun map-charset-chars (func charset)
544 "Use FUNC to map over all characters in CHARSET for side effects.
545FUNC is a function of two args, the start and end (inclusive) of a
546character code range. Thus FUNC should iterate over [START, END]."
547 (let* ((dim (charset-dimension charset))
548 (chars (charset-chars charset))
549 (start (if (= chars 94)
550 33
551 32)))
552 (if (= dim 1)
553 (funcall func
554 (make-char charset start)
555 (make-char charset (+ start chars -1)))
556 (dotimes (i chars)
557 (funcall func
558 (make-char charset (+ i start) start)
559 (make-char charset (+ i start) (+ start chars -1)))))))
560
561(defun register-char-codings (coding-system safe-chars)
562 "Add entries for CODING-SYSTEM to `char-coding-system-table'.
563If SAFE-CHARS is a char-table, its non-nil entries specify characters
564which CODING-SYSTEM encodes safely. If SAFE-CHARS is t, register
565CODING-SYSTEM as a general one which can encode all characters."
566 (let ((general (char-table-extra-slot char-coding-system-table 0))
567 ;; Charsets which have some members in the table, but not all
568 ;; of them (i.e. not just a generic character):
569 (partials (char-table-extra-slot char-coding-system-table 1)))
570 (if (eq safe-chars t)
571 (or (memq coding-system general)
572 (set-char-table-extra-slot char-coding-system-table 0
573 (cons coding-system general)))
574 (map-char-table
575 (lambda (key val)
576 (if (and (>= key 128) val)
577 (let ((codings (aref char-coding-system-table key))
578 (charset (char-charset key)))
579 (unless (memq coding-system codings)
580 (if (and (generic-char-p key)
581 (memq charset partials))
582 ;; The generic char would clobber individual
583 ;; entries already in the table. First save the
584 ;; separate existing entries for all chars of the
585 ;; charset (with the generic entry added, if
586 ;; necessary).
587 (let (entry existing)
588 (map-charset-chars
589 (lambda (start end)
590 (while (<= start end)
591 (setq entry (aref char-coding-system-table start))
592 (when entry
593 (push (cons
594 start
595 (if (memq coding-system entry)
596 entry
597 (cons coding-system entry)))
598 existing))
599 (setq start (1+ start))))
600 charset)
601 ;; Update the generic entry.
602 (aset char-coding-system-table key
603 (cons coding-system codings))
604 ;; Override with the saved entries.
605 (dolist (elt existing)
606 (aset char-coding-system-table (car elt) (cdr elt))))
607 (aset char-coding-system-table key
608 (cons coding-system codings))
609 (unless (or (memq charset partials)
610 (generic-char-p key))
611 (push charset partials)))))))
612 safe-chars)
613 (set-char-table-extra-slot char-coding-system-table 1 partials))))
614
615
616(defun make-subsidiary-coding-system (coding-system)
617 "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM."
618 (let ((coding-spec (coding-system-spec coding-system))
619 (subsidiaries (vector (intern (format "%s-unix" coding-system))
620 (intern (format "%s-dos" coding-system))
621 (intern (format "%s-mac" coding-system))))
622 (i 0)
623 temp)
624 (while (< i 3)
625 (put (aref subsidiaries i) 'coding-system coding-spec)
626 (put (aref subsidiaries i) 'eol-type i)
627 (add-to-coding-system-list (aref subsidiaries i))
628 (setq coding-system-alist
629 (cons (list (symbol-name (aref subsidiaries i)))
630 coding-system-alist))
631 (setq i (1+ i)))
632 subsidiaries))
633
634(defun transform-make-coding-system-args (name type &optional doc-string props)
635 "For internal use only.
636Transform XEmacs style args for `make-coding-system' to Emacs style.
637Value is a list of transformed arguments."
638 (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
639 (eol-type (plist-get props 'eol-type))
640 properties tmp)
641 (cond
642 ((eq eol-type 'lf) (setq eol-type 'unix))
643 ((eq eol-type 'crlf) (setq eol-type 'dos))
644 ((eq eol-type 'cr) (setq eol-type 'mac)))
645 (if (setq tmp (plist-get props 'post-read-conversion))
646 (setq properties (plist-put properties 'post-read-conversion tmp)))
647 (if (setq tmp (plist-get props 'pre-write-conversion))
648 (setq properties (plist-put properties 'pre-write-conversion tmp)))
649 (cond
650 ((eq type 'shift-jis)
651 `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
652 ((eq type 'iso2022) ; This is not perfect.
653 (if (plist-get props 'escape-quoted)
654 (error "escape-quoted is not supported: %S"
655 `(,name ,type ,doc-string ,props)))
656 (let ((g0 (plist-get props 'charset-g0))
657 (g1 (plist-get props 'charset-g1))
658 (g2 (plist-get props 'charset-g2))
659 (g3 (plist-get props 'charset-g3))
660 (use-roman
661 (and
662 (eq (cadr (assoc 'latin-jisx0201
663 (plist-get props 'input-charset-conversion)))
664 'ascii)
665 (eq (cadr (assoc 'ascii
666 (plist-get props 'output-charset-conversion)))
667 'latin-jisx0201)))
668 (use-oldjis
669 (and
670 (eq (cadr (assoc 'japanese-jisx0208-1978
671 (plist-get props 'input-charset-conversion)))
672 'japanese-jisx0208)
673 (eq (cadr (assoc 'japanese-jisx0208
674 (plist-get props 'output-charset-conversion)))
675 'japanese-jisx0208-1978))))
676 (if (charsetp g0)
677 (if (plist-get props 'force-g0-on-output)
678 (setq g0 `(nil ,g0))
679 (setq g0 `(,g0 t))))
680 (if (charsetp g1)
681 (if (plist-get props 'force-g1-on-output)
682 (setq g1 `(nil ,g1))
683 (setq g1 `(,g1 t))))
684 (if (charsetp g2)
685 (if (plist-get props 'force-g2-on-output)
686 (setq g2 `(nil ,g2))
687 (setq g2 `(,g2 t))))
688 (if (charsetp g3)
689 (if (plist-get props 'force-g3-on-output)
690 (setq g3 `(nil ,g3))
691 (setq g3 `(,g3 t))))
692 `(,name 2 ,mnemonic ,doc-string
693 (,g0 ,g1 ,g2 ,g3
694 ,(plist-get props 'short)
695 ,(not (plist-get props 'no-ascii-eol))
696 ,(not (plist-get props 'no-ascii-cntl))
697 ,(plist-get props 'seven)
698 t
699 ,(not (plist-get props 'lock-shift))
700 ,use-roman
701 ,use-oldjis
702 ,(plist-get props 'no-iso6429)
703 nil nil nil nil)
704 ,properties ,eol-type)))
705 ((eq type 'big5)
706 `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
707 ((eq type 'ccl)
708 `(,name 4 ,mnemonic ,doc-string
709 (,(plist-get props 'decode) . ,(plist-get props 'encode))
710 ,properties ,eol-type))
711 (t
712 (error "unsupported XEmacs style make-coding-style arguments: %S"
713 `(,name ,type ,doc-string ,props))))))
714
715(defun make-coding-system (coding-system type mnemonic doc-string
716 &optional
717 flags
718 properties
719 eol-type)
720 "Define a new coding system CODING-SYSTEM (symbol).
721Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional),
722and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM
723in the following format:
724 [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
725
726TYPE is an integer value indicating the type of the coding system as follows:
727 0: Emacs internal format,
728 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC,
729 2: ISO-2022 including many variants,
730 3: Big5 used mainly on Chinese PC,
731 4: private, CCL programs provide encoding/decoding algorithm,
732 5: Raw-text, which means that text contains random 8-bit codes.
733
734MNEMONIC is a character to be displayed on mode line for the coding system.
735
736DOC-STRING is a documentation string for the coding system.
737
738FLAGS specifies more detailed information of the coding system as follows:
739
740 If TYPE is 2 (ISO-2022), FLAGS is a list of these elements:
741 CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM,
742 ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT,
743 USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL,
744 SAFE, ACCEPT-LATIN-EXTRA-CODE.
745 CHARSETn are character sets initially designated to Gn graphic registers.
746 If CHARSETn is nil, Gn is never used.
747 If CHARSETn is t, Gn can be used but nothing designated initially.
748 If CHARSETn is a list of character sets, those character sets are
749 designated to Gn on output, but nothing designated to Gn initially.
750 But, character set `ascii' can be designated only to G0.
751 SHORT-FORM non-nil means use short designation sequence on output.
752 ASCII-EOL non-nil means designate ASCII to g0 at end of line on output.
753 ASCII-CNTL non-nil means designate ASCII to g0 before control codes and
754 SPACE on output.
755 SEVEN non-nil means use 7-bit code only on output.
756 LOCKING-SHIFT non-nil means use locking-shift.
757 SINGLE-SHIFT non-nil means use single-shift.
758 USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII.
759 USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983.
760 NO-ISO6429 non-nil means not use ISO6429's direction specification.
761 INIT-BOL non-nil means any designation state is assumed to be reset
762 to initial at each beginning of line on output.
763 DESIGNATION-BOL non-nil means designation sequences should be placed
764 at beginning of line on output.
765 SAFE non-nil means convert unsafe characters to `?' on output.
766 Characters not specified in the property `safe-charsets' nor
767 `safe-chars' are unsafe.
768 ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts
769 a code specified in `latin-extra-code-table' (which see) as a valid
770 code of the coding system.
771
772 If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for
773 decoding and encoding. CCL programs should be specified by their
774 symbols.
775
776PROPERTIES is an alist of properties vs the corresponding values. The
777following properties are recognized:
778
779 o post-read-conversion
780
781 The value is a function to call after some text is inserted and
782 decoded by the coding system itself and before any functions in
783 `after-insert-functions' are called. The argument of this
784 function is the same as for a function in
785 `after-insert-file-functions', i.e. LENGTH of the text inserted,
786 with point at the head of the text to be decoded.
787
788 o pre-write-conversion
789
790 The value is a function to call after all functions in
791 `write-region-annotate-functions' and `buffer-file-format' are
792 called, and before the text is encoded by the coding system itself.
793 The arguments to this function are the same as those of a function
794 in `write-region-annotate-functions', i.e. FROM and TO, specifying
795 a region of text.
796
797 o translation-table-for-decode
798
799 The value is a translation table to be applied on decoding. See
800 the function `make-translation-table' for the format of translation
801 table. This is not applicable to type 4 (CCL-based) coding systems.
802
803 o translation-table-for-encode
804
805 The value is a translation table to be applied on encoding. This is
806 not applicable to type 4 (CCL-based) coding systems.
807
808 o safe-chars
809
810 The value is a char table. If a character has non-nil value in it,
811 the character is safely supported by the coding system. This
812 overrides the specification of safe-charsets.
813
814 o safe-charsets
815
816 The value is a list of charsets safely supported by the coding
817 system. The value t means that all charsets Emacs handles are
818 supported. Even if some charset is not in this list, it doesn't
819 mean that the charset can't be encoded in the coding system;
820 it just means that some other receiver of text encoded
821 in the coding system won't be able to handle that charset.
822
823 o mime-charset
824
825 The value is a symbol of which name is `MIME-charset' parameter of
826 the coding system.
827
828 o valid-codes (meaningful only for a coding system based on CCL)
829
830 The value is a list to indicate valid byte ranges of the encoded
831 file. Each element of the list is an integer or a cons of integer.
832 In the former case, the integer value is a valid byte code. In the
833 latter case, the integers specify the range of valid byte codes.
834
835These properties are set in PLIST, a property list. This function
836also sets properties `coding-category' and `alias-coding-systems'
837automatically.
838
839EOL-TYPE specifies the EOL type of the coding-system in one of the
840following formats:
841
842 o symbol (unix, dos, or mac)
843
844 The symbol `unix' means Unix-like EOL (LF), `dos' means
845 DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR).
846
847 o number (0, 1, or 2)
848
849 The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL
850 respectively.
851
852 o vector of coding-systems of length 3
853
854 The EOL type is detected automatically for the coding system.
855 And, according to the detected EOL type, one of the coding
856 systems in the vector is selected. Elements of the vector
857 corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL
858 in this order.
859
860Kludgy features for backward compatibility:
861
8621. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is
863treated as a compiled CCL code.
864
8652. If PROPERTIES is just a list of character sets, the list is set as
866a value of `safe-charsets' in PLIST."
867
868 ;; For compatiblity with XEmacs, we check the type of TYPE. If it
869 ;; is a symbol, perhaps, this function is called with XEmacs-style
870 ;; arguments. Here, try to transform that kind of arguments to
871 ;; Emacs style.
872 (if (symbolp type)
873 (let ((args (transform-make-coding-system-args coding-system type
874 mnemonic doc-string)))
875 (setq coding-system (car args)
876 type (nth 1 args)
877 mnemonic (nth 2 args)
878 doc-string (nth 3 args)
879 flags (nth 4 args)
880 properties (nth 5 args)
881 eol-type (nth 6 args))))
882
883 ;; Set a value of `coding-system' property.
884 (let ((coding-spec (make-vector 5 nil))
885 (no-initial-designation t)
886 (no-alternative-designation t)
887 (accept-latin-extra-code nil)
888 coding-category)
889 (if (or (not (integerp type)) (< type 0) (> type 5))
890 (error "TYPE argument must be 0..5"))
891 (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127))
892 (error "MNEMONIC argument must be an ASCII printable character"))
893 (aset coding-spec coding-spec-type-idx type)
894 (aset coding-spec coding-spec-mnemonic-idx mnemonic)
895 (aset coding-spec coding-spec-doc-string-idx
896 (purecopy (if (stringp doc-string) doc-string "")))
897 (cond ((= type 0)
898 (setq coding-category 'coding-category-emacs-mule))
899 ((= type 1)
900 (setq coding-category 'coding-category-sjis))
901 ((= type 2) ; ISO2022
902 (let ((i 0)
903 (vec (make-vector 32 nil))
904 (g1-designation nil)
905 (fl flags))
906 (while (< i 4)
907 (let ((charset (car fl)))
908 (if (and no-initial-designation
909 (> i 0)
910 (or (charsetp charset)
911 (and (consp charset)
912 (charsetp (car charset)))))
913 (setq no-initial-designation nil))
914 (if (charsetp charset)
915 (if (= i 1) (setq g1-designation charset))
916 (if (consp charset)
917 (let ((tail charset)
918 elt)
919 (while tail
920 (setq elt (car tail))
921 (if (eq elt t)
922 (setq no-alternative-designation nil)
923 (if (and elt (not (charsetp elt)))
924 (error "Invalid charset: %s" elt)))
925 (setq tail (cdr tail)))
926 (setq g1-designation (car charset)))
927 (if charset
928 (if (eq charset t)
929 (setq no-alternative-designation nil)
930 (error "Invalid charset: %s" charset)))))
931 (aset vec i charset))
932 (setq fl (cdr fl) i (1+ i)))
933 (while (and (< i 32) fl)
934 (aset vec i (car fl))
935 (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE
936 (car fl))
937 (setq accept-latin-extra-code t))
938 (setq fl (cdr fl) i (1+ i)))
939 (aset coding-spec 4 vec)
940 (setq coding-category
941 (if (aref vec 8) ; Use locking-shift.
942 (or (and (aref vec 7) 'coding-category-iso-7-else)
943 'coding-category-iso-8-else)
944 (if (aref vec 7) ; 7-bit only.
945 (if (aref vec 9) ; Use single-shift.
946 'coding-category-iso-7-else
947 (if no-alternative-designation
948 'coding-category-iso-7-tight
949 'coding-category-iso-7))
950 (if (or no-initial-designation
951 (not no-alternative-designation))
952 'coding-category-iso-8-else
953 (if (and (charsetp g1-designation)
954 (= (charset-dimension g1-designation) 2))
955 'coding-category-iso-8-2
956 'coding-category-iso-8-1)))))))
957 ((= type 3)
958 (setq coding-category 'coding-category-big5))
959 ((= type 4) ; private
960 (setq coding-category 'coding-category-ccl)
961 (if (not (consp flags))
962 (error "Invalid FLAGS argument for TYPE 4 (CCL)")
963 (let ((decoder (check-ccl-program
964 (car flags)
965 (intern (format "%s-decoder" coding-system))))
966 (encoder (check-ccl-program
967 (cdr flags)
968 (intern (format "%s-encoder" coding-system)))))
969 (if (and decoder encoder)
970 (aset coding-spec 4 (cons decoder encoder))
971 (error "Invalid FLAGS argument for TYPE 4 (CCL)")))))
972 (t ; i.e. (= type 5)
973 (setq coding-category 'coding-category-raw-text)))
974
975 (let ((plist (list 'coding-category coding-category
976 'alias-coding-systems (list coding-system))))
977 (if no-initial-designation
978 (plist-put plist 'no-initial-designation t))
979 (if (and properties
980 (or (eq properties t)
981 (not (consp (car properties)))))
982 ;; In the old version, the arg PROPERTIES is a list to be
983 ;; set in PLIST as a value of property `safe-charsets'.
984 (setq properties (list (cons 'safe-charsets properties))))
985 ;; In the current version PROPERTIES is a property list.
986 ;; Reflect it into PLIST one by one while handling safe-chars
987 ;; specially.
988 (let ((safe-charsets (cdr (assq 'safe-charsets properties)))
989 (safe-chars (cdr (assq 'safe-chars properties)))
990 (l properties)
991 prop val)
992 ;; If only safe-charsets is specified, make a char-table from
993 ;; it, and store that char-table as the value of `safe-chars'.
994 (if (and (not safe-chars) safe-charsets)
995 (let (charset)
996 (if (eq safe-charsets t)
997 (setq safe-chars t)
998 (setq safe-chars (make-char-table 'safe-chars))
999 (while safe-charsets
1000 (setq charset (car safe-charsets)
1001 safe-charsets (cdr safe-charsets))
1002 (cond ((eq charset 'ascii)) ; just ignore
1003 ((eq charset 'eight-bit-control)
1004 (let ((i 128))
1005 (while (< i 160)
1006 (aset safe-chars i t)
1007 (setq i (1+ i)))))
1008 ((eq charset 'eight-bit-graphic)
1009 (let ((i 160))
1010 (while (< i 256)
1011 (aset safe-chars i t)
1012 (setq i (1+ i)))))
1013 (t
1014 (aset safe-chars (make-char charset) t))))
1015 (if accept-latin-extra-code
1016 (let ((i 128))
1017 (while (< i 160)
1018 (if (aref latin-extra-code-table i)
1019 (aset safe-chars i t))
1020 (setq i (1+ i))))))
1021 (setq l (cons (cons 'safe-chars safe-chars) l))))
1022 (while l
1023 (setq prop (car (car l)) val (cdr (car l)) l (cdr l))
1024 (if (eq prop 'safe-chars)
1025 (progn
1026 (if (and (symbolp val)
1027 (get val 'translation-table))
1028 (setq safe-chars (get val 'translation-table)))
1029 (register-char-codings coding-system safe-chars)
1030 (setq val safe-chars)))
1031 (plist-put plist prop val)))
1032 ;; The property `coding-category' may have been set differently
1033 ;; through PROPERTIES.
1034 (setq coding-category (plist-get plist 'coding-category))
1035 (aset coding-spec coding-spec-plist-idx plist))
1036 (put coding-system 'coding-system coding-spec)
1037 (put coding-category 'coding-systems
1038 (cons coding-system (get coding-category 'coding-systems))))
1039
1040 ;; Next, set a value of `eol-type' property.
1041 (if (not eol-type)
1042 ;; If EOL-TYPE is nil, set a vector of subsidiary coding
1043 ;; systems, each corresponds to a coding system for the detected
1044 ;; EOL format.
1045 (setq eol-type (make-subsidiary-coding-system coding-system)))
1046 (setq eol-type
1047 (cond ((or (eq eol-type 'unix) (null eol-type))
1048 0)
1049 ((eq eol-type 'dos)
1050 1)
1051 ((eq eol-type 'mac)
1052 2)
1053 ((or (and (vectorp eol-type)
1054 (= (length eol-type) 3))
1055 (and (numberp eol-type)
1056 (and (>= eol-type 0)
1057 (<= eol-type 2))))
1058 eol-type)
1059 (t
1060 (error "Invalid EOL-TYPE spec:%S" eol-type))))
1061 (put coding-system 'eol-type eol-type)
1062
1063 ;; At last, register CODING-SYSTEM in `coding-system-list' and
1064 ;; `coding-system-alist'.
1065 (add-to-coding-system-list coding-system)
1066 (setq coding-system-alist (cons (list (symbol-name coding-system))
1067 coding-system-alist))
1068
1069 ;; For a coding system of cateogory iso-8-1 and iso-8-2, create
1070 ;; XXX-with-esc variants.
1071 (let ((coding-category (coding-system-category coding-system)))
1072 (if (or (eq coding-category 'coding-category-iso-8-1)
1073 (eq coding-category 'coding-category-iso-8-2))
1074 (let ((esc (intern (concat (symbol-name coding-system) "-with-esc")))
1075 (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system))
1076 (safe-charsets (assq 'safe-charsets properties))
1077 (mime-charset (assq 'mime-charset properties)))
1078 (if safe-charsets
1079 (setcdr safe-charsets t)
1080 (setq properties (cons (cons 'safe-charsets t) properties)))
1081 (if mime-charset
1082 (setcdr mime-charset nil))
1083 (make-coding-system esc type mnemonic doc
1084 (if (listp (car flags))
1085 (cons (append (car flags) '(t)) (cdr flags))
1086 (cons (list (car flags) t) (cdr flags)))
1087 properties))))
1088
1089 coding-system)
1090
1091(defun define-coding-system-alias (alias coding-system)
1092 "Define ALIAS as an alias for coding system CODING-SYSTEM."
1093 (put alias 'coding-system (coding-system-spec coding-system))
1094 (nconc (coding-system-get alias 'alias-coding-systems) (list alias))
1095 (add-to-coding-system-list alias)
1096 (setq coding-system-alist (cons (list (symbol-name alias))
1097 coding-system-alist))
1098 (let ((eol-type (coding-system-eol-type coding-system)))
1099 (if (vectorp eol-type)
1100 (put alias 'eol-type (make-subsidiary-coding-system alias))
1101 (put alias 'eol-type eol-type))))
1102
1103(defun set-buffer-file-coding-system (coding-system &optional force) 690(defun set-buffer-file-coding-system (coding-system &optional force)
1104 "Set the file coding-system of the current buffer to CODING-SYSTEM. 691 "Set the file coding-system of the current buffer to CODING-SYSTEM.
1105This means that when you save the buffer, it will be converted 692This means that when you save the buffer, it will be converted
@@ -1268,7 +855,10 @@ This setting is effective for the next communication only."
1268 855
1269(defun set-coding-priority (arg) 856(defun set-coding-priority (arg)
1270 "Set priority of coding categories according to ARG. 857 "Set priority of coding categories according to ARG.
1271ARG is a list of coding categories ordered by priority." 858ARG is a list of coding categories ordered by priority.
859
860This function is provided for backward compatibility.
861Now we have more convenient function `set-coding-system-priority'."
1272 (let ((l arg) 862 (let ((l arg)
1273 (current-list (copy-sequence coding-category-list))) 863 (current-list (copy-sequence coding-category-list)))
1274 ;; Check the validity of ARG while deleting coding categories in 864 ;; Check the validity of ARG while deleting coding categories in
@@ -1457,6 +1047,8 @@ text, and convert it in the temporary buffer. Otherwise, convert in-place."
1457 ;; Must return nil, as build_annotations_2 expects that. 1047 ;; Must return nil, as build_annotations_2 expects that.
1458 nil) 1048 nil)
1459 1049
1050(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0")
1051
1460;;; FILE I/O 1052;;; FILE I/O
1461 1053
1462(defcustom auto-coding-alist 1054(defcustom auto-coding-alist
@@ -1626,8 +1218,7 @@ function by default."
1626 (when coding-system 1218 (when coding-system
1627 (set-buffer-file-coding-system coding-system t) 1219 (set-buffer-file-coding-system coding-system t)
1628 (if (and enable-multibyte-characters 1220 (if (and enable-multibyte-characters
1629 (or (eq coding-system 'no-conversion) 1221 (or (eq (coding-system-type coding-system) 'raw-text))
1630 (eq (coding-system-type coding-system) 5))
1631 ;; If buffer was unmodified and the size is the 1222 ;; If buffer was unmodified and the size is the
1632 ;; same as INSERTED, we must be visiting it. 1223 ;; same as INSERTED, we must be visiting it.
1633 (not modified-p) 1224 (not modified-p)
@@ -1667,8 +1258,8 @@ Return nil if there's no need to set `buffer-file-coding-system'."
1667 ;; But eol-type is not yet set. 1258 ;; But eol-type is not yet set.
1668 (setq local-eol nil)) 1259 (setq local-eol nil))
1669 (if (and buffer-file-coding-system 1260 (if (and buffer-file-coding-system
1670 (not (eq (coding-system-type buffer-file-coding-system) t))) 1261 (not (eq (coding-system-type buffer-file-coding-system)
1671 ;; This is not `undecided'. 1262 'undecided)))
1672 (setq local-coding (coding-system-base buffer-file-coding-system))) 1263 (setq local-coding (coding-system-base buffer-file-coding-system)))
1673 1264
1674 (if (and (local-variable-p 'buffer-file-coding-system) 1265 (if (and (local-variable-p 'buffer-file-coding-system)
@@ -1682,9 +1273,7 @@ Return nil if there's no need to set `buffer-file-coding-system'."
1682 ;; But eol-type is not found. 1273 ;; But eol-type is not found.
1683 ;; If EOL conversions are inhibited, force unix eol-type. 1274 ;; If EOL conversions are inhibited, force unix eol-type.
1684 (setq found-eol (if inhibit-eol-conversion 0))) 1275 (setq found-eol (if inhibit-eol-conversion 0)))
1685 (if (eq (coding-system-type coding) t) 1276 (setq found-coding (coding-system-base coding))
1686 (setq found-coding 'undecided)
1687 (setq found-coding (coding-system-base coding)))
1688 1277
1689 (if (and (not found-eol) (eq found-coding 'undecided)) 1278 (if (and (not found-eol) (eq found-coding 'undecided))
1690 ;; No valid coding information found. 1279 ;; No valid coding information found.