diff options
| author | Kenichi Handa | 2002-03-01 02:22:38 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2002-03-01 02:22:38 +0000 |
| commit | c184177262bc93a25ae7c781dcabbcc1ba213b04 (patch) | |
| tree | c9486650246da0f5b26d8ae6ebaaa6370df695c5 | |
| parent | 2c390c27dcc603e16c247a992419d4f0b207f113 (diff) | |
| download | emacs-c184177262bc93a25ae7c781dcabbcc1ba213b04.tar.gz emacs-c184177262bc93a25ae7c781dcabbcc1ba213b04.zip | |
(char-valid-p): Make it an alias of characterp.
(define-charset): Fully re-designed.
(charset-quoted-standard-p): Deleted.
(charsetp): Moved to charset.c.
(charset-info, charset-id, charset-bytes, charset-width,
charset-directioin, charset-iso-graphic-plane,
charset-reverse-charset): Deleted.
(charset-dimension, charset-chars, charset-iso-final-char,
charset-description, charset-short-name, charset-long-name): Call
charset-plist instead of charset-info.
(charset-plist, set-charset-plist): Moved to charset.c.
(get-charset-property, put-charset-property): Moved from
mule-cmds.el. Call charset-plist and set-charset-plist.
(make-char): Deleted.
(generic-char-p): Make it always return nil.
(decode-char, encode-char): Moved to charset.c.
(coding-spec-XXX-idx): Variables deleted.
(coding-system-iso-2022-flags): New variable.
(define-coding-system): New function.
(transform-make-coding-system-args, make-coding-system): Deleted.
(set-coding-priority): Make it obsolete.
(after-insert-file-set-buffer-file-coding-system): Adjusted for
the new coding system structure.
(find-new-buffer-file-coding-system): Likewise.
| -rw-r--r-- | lisp/international/mule.el | 1411 |
1 files changed, 500 insertions, 911 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 8235ce58e65..47e18a91b9d 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -3,6 +3,9 @@ | |||
| 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
| 4 | ;; Licensed to the Free Software Foundation. | 4 | ;; Licensed to the Free Software Foundation. |
| 5 | ;; Copyright (C) 2001 Free Software Foundation, Inc. | 5 | ;; Copyright (C) 2001 Free Software Foundation, Inc. |
| 6 | ;; Copyright (C) 2001, 2002 | ||
| 7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 8 | ;; Registration Number H13PRO009 | ||
| 6 | 9 | ||
| 7 | ;; Keywords: mule, multilingual, character set, coding system | 10 | ;; Keywords: mule, multilingual, character set, coding system |
| 8 | 11 | ||
| @@ -27,12 +30,165 @@ | |||
| 27 | 30 | ||
| 28 | ;;; Code: | 31 | ;;; Code: |
| 29 | 32 | ||
| 30 | (defconst mule-version "5.0 (SAKAKI)" "\ | 33 | (defconst mule-version "7.0 (SAKAKI)" "\ |
| 31 | Version number and name of this version of MULE (multilingual environment).") | 34 | Version number and name of this version of MULE (multilingual environment).") |
| 32 | 35 | ||
| 33 | (defconst mule-version-date "1999.12.7" "\ | 36 | (defconst mule-version-date "2002.2.28" "\ |
| 34 | Distribution date of this version of MULE (multilingual environment).") | 37 | Distribution date of this version of MULE (multilingual environment).") |
| 35 | 38 | ||
| 39 | |||
| 40 | |||
| 41 | ;;; CHARACTER | ||
| 42 | (defalias 'char-valid-p 'characterp) | ||
| 43 | (make-obsolete 'char-valid-p 'characterp "22.1") | ||
| 44 | |||
| 45 | |||
| 46 | ;;; CHARSET | ||
| 47 | |||
| 48 | (defun define-charset (name docstring &rest props) | ||
| 49 | "Define NAME (symbol) as a charset with DOCSTRING. | ||
| 50 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | ||
| 51 | may be any symbol. The followings have special meanings, and one of | ||
| 52 | `:code-offset', `:map', `:parents' must be specified. | ||
| 53 | |||
| 54 | `:short-name' | ||
| 55 | |||
| 56 | VALUE must be a short string to identify the charset. If omitted, | ||
| 57 | NAME is used. | ||
| 58 | |||
| 59 | `:long-name' | ||
| 60 | |||
| 61 | VALUE must be a string longer than `:short-name' to identify the | ||
| 62 | charset. If omitted, the value of `:short-name' attribute is used. | ||
| 63 | |||
| 64 | `:dimension' | ||
| 65 | |||
| 66 | VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of | ||
| 67 | code-points of the charsets. If omitted, it is calculated from a | ||
| 68 | value of `:code-space' attribute. | ||
| 69 | |||
| 70 | `:code-space' | ||
| 71 | |||
| 72 | VALUE must be a vector of length at most 8 specifying the byte code | ||
| 73 | range of each dimension in this format: | ||
| 74 | [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] | ||
| 75 | where, MIN-N is the minimum byte value of Nth dimension of code-point, | ||
| 76 | MAX-N is the maximum byte value of that. | ||
| 77 | |||
| 78 | `:iso-final-char' | ||
| 79 | |||
| 80 | VALUE must be a character in the range 32 to 127 (inclusive) | ||
| 81 | specifying the final char of the charset for ISO-2022 encoding. If | ||
| 82 | omitted, the charset can't be encoded by ISO-2022 based | ||
| 83 | coding-systems. | ||
| 84 | |||
| 85 | `:iso-revision-number' | ||
| 86 | |||
| 87 | VALUE must be an integer in the range 0..63, specifying the revision | ||
| 88 | number of the charset for ISO-2022 encoding. | ||
| 89 | |||
| 90 | `:emacs-mule-id' | ||
| 91 | |||
| 92 | VALUE must be an integer of 0, 128..255. If omitted, the charset | ||
| 93 | can't be encoded by coding-systems of type `emacs-mule'. | ||
| 94 | |||
| 95 | `:ascii-compatible-p' | ||
| 96 | |||
| 97 | VALUE must be nil or t. If the VALUE is nil, the charset is a not | ||
| 98 | compatible with ASCII. The default value is nil. | ||
| 99 | |||
| 100 | `:supplementary-p' | ||
| 101 | |||
| 102 | VALUE must be nil or t. If the VALUE is t, the charset is | ||
| 103 | supplementary, which means the charset is used only as a parent of | ||
| 104 | some other charset. | ||
| 105 | |||
| 106 | `:invalid-code' | ||
| 107 | |||
| 108 | VALUE must be a nonnegative integer that can be used as an invalid | ||
| 109 | code point of the charset. If the minimum code is 0 and the maximum | ||
| 110 | code is greater than Emacs' maximum integer value, `:invalid-code' | ||
| 111 | should not be omitted. | ||
| 112 | |||
| 113 | `:code-offset' | ||
| 114 | |||
| 115 | VALUE must be an integer added to an index number of character to get | ||
| 116 | the corresponding character code. | ||
| 117 | |||
| 118 | `:map' | ||
| 119 | |||
| 120 | VALUE must be vector or string. | ||
| 121 | |||
| 122 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | ||
| 123 | where CODE-n is a code-point of the charset, and CHAR-n is the | ||
| 124 | corresponding charcter code. | ||
| 125 | |||
| 126 | If it is a string, it is a name of file that contains the above | ||
| 127 | information. | ||
| 128 | |||
| 129 | `:parents' | ||
| 130 | |||
| 131 | VALUE must be a list of parent charsets. The charset inherits | ||
| 132 | characters from them. Each element of the list may be a cons (PARENT | ||
| 133 | . OFFSET), where PARENT is a parent charset, and OFFSET is an offset | ||
| 134 | value to add to a code point of this charset to get the corresponding | ||
| 135 | code point of PARENT. | ||
| 136 | |||
| 137 | `:unify-map' | ||
| 138 | |||
| 139 | VALUE must be vector or string. | ||
| 140 | |||
| 141 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | ||
| 142 | where CODE-n is a code-point of the charset, and CHAR-n is the | ||
| 143 | corresponding unified charcter code. | ||
| 144 | |||
| 145 | If it is a string, it is a name of file that contains the above | ||
| 146 | information." | ||
| 147 | (let ((attrs (mapcar 'list '(:dimension | ||
| 148 | :code-space | ||
| 149 | :iso-final-char | ||
| 150 | :iso-revision-number | ||
| 151 | :emacs-mule-id | ||
| 152 | :ascii-compatible-p | ||
| 153 | :supplementary-p | ||
| 154 | :invalid-code | ||
| 155 | :code-offset | ||
| 156 | :map | ||
| 157 | :parents | ||
| 158 | :unify-map | ||
| 159 | :plist)))) | ||
| 160 | |||
| 161 | ;; If :dimension is omitted, get the dimension from :code-space. | ||
| 162 | (let ((dimension (plist-get props :dimension))) | ||
| 163 | (or dimension | ||
| 164 | (progn | ||
| 165 | (setq dimension (/ (length (plist-get props :code-space)) 2)) | ||
| 166 | (setq props (plist-put props :dimension dimension))))) | ||
| 167 | |||
| 168 | (dolist (slot attrs) | ||
| 169 | (setcdr slot (plist-get props (car slot)))) | ||
| 170 | |||
| 171 | ;; Make sure that the value of :code-space is a vector of 8 | ||
| 172 | ;; elements. | ||
| 173 | (let* ((slot (assq :code-space attrs)) | ||
| 174 | (val (cdr slot)) | ||
| 175 | (len (length val))) | ||
| 176 | (if (< len 8) | ||
| 177 | (setcdr slot | ||
| 178 | (vconcat val (make-vector (- 8 len) 0))))) | ||
| 179 | |||
| 180 | ;; Add :name and :docstring properties to PROPS. | ||
| 181 | (setq props | ||
| 182 | (cons :name (cons name (cons :docstring (cons docstring props))))) | ||
| 183 | (or (plist-get props :short-name) | ||
| 184 | (plist-put props :short-name (symbol-name name))) | ||
| 185 | (or (plist-get props :long-name) | ||
| 186 | (plist-put props :long-name (plist-get props :short-name))) | ||
| 187 | (setcdr (assq :plist attrs) props) | ||
| 188 | |||
| 189 | (apply 'define-charset-internal name (mapcar 'cdr attrs)))) | ||
| 190 | |||
| 191 | |||
| 36 | (defun load-with-code-conversion (fullname file &optional noerror nomessage) | 192 | (defun load-with-code-conversion (fullname file &optional noerror nomessage) |
| 37 | "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. | 193 | "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. |
| 38 | The file contents are decoded before evaluation if necessary. | 194 | The file contents are decoded before evaluation if necessary. |
| @@ -103,190 +259,46 @@ Return t if file exists." | |||
| 103 | 259 | ||
| 104 | ;; API (Application Program Interface) for charsets. | 260 | ;; API (Application Program Interface) for charsets. |
| 105 | 261 | ||
| 106 | (defsubst charset-quoted-standard-p (obj) | 262 | ;;; Charset property |
| 107 | "Return t if OBJ is a quoted symbol, and is the name of a standard charset." | 263 | |
| 108 | (and (listp obj) (eq (car obj) 'quote) | 264 | (defun get-charset-property (charset propname) |
| 109 | (symbolp (car-safe (cdr obj))) | 265 | "Return the value of CHARSET's PROPNAME property. |
| 110 | (let ((vector (get (car-safe (cdr obj)) 'charset))) | 266 | This is the last value stored with |
| 111 | (and (vectorp vector) | 267 | (put-charset-property CHARSET PROPNAME VALUE)." |
| 112 | (< (aref vector 0) 160))))) | 268 | (plist-get (charset-plist charset) propname)) |
| 113 | 269 | ||
| 114 | (defsubst charsetp (object) | 270 | (defun put-charset-property (charset propname value) |
| 115 | "T if OBJECT is a charset." | 271 | "Store CHARSETS's PROPNAME property with value VALUE. |
| 116 | (and (symbolp object) (vectorp (get object 'charset)))) | 272 | It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." |
| 117 | 273 | (set-charset-plist charset | |
| 118 | (defsubst charset-info (charset) | 274 | (plist-put (charset-plist charset) propname value))) |
| 119 | "Return a vector of information of CHARSET. | 275 | |
| 120 | The elements of the vector are: | 276 | |
| 121 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, | 277 | (defun charset-description (charset) |
| 122 | LEADING-CODE-BASE, LEADING-CODE-EXT, | 278 | "Return description string of CHARSET." |
| 123 | ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, | 279 | (plist-get (charset-plist charset) :docstring)) |
| 124 | REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, | 280 | |
| 125 | PLIST, | 281 | (defun charset-dimension (charset) |
| 126 | where | 282 | "Return dimension string of CHARSET." |
| 127 | CHARSET-ID (integer) is the identification number of the charset. | 283 | (plist-get (charset-plist charset) :dimension)) |
| 128 | BYTES (integer) is the length of multi-byte form of a character in | 284 | |
| 129 | the charset: one of 1, 2, 3, and 4. | 285 | (defun charset-chars (charset) |
| 130 | DIMENSION (integer) is the number of bytes to represent a character of | 286 | "Return character numbers contained in a dimension of CHARSET." |
| 131 | the charset: 1 or 2. | 287 | (let ((code-space (plist-get (cahrset-plist charset) :code-space))) |
| 132 | CHARS (integer) is the number of characters in a dimension: 94 or 96. | 288 | (1+ (- (aref code-space 1) (aref code-space 0))))) |
| 133 | WIDTH (integer) is the number of columns a character in the charset | 289 | |
| 134 | occupies on the screen: one of 0, 1, and 2. | 290 | (defun charset-iso-final-char (charset) |
| 135 | DIRECTION (integer) is the rendering direction of characters in the | 291 | "Return final char of CHARSET." |
| 136 | charset when rendering. If 0, render from left to right, else | 292 | (or (plist-get (charset-plist charset) :iso-final-char) |
| 137 | render from right to left. | 293 | -1)) |
| 138 | LEADING-CODE-BASE (integer) is the base leading-code for the | ||
| 139 | charset. | ||
| 140 | LEADING-CODE-EXT (integer) is the extended leading-code for the | ||
| 141 | charset. All charsets of less than 0xA0 has the value 0. | ||
| 142 | ISO-FINAL-CHAR (character) is the final character of the | ||
| 143 | corresponding ISO 2022 charset. If the charset is not assigned | ||
| 144 | any final character, the value is -1. | ||
| 145 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked | ||
| 146 | while encoding to variants of ISO 2022 coding system, one of the | ||
| 147 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). | ||
| 148 | If the charset is not assigned any final character, the value is -1. | ||
| 149 | REVERSE-CHARSET (integer) is the charset which differs only in | ||
| 150 | LEFT-TO-RIGHT value from the charset. If there's no such a | ||
| 151 | charset, the value is -1. | ||
| 152 | SHORT-NAME (string) is the short name to refer to the charset. | ||
| 153 | LONG-NAME (string) is the long name to refer to the charset | ||
| 154 | DESCRIPTION (string) is the description string of the charset. | ||
| 155 | PLIST (property list) may contain any type of information a user | ||
| 156 | want to put and get by functions `put-charset-property' and | ||
| 157 | `get-charset-property' respectively." | ||
| 158 | (get charset 'charset)) | ||
| 159 | |||
| 160 | ;; It is better not to use backquote in this file, | ||
| 161 | ;; because that makes a bootstrapping problem | ||
| 162 | ;; if you need to recompile all the Lisp files using interpreted code. | ||
| 163 | |||
| 164 | (defmacro charset-id (charset) | ||
| 165 | "Return charset identification number of CHARSET." | ||
| 166 | (if (charset-quoted-standard-p charset) | ||
| 167 | (aref (charset-info (nth 1 charset)) 0) | ||
| 168 | (list 'aref (list 'charset-info charset) 0))) | ||
| 169 | |||
| 170 | (defmacro charset-bytes (charset) | ||
| 171 | "Return bytes of CHARSET. | ||
| 172 | See the function `charset-info' for more detail." | ||
| 173 | (if (charset-quoted-standard-p charset) | ||
| 174 | (aref (charset-info (nth 1 charset)) 1) | ||
| 175 | (list 'aref (list 'charset-info charset) 1))) | ||
| 176 | |||
| 177 | (defmacro charset-dimension (charset) | ||
| 178 | "Return dimension of CHARSET. | ||
| 179 | See the function `charset-info' for more detail." | ||
| 180 | (if (charset-quoted-standard-p charset) | ||
| 181 | (aref (charset-info (nth 1 charset)) 2) | ||
| 182 | (list 'aref (list 'charset-info charset) 2))) | ||
| 183 | |||
| 184 | (defmacro charset-chars (charset) | ||
| 185 | "Return character numbers contained in a dimension of CHARSET. | ||
| 186 | See the function `charset-info' for more detail." | ||
| 187 | (if (charset-quoted-standard-p charset) | ||
| 188 | (aref (charset-info (nth 1 charset)) 3) | ||
| 189 | (list 'aref (list 'charset-info charset) 3))) | ||
| 190 | |||
| 191 | (defmacro charset-width (charset) | ||
| 192 | "Return width (how many column occupied on a screen) of CHARSET. | ||
| 193 | See the function `charset-info' for more detail." | ||
| 194 | (if (charset-quoted-standard-p charset) | ||
| 195 | (aref (charset-info (nth 1 charset)) 4) | ||
| 196 | (list 'aref (list 'charset-info charset) 4))) | ||
| 197 | |||
| 198 | (defmacro charset-direction (charset) | ||
| 199 | "Return direction of CHARSET. | ||
| 200 | See the function `charset-info' for more detail." | ||
| 201 | (if (charset-quoted-standard-p charset) | ||
| 202 | (aref (charset-info (nth 1 charset)) 5) | ||
| 203 | (list 'aref (list 'charset-info charset) 5))) | ||
| 204 | |||
| 205 | (defmacro charset-iso-final-char (charset) | ||
| 206 | "Return final char of CHARSET. | ||
| 207 | See the function `charset-info' for more detail." | ||
| 208 | (if (charset-quoted-standard-p charset) | ||
| 209 | (aref (charset-info (nth 1 charset)) 8) | ||
| 210 | (list 'aref (list 'charset-info charset) 8))) | ||
| 211 | |||
| 212 | (defmacro charset-iso-graphic-plane (charset) | ||
| 213 | "Return graphic plane of CHARSET. | ||
| 214 | See the function `charset-info' for more detail." | ||
| 215 | (if (charset-quoted-standard-p charset) | ||
| 216 | (aref (charset-info (nth 1 charset)) 9) | ||
| 217 | (list 'aref (list 'charset-info charset) 9))) | ||
| 218 | |||
| 219 | (defmacro charset-reverse-charset (charset) | ||
| 220 | "Return reverse charset of CHARSET. | ||
| 221 | See the function `charset-info' for more detail." | ||
| 222 | (if (charset-quoted-standard-p charset) | ||
| 223 | (aref (charset-info (nth 1 charset)) 10) | ||
| 224 | (list 'aref (list 'charset-info charset) 10))) | ||
| 225 | 294 | ||
| 226 | (defmacro charset-short-name (charset) | 295 | (defmacro charset-short-name (charset) |
| 227 | "Return short name of CHARSET. | 296 | "Return short name of CHARSET." |
| 228 | See the function `charset-info' for more detail." | 297 | (plist-get (charset-plist charset) :short-name)) |
| 229 | (if (charset-quoted-standard-p charset) | ||
| 230 | (aref (charset-info (nth 1 charset)) 11) | ||
| 231 | (list 'aref (list 'charset-info charset) 11))) | ||
| 232 | 298 | ||
| 233 | (defmacro charset-long-name (charset) | 299 | (defmacro charset-long-name (charset) |
| 234 | "Return long name of CHARSET. | 300 | "Return long name of CHARSET." |
| 235 | See the function `charset-info' for more detail." | 301 | (plist-get (charset-plist charset) :long-name)) |
| 236 | (if (charset-quoted-standard-p charset) | ||
| 237 | (aref (charset-info (nth 1 charset)) 12) | ||
| 238 | (list 'aref (list 'charset-info charset) 12))) | ||
| 239 | |||
| 240 | (defmacro charset-description (charset) | ||
| 241 | "Return description of CHARSET. | ||
| 242 | See the function `charset-info' for more detail." | ||
| 243 | (if (charset-quoted-standard-p charset) | ||
| 244 | (aref (charset-info (nth 1 charset)) 13) | ||
| 245 | (list 'aref (list 'charset-info charset) 13))) | ||
| 246 | |||
| 247 | (defmacro charset-plist (charset) | ||
| 248 | "Return list charset property of CHARSET. | ||
| 249 | See the function `charset-info' for more detail." | ||
| 250 | (list 'aref | ||
| 251 | (if (charset-quoted-standard-p charset) | ||
| 252 | (charset-info (nth 1 charset)) | ||
| 253 | (list 'charset-info charset)) | ||
| 254 | 14)) | ||
| 255 | |||
| 256 | (defun set-charset-plist (charset plist) | ||
| 257 | "Set CHARSET's property list to PLIST, and return PLIST." | ||
| 258 | (aset (charset-info charset) 14 plist)) | ||
| 259 | |||
| 260 | (defun make-char (charset &optional code1 code2) | ||
| 261 | "Return a character of CHARSET whose position codes are CODE1 and CODE2. | ||
| 262 | CODE1 and CODE2 are optional, but if you don't supply | ||
| 263 | sufficient position codes, return a generic character which stands for | ||
| 264 | all characters or group of characters in the character set. | ||
| 265 | A generic character can be used to index a char table (e.g. syntax-table). | ||
| 266 | |||
| 267 | Such character sets as ascii, eight-bit-control, and eight-bit-graphic | ||
| 268 | don't have corresponding generic characters. If CHARSET is one of | ||
| 269 | them and you don't supply CODE1, return the character of the smallest | ||
| 270 | code in CHARSET. | ||
| 271 | |||
| 272 | If CODE1 or CODE2 are invalid (out of range), this function signals an | ||
| 273 | error. However, the eighth bit of both CODE1 and CODE2 is zeroed | ||
| 274 | before they are used to index CHARSET. Thus you may use, say, the | ||
| 275 | actual ISO 8859 character code rather than subtracting 128, as you | ||
| 276 | would need to index the corresponding Emacs charset." | ||
| 277 | (make-char-internal (charset-id charset) code1 code2)) | ||
| 278 | |||
| 279 | (put 'make-char 'byte-compile | ||
| 280 | (function | ||
| 281 | (lambda (form) | ||
| 282 | (let ((charset (nth 1 form))) | ||
| 283 | (if (charset-quoted-standard-p charset) | ||
| 284 | (byte-compile-normal-call | ||
| 285 | (cons 'make-char-internal | ||
| 286 | (cons (charset-id (nth 1 charset)) (nthcdr 2 form)))) | ||
| 287 | (byte-compile-normal-call | ||
| 288 | (cons 'make-char-internal | ||
| 289 | (cons (list 'charset-id charset) (nthcdr 2 form))))))))) | ||
| 290 | 302 | ||
| 291 | (defun charset-list () | 303 | (defun charset-list () |
| 292 | "Return list of charsets ever defined. | 304 | "Return list of charsets ever defined. |
| @@ -295,152 +307,314 @@ This function is provided for backward compatibility. | |||
| 295 | Now we have the variable `charset-list'." | 307 | Now we have the variable `charset-list'." |
| 296 | charset-list) | 308 | charset-list) |
| 297 | 309 | ||
| 298 | (defsubst generic-char-p (char) | 310 | (defun generic-char-p (char) |
| 299 | "Return t if and only if CHAR is a generic character. | 311 | "Always return nil. This exists only for backward compatibility." |
| 300 | See also the documentation of `make-char'." | 312 | nil) |
| 301 | (and (>= char 0400) | ||
| 302 | (let ((l (split-char char))) | ||
| 303 | (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0)) | ||
| 304 | (not (eq (car l) 'composition)))))) | ||
| 305 | |||
| 306 | (defun decode-char (ccs code-point &optional restriction) | ||
| 307 | "Return character specified by coded character set CCS and CODE-POINT in it. | ||
| 308 | Return nil if such a character is not supported. | ||
| 309 | Currently the only supported coded character set is `ucs' (ISO/IEC | ||
| 310 | 10646: Universal Multi-Octet Coded Character Set). | ||
| 311 | |||
| 312 | Optional argument RESTRICTION specifies a way to map the pair of CCS | ||
| 313 | and CODE-POINT to a character. Currently not supported and just ignored." | ||
| 314 | (cond ((eq ccs 'ucs) | ||
| 315 | (cond ((< code-point 160) | ||
| 316 | code-point) | ||
| 317 | ((< code-point 256) | ||
| 318 | (make-char 'latin-iso8859-1 code-point)) | ||
| 319 | ((< code-point #x2500) | ||
| 320 | (setq code-point (- code-point #x0100)) | ||
| 321 | (make-char 'mule-unicode-0100-24ff | ||
| 322 | (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) | ||
| 323 | ((< code-point #x3400) | ||
| 324 | (setq code-point (- code-point #x2500)) | ||
| 325 | (make-char 'mule-unicode-2500-33ff | ||
| 326 | (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) | ||
| 327 | ((and (>= code-point #xe000) (< code-point #x10000)) | ||
| 328 | (setq code-point (- code-point #xe000)) | ||
| 329 | (make-char 'mule-unicode-e000-ffff | ||
| 330 | (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) | ||
| 331 | )))) | ||
| 332 | |||
| 333 | (defun encode-char (char ccs &optional restriction) | ||
| 334 | "Return code-point in coded character set CCS that corresponds to CHAR. | ||
| 335 | Return nil if CHAR is not included in CCS. | ||
| 336 | Currently the only supported coded character set is `ucs' (ISO/IEC | ||
| 337 | 10646: Universal Multi-Octet Coded Character Set). | ||
| 338 | |||
| 339 | CHAR should be in one of these charsets: | ||
| 340 | ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff, | ||
| 341 | mule-unicode-e000-ffff, eight-bit-control | ||
| 342 | Otherwise, return nil. | ||
| 343 | |||
| 344 | Optional argument RESTRICTION specifies a way to map CHAR to a | ||
| 345 | code-point in CCS. Currently not supported and just ignored." | ||
| 346 | (let* ((split (split-char char)) | ||
| 347 | (charset (car split))) | ||
| 348 | (cond ((eq ccs 'ucs) | ||
| 349 | (cond ((eq charset 'ascii) | ||
| 350 | char) | ||
| 351 | ((eq charset 'latin-iso8859-1) | ||
| 352 | (+ (nth 1 split) 128)) | ||
| 353 | ((eq charset 'mule-unicode-0100-24ff) | ||
| 354 | (+ #x0100 (+ (* (- (nth 1 split) 32) 96) | ||
| 355 | (- (nth 2 split) 32)))) | ||
| 356 | ((eq charset 'mule-unicode-2500-33ff) | ||
| 357 | (+ #x2500 (+ (* (- (nth 1 split) 32) 96) | ||
| 358 | (- (nth 2 split) 32)))) | ||
| 359 | ((eq charset 'mule-unicode-e000-ffff) | ||
| 360 | (+ #xe000 (+ (* (- (nth 1 split) 32) 96) | ||
| 361 | (- (nth 2 split) 32)))) | ||
| 362 | ((eq charset 'eight-bit-control) | ||
| 363 | char)))))) | ||
| 364 | |||
| 365 | 313 | ||
| 366 | ;; Coding system stuff | 314 | ;; Coding system stuff |
| 367 | 315 | ||
| 368 | ;; Coding system is a symbol that has the property `coding-system'. | 316 | ;; Coding system is a symbol that has been defined by the function |
| 369 | ;; | 317 | ;; `define-coding-system'. |
| 370 | ;; The value of the property `coding-system' is a vector of the | ||
| 371 | ;; following format: | ||
| 372 | ;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS] | ||
| 373 | ;; We call this vector as coding-spec. See comments in src/coding.c | ||
| 374 | ;; for more detail. | ||
| 375 | |||
| 376 | (defconst coding-spec-type-idx 0) | ||
| 377 | (defconst coding-spec-mnemonic-idx 1) | ||
| 378 | (defconst coding-spec-doc-string-idx 2) | ||
| 379 | (defconst coding-spec-plist-idx 3) | ||
| 380 | (defconst coding-spec-flags-idx 4) | ||
| 381 | |||
| 382 | ;; PLIST is a property list of a coding system. To share PLIST among | ||
| 383 | ;; alias coding systems, a coding system has PLIST in coding-spec | ||
| 384 | ;; instead of having it in normal property list of Lisp symbol. | ||
| 385 | ;; Here's a list of coding system properties currently being used. | ||
| 386 | ;; | ||
| 387 | ;; o coding-category | ||
| 388 | ;; | ||
| 389 | ;; The value is a coding category the coding system belongs to. The | ||
| 390 | ;; function `make-coding-system' sets this value automatically | ||
| 391 | ;; unless its argument PROPERTIES specifies this property. | ||
| 392 | ;; | ||
| 393 | ;; o alias-coding-systems | ||
| 394 | ;; | ||
| 395 | ;; The value is a list of coding systems of the same alias group. The | ||
| 396 | ;; first element is the coding system made at first, which we call as | ||
| 397 | ;; `base coding system'. The function `make-coding-system' sets this | ||
| 398 | ;; value automatically and `define-coding-system-alias' updates it. | ||
| 399 | ;; | ||
| 400 | ;; See the documentation of make-coding-system for the meanings of the | ||
| 401 | ;; following properties. | ||
| 402 | ;; | ||
| 403 | ;; o post-read-conversion | ||
| 404 | ;; o pre-write-conversion | ||
| 405 | ;; o translation-table-for-decode | ||
| 406 | ;; o translation-table-for-encode | ||
| 407 | ;; o safe-chars | ||
| 408 | ;; o safe-charsets | ||
| 409 | ;; o mime-charset | ||
| 410 | ;; o valid-codes (meaningful only for a coding system based on CCL) | ||
| 411 | 318 | ||
| 319 | (defconst coding-system-iso-2022-flags | ||
| 320 | '(long-form | ||
| 321 | ascii-at-eol | ||
| 322 | ascii-at-cntl | ||
| 323 | 7-bit | ||
| 324 | locking-shift | ||
| 325 | single-shift | ||
| 326 | designation | ||
| 327 | revision | ||
| 328 | direction | ||
| 329 | init-at-bol | ||
| 330 | designate-at-bol | ||
| 331 | safe | ||
| 332 | latin-extra | ||
| 333 | composition | ||
| 334 | euc-tw-shift) | ||
| 335 | "List of symbols that control ISO-2022 encoder/decoder. | ||
| 412 | 336 | ||
| 413 | (defsubst coding-system-spec (coding-system) | 337 | The value of `:flags' attribute in the argument of the function |
| 414 | "Return coding-spec of CODING-SYSTEM." | 338 | `define-coding-system' must be one of them. |
| 415 | (get (check-coding-system coding-system) 'coding-system)) | ||
| 416 | 339 | ||
| 417 | (defun coding-system-type (coding-system) | 340 | If `long-form' is specified, use a long designation sequence on |
| 418 | "Return the coding type of CODING-SYSTEM. | 341 | encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', |
| 419 | A coding type is an integer value indicating the encoding method | 342 | and `japanese-jisx0208'. The long designation sequence doesn't |
| 420 | of CODING-SYSTEM. See the function `make-coding-system' for more detail." | 343 | conform to ISO 2022, but used by such a coding system as |
| 421 | (aref (coding-system-spec coding-system) coding-spec-type-idx)) | 344 | `compound-text'. |
| 345 | |||
| 346 | If `ascii-at-eol' is specified, designate ASCII to g0 at end of line | ||
| 347 | on encoding. | ||
| 348 | |||
| 349 | If `ascii-at-cntl' is specified, designate ASCII to g0 before control | ||
| 350 | codes and SPC on encoding. | ||
| 351 | |||
| 352 | If `7-bit' is specified, use 7-bit code only on encoding. | ||
| 353 | |||
| 354 | If `locking-shift' is specified, decode locking-shift code correctly | ||
| 355 | on decoding, and use locking-shift to invoke a graphic element on | ||
| 356 | encoding. | ||
| 357 | |||
| 358 | If `single-shift' is specified, decode single-shift code correctly on | ||
| 359 | decoding, and use single-shift to invoke a graphic element on encoding. | ||
| 360 | |||
| 361 | If `designation' is specified, decode designation code correctly on | ||
| 362 | decoding, and use designation to designate a charset to a graphic | ||
| 363 | element on encoding. | ||
| 364 | |||
| 365 | If `revision' is specified, produce an escape sequence to specify | ||
| 366 | revision number of a charset on encoding. Such an escape sequence is | ||
| 367 | always correctly decoded on decoding. | ||
| 368 | |||
| 369 | If `direction' is specified, decode ISO6429's code for specifying | ||
| 370 | direction correctly, and produced the code on encoding. | ||
| 371 | |||
| 372 | If `init-at-bol' is specified, on encoding, it is assumed that | ||
| 373 | invocation and designation statuses are reset at each beginning of | ||
| 374 | line even if `ascii-at-eol' is not specified thus no code for | ||
| 375 | resetting them are produced. | ||
| 376 | |||
| 377 | If `safe' is specified, on encoding, characters not supported by a | ||
| 378 | coding are replaced with `?'. | ||
| 379 | |||
| 380 | If `latin-extra' is specified, code-detection routine assumes that a | ||
| 381 | code specified in `latin-extra-code-table' (which see) is valid. | ||
| 382 | |||
| 383 | If `composition' is specified, an escape sequence to specify | ||
| 384 | composition sequence is correctly decode on decoding, and is produced | ||
| 385 | on encoding. | ||
| 386 | |||
| 387 | If `euc-tw-shift' is specified, the EUC-TW specific shifting code is | ||
| 388 | correctly decoded on decoding, and is produced on encoding.") | ||
| 389 | |||
| 390 | (defun define-coding-system (name docstring &rest props) | ||
| 391 | "Define NAME (symbol) as a coding system with DOCSTRING and attributes. | ||
| 392 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | ||
| 393 | may be any symbol. | ||
| 394 | |||
| 395 | The following attributes have special meanings. If labeled as | ||
| 396 | \"(required)\", it should not be omitted. | ||
| 397 | |||
| 398 | `:mnemonic' (required) | ||
| 399 | |||
| 400 | VALUE is a character to display on mode line for the coding system. | ||
| 401 | |||
| 402 | `:coding-type' (required) | ||
| 403 | |||
| 404 | VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', | ||
| 405 | `emacs-mule', `sjis', `big5', `ccl', `raw-text', `undecided'. | ||
| 406 | |||
| 407 | `:eol-type' (optional) | ||
| 408 | |||
| 409 | VALUE is an EOL (end-of-line) format of the coding system. It must be | ||
| 410 | one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL | ||
| 411 | \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), | ||
| 412 | and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on | ||
| 413 | decoding by the coding system, Emacs automatically detects an EOL | ||
| 414 | format of the source text. | ||
| 415 | |||
| 416 | `:charset-list' (required) | ||
| 417 | |||
| 418 | VALUE must be a list of charsets supported by the coding system. On | ||
| 419 | encoding by the coding system, if a character belongs to multiple | ||
| 420 | charsets in the list, a charset that comes earlier in the list is | ||
| 421 | selected. | ||
| 422 | |||
| 423 | `:ascii-compatible-p' (optional) | ||
| 424 | |||
| 425 | If VALUE is non-nil, the coding system decodes all 7-bit bytes into | ||
| 426 | the correponding ASCII characters, and encodes all ASCII characters | ||
| 427 | back to the correponding 7-bit bytes. If omitted, the VALUE defaults | ||
| 428 | to nil. | ||
| 429 | |||
| 430 | `:decode-translation-table' (optional) | ||
| 431 | |||
| 432 | VALUE must be a translation table to use on decoding. | ||
| 433 | |||
| 434 | `:encode-translation-table' (optional) | ||
| 435 | |||
| 436 | VALUE must be a translation table to use on encoding. | ||
| 437 | |||
| 438 | `:post-read-conversion' (optional) | ||
| 439 | |||
| 440 | VALUE must be a function to call after some text is inserted and | ||
| 441 | decoded by the coding system itself and before any functions in | ||
| 442 | `after-insert-functions' are called. The arguments to this function | ||
| 443 | is the same as those of a function in `after-insert-functions', | ||
| 444 | i.e. LENGTH of a text while putting point at the head of the text to | ||
| 445 | be decoded | ||
| 446 | |||
| 447 | `:pre-write-conversion' | ||
| 448 | |||
| 449 | VALUE must be a function to call after all functions in | ||
| 450 | `write-region-annotate-functions' and `buffer-file-format' are called, | ||
| 451 | and before the text is encoded by the coding system itself. The | ||
| 452 | arguments to this function is the same as those of a function in | ||
| 453 | `write-region-annotate-functions', i.e. FROM and TO specifying region | ||
| 454 | of a text. | ||
| 455 | |||
| 456 | `:default-char' | ||
| 457 | |||
| 458 | VALUE must be a character. On encoding, a character not supported by | ||
| 459 | the coding system is replaced with VALUE. | ||
| 460 | |||
| 461 | `:eol-type' | ||
| 462 | |||
| 463 | VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like | ||
| 464 | EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like | ||
| 465 | EOL (CR). If omitted, on decoding, the coding system detect EOL | ||
| 466 | format automatically, and on encoding, used Unix-like EOL. | ||
| 467 | |||
| 468 | `:mime-charset' | ||
| 469 | |||
| 470 | VALUE must be a symbol who has MIME-charset name. | ||
| 471 | |||
| 472 | `:flags' | ||
| 473 | |||
| 474 | VALUE must be a list of symbols that control ISO-2022 converter. Each | ||
| 475 | symbol must be a member of the variable `coding-system-iso-2022-flags' | ||
| 476 | \(which see). This attribute has a meaning only when `:coding-type' | ||
| 477 | is `iso-2022'. | ||
| 478 | |||
| 479 | `:designation' | ||
| 480 | |||
| 481 | VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. | ||
| 482 | GN-USAGE specifies the usage of graphic register GN as follows. | ||
| 483 | |||
| 484 | If it is nil, no charset can be designated to GN. | ||
| 485 | |||
| 486 | If it is a charset, the charset is initially designaged to GN, and | ||
| 487 | never used by the other charsets. | ||
| 488 | |||
| 489 | If it is a list, the elements must be charsets, nil, 94, or 96. GN | ||
| 490 | can be used by all listed charsets. If the list contains 94, any | ||
| 491 | charsets whose iso-chars is 94 can be designaged to GN. If the list | ||
| 492 | contains 96, any charsets whose iso-chars is 96 can be designaged to | ||
| 493 | GN. If the first element is a charset, the charset is initially | ||
| 494 | designaged to GN. | ||
| 495 | |||
| 496 | This attribute has a meaning only when `:coding-type' is `iso-2022'. | ||
| 497 | |||
| 498 | `:bom' | ||
| 499 | |||
| 500 | VALUE must nil, t, or cons of coding systems whose `:coding-type' is | ||
| 501 | `utf-16'. | ||
| 502 | |||
| 503 | This attribute has a meaning only when `:coding-type' is `utf-16'. | ||
| 504 | |||
| 505 | `:endian' | ||
| 506 | |||
| 507 | VALUE must be t or nil. See the above description for the detail. | ||
| 508 | |||
| 509 | This attribute has a meaning only when `:coding-type' is `utf-16'. | ||
| 510 | |||
| 511 | `:ccl-decoder' | ||
| 512 | |||
| 513 | This attribute has a meaning only when `:coding-type' is `ccl'. | ||
| 514 | |||
| 515 | `:ccl-encoder' | ||
| 516 | |||
| 517 | This attribute has a meaning only when `:coding-type' is `ccl'." | ||
| 518 | (let* ((common-attrs (mapcar 'list | ||
| 519 | '(:mnemonic | ||
| 520 | :coding-type | ||
| 521 | :charset-list | ||
| 522 | :ascii-compatible-p | ||
| 523 | :docode-translation-table | ||
| 524 | :encode-translation-table | ||
| 525 | :post-read-conversion | ||
| 526 | :pre-write-conversion | ||
| 527 | :default-char | ||
| 528 | :plist | ||
| 529 | :eol-type))) | ||
| 530 | (coding-type (plist-get props :coding-type)) | ||
| 531 | (spec-attrs (mapcar 'list | ||
| 532 | (cond ((eq coding-type 'iso-2022) | ||
| 533 | '(:initial | ||
| 534 | :reg-usage | ||
| 535 | :request | ||
| 536 | :flags)) | ||
| 537 | ((eq coding-type 'utf-16) | ||
| 538 | '(:bom | ||
| 539 | :endian)) | ||
| 540 | ((eq coding-type 'ccl) | ||
| 541 | '(:ccl-decoder | ||
| 542 | :ccl-encoder | ||
| 543 | :valids)))))) | ||
| 544 | |||
| 545 | (dolist (slot common-attrs) | ||
| 546 | (setcdr slot (plist-get props (car slot)))) | ||
| 547 | |||
| 548 | (dolist (slot spec-attrs) | ||
| 549 | (setcdr slot (plist-get props (car slot)))) | ||
| 550 | |||
| 551 | (if (eq coding-type 'iso-2022) | ||
| 552 | (let ((designation (plist-get props :designation)) | ||
| 553 | (flags (plist-get props :flags)) | ||
| 554 | (initial (make-vector 4 nil)) | ||
| 555 | (reg-usage (cons 4 4)) | ||
| 556 | request elt) | ||
| 557 | (dotimes (i 4) | ||
| 558 | (setq elt (aref designation i)) | ||
| 559 | (cond ((charsetp elt) | ||
| 560 | (aset initial i elt) | ||
| 561 | (setq request (cons (cons elt i) request))) | ||
| 562 | ((consp elt) | ||
| 563 | (aset initial i (car elt)) | ||
| 564 | (if (charsetp (car elt)) | ||
| 565 | (setq request (cons (cons (car elt) i) request))) | ||
| 566 | (dolist (e (cdr elt)) | ||
| 567 | (cond ((charsetp e) | ||
| 568 | (setq request (cons (cons e i) request))) | ||
| 569 | ((eq e 94) | ||
| 570 | (setcar reg-usage i)) | ||
| 571 | ((eq e 96) | ||
| 572 | (setcdr reg-usage i)) | ||
| 573 | ((eq e t) | ||
| 574 | (setcar reg-usage i) | ||
| 575 | (setcdr reg-usage i))))))) | ||
| 576 | (setcdr (assq :initial spec-attrs) initial) | ||
| 577 | (setcdr (assq :reg-usage spec-attrs) reg-usage) | ||
| 578 | (setcdr (assq :request spec-attrs) request) | ||
| 579 | |||
| 580 | ;; Change :flags value from a list to a bit-mask. | ||
| 581 | (let ((bits 0) | ||
| 582 | (i 0)) | ||
| 583 | (dolist (elt coding-system-iso-2022-flags) | ||
| 584 | (if (memq elt flags) | ||
| 585 | (setq bits (logior bits (lsh 1 i)))) | ||
| 586 | (setq i (1+ i))) | ||
| 587 | (setcdr (assq :flags spec-attrs) bits)))) | ||
| 588 | |||
| 589 | ;; Add :name and :docstring properties to PROPS. | ||
| 590 | (setq props | ||
| 591 | (cons :name (cons name (cons :docstring (cons docstring props))))) | ||
| 592 | (setcdr (assq :plist common-attrs) props) | ||
| 593 | |||
| 594 | (apply 'define-coding-system-internal | ||
| 595 | name (mapcar 'cdr (append common-attrs spec-attrs))))) | ||
| 596 | |||
| 597 | (defun coding-system-doc-string (coding-system) | ||
| 598 | "Return the documentation string for CODING-SYSTEM." | ||
| 599 | (plist-get (coding-system-plist coding-system) :docstring)) | ||
| 422 | 600 | ||
| 423 | (defun coding-system-mnemonic (coding-system) | 601 | (defun coding-system-mnemonic (coding-system) |
| 424 | "Return the mnemonic character of CODING-SYSTEM. | 602 | "Return the mnemonic character of CODING-SYSTEM. |
| 425 | The mnemonic character of a coding system is used in mode line | 603 | The mnemonic character of a coding system is used in mode line |
| 426 | to indicate the coding system. If the arg is nil, return ?-." | 604 | to indicate the coding system. If the arg is nil, return ?-." |
| 427 | (let ((spec (coding-system-spec coding-system))) | 605 | (plist-get (coding-system-plist coding-system) :mnemonic)) |
| 428 | (if spec (aref spec coding-spec-mnemonic-idx) ?-))) | ||
| 429 | |||
| 430 | (defun coding-system-doc-string (coding-system) | ||
| 431 | "Return the documentation string for CODING-SYSTEM." | ||
| 432 | (aref (coding-system-spec coding-system) coding-spec-doc-string-idx)) | ||
| 433 | 606 | ||
| 434 | (defun coding-system-plist (coding-system) | 607 | (defun coding-system-type (coding-system) |
| 435 | "Return the property list of CODING-SYSTEM." | 608 | "Return the coding type of CODING-SYSTEM. |
| 436 | (aref (coding-system-spec coding-system) coding-spec-plist-idx)) | 609 | A coding type is a symbol indicating the encoding method of CODING-SYSTEM. |
| 610 | See the function `define-coding-system' for more detail." | ||
| 611 | (plist-get (coding-system-plist coding-system) :coding-type)) | ||
| 437 | 612 | ||
| 438 | (defun coding-system-flags (coding-system) | 613 | (defun coding-system-charset-list (coding-system) |
| 439 | "Return `flags' of CODING-SYSTEM. | 614 | "Return list of charsets supported by COIDNG-SYSTEM. |
| 440 | A `flags' of a coding system is a vector of length 32 indicating detailed | 615 | If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'. |
| 441 | information of a coding system. See the function `make-coding-system' | 616 | If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'." |
| 442 | for more detail." | 617 | (plist-get (coding-system-plist coding-system) :charset-list)) |
| 443 | (aref (coding-system-spec coding-system) coding-spec-flags-idx)) | ||
| 444 | 618 | ||
| 445 | (defun coding-system-get (coding-system prop) | 619 | (defun coding-system-get (coding-system prop) |
| 446 | "Extract a value from CODING-SYSTEM's property list for property PROP." | 620 | "Extract a value from CODING-SYSTEM's property list for property PROP." |
| @@ -448,22 +622,7 @@ for more detail." | |||
| 448 | 622 | ||
| 449 | (defun coding-system-put (coding-system prop val) | 623 | (defun coding-system-put (coding-system prop val) |
| 450 | "Change value in CODING-SYSTEM's property list PROP to VAL." | 624 | "Change value in CODING-SYSTEM's property list PROP to VAL." |
| 451 | (let ((plist (coding-system-plist coding-system))) | 625 | (plist-put (coding-system-plist coding-system) prop val)) |
| 452 | (if plist | ||
| 453 | (plist-put plist prop val) | ||
| 454 | (aset (coding-system-spec coding-system) coding-spec-plist-idx | ||
| 455 | (list prop val))))) | ||
| 456 | |||
| 457 | (defun coding-system-category (coding-system) | ||
| 458 | "Return the coding category of CODING-SYSTEM. | ||
| 459 | See also `coding-category-list'." | ||
| 460 | (coding-system-get coding-system 'coding-category)) | ||
| 461 | |||
| 462 | (defun coding-system-base (coding-system) | ||
| 463 | "Return the base coding system of CODING-SYSTEM. | ||
| 464 | A base coding system is what made by `make-coding-system'. | ||
| 465 | Any alias nor subsidiary coding systems are not base coding system." | ||
| 466 | (car (coding-system-get coding-system 'alias-coding-systems))) | ||
| 467 | 626 | ||
| 468 | (defalias 'coding-system-parent 'coding-system-base) | 627 | (defalias 'coding-system-parent 'coding-system-base) |
| 469 | (make-obsolete 'coding-system-parent 'coding-system-base "20.3") | 628 | (make-obsolete 'coding-system-parent 'coding-system-base "20.3") |
| @@ -478,18 +637,6 @@ Any alias nor subsidiary coding systems are not base coding system." | |||
| 478 | ;; automatically. Nth element of the vector is the subsidiary coding | 637 | ;; automatically. Nth element of the vector is the subsidiary coding |
| 479 | ;; system whose `eol-type' property is N. | 638 | ;; system whose `eol-type' property is N. |
| 480 | 639 | ||
| 481 | (defun coding-system-eol-type (coding-system) | ||
| 482 | "Return eol-type of CODING-SYSTEM. | ||
| 483 | An eol-type is integer 0, 1, 2, or a vector of coding systems. | ||
| 484 | |||
| 485 | Integer values 0, 1, and 2 indicate a format of end-of-line; LF, | ||
| 486 | CRLF, and CR respectively. | ||
| 487 | |||
| 488 | A vector value indicates that a format of end-of-line should be | ||
| 489 | detected automatically. Nth element of the vector is the subsidiary | ||
| 490 | coding system whose eol-type is N." | ||
| 491 | (get coding-system 'eol-type)) | ||
| 492 | |||
| 493 | (defun coding-system-lessp (x y) | 640 | (defun coding-system-lessp (x y) |
| 494 | (cond ((eq x 'no-conversion) t) | 641 | (cond ((eq x 'no-conversion) t) |
| 495 | ((eq y 'no-conversion) nil) | 642 | ((eq y 'no-conversion) nil) |
| @@ -540,566 +687,6 @@ formats (e.g. iso-latin-1-unix, koi8-r-dos)." | |||
| 540 | (setq tail (cdr tail))))) | 687 | (setq tail (cdr tail))))) |
| 541 | codings)) | 688 | codings)) |
| 542 | 689 | ||
| 543 | (defun map-charset-chars (func charset) | ||
| 544 | "Use FUNC to map over all characters in CHARSET for side effects. | ||
| 545 | FUNC is a function of two args, the start and end (inclusive) of a | ||
| 546 | character code range. Thus FUNC should iterate over [START, END]." | ||
| 547 | (let* ((dim (charset-dimension charset)) | ||
| 548 | (chars (charset-chars charset)) | ||
| 549 | (start (if (= chars 94) | ||
| 550 | 33 | ||
| 551 | 32))) | ||
| 552 | (if (= dim 1) | ||
| 553 | (funcall func | ||
| 554 | (make-char charset start) | ||
| 555 | (make-char charset (+ start chars -1))) | ||
| 556 | (dotimes (i chars) | ||
| 557 | (funcall func | ||
| 558 | (make-char charset (+ i start) start) | ||
| 559 | (make-char charset (+ i start) (+ start chars -1))))))) | ||
| 560 | |||
| 561 | (defun register-char-codings (coding-system safe-chars) | ||
| 562 | "Add entries for CODING-SYSTEM to `char-coding-system-table'. | ||
| 563 | If SAFE-CHARS is a char-table, its non-nil entries specify characters | ||
| 564 | which CODING-SYSTEM encodes safely. If SAFE-CHARS is t, register | ||
| 565 | CODING-SYSTEM as a general one which can encode all characters." | ||
| 566 | (let ((general (char-table-extra-slot char-coding-system-table 0)) | ||
| 567 | ;; Charsets which have some members in the table, but not all | ||
| 568 | ;; of them (i.e. not just a generic character): | ||
| 569 | (partials (char-table-extra-slot char-coding-system-table 1))) | ||
| 570 | (if (eq safe-chars t) | ||
| 571 | (or (memq coding-system general) | ||
| 572 | (set-char-table-extra-slot char-coding-system-table 0 | ||
| 573 | (cons coding-system general))) | ||
| 574 | (map-char-table | ||
| 575 | (lambda (key val) | ||
| 576 | (if (and (>= key 128) val) | ||
| 577 | (let ((codings (aref char-coding-system-table key)) | ||
| 578 | (charset (char-charset key))) | ||
| 579 | (unless (memq coding-system codings) | ||
| 580 | (if (and (generic-char-p key) | ||
| 581 | (memq charset partials)) | ||
| 582 | ;; The generic char would clobber individual | ||
| 583 | ;; entries already in the table. First save the | ||
| 584 | ;; separate existing entries for all chars of the | ||
| 585 | ;; charset (with the generic entry added, if | ||
| 586 | ;; necessary). | ||
| 587 | (let (entry existing) | ||
| 588 | (map-charset-chars | ||
| 589 | (lambda (start end) | ||
| 590 | (while (<= start end) | ||
| 591 | (setq entry (aref char-coding-system-table start)) | ||
| 592 | (when entry | ||
| 593 | (push (cons | ||
| 594 | start | ||
| 595 | (if (memq coding-system entry) | ||
| 596 | entry | ||
| 597 | (cons coding-system entry))) | ||
| 598 | existing)) | ||
| 599 | (setq start (1+ start)))) | ||
| 600 | charset) | ||
| 601 | ;; Update the generic entry. | ||
| 602 | (aset char-coding-system-table key | ||
| 603 | (cons coding-system codings)) | ||
| 604 | ;; Override with the saved entries. | ||
| 605 | (dolist (elt existing) | ||
| 606 | (aset char-coding-system-table (car elt) (cdr elt)))) | ||
| 607 | (aset char-coding-system-table key | ||
| 608 | (cons coding-system codings)) | ||
| 609 | (unless (or (memq charset partials) | ||
| 610 | (generic-char-p key)) | ||
| 611 | (push charset partials))))))) | ||
| 612 | safe-chars) | ||
| 613 | (set-char-table-extra-slot char-coding-system-table 1 partials)))) | ||
| 614 | |||
| 615 | |||
| 616 | (defun make-subsidiary-coding-system (coding-system) | ||
| 617 | "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM." | ||
| 618 | (let ((coding-spec (coding-system-spec coding-system)) | ||
| 619 | (subsidiaries (vector (intern (format "%s-unix" coding-system)) | ||
| 620 | (intern (format "%s-dos" coding-system)) | ||
| 621 | (intern (format "%s-mac" coding-system)))) | ||
| 622 | (i 0) | ||
| 623 | temp) | ||
| 624 | (while (< i 3) | ||
| 625 | (put (aref subsidiaries i) 'coding-system coding-spec) | ||
| 626 | (put (aref subsidiaries i) 'eol-type i) | ||
| 627 | (add-to-coding-system-list (aref subsidiaries i)) | ||
| 628 | (setq coding-system-alist | ||
| 629 | (cons (list (symbol-name (aref subsidiaries i))) | ||
| 630 | coding-system-alist)) | ||
| 631 | (setq i (1+ i))) | ||
| 632 | subsidiaries)) | ||
| 633 | |||
| 634 | (defun transform-make-coding-system-args (name type &optional doc-string props) | ||
| 635 | "For internal use only. | ||
| 636 | Transform XEmacs style args for `make-coding-system' to Emacs style. | ||
| 637 | Value is a list of transformed arguments." | ||
| 638 | (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) | ||
| 639 | (eol-type (plist-get props 'eol-type)) | ||
| 640 | properties tmp) | ||
| 641 | (cond | ||
| 642 | ((eq eol-type 'lf) (setq eol-type 'unix)) | ||
| 643 | ((eq eol-type 'crlf) (setq eol-type 'dos)) | ||
| 644 | ((eq eol-type 'cr) (setq eol-type 'mac))) | ||
| 645 | (if (setq tmp (plist-get props 'post-read-conversion)) | ||
| 646 | (setq properties (plist-put properties 'post-read-conversion tmp))) | ||
| 647 | (if (setq tmp (plist-get props 'pre-write-conversion)) | ||
| 648 | (setq properties (plist-put properties 'pre-write-conversion tmp))) | ||
| 649 | (cond | ||
| 650 | ((eq type 'shift-jis) | ||
| 651 | `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) | ||
| 652 | ((eq type 'iso2022) ; This is not perfect. | ||
| 653 | (if (plist-get props 'escape-quoted) | ||
| 654 | (error "escape-quoted is not supported: %S" | ||
| 655 | `(,name ,type ,doc-string ,props))) | ||
| 656 | (let ((g0 (plist-get props 'charset-g0)) | ||
| 657 | (g1 (plist-get props 'charset-g1)) | ||
| 658 | (g2 (plist-get props 'charset-g2)) | ||
| 659 | (g3 (plist-get props 'charset-g3)) | ||
| 660 | (use-roman | ||
| 661 | (and | ||
| 662 | (eq (cadr (assoc 'latin-jisx0201 | ||
| 663 | (plist-get props 'input-charset-conversion))) | ||
| 664 | 'ascii) | ||
| 665 | (eq (cadr (assoc 'ascii | ||
| 666 | (plist-get props 'output-charset-conversion))) | ||
| 667 | 'latin-jisx0201))) | ||
| 668 | (use-oldjis | ||
| 669 | (and | ||
| 670 | (eq (cadr (assoc 'japanese-jisx0208-1978 | ||
| 671 | (plist-get props 'input-charset-conversion))) | ||
| 672 | 'japanese-jisx0208) | ||
| 673 | (eq (cadr (assoc 'japanese-jisx0208 | ||
| 674 | (plist-get props 'output-charset-conversion))) | ||
| 675 | 'japanese-jisx0208-1978)))) | ||
| 676 | (if (charsetp g0) | ||
| 677 | (if (plist-get props 'force-g0-on-output) | ||
| 678 | (setq g0 `(nil ,g0)) | ||
| 679 | (setq g0 `(,g0 t)))) | ||
| 680 | (if (charsetp g1) | ||
| 681 | (if (plist-get props 'force-g1-on-output) | ||
| 682 | (setq g1 `(nil ,g1)) | ||
| 683 | (setq g1 `(,g1 t)))) | ||
| 684 | (if (charsetp g2) | ||
| 685 | (if (plist-get props 'force-g2-on-output) | ||
| 686 | (setq g2 `(nil ,g2)) | ||
| 687 | (setq g2 `(,g2 t)))) | ||
| 688 | (if (charsetp g3) | ||
| 689 | (if (plist-get props 'force-g3-on-output) | ||
| 690 | (setq g3 `(nil ,g3)) | ||
| 691 | (setq g3 `(,g3 t)))) | ||
| 692 | `(,name 2 ,mnemonic ,doc-string | ||
| 693 | (,g0 ,g1 ,g2 ,g3 | ||
| 694 | ,(plist-get props 'short) | ||
| 695 | ,(not (plist-get props 'no-ascii-eol)) | ||
| 696 | ,(not (plist-get props 'no-ascii-cntl)) | ||
| 697 | ,(plist-get props 'seven) | ||
| 698 | t | ||
| 699 | ,(not (plist-get props 'lock-shift)) | ||
| 700 | ,use-roman | ||
| 701 | ,use-oldjis | ||
| 702 | ,(plist-get props 'no-iso6429) | ||
| 703 | nil nil nil nil) | ||
| 704 | ,properties ,eol-type))) | ||
| 705 | ((eq type 'big5) | ||
| 706 | `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) | ||
| 707 | ((eq type 'ccl) | ||
| 708 | `(,name 4 ,mnemonic ,doc-string | ||
| 709 | (,(plist-get props 'decode) . ,(plist-get props 'encode)) | ||
| 710 | ,properties ,eol-type)) | ||
| 711 | (t | ||
| 712 | (error "unsupported XEmacs style make-coding-style arguments: %S" | ||
| 713 | `(,name ,type ,doc-string ,props)))))) | ||
| 714 | |||
| 715 | (defun make-coding-system (coding-system type mnemonic doc-string | ||
| 716 | &optional | ||
| 717 | flags | ||
| 718 | properties | ||
| 719 | eol-type) | ||
| 720 | "Define a new coding system CODING-SYSTEM (symbol). | ||
| 721 | Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional), | ||
| 722 | and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM | ||
| 723 | in the following format: | ||
| 724 | [TYPE MNEMONIC DOC-STRING PLIST FLAGS] | ||
| 725 | |||
| 726 | TYPE is an integer value indicating the type of the coding system as follows: | ||
| 727 | 0: Emacs internal format, | ||
| 728 | 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC, | ||
| 729 | 2: ISO-2022 including many variants, | ||
| 730 | 3: Big5 used mainly on Chinese PC, | ||
| 731 | 4: private, CCL programs provide encoding/decoding algorithm, | ||
| 732 | 5: Raw-text, which means that text contains random 8-bit codes. | ||
| 733 | |||
| 734 | MNEMONIC is a character to be displayed on mode line for the coding system. | ||
| 735 | |||
| 736 | DOC-STRING is a documentation string for the coding system. | ||
| 737 | |||
| 738 | FLAGS specifies more detailed information of the coding system as follows: | ||
| 739 | |||
| 740 | If TYPE is 2 (ISO-2022), FLAGS is a list of these elements: | ||
| 741 | CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM, | ||
| 742 | ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT, | ||
| 743 | USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL, | ||
| 744 | SAFE, ACCEPT-LATIN-EXTRA-CODE. | ||
| 745 | CHARSETn are character sets initially designated to Gn graphic registers. | ||
| 746 | If CHARSETn is nil, Gn is never used. | ||
| 747 | If CHARSETn is t, Gn can be used but nothing designated initially. | ||
| 748 | If CHARSETn is a list of character sets, those character sets are | ||
| 749 | designated to Gn on output, but nothing designated to Gn initially. | ||
| 750 | But, character set `ascii' can be designated only to G0. | ||
| 751 | SHORT-FORM non-nil means use short designation sequence on output. | ||
| 752 | ASCII-EOL non-nil means designate ASCII to g0 at end of line on output. | ||
| 753 | ASCII-CNTL non-nil means designate ASCII to g0 before control codes and | ||
| 754 | SPACE on output. | ||
| 755 | SEVEN non-nil means use 7-bit code only on output. | ||
| 756 | LOCKING-SHIFT non-nil means use locking-shift. | ||
| 757 | SINGLE-SHIFT non-nil means use single-shift. | ||
| 758 | USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII. | ||
| 759 | USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983. | ||
| 760 | NO-ISO6429 non-nil means not use ISO6429's direction specification. | ||
| 761 | INIT-BOL non-nil means any designation state is assumed to be reset | ||
| 762 | to initial at each beginning of line on output. | ||
| 763 | DESIGNATION-BOL non-nil means designation sequences should be placed | ||
| 764 | at beginning of line on output. | ||
| 765 | SAFE non-nil means convert unsafe characters to `?' on output. | ||
| 766 | Characters not specified in the property `safe-charsets' nor | ||
| 767 | `safe-chars' are unsafe. | ||
| 768 | ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts | ||
| 769 | a code specified in `latin-extra-code-table' (which see) as a valid | ||
| 770 | code of the coding system. | ||
| 771 | |||
| 772 | If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for | ||
| 773 | decoding and encoding. CCL programs should be specified by their | ||
| 774 | symbols. | ||
| 775 | |||
| 776 | PROPERTIES is an alist of properties vs the corresponding values. The | ||
| 777 | following properties are recognized: | ||
| 778 | |||
| 779 | o post-read-conversion | ||
| 780 | |||
| 781 | The value is a function to call after some text is inserted and | ||
| 782 | decoded by the coding system itself and before any functions in | ||
| 783 | `after-insert-functions' are called. The argument of this | ||
| 784 | function is the same as for a function in | ||
| 785 | `after-insert-file-functions', i.e. LENGTH of the text inserted, | ||
| 786 | with point at the head of the text to be decoded. | ||
| 787 | |||
| 788 | o pre-write-conversion | ||
| 789 | |||
| 790 | The value is a function to call after all functions in | ||
| 791 | `write-region-annotate-functions' and `buffer-file-format' are | ||
| 792 | called, and before the text is encoded by the coding system itself. | ||
| 793 | The arguments to this function are the same as those of a function | ||
| 794 | in `write-region-annotate-functions', i.e. FROM and TO, specifying | ||
| 795 | a region of text. | ||
| 796 | |||
| 797 | o translation-table-for-decode | ||
| 798 | |||
| 799 | The value is a translation table to be applied on decoding. See | ||
| 800 | the function `make-translation-table' for the format of translation | ||
| 801 | table. This is not applicable to type 4 (CCL-based) coding systems. | ||
| 802 | |||
| 803 | o translation-table-for-encode | ||
| 804 | |||
| 805 | The value is a translation table to be applied on encoding. This is | ||
| 806 | not applicable to type 4 (CCL-based) coding systems. | ||
| 807 | |||
| 808 | o safe-chars | ||
| 809 | |||
| 810 | The value is a char table. If a character has non-nil value in it, | ||
| 811 | the character is safely supported by the coding system. This | ||
| 812 | overrides the specification of safe-charsets. | ||
| 813 | |||
| 814 | o safe-charsets | ||
| 815 | |||
| 816 | The value is a list of charsets safely supported by the coding | ||
| 817 | system. The value t means that all charsets Emacs handles are | ||
| 818 | supported. Even if some charset is not in this list, it doesn't | ||
| 819 | mean that the charset can't be encoded in the coding system; | ||
| 820 | it just means that some other receiver of text encoded | ||
| 821 | in the coding system won't be able to handle that charset. | ||
| 822 | |||
| 823 | o mime-charset | ||
| 824 | |||
| 825 | The value is a symbol of which name is `MIME-charset' parameter of | ||
| 826 | the coding system. | ||
| 827 | |||
| 828 | o valid-codes (meaningful only for a coding system based on CCL) | ||
| 829 | |||
| 830 | The value is a list to indicate valid byte ranges of the encoded | ||
| 831 | file. Each element of the list is an integer or a cons of integer. | ||
| 832 | In the former case, the integer value is a valid byte code. In the | ||
| 833 | latter case, the integers specify the range of valid byte codes. | ||
| 834 | |||
| 835 | These properties are set in PLIST, a property list. This function | ||
| 836 | also sets properties `coding-category' and `alias-coding-systems' | ||
| 837 | automatically. | ||
| 838 | |||
| 839 | EOL-TYPE specifies the EOL type of the coding-system in one of the | ||
| 840 | following formats: | ||
| 841 | |||
| 842 | o symbol (unix, dos, or mac) | ||
| 843 | |||
| 844 | The symbol `unix' means Unix-like EOL (LF), `dos' means | ||
| 845 | DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR). | ||
| 846 | |||
| 847 | o number (0, 1, or 2) | ||
| 848 | |||
| 849 | The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL | ||
| 850 | respectively. | ||
| 851 | |||
| 852 | o vector of coding-systems of length 3 | ||
| 853 | |||
| 854 | The EOL type is detected automatically for the coding system. | ||
| 855 | And, according to the detected EOL type, one of the coding | ||
| 856 | systems in the vector is selected. Elements of the vector | ||
| 857 | corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL | ||
| 858 | in this order. | ||
| 859 | |||
| 860 | Kludgy features for backward compatibility: | ||
| 861 | |||
| 862 | 1. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is | ||
| 863 | treated as a compiled CCL code. | ||
| 864 | |||
| 865 | 2. If PROPERTIES is just a list of character sets, the list is set as | ||
| 866 | a value of `safe-charsets' in PLIST." | ||
| 867 | |||
| 868 | ;; For compatiblity with XEmacs, we check the type of TYPE. If it | ||
| 869 | ;; is a symbol, perhaps, this function is called with XEmacs-style | ||
| 870 | ;; arguments. Here, try to transform that kind of arguments to | ||
| 871 | ;; Emacs style. | ||
| 872 | (if (symbolp type) | ||
| 873 | (let ((args (transform-make-coding-system-args coding-system type | ||
| 874 | mnemonic doc-string))) | ||
| 875 | (setq coding-system (car args) | ||
| 876 | type (nth 1 args) | ||
| 877 | mnemonic (nth 2 args) | ||
| 878 | doc-string (nth 3 args) | ||
| 879 | flags (nth 4 args) | ||
| 880 | properties (nth 5 args) | ||
| 881 | eol-type (nth 6 args)))) | ||
| 882 | |||
| 883 | ;; Set a value of `coding-system' property. | ||
| 884 | (let ((coding-spec (make-vector 5 nil)) | ||
| 885 | (no-initial-designation t) | ||
| 886 | (no-alternative-designation t) | ||
| 887 | (accept-latin-extra-code nil) | ||
| 888 | coding-category) | ||
| 889 | (if (or (not (integerp type)) (< type 0) (> type 5)) | ||
| 890 | (error "TYPE argument must be 0..5")) | ||
| 891 | (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127)) | ||
| 892 | (error "MNEMONIC argument must be an ASCII printable character")) | ||
| 893 | (aset coding-spec coding-spec-type-idx type) | ||
| 894 | (aset coding-spec coding-spec-mnemonic-idx mnemonic) | ||
| 895 | (aset coding-spec coding-spec-doc-string-idx | ||
| 896 | (purecopy (if (stringp doc-string) doc-string ""))) | ||
| 897 | (cond ((= type 0) | ||
| 898 | (setq coding-category 'coding-category-emacs-mule)) | ||
| 899 | ((= type 1) | ||
| 900 | (setq coding-category 'coding-category-sjis)) | ||
| 901 | ((= type 2) ; ISO2022 | ||
| 902 | (let ((i 0) | ||
| 903 | (vec (make-vector 32 nil)) | ||
| 904 | (g1-designation nil) | ||
| 905 | (fl flags)) | ||
| 906 | (while (< i 4) | ||
| 907 | (let ((charset (car fl))) | ||
| 908 | (if (and no-initial-designation | ||
| 909 | (> i 0) | ||
| 910 | (or (charsetp charset) | ||
| 911 | (and (consp charset) | ||
| 912 | (charsetp (car charset))))) | ||
| 913 | (setq no-initial-designation nil)) | ||
| 914 | (if (charsetp charset) | ||
| 915 | (if (= i 1) (setq g1-designation charset)) | ||
| 916 | (if (consp charset) | ||
| 917 | (let ((tail charset) | ||
| 918 | elt) | ||
| 919 | (while tail | ||
| 920 | (setq elt (car tail)) | ||
| 921 | (if (eq elt t) | ||
| 922 | (setq no-alternative-designation nil) | ||
| 923 | (if (and elt (not (charsetp elt))) | ||
| 924 | (error "Invalid charset: %s" elt))) | ||
| 925 | (setq tail (cdr tail))) | ||
| 926 | (setq g1-designation (car charset))) | ||
| 927 | (if charset | ||
| 928 | (if (eq charset t) | ||
| 929 | (setq no-alternative-designation nil) | ||
| 930 | (error "Invalid charset: %s" charset))))) | ||
| 931 | (aset vec i charset)) | ||
| 932 | (setq fl (cdr fl) i (1+ i))) | ||
| 933 | (while (and (< i 32) fl) | ||
| 934 | (aset vec i (car fl)) | ||
| 935 | (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE | ||
| 936 | (car fl)) | ||
| 937 | (setq accept-latin-extra-code t)) | ||
| 938 | (setq fl (cdr fl) i (1+ i))) | ||
| 939 | (aset coding-spec 4 vec) | ||
| 940 | (setq coding-category | ||
| 941 | (if (aref vec 8) ; Use locking-shift. | ||
| 942 | (or (and (aref vec 7) 'coding-category-iso-7-else) | ||
| 943 | 'coding-category-iso-8-else) | ||
| 944 | (if (aref vec 7) ; 7-bit only. | ||
| 945 | (if (aref vec 9) ; Use single-shift. | ||
| 946 | 'coding-category-iso-7-else | ||
| 947 | (if no-alternative-designation | ||
| 948 | 'coding-category-iso-7-tight | ||
| 949 | 'coding-category-iso-7)) | ||
| 950 | (if (or no-initial-designation | ||
| 951 | (not no-alternative-designation)) | ||
| 952 | 'coding-category-iso-8-else | ||
| 953 | (if (and (charsetp g1-designation) | ||
| 954 | (= (charset-dimension g1-designation) 2)) | ||
| 955 | 'coding-category-iso-8-2 | ||
| 956 | 'coding-category-iso-8-1))))))) | ||
| 957 | ((= type 3) | ||
| 958 | (setq coding-category 'coding-category-big5)) | ||
| 959 | ((= type 4) ; private | ||
| 960 | (setq coding-category 'coding-category-ccl) | ||
| 961 | (if (not (consp flags)) | ||
| 962 | (error "Invalid FLAGS argument for TYPE 4 (CCL)") | ||
| 963 | (let ((decoder (check-ccl-program | ||
| 964 | (car flags) | ||
| 965 | (intern (format "%s-decoder" coding-system)))) | ||
| 966 | (encoder (check-ccl-program | ||
| 967 | (cdr flags) | ||
| 968 | (intern (format "%s-encoder" coding-system))))) | ||
| 969 | (if (and decoder encoder) | ||
| 970 | (aset coding-spec 4 (cons decoder encoder)) | ||
| 971 | (error "Invalid FLAGS argument for TYPE 4 (CCL)"))))) | ||
| 972 | (t ; i.e. (= type 5) | ||
| 973 | (setq coding-category 'coding-category-raw-text))) | ||
| 974 | |||
| 975 | (let ((plist (list 'coding-category coding-category | ||
| 976 | 'alias-coding-systems (list coding-system)))) | ||
| 977 | (if no-initial-designation | ||
| 978 | (plist-put plist 'no-initial-designation t)) | ||
| 979 | (if (and properties | ||
| 980 | (or (eq properties t) | ||
| 981 | (not (consp (car properties))))) | ||
| 982 | ;; In the old version, the arg PROPERTIES is a list to be | ||
| 983 | ;; set in PLIST as a value of property `safe-charsets'. | ||
| 984 | (setq properties (list (cons 'safe-charsets properties)))) | ||
| 985 | ;; In the current version PROPERTIES is a property list. | ||
| 986 | ;; Reflect it into PLIST one by one while handling safe-chars | ||
| 987 | ;; specially. | ||
| 988 | (let ((safe-charsets (cdr (assq 'safe-charsets properties))) | ||
| 989 | (safe-chars (cdr (assq 'safe-chars properties))) | ||
| 990 | (l properties) | ||
| 991 | prop val) | ||
| 992 | ;; If only safe-charsets is specified, make a char-table from | ||
| 993 | ;; it, and store that char-table as the value of `safe-chars'. | ||
| 994 | (if (and (not safe-chars) safe-charsets) | ||
| 995 | (let (charset) | ||
| 996 | (if (eq safe-charsets t) | ||
| 997 | (setq safe-chars t) | ||
| 998 | (setq safe-chars (make-char-table 'safe-chars)) | ||
| 999 | (while safe-charsets | ||
| 1000 | (setq charset (car safe-charsets) | ||
| 1001 | safe-charsets (cdr safe-charsets)) | ||
| 1002 | (cond ((eq charset 'ascii)) ; just ignore | ||
| 1003 | ((eq charset 'eight-bit-control) | ||
| 1004 | (let ((i 128)) | ||
| 1005 | (while (< i 160) | ||
| 1006 | (aset safe-chars i t) | ||
| 1007 | (setq i (1+ i))))) | ||
| 1008 | ((eq charset 'eight-bit-graphic) | ||
| 1009 | (let ((i 160)) | ||
| 1010 | (while (< i 256) | ||
| 1011 | (aset safe-chars i t) | ||
| 1012 | (setq i (1+ i))))) | ||
| 1013 | (t | ||
| 1014 | (aset safe-chars (make-char charset) t)))) | ||
| 1015 | (if accept-latin-extra-code | ||
| 1016 | (let ((i 128)) | ||
| 1017 | (while (< i 160) | ||
| 1018 | (if (aref latin-extra-code-table i) | ||
| 1019 | (aset safe-chars i t)) | ||
| 1020 | (setq i (1+ i)))))) | ||
| 1021 | (setq l (cons (cons 'safe-chars safe-chars) l)))) | ||
| 1022 | (while l | ||
| 1023 | (setq prop (car (car l)) val (cdr (car l)) l (cdr l)) | ||
| 1024 | (if (eq prop 'safe-chars) | ||
| 1025 | (progn | ||
| 1026 | (if (and (symbolp val) | ||
| 1027 | (get val 'translation-table)) | ||
| 1028 | (setq safe-chars (get val 'translation-table))) | ||
| 1029 | (register-char-codings coding-system safe-chars) | ||
| 1030 | (setq val safe-chars))) | ||
| 1031 | (plist-put plist prop val))) | ||
| 1032 | ;; The property `coding-category' may have been set differently | ||
| 1033 | ;; through PROPERTIES. | ||
| 1034 | (setq coding-category (plist-get plist 'coding-category)) | ||
| 1035 | (aset coding-spec coding-spec-plist-idx plist)) | ||
| 1036 | (put coding-system 'coding-system coding-spec) | ||
| 1037 | (put coding-category 'coding-systems | ||
| 1038 | (cons coding-system (get coding-category 'coding-systems)))) | ||
| 1039 | |||
| 1040 | ;; Next, set a value of `eol-type' property. | ||
| 1041 | (if (not eol-type) | ||
| 1042 | ;; If EOL-TYPE is nil, set a vector of subsidiary coding | ||
| 1043 | ;; systems, each corresponds to a coding system for the detected | ||
| 1044 | ;; EOL format. | ||
| 1045 | (setq eol-type (make-subsidiary-coding-system coding-system))) | ||
| 1046 | (setq eol-type | ||
| 1047 | (cond ((or (eq eol-type 'unix) (null eol-type)) | ||
| 1048 | 0) | ||
| 1049 | ((eq eol-type 'dos) | ||
| 1050 | 1) | ||
| 1051 | ((eq eol-type 'mac) | ||
| 1052 | 2) | ||
| 1053 | ((or (and (vectorp eol-type) | ||
| 1054 | (= (length eol-type) 3)) | ||
| 1055 | (and (numberp eol-type) | ||
| 1056 | (and (>= eol-type 0) | ||
| 1057 | (<= eol-type 2)))) | ||
| 1058 | eol-type) | ||
| 1059 | (t | ||
| 1060 | (error "Invalid EOL-TYPE spec:%S" eol-type)))) | ||
| 1061 | (put coding-system 'eol-type eol-type) | ||
| 1062 | |||
| 1063 | ;; At last, register CODING-SYSTEM in `coding-system-list' and | ||
| 1064 | ;; `coding-system-alist'. | ||
| 1065 | (add-to-coding-system-list coding-system) | ||
| 1066 | (setq coding-system-alist (cons (list (symbol-name coding-system)) | ||
| 1067 | coding-system-alist)) | ||
| 1068 | |||
| 1069 | ;; For a coding system of cateogory iso-8-1 and iso-8-2, create | ||
| 1070 | ;; XXX-with-esc variants. | ||
| 1071 | (let ((coding-category (coding-system-category coding-system))) | ||
| 1072 | (if (or (eq coding-category 'coding-category-iso-8-1) | ||
| 1073 | (eq coding-category 'coding-category-iso-8-2)) | ||
| 1074 | (let ((esc (intern (concat (symbol-name coding-system) "-with-esc"))) | ||
| 1075 | (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system)) | ||
| 1076 | (safe-charsets (assq 'safe-charsets properties)) | ||
| 1077 | (mime-charset (assq 'mime-charset properties))) | ||
| 1078 | (if safe-charsets | ||
| 1079 | (setcdr safe-charsets t) | ||
| 1080 | (setq properties (cons (cons 'safe-charsets t) properties))) | ||
| 1081 | (if mime-charset | ||
| 1082 | (setcdr mime-charset nil)) | ||
| 1083 | (make-coding-system esc type mnemonic doc | ||
| 1084 | (if (listp (car flags)) | ||
| 1085 | (cons (append (car flags) '(t)) (cdr flags)) | ||
| 1086 | (cons (list (car flags) t) (cdr flags))) | ||
| 1087 | properties)))) | ||
| 1088 | |||
| 1089 | coding-system) | ||
| 1090 | |||
| 1091 | (defun define-coding-system-alias (alias coding-system) | ||
| 1092 | "Define ALIAS as an alias for coding system CODING-SYSTEM." | ||
| 1093 | (put alias 'coding-system (coding-system-spec coding-system)) | ||
| 1094 | (nconc (coding-system-get alias 'alias-coding-systems) (list alias)) | ||
| 1095 | (add-to-coding-system-list alias) | ||
| 1096 | (setq coding-system-alist (cons (list (symbol-name alias)) | ||
| 1097 | coding-system-alist)) | ||
| 1098 | (let ((eol-type (coding-system-eol-type coding-system))) | ||
| 1099 | (if (vectorp eol-type) | ||
| 1100 | (put alias 'eol-type (make-subsidiary-coding-system alias)) | ||
| 1101 | (put alias 'eol-type eol-type)))) | ||
| 1102 | |||
| 1103 | (defun set-buffer-file-coding-system (coding-system &optional force) | 690 | (defun set-buffer-file-coding-system (coding-system &optional force) |
| 1104 | "Set the file coding-system of the current buffer to CODING-SYSTEM. | 691 | "Set the file coding-system of the current buffer to CODING-SYSTEM. |
| 1105 | This means that when you save the buffer, it will be converted | 692 | This means that when you save the buffer, it will be converted |
| @@ -1268,7 +855,10 @@ This setting is effective for the next communication only." | |||
| 1268 | 855 | ||
| 1269 | (defun set-coding-priority (arg) | 856 | (defun set-coding-priority (arg) |
| 1270 | "Set priority of coding categories according to ARG. | 857 | "Set priority of coding categories according to ARG. |
| 1271 | ARG is a list of coding categories ordered by priority." | 858 | ARG is a list of coding categories ordered by priority. |
| 859 | |||
| 860 | This function is provided for backward compatibility. | ||
| 861 | Now we have more convenient function `set-coding-system-priority'." | ||
| 1272 | (let ((l arg) | 862 | (let ((l arg) |
| 1273 | (current-list (copy-sequence coding-category-list))) | 863 | (current-list (copy-sequence coding-category-list))) |
| 1274 | ;; Check the validity of ARG while deleting coding categories in | 864 | ;; Check the validity of ARG while deleting coding categories in |
| @@ -1457,6 +1047,8 @@ text, and convert it in the temporary buffer. Otherwise, convert in-place." | |||
| 1457 | ;; Must return nil, as build_annotations_2 expects that. | 1047 | ;; Must return nil, as build_annotations_2 expects that. |
| 1458 | nil) | 1048 | nil) |
| 1459 | 1049 | ||
| 1050 | (make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0") | ||
| 1051 | |||
| 1460 | ;;; FILE I/O | 1052 | ;;; FILE I/O |
| 1461 | 1053 | ||
| 1462 | (defcustom auto-coding-alist | 1054 | (defcustom auto-coding-alist |
| @@ -1626,8 +1218,7 @@ function by default." | |||
| 1626 | (when coding-system | 1218 | (when coding-system |
| 1627 | (set-buffer-file-coding-system coding-system t) | 1219 | (set-buffer-file-coding-system coding-system t) |
| 1628 | (if (and enable-multibyte-characters | 1220 | (if (and enable-multibyte-characters |
| 1629 | (or (eq coding-system 'no-conversion) | 1221 | (or (eq (coding-system-type coding-system) 'raw-text)) |
| 1630 | (eq (coding-system-type coding-system) 5)) | ||
| 1631 | ;; If buffer was unmodified and the size is the | 1222 | ;; If buffer was unmodified and the size is the |
| 1632 | ;; same as INSERTED, we must be visiting it. | 1223 | ;; same as INSERTED, we must be visiting it. |
| 1633 | (not modified-p) | 1224 | (not modified-p) |
| @@ -1667,8 +1258,8 @@ Return nil if there's no need to set `buffer-file-coding-system'." | |||
| 1667 | ;; But eol-type is not yet set. | 1258 | ;; But eol-type is not yet set. |
| 1668 | (setq local-eol nil)) | 1259 | (setq local-eol nil)) |
| 1669 | (if (and buffer-file-coding-system | 1260 | (if (and buffer-file-coding-system |
| 1670 | (not (eq (coding-system-type buffer-file-coding-system) t))) | 1261 | (not (eq (coding-system-type buffer-file-coding-system) |
| 1671 | ;; This is not `undecided'. | 1262 | 'undecided))) |
| 1672 | (setq local-coding (coding-system-base buffer-file-coding-system))) | 1263 | (setq local-coding (coding-system-base buffer-file-coding-system))) |
| 1673 | 1264 | ||
| 1674 | (if (and (local-variable-p 'buffer-file-coding-system) | 1265 | (if (and (local-variable-p 'buffer-file-coding-system) |
| @@ -1682,9 +1273,7 @@ Return nil if there's no need to set `buffer-file-coding-system'." | |||
| 1682 | ;; But eol-type is not found. | 1273 | ;; But eol-type is not found. |
| 1683 | ;; If EOL conversions are inhibited, force unix eol-type. | 1274 | ;; If EOL conversions are inhibited, force unix eol-type. |
| 1684 | (setq found-eol (if inhibit-eol-conversion 0))) | 1275 | (setq found-eol (if inhibit-eol-conversion 0))) |
| 1685 | (if (eq (coding-system-type coding) t) | 1276 | (setq found-coding (coding-system-base coding)) |
| 1686 | (setq found-coding 'undecided) | ||
| 1687 | (setq found-coding (coding-system-base coding))) | ||
| 1688 | 1277 | ||
| 1689 | (if (and (not found-eol) (eq found-coding 'undecided)) | 1278 | (if (and (not found-eol) (eq found-coding 'undecided)) |
| 1690 | ;; No valid coding information found. | 1279 | ;; No valid coding information found. |