aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2003-06-21 02:26:13 +0000
committerKenichi Handa2003-06-21 02:26:13 +0000
commit65a0e5fec33ad437fbf36e4da674a5b8bd927e3b (patch)
tree3ced5873d5abde2276086cd739948beaa126aa7f
parent9ef9b28e03a8f88556d563ad6eebe01dd3c176a6 (diff)
downloademacs-65a0e5fec33ad437fbf36e4da674a5b8bd927e3b.tar.gz
emacs-65a0e5fec33ad437fbf36e4da674a5b8bd927e3b.zip
Many name changes: utf-16-{be,le} -> utf-16{be,le}.
(mule-utf-16-le, utf-16-le, mule-utf-16-be, utf-16-be): New coding system aliases for backward compatibility.
-rw-r--r--lisp/international/utf-16.el127
1 files changed, 70 insertions, 57 deletions
diff --git a/lisp/international/utf-16.el b/lisp/international/utf-16.el
index 3faf6938d42..6e416c91f6d 100644
--- a/lisp/international/utf-16.el
+++ b/lisp/international/utf-16.el
@@ -26,12 +26,18 @@
26 26
27;; Support for UTF-16, which is a two-byte encoding (modulo 27;; Support for UTF-16, which is a two-byte encoding (modulo
28;; surrogates) of Unicode, written either in little or big endian 28;; surrogates) of Unicode, written either in little or big endian
29;; order: coding-systems `mule-utf-16-le' and `mule-utf-16-be'. 29;; order and either with or without the leading BOM (a two-byte
30;; (utf-16-le is used by the DozeN'T clipboard, for instance.) The 30;; signature which identifies their byte sex)a.
31;; data are preceeded by a two-byte signature which identifies their 31;;
32;; byte sex. These are used by the coding-category-utf-16-{b,l}e code 32;; We provides these base coding systems.
33;; to identify the coding, but ignored on decoding. 33;; name endian BOM
34 34;; ---- ------ ---
35;; mule-utf-16le little no
36;; mule-utf-16be big no
37;; mule-utf-16le-with-signature little yes
38;; mule-utf-16be-with-signature big yes
39;; mule-utf-16 both yes
40;;
35;; Note that un-decodable sequences aren't (yet?) preserved as raw 41;; Note that un-decodable sequences aren't (yet?) preserved as raw
36;; bytes, as they are with utf-8, so reading and writing as utf-16 can 42;; bytes, as they are with utf-8, so reading and writing as utf-16 can
37;; corrupt data. 43;; corrupt data.
@@ -112,7 +118,7 @@
112 (r1 %= 96) 118 (r1 %= 96)
113 (r1 += (r2 + 32))))))))))))) 119 (r1 += (r2 + 32)))))))))))))
114 120
115(defconst utf-16-le-decode-loop 121(defconst utf-16le-decode-loop
116 `(loop 122 `(loop
117 (read r3 r4) 123 (read r3 r4)
118 (r1 = (r4 <8 r3)) 124 (r1 = (r4 <8 r3))
@@ -121,7 +127,7 @@
121 (write-multibyte-character r0 r1) 127 (write-multibyte-character r0 r1)
122 (repeat))) 128 (repeat)))
123 129
124(defconst utf-16-be-decode-loop 130(defconst utf-16be-decode-loop
125 `(loop 131 `(loop
126 (read r3 r4) 132 (read r3 r4)
127 (r1 = (r3 <8 r4)) 133 (r1 = (r3 <8 r4))
@@ -132,35 +138,35 @@
132 138
133) 139)
134 140
135(define-ccl-program ccl-decode-mule-utf-16-le 141(define-ccl-program ccl-decode-mule-utf-16le
136 `(2 ; 2 bytes -> 1 to 4 bytes 142 `(2 ; 2 bytes -> 1 to 4 bytes
137 ,utf-16-le-decode-loop) 143 ,utf-16le-decode-loop)
138 "Decode UTF-16LE (little endian without signature bytes). 144 "Decode UTF-16LE (little endian without signature bytes).
139Basic decoding is done into the charsets ascii, latin-iso8859-1 and 145Basic decoding is done into the charsets ascii, latin-iso8859-1 and
140mule-unicode-*. Un-representable Unicode characters are decoded as 146mule-unicode-*. Un-representable Unicode characters are decoded as
141U+fffd. The result is run through the translation-table named 147U+fffd. The result is run through the translation-table named
142`utf-translation-table-for-decode'.") 148`utf-translation-table-for-decode'.")
143 149
144(define-ccl-program ccl-decode-mule-utf-16-be 150(define-ccl-program ccl-decode-mule-utf-16be
145 `(2 ; 2 bytes -> 1 to 4 bytes 151 `(2 ; 2 bytes -> 1 to 4 bytes
146 ,utf-16-be-decode-loop) 152 ,utf-16be-decode-loop)
147 "Decode UTF-16BE (big endian without signature bytes). 153 "Decode UTF-16BE (big endian without signature bytes).
148Basic decoding is done into the charsets ascii, latin-iso8859-1 and 154Basic decoding is done into the charsets ascii, latin-iso8859-1 and
149mule-unicode-*. Un-representable Unicode characters are 155mule-unicode-*. Un-representable Unicode characters are
150decoded as U+fffd. The result is run through the translation-table of 156decoded as U+fffd. The result is run through the translation-table of
151name `utf-translation-table-for-decode'.") 157name `utf-translation-table-for-decode'.")
152 158
153(define-ccl-program ccl-decode-mule-utf-16-le-with-signature 159(define-ccl-program ccl-decode-mule-utf-16le-with-signature
154 `(2 160 `(2
155 ((read r3 r4) 161 ((read r3 r4)
156 ,utf-16-le-decode-loop)) 162 ,utf-16le-decode-loop))
157 "Like ccl-decode-utf-16-le but skip the first 2-byte BOM.") 163 "Like ccl-decode-utf-16le but skip the first 2-byte BOM.")
158 164
159(define-ccl-program ccl-decode-mule-utf-16-be-with-signature 165(define-ccl-program ccl-decode-mule-utf-16be-with-signature
160 `(2 166 `(2
161 ((read r3 r4) 167 ((read r3 r4)
162 ,utf-16-be-decode-loop)) 168 ,utf-16be-decode-loop))
163 "Like ccl-decode-utf-16-be but skip the first 2-byte BOM.") 169 "Like ccl-decode-utf-16be but skip the first 2-byte BOM.")
164 170
165(define-ccl-program ccl-decode-mule-utf-16 171(define-ccl-program ccl-decode-mule-utf-16
166 `(2 172 `(2
@@ -172,7 +178,7 @@ name `utf-translation-table-for-decode'.")
172 ;; function. 178 ;; function.
173 (,@utf-16-decode-ucs 179 (,@utf-16-decode-ucs
174 (write-multibyte-character r0 r1) 180 (write-multibyte-character r0 r1)
175 ,utf-16-le-decode-loop) 181 ,utf-16le-decode-loop)
176 ((if (r1 == #xFEFF) 182 ((if (r1 == #xFEFF)
177 ;; R1 is a BOM for big endian, but we can't keep that 183 ;; R1 is a BOM for big endian, but we can't keep that
178 ;; character in the output because it can't be 184 ;; character in the output because it can't be
@@ -184,12 +190,12 @@ name `utf-translation-table-for-decode'.")
184 (,@utf-16-decode-ucs 190 (,@utf-16-decode-ucs
185 (translate-character utf-translation-table-for-decode r0 r1))) 191 (translate-character utf-translation-table-for-decode r0 r1)))
186 (write-multibyte-character r0 r1) 192 (write-multibyte-character r0 r1)
187 ,utf-16-be-decode-loop)))) 193 ,utf-16be-decode-loop))))
188 "Like ccl-decode-utf-16-be/le but check the first BOM.") 194 "Like ccl-decode-utf-16be/le but check the first BOM.")
189 195
190(makunbound 'utf-16-decode-ucs) ; done with it 196(makunbound 'utf-16-decode-ucs) ; done with it
191(makunbound 'utf-16-le-decode-loop) 197(makunbound 'utf-16le-decode-loop)
192(makunbound 'utf-16-be-decode-loop) 198(makunbound 'utf-16be-decode-loop)
193 199
194(eval-and-compile 200(eval-and-compile
195(defconst utf-16-decode-to-ucs 201(defconst utf-16-decode-to-ucs
@@ -216,7 +222,7 @@ name `utf-translation-table-for-decode'.")
216 (r0 = (r3 + #xe000)) 222 (r0 = (r3 + #xe000))
217 (r0 = #xfffd)))))))))) 223 (r0 = #xfffd))))))))))
218 224
219(defconst utf-16-le-encode-loop 225(defconst utf-16le-encode-loop
220 `(loop 226 `(loop
221 (read-multibyte-character r0 r1) 227 (read-multibyte-character r0 r1)
222 (lookup-character utf-subst-table-for-encode r0 r1) 228 (lookup-character utf-subst-table-for-encode r0 r1)
@@ -227,7 +233,7 @@ name `utf-translation-table-for-decode'.")
227 (write (r0 >> 8)) 233 (write (r0 >> 8))
228 (repeat))) 234 (repeat)))
229 235
230(defconst utf-16-be-encode-loop 236(defconst utf-16be-encode-loop
231 `(loop 237 `(loop
232 (read-multibyte-character r0 r1) 238 (read-multibyte-character r0 r1)
233 (lookup-character utf-subst-table-for-encode r0 r1) 239 (lookup-character utf-subst-table-for-encode r0 r1)
@@ -239,9 +245,10 @@ name `utf-translation-table-for-decode'.")
239 (repeat))) 245 (repeat)))
240) 246)
241 247
242(define-ccl-program ccl-encode-mule-utf-16-le 248
249(define-ccl-program ccl-encode-mule-utf-16le
243 `(1 250 `(1
244 ,utf-16-le-encode-loop) 251 ,utf-16le-encode-loop)
245 "Encode to UTF-16LE (little endian without signature). 252 "Encode to UTF-16LE (little endian without signature).
246Characters from the charsets ascii, eight-bit-control, 253Characters from the charsets ascii, eight-bit-control,
247eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded 254eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
@@ -249,9 +256,9 @@ after translation through the translation-table of name
249`utf-translation-table-for-encode'. 256`utf-translation-table-for-encode'.
250Others are encoded as U+FFFD.") 257Others are encoded as U+FFFD.")
251 258
252(define-ccl-program ccl-encode-mule-utf-16-be 259(define-ccl-program ccl-encode-mule-utf-16be
253 `(1 260 `(1
254 ,utf-16-be-encode-loop) 261 ,utf-16be-encode-loop)
255 "Encode to UTF-16BE (big endian without signature). 262 "Encode to UTF-16BE (big endian without signature).
256Characters from the charsets ascii, eight-bit-control, 263Characters from the charsets ascii, eight-bit-control,
257eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded 264eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
@@ -259,11 +266,11 @@ after translation through the translation-table named
259`utf-translation-table-for-encode'. 266`utf-translation-table-for-encode'.
260Others are encoded as U+FFFD.") 267Others are encoded as U+FFFD.")
261 268
262(define-ccl-program ccl-encode-mule-utf-16-le-with-signature 269(define-ccl-program ccl-encode-mule-utf-16le-with-signature
263 `(1 270 `(1
264 ((write #xFF) 271 ((write #xFF)
265 (write #xFE) 272 (write #xFE)
266 ,utf-16-le-encode-loop)) 273 ,utf-16le-encode-loop))
267 "Encode to UTF-16 (little endian with signature). 274 "Encode to UTF-16 (little endian with signature).
268Characters from the charsets ascii, eight-bit-control, 275Characters from the charsets ascii, eight-bit-control,
269eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded 276eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
@@ -271,11 +278,11 @@ after translation through the translation-table of name
271`utf-translation-table-for-encode'. 278`utf-translation-table-for-encode'.
272Others are encoded as U+FFFD.") 279Others are encoded as U+FFFD.")
273 280
274(define-ccl-program ccl-encode-mule-utf-16-be-with-signature 281(define-ccl-program ccl-encode-mule-utf-16be-with-signature
275 `(1 282 `(1
276 ((write #xFE) 283 ((write #xFE)
277 (write #xFF) 284 (write #xFF)
278 ,utf-16-be-encode-loop)) 285 ,utf-16be-encode-loop))
279 "Encode to UTF-16 (big endian with signature). 286 "Encode to UTF-16 (big endian with signature).
280Characters from the charsets ascii, eight-bit-control, 287Characters from the charsets ascii, eight-bit-control,
281eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded 288eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
@@ -284,8 +291,8 @@ after translation through the translation-table named
284Others are encoded as U+FFFD.") 291Others are encoded as U+FFFD.")
285 292
286(makunbound 'utf-16-decode-to-ucs) 293(makunbound 'utf-16-decode-to-ucs)
287(makunbound 'utf-16-le-encode-loop) 294(makunbound 'utf-16le-encode-loop)
288(makunbound 'utf-16-be-encode-loop) 295(makunbound 'utf-16be-encode-loop)
289 296
290(defun mule-utf-16-post-read-conversion (length) 297(defun mule-utf-16-post-read-conversion (length)
291 (when (> length 0) 298 (when (> length 0)
@@ -295,17 +302,17 @@ Others are encoded as U+FFFD.")
295 (setq last-coding-system-used 302 (setq last-coding-system-used
296 (coding-system-change-text-conversion 303 (coding-system-change-text-conversion
297 last-coding-system-used 304 last-coding-system-used
298 'mule-utf-16-le-with-signature)) 305 'mule-utf-16le-with-signature))
299 (setq length (1- length))) 306 (setq length (1- length)))
300 ((= char (decode-char 'ucs #xFFFF)) 307 ((= char (decode-char 'ucs #xFFFF))
301 (delete-char 1) 308 (delete-char 1)
302 (setq last-coding-system-used 309 (setq last-coding-system-used
303 (coding-system-change-text-conversion 310 (coding-system-change-text-conversion
304 last-coding-system-used 311 last-coding-system-used
305 'mule-utf-16-be-with-signature)) 312 'mule-utf-16be-with-signature))
306 (setq length (1- length))) 313 (setq length (1- length)))
307 (t 314 (t
308 (setq last-coding-system-used 'mule-utf-16-be))))) 315 (setq last-coding-system-used 'mule-utf-16be)))))
309 length) 316 length)
310 317
311(let ((doc " 318(let ((doc "
@@ -324,13 +331,13 @@ On encoding (e.g. writing a file), Emacs characters not belonging to
324any of the character sets listed above are encoded into the byte 331any of the character sets listed above are encoded into the byte
325sequence representing U+FFFD (REPLACEMENT CHARACTER).")) 332sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
326 (make-coding-system 333 (make-coding-system
327 'mule-utf-16-le 4 334 'mule-utf-16le 4
328 ?u ; Mule-UCS uses ?U, but code-pages uses that for koi8-u. 335 ?u ; Mule-UCS uses ?U, but code-pages uses that for koi8-u.
329 (concat 336 (concat
330 "Little endian UTF-16 encoding for Emacs-supported Unicode characters." 337 "UTF-16LE encoding for Emacs-supported Unicode characters."
331 doc) 338 doc)
332 339
333 '(ccl-decode-mule-utf-16-le . ccl-encode-mule-utf-16-le) 340 '(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le)
334 '((safe-charsets 341 '((safe-charsets
335 ascii 342 ascii
336 eight-bit-control 343 eight-bit-control
@@ -346,12 +353,12 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
346 utf-translate-cjk))) 353 utf-translate-cjk)))
347 354
348 (make-coding-system 355 (make-coding-system
349 'mule-utf-16-be 4 ?u 356 'mule-utf-16be 4 ?u
350 (concat 357 (concat
351 "Big endian UTF-16 encoding for Emacs-supported Unicode characters." 358 "UTF-16BE encoding for Emacs-supported Unicode characters."
352 doc) 359 doc)
353 360
354 '(ccl-decode-mule-utf-16-be . ccl-encode-mule-utf-16-be) 361 '(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be)
355 '((safe-charsets 362 '((safe-charsets
356 ascii 363 ascii
357 eight-bit-control 364 eight-bit-control
@@ -367,13 +374,13 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
367 utf-translate-cjk))) 374 utf-translate-cjk)))
368 375
369 (make-coding-system 376 (make-coding-system
370 'mule-utf-16-le-with-signature 4 ?u 377 'mule-utf-16le-with-signature 4 ?u
371 (concat 378 (concat
372 "Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters." 379 "Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
373 doc) 380 doc)
374 381
375 '(ccl-decode-mule-utf-16-le-with-signature 382 '(ccl-decode-mule-utf-16le-with-signature
376 . ccl-encode-mule-utf-16-le-with-signature) 383 . ccl-encode-mule-utf-16le-with-signature)
377 '((safe-charsets 384 '((safe-charsets
378 ascii 385 ascii
379 eight-bit-control 386 eight-bit-control
@@ -390,13 +397,13 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
390 utf-translate-cjk))) 397 utf-translate-cjk)))
391 398
392 (make-coding-system 399 (make-coding-system
393 'mule-utf-16-be-with-signature 4 ?u 400 'mule-utf-16be-with-signature 4 ?u
394 (concat 401 (concat
395 "Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters." 402 "Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
396 doc) 403 doc)
397 404
398 '(ccl-decode-mule-utf-16-be-with-signature 405 '(ccl-decode-mule-utf-16be-with-signature
399 . ccl-encode-mule-utf-16-be-with-signature) 406 . ccl-encode-mule-utf-16be-with-signature)
400 '((safe-charsets 407 '((safe-charsets
401 ascii 408 ascii
402 eight-bit-control 409 eight-bit-control
@@ -418,7 +425,7 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
418 "UTF-16 (with or without BOM) for Emacs-supported Unicode characters." 425 "UTF-16 (with or without BOM) for Emacs-supported Unicode characters."
419 doc) 426 doc)
420 427
421 '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16-be-with-signature) 428 '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature)
422 '((safe-charsets 429 '((safe-charsets
423 ascii 430 ascii
424 eight-bit-control 431 eight-bit-control
@@ -436,12 +443,18 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
436 (post-read-conversion . mule-utf-16-post-read-conversion))) 443 (post-read-conversion . mule-utf-16-post-read-conversion)))
437) 444)
438 445
439(define-coding-system-alias 'utf-16-le 'mule-utf-16-le) 446(define-coding-system-alias 'utf-16le 'mule-utf-16le)
440(define-coding-system-alias 'utf-16-be 'mule-utf-16-be) 447(define-coding-system-alias 'utf-16be 'mule-utf-16be)
441(define-coding-system-alias 'utf-16-le-with-signature 448(define-coding-system-alias 'utf-16le-with-signature
442 'mule-utf-16-le-with-signature) 449 'mule-utf-16le-with-signature)
443(define-coding-system-alias 'utf-16-be-with-signature 450(define-coding-system-alias 'utf-16be-with-signature
444 'mule-utf-16-be-with-signature) 451 'mule-utf-16be-with-signature)
445(define-coding-system-alias 'utf-16 'mule-utf-16) 452(define-coding-system-alias 'utf-16 'mule-utf-16)
446 453
454;; For backward compatibility.
455(define-coding-system-alias 'mule-utf-16-le 'mule-utf-16le-with-signature)
456(define-coding-system-alias 'utf-16-le 'mule-utf-16le-with-signature)
457(define-coding-system-alias 'mule-utf-16-be 'mule-utf-16be-with-signature)
458(define-coding-system-alias 'utf-16-be 'mule-utf-16be-with-signature)
459
447;;; utf-16.el ends here 460;;; utf-16.el ends here