aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2002-07-29 05:05:19 +0000
committerKenichi Handa2002-07-29 05:05:19 +0000
commit5c88a01e1e8a7f7fefda2ee3c1e16e0782fa02e5 (patch)
tree1c60f16b2c0a35241cfa96f2e1cc2c7ce8acfd98
parent930ca8e8456912a10ab89c400bdfecb32be64a22 (diff)
downloademacs-5c88a01e1e8a7f7fefda2ee3c1e16e0782fa02e5.tar.gz
emacs-5c88a01e1e8a7f7fefda2ee3c1e16e0782fa02e5.zip
(ctext-post-read-conversion): Add support for emboded utf-8 encodng
(ESC % G ... ESC % @).
-rw-r--r--lisp/international/mule.el124
1 files changed, 72 insertions, 52 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 346133053cd..fc5b10bcb9b 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -429,7 +429,8 @@ code-point in CCS. Currently not supported and just ignored."
429 "Return the coding type of CODING-SYSTEM. 429 "Return the coding type of CODING-SYSTEM.
430A coding type is an integer value indicating the encoding method 430A coding type is an integer value indicating the encoding method
431of CODING-SYSTEM. See the function `make-coding-system' for more detail." 431of CODING-SYSTEM. See the function `make-coding-system' for more detail."
432 (aref (coding-system-spec coding-system) coding-spec-type-idx)) 432 (let ((spec (coding-system-spec coding-system)))
433 (if spec (aref spec coding-spec-type-idx))))
433 434
434(defun coding-system-mnemonic (coding-system) 435(defun coding-system-mnemonic (coding-system)
435 "Return the mnemonic character of CODING-SYSTEM. 436 "Return the mnemonic character of CODING-SYSTEM.
@@ -440,18 +441,21 @@ to indicate the coding system. If the arg is nil, return ?-."
440 441
441(defun coding-system-doc-string (coding-system) 442(defun coding-system-doc-string (coding-system)
442 "Return the documentation string for CODING-SYSTEM." 443 "Return the documentation string for CODING-SYSTEM."
443 (aref (coding-system-spec coding-system) coding-spec-doc-string-idx)) 444 (let ((spec (coding-system-spec coding-system)))
445 (if spec (aref spec coding-spec-doc-string-idx))))
444 446
445(defun coding-system-plist (coding-system) 447(defun coding-system-plist (coding-system)
446 "Return the property list of CODING-SYSTEM." 448 "Return the property list of CODING-SYSTEM."
447 (aref (coding-system-spec coding-system) coding-spec-plist-idx)) 449 (let ((spec (coding-system-spec coding-system)))
450 (if spec (aref spec coding-spec-plist-idx))))
448 451
449(defun coding-system-flags (coding-system) 452(defun coding-system-flags (coding-system)
450 "Return `flags' of CODING-SYSTEM. 453 "Return `flags' of CODING-SYSTEM.
451A `flags' of a coding system is a vector of length 32 indicating detailed 454A `flags' of a coding system is a vector of length 32 indicating detailed
452information of a coding system. See the function `make-coding-system' 455information of a coding system. See the function `make-coding-system'
453for more detail." 456for more detail."
454 (aref (coding-system-spec coding-system) coding-spec-flags-idx)) 457 (let ((spec (coding-system-spec coding-system)))
458 (if spec (aref spec coding-spec-flags-idx))))
455 459
456(defun coding-system-get (coding-system prop) 460(defun coding-system-get (coding-system prop)
457 "Extract a value from CODING-SYSTEM's property list for property PROP." 461 "Extract a value from CODING-SYSTEM's property list for property PROP."
@@ -462,8 +466,8 @@ for more detail."
462 (let ((plist (coding-system-plist coding-system))) 466 (let ((plist (coding-system-plist coding-system)))
463 (if plist 467 (if plist
464 (plist-put plist prop val) 468 (plist-put plist prop val)
465 (aset (coding-system-spec coding-system) coding-spec-plist-idx 469 (let ((spec (coding-system-spec coding-system)))
466 (list prop val))))) 470 (if spec (aset spec coding-spec-plist-idx (list prop val)))))))
467 471
468(defun coding-system-category (coding-system) 472(defun coding-system-category (coding-system)
469 "Return the coding category of CODING-SYSTEM. 473 "Return the coding category of CODING-SYSTEM.
@@ -1307,10 +1311,15 @@ ARG is a list of coding categories ordered by priority."
1307charsets or coding systems.") 1311charsets or coding systems.")
1308 1312
1309;; Functions to support "Non-Standard Character Set Encodings" defined 1313;; Functions to support "Non-Standard Character Set Encodings" defined
1310;; by the ICCCM spec. We support that by converting the leading 1314;; by the COMPOUND-TEXT spec.
1311;; sequence of the ``extended segment'' to the corresponding ISO-2022 1315;; We support that by converting the leading sequence of the
1312;; sequences (if the leading sequence names an Emacs charset), or decode 1316;; ``extended segment'' to the corresponding ISO-2022 sequences (if
1313;; the segment (if it names a coding system). Encoding does the reverse. 1317;; the leading sequence names an Emacs charset), or decode the segment
1318;; (if it names a coding system). Encoding does the reverse.
1319;; This function also supports "The UTF-8 encoding" described in the
1320;; section 7 of the documentation fo COMPOUND-TEXT distributed with
1321;; XFree86.
1322
1314(defun ctext-post-read-conversion (len) 1323(defun ctext-post-read-conversion (len)
1315 "Decode LEN characters encoded as Compound Text with Extended Segments." 1324 "Decode LEN characters encoded as Compound Text with Extended Segments."
1316 (buffer-disable-undo) ; minimize consing due to insertions and deletions 1325 (buffer-disable-undo) ; minimize consing due to insertions and deletions
@@ -1324,54 +1333,65 @@ charsets or coding systems.")
1324 last-coding-system-used 1333 last-coding-system-used
1325 encoding textlen chset) 1334 encoding textlen chset)
1326 (while (re-search-forward 1335 (while (re-search-forward
1327 "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002" 1336 "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\|\e%G[^\e]+\e%@"
1328 nil 'move) 1337 nil 'move)
1329 (set-marker newpt (point)) 1338 (set-marker newpt (point))
1330 (set-marker pt (match-beginning 0)) 1339 (set-marker pt (match-beginning 0))
1331 (setq encoding (match-string 3)) 1340 (if (= (preceding-char) ?@)
1332 (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) 1341 ;; We found embedded utf-8 sequence.
1333 (- (aref (match-string 2) 1) 128)) 1342 (progn
1334 (1+ (length encoding)))) 1343 (delete-char -3) ; delete ESC % @ at the tail
1335 (setq 1344 (goto-char pt)
1336 chset (cdr (assoc-ignore-case encoding 1345 (delete-char 3) ; delete ESC % G at the head
1337 non-standard-icccm-encodings-alist))) 1346 (if (> pt oldpt)
1338 (cond ((null chset) 1347 (decode-coding-region oldpt pt 'ctext-no-compositions))
1339 ;; This charset is not supported--leave this extended 1348 (decode-coding-region pt newpt 'mule-utf-8)
1340 ;; segment unaltered and skip over it. 1349 (goto-char newpt)
1341 (goto-char (+ (point) textlen))) 1350 (set-marker oldpt newpt))
1342 ((charsetp chset) 1351 (setq encoding (match-string 3))
1343 ;; If it's a charset, replace the leading escape sequence 1352 (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
1344 ;; with a standard ISO-2022 sequence. We will decode all 1353 (- (aref (match-string 2) 1) 128))
1345 ;; such segments later, in one go, when we exit the loop 1354 (1+ (length encoding))))
1346 ;; or find an extended segment that names a coding 1355 (setq
1347 ;; system, not a charset. 1356 chset (cdr (assoc-ignore-case encoding
1348 (replace-match 1357 non-standard-icccm-encodings-alist)))
1349 (concat "\\1" 1358 (cond ((null chset)
1350 (if (= 0 (charset-iso-graphic-plane chset)) 1359 ;; This charset is not supported--leave this extended
1351 ;; GL charsets 1360 ;; segment unaltered and skip over it.
1352 (if (= 1 (charset-dimension chset)) "(" "$(") 1361 (goto-char (+ (point) textlen)))
1353 ;; GR charsets 1362 ((charsetp chset)
1354 (if (= 96 (charset-chars chset)) 1363 ;; If it's a charset, replace the leading escape sequence
1355 "-" 1364 ;; with a standard ISO-2022 sequence. We will decode all
1356 (if (= 1 (charset-dimension chset)) ")" "$)"))) 1365 ;; such segments later, in one go, when we exit the loop
1357 (string (charset-iso-final-char chset))) 1366 ;; or find an extended segment that names a coding
1358 t) 1367 ;; system, not a charset.
1359 (goto-char (+ (point) textlen))) 1368 (replace-match
1360 ((coding-system-p chset) 1369 (concat "\\1"
1361 ;; If it's a coding system, we need to decode the segment 1370 (if (= 0 (charset-iso-graphic-plane chset))
1362 ;; right away. But first, decode what we've skipped 1371 ;; GL charsets
1363 ;; across until now. 1372 (if (= 1 (charset-dimension chset)) "(" "$(")
1364 (when (> pt oldpt) 1373 ;; GR charsets
1365 (decode-coding-region oldpt pt 'ctext-no-compositions)) 1374 (if (= 96 (charset-chars chset))
1366 (delete-region pt newpt) 1375 "-"
1367 (set-marker newpt (+ newpt textlen)) 1376 (if (= 1 (charset-dimension chset)) ")" "$)")))
1368 (decode-coding-region pt newpt chset) 1377 (string (charset-iso-final-char chset)))
1369 (goto-char newpt) 1378 t)
1370 (set-marker oldpt newpt)))) 1379 (goto-char (+ (point) textlen)))
1380 ((coding-system-p chset)
1381 ;; If it's a coding system, we need to decode the segment
1382 ;; right away. But first, decode what we've skipped
1383 ;; across until now.
1384 (when (> pt oldpt)
1385 (decode-coding-region oldpt pt 'ctext-no-compositions))
1386 (delete-region pt newpt)
1387 (set-marker newpt (+ newpt textlen))
1388 (decode-coding-region pt newpt chset)
1389 (goto-char newpt)
1390 (set-marker oldpt newpt)))))
1371 ;; Decode what's left. 1391 ;; Decode what's left.
1372 (when (> (point) oldpt) 1392 (when (> (point) oldpt)
1373 (decode-coding-region oldpt (point) 'ctext-no-compositions)) 1393 (decode-coding-region oldpt (point) 'ctext-no-compositions))
1374 ;; This buffer started as unibyte, because the string we get from 1394 ;; This buffer started as unibyte, because the string we get from
1375 ;; the X selection is a unibyte string. We must now make it 1395 ;; the X selection is a unibyte string. We must now make it
1376 ;; multibyte, so that the decoded text is inserted as multibyte 1396 ;; multibyte, so that the decoded text is inserted as multibyte
1377 ;; into its buffer. 1397 ;; into its buffer.