1 files changed, 72 insertions, 52 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 346133053cd..fc5b10bcb9b 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -429,7 +429,8 @@ code-point in CCS.  Currently not supported and just ignored."
  "Return the coding type of CODING-SYSTEM.
 A coding type is an integer value indicating the encoding method
 of CODING-SYSTEM.  See the function `make-coding-system' for more detail."
-  (aref (coding-system-spec coding-system) coding-spec-type-idx))
+  (let ((spec (coding-system-spec coding-system)))
+    (if spec (aref spec coding-spec-type-idx))))
 (defun coding-system-mnemonic (coding-system)
  "Return the mnemonic character of CODING-SYSTEM.
@@ -440,18 +441,21 @@ to indicate the coding system.  If the arg is nil, return ?-."
 (defun coding-system-doc-string (coding-system)
  "Return the documentation string for CODING-SYSTEM."
-  (aref (coding-system-spec coding-system) coding-spec-doc-string-idx))
+  (let ((spec (coding-system-spec coding-system)))
+    (if spec (aref spec coding-spec-doc-string-idx))))
 (defun coding-system-plist (coding-system)
  "Return the property list of CODING-SYSTEM."
-  (aref (coding-system-spec coding-system) coding-spec-plist-idx))
+  (let ((spec (coding-system-spec coding-system)))
+    (if spec (aref spec coding-spec-plist-idx))))
 (defun coding-system-flags (coding-system)
  "Return `flags' of CODING-SYSTEM.
 A `flags' of a coding system is a vector of length 32 indicating detailed
 information of a coding system.  See the function `make-coding-system'
 for more detail."
-  (aref (coding-system-spec coding-system) coding-spec-flags-idx))
+  (let ((spec (coding-system-spec coding-system)))
+    (if spec (aref spec coding-spec-flags-idx))))
 (defun coding-system-get (coding-system prop)
  "Extract a value from CODING-SYSTEM's property list for property PROP."
@@ -462,8 +466,8 @@ for more detail."
  (let ((plist (coding-system-plist coding-system)))
    (if plist
        (plist-put plist prop val)
-      (aset (coding-system-spec coding-system) coding-spec-plist-idx
+      (let ((spec (coding-system-spec coding-system)))
-            (list prop val)))))
+        (if spec (aset spec coding-spec-plist-idx (list prop val)))))))
 (defun coding-system-category (coding-system)
  "Return the coding category of CODING-SYSTEM.
@@ -1307,10 +1311,15 @@ ARG is a list of coding categories ordered by priority."
 charsets or coding systems.")
 ;; Functions to support "Non-Standard Character Set Encodings" defined
-;; by the ICCCM spec.  We support that by converting the leading
+;; by the COMPOUND-TEXT spec.
-;; sequence of the ``extended segment'' to the corresponding ISO-2022
+;; We support that by converting the leading sequence of the
-;; sequences (if the leading sequence names an Emacs charset), or decode
+;; ``extended segment'' to the corresponding ISO-2022 sequences (if
-;; the segment (if it names a coding system).  Encoding does the reverse.
+;; the leading sequence names an Emacs charset), or decode the segment
+;; (if it names a coding system).  Encoding does the reverse.
+;; This function also supports "The UTF-8 encoding" described in the
+;; section 7 of the documentation fo COMPOUND-TEXT distributed with
+;; XFree86.
 (defun ctext-post-read-conversion (len)
  "Decode LEN characters encoded as Compound Text with Extended Segments."
  (buffer-disable-undo) ; minimize consing due to insertions and deletions
@@ -1324,54 +1333,65 @@ charsets or coding systems.")
          last-coding-system-used
          encoding textlen chset)
      (while (re-search-forward
-              "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
+              "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\|\e%G[^\e]+\e%@"
              nil 'move)
        (set-marker newpt (point))
        (set-marker pt (match-beginning 0))
-        (setq encoding (match-string 3))
+        (if (= (preceding-char) ?@)
-        (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
+            ;; We found embedded utf-8 sequence.
-                            (- (aref (match-string 2) 1) 128))
+            (progn
-                         (1+ (length encoding))))
+              (delete-char -3)          ; delete ESC % @ at the tail
-        (setq
+              (goto-char pt)
-         chset (cdr (assoc-ignore-case encoding
+              (delete-char 3)           ; delete ESC % G at the head
-                                       non-standard-icccm-encodings-alist)))
+              (if (> pt oldpt)
-        (cond ((null chset)
+                  (decode-coding-region oldpt pt 'ctext-no-compositions))
-               ;; This charset is not supported--leave this extended
+              (decode-coding-region pt newpt 'mule-utf-8)
-               ;; segment unaltered and skip over it.
+              (goto-char newpt)
-               (goto-char (+ (point) textlen)))
+              (set-marker oldpt newpt))
-              ((charsetp chset)
+          (setq encoding (match-string 3))
-             ;; If it's a charset, replace the leading escape sequence
+          (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
-             ;; with a standard ISO-2022 sequence.  We will decode all
+                              (- (aref (match-string 2) 1) 128))
-              ;; such segments later, in one go, when we exit the loop
+                           (1+ (length encoding))))
-               ;; or find an extended segment that names a coding
+          (setq
-               ;; system, not a charset.
+           chset (cdr (assoc-ignore-case encoding
-               (replace-match
+                                         non-standard-icccm-encodings-alist)))
-                (concat "\\1"
+          (cond ((null chset)
-                        (if (= 0 (charset-iso-graphic-plane chset))
+                 ;; This charset is not supported--leave this extended
-                            ;; GL charsets
+                 ;; segment unaltered and skip over it.
-                            (if (= 1 (charset-dimension chset)) "(" "$(")
+                 (goto-char (+ (point) textlen)))
-                          ;; GR charsets
+                ((charsetp chset)
-                          (if (= 96 (charset-chars chset))
+                 ;; If it's a charset, replace the leading escape sequence
-                              "-"
+                 ;; with a standard ISO-2022 sequence.  We will decode all
-                            (if (= 1 (charset-dimension chset)) ")" "$)")))
+                 ;; such segments later, in one go, when we exit the loop
-                        (string (charset-iso-final-char chset)))
+                 ;; or find an extended segment that names a coding
-                t)
+                 ;; system, not a charset.
-               (goto-char (+ (point) textlen)))
+                 (replace-match
-              ((coding-system-p chset)
+                  (concat "\\1"
-             ;; If it's a coding system, we need to decode the segment
+                          (if (= 0 (charset-iso-graphic-plane chset))
-               ;; right away.  But first, decode what we've skipped
+                              ;; GL charsets
-               ;; across until now.
+                              (if (= 1 (charset-dimension chset)) "(" "$(")
-               (when (> pt oldpt)
+                            ;; GR charsets
-                 (decode-coding-region oldpt pt 'ctext-no-compositions))
+                            (if (= 96 (charset-chars chset))
-               (delete-region pt newpt)
+                                "-"
-               (set-marker newpt (+ newpt textlen))
+                              (if (= 1 (charset-dimension chset)) ")" "$)")))
-               (decode-coding-region pt newpt chset)
+                          (string (charset-iso-final-char chset)))
-               (goto-char newpt)
+                  t)
-               (set-marker oldpt newpt))))
+                 (goto-char (+ (point) textlen)))
+                ((coding-system-p chset)
+                 ;; If it's a coding system, we need to decode the segment
+                 ;; right away.  But first, decode what we've skipped
+                 ;; across until now.
+                 (when (> pt oldpt)
+                   (decode-coding-region oldpt pt 'ctext-no-compositions))
+                 (delete-region pt newpt)
+                 (set-marker newpt (+ newpt textlen))
+                 (decode-coding-region pt newpt chset)
+                 (goto-char newpt)
+                 (set-marker oldpt newpt)))))
      ;; Decode what's left.
      (when (> (point) oldpt)
        (decode-coding-region oldpt (point) 'ctext-no-compositions))
-     ;; This buffer started as unibyte, because the string we get from
+      ;; This buffer started as unibyte, because the string we get from
      ;; the X selection is a unibyte string.  We must now make it
      ;; multibyte, so that the decoded text is inserted as multibyte
      ;; into its buffer.

diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 346133053cd..fc5b10bcb9b 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el
@@ -429,7 +429,8 @@ code-point in CCS. Currently not supported and just ignored."
429	"Return the coding type of CODING-SYSTEM.	429	"Return the coding type of CODING-SYSTEM.
430	A coding type is an integer value indicating the encoding method	430	A coding type is an integer value indicating the encoding method
431	of CODING-SYSTEM. See the function `make-coding-system' for more detail."	431	of CODING-SYSTEM. See the function `make-coding-system' for more detail."
432	(aref (coding-system-spec coding-system) coding-spec-type-idx))	432	(let ((spec (coding-system-spec coding-system)))
		433	(if spec (aref spec coding-spec-type-idx))))
433		434
434	(defun coding-system-mnemonic (coding-system)	435	(defun coding-system-mnemonic (coding-system)
435	"Return the mnemonic character of CODING-SYSTEM.	436	"Return the mnemonic character of CODING-SYSTEM.
@@ -440,18 +441,21 @@ to indicate the coding system. If the arg is nil, return ?-."
440		441
441	(defun coding-system-doc-string (coding-system)	442	(defun coding-system-doc-string (coding-system)
442	"Return the documentation string for CODING-SYSTEM."	443	"Return the documentation string for CODING-SYSTEM."
443	(aref (coding-system-spec coding-system) coding-spec-doc-string-idx))	444	(let ((spec (coding-system-spec coding-system)))
		445	(if spec (aref spec coding-spec-doc-string-idx))))
444		446
445	(defun coding-system-plist (coding-system)	447	(defun coding-system-plist (coding-system)
446	"Return the property list of CODING-SYSTEM."	448	"Return the property list of CODING-SYSTEM."
447	(aref (coding-system-spec coding-system) coding-spec-plist-idx))	449	(let ((spec (coding-system-spec coding-system)))
		450	(if spec (aref spec coding-spec-plist-idx))))
448		451
449	(defun coding-system-flags (coding-system)	452	(defun coding-system-flags (coding-system)
450	"Return `flags' of CODING-SYSTEM.	453	"Return `flags' of CODING-SYSTEM.
451	A `flags' of a coding system is a vector of length 32 indicating detailed	454	A `flags' of a coding system is a vector of length 32 indicating detailed
452	information of a coding system. See the function `make-coding-system'	455	information of a coding system. See the function `make-coding-system'
453	for more detail."	456	for more detail."
454	(aref (coding-system-spec coding-system) coding-spec-flags-idx))	457	(let ((spec (coding-system-spec coding-system)))
		458	(if spec (aref spec coding-spec-flags-idx))))
455		459
456	(defun coding-system-get (coding-system prop)	460	(defun coding-system-get (coding-system prop)
457	"Extract a value from CODING-SYSTEM's property list for property PROP."	461	"Extract a value from CODING-SYSTEM's property list for property PROP."
@@ -462,8 +466,8 @@ for more detail."
462	(let ((plist (coding-system-plist coding-system)))	466	(let ((plist (coding-system-plist coding-system)))
463	(if plist	467	(if plist
464	(plist-put plist prop val)	468	(plist-put plist prop val)
465	(aset (coding-system-spec coding-system) coding-spec-plist-idx	469	(let ((spec (coding-system-spec coding-system)))
466	(list prop val)))))	470	(if spec (aset spec coding-spec-plist-idx (list prop val)))))))
467		471
468	(defun coding-system-category (coding-system)	472	(defun coding-system-category (coding-system)
469	"Return the coding category of CODING-SYSTEM.	473	"Return the coding category of CODING-SYSTEM.
@@ -1307,10 +1311,15 @@ ARG is a list of coding categories ordered by priority."
1307	charsets or coding systems.")	1311	charsets or coding systems.")
1308		1312
1309	;; Functions to support "Non-Standard Character Set Encodings" defined	1313	;; Functions to support "Non-Standard Character Set Encodings" defined
1310	;; by the ICCCM spec. We support that by converting the leading	1314	;; by the COMPOUND-TEXT spec.
1311	;; sequence of the ``extended segment'' to the corresponding ISO-2022	1315	;; We support that by converting the leading sequence of the
1312	;; sequences (if the leading sequence names an Emacs charset), or decode	1316	;; ``extended segment'' to the corresponding ISO-2022 sequences (if
1313	;; the segment (if it names a coding system). Encoding does the reverse.	1317	;; the leading sequence names an Emacs charset), or decode the segment
		1318	;; (if it names a coding system). Encoding does the reverse.
		1319	;; This function also supports "The UTF-8 encoding" described in the
		1320	;; section 7 of the documentation fo COMPOUND-TEXT distributed with
		1321	;; XFree86.
		1322
1314	(defun ctext-post-read-conversion (len)	1323	(defun ctext-post-read-conversion (len)
1315	"Decode LEN characters encoded as Compound Text with Extended Segments."	1324	"Decode LEN characters encoded as Compound Text with Extended Segments."
1316	(buffer-disable-undo) ; minimize consing due to insertions and deletions	1325	(buffer-disable-undo) ; minimize consing due to insertions and deletions
@@ -1324,54 +1333,65 @@ charsets or coding systems.")
1324	last-coding-system-used	1333	last-coding-system-used
1325	encoding textlen chset)	1334	encoding textlen chset)
1326	(while (re-search-forward	1335	(while (re-search-forward
1327	"\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"	1336	"\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\\|\e%G[^\e]+\e%@"
1328	nil 'move)	1337	nil 'move)
1329	(set-marker newpt (point))	1338	(set-marker newpt (point))
1330	(set-marker pt (match-beginning 0))	1339	(set-marker pt (match-beginning 0))
1331	(setq encoding (match-string 3))	1340	(if (= (preceding-char) ?@)
1332	(setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)	1341	;; We found embedded utf-8 sequence.
1333	(- (aref (match-string 2) 1) 128))	1342	(progn
1334	(1+ (length encoding))))	1343	(delete-char -3) ; delete ESC % @ at the tail
1335	(setq	1344	(goto-char pt)
1336	chset (cdr (assoc-ignore-case encoding	1345	(delete-char 3) ; delete ESC % G at the head
1337	non-standard-icccm-encodings-alist)))	1346	(if (> pt oldpt)
1338	(cond ((null chset)	1347	(decode-coding-region oldpt pt 'ctext-no-compositions))
1339	;; This charset is not supported--leave this extended	1348	(decode-coding-region pt newpt 'mule-utf-8)
1340	;; segment unaltered and skip over it.	1349	(goto-char newpt)
1341	(goto-char (+ (point) textlen)))	1350	(set-marker oldpt newpt))
1342	((charsetp chset)	1351	(setq encoding (match-string 3))
1343	;; If it's a charset, replace the leading escape sequence	1352	(setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
1344	;; with a standard ISO-2022 sequence. We will decode all	1353	(- (aref (match-string 2) 1) 128))
1345	;; such segments later, in one go, when we exit the loop	1354	(1+ (length encoding))))
1346	;; or find an extended segment that names a coding	1355	(setq
1347	;; system, not a charset.	1356	chset (cdr (assoc-ignore-case encoding
1348	(replace-match	1357	non-standard-icccm-encodings-alist)))
1349	(concat "\\1"	1358	(cond ((null chset)
1350	(if (= 0 (charset-iso-graphic-plane chset))	1359	;; This charset is not supported--leave this extended
1351	;; GL charsets	1360	;; segment unaltered and skip over it.
1352	(if (= 1 (charset-dimension chset)) "(" "$(")	1361	(goto-char (+ (point) textlen)))
1353	;; GR charsets	1362	((charsetp chset)
1354	(if (= 96 (charset-chars chset))	1363	;; If it's a charset, replace the leading escape sequence
1355	"-"	1364	;; with a standard ISO-2022 sequence. We will decode all
1356	(if (= 1 (charset-dimension chset)) ")" "$)")))	1365	;; such segments later, in one go, when we exit the loop
1357	(string (charset-iso-final-char chset)))	1366	;; or find an extended segment that names a coding
1358	t)	1367	;; system, not a charset.
1359	(goto-char (+ (point) textlen)))	1368	(replace-match
1360	((coding-system-p chset)	1369	(concat "\\1"
1361	;; If it's a coding system, we need to decode the segment	1370	(if (= 0 (charset-iso-graphic-plane chset))
1362	;; right away. But first, decode what we've skipped	1371	;; GL charsets
1363	;; across until now.	1372	(if (= 1 (charset-dimension chset)) "(" "$(")
1364	(when (> pt oldpt)	1373	;; GR charsets
1365	(decode-coding-region oldpt pt 'ctext-no-compositions))	1374	(if (= 96 (charset-chars chset))
1366	(delete-region pt newpt)	1375	"-"
1367	(set-marker newpt (+ newpt textlen))	1376	(if (= 1 (charset-dimension chset)) ")" "$)")))
1368	(decode-coding-region pt newpt chset)	1377	(string (charset-iso-final-char chset)))
1369	(goto-char newpt)	1378	t)
1370	(set-marker oldpt newpt))))	1379	(goto-char (+ (point) textlen)))
		1380	((coding-system-p chset)
		1381	;; If it's a coding system, we need to decode the segment
		1382	;; right away. But first, decode what we've skipped
		1383	;; across until now.
		1384	(when (> pt oldpt)
		1385	(decode-coding-region oldpt pt 'ctext-no-compositions))
		1386	(delete-region pt newpt)
		1387	(set-marker newpt (+ newpt textlen))
		1388	(decode-coding-region pt newpt chset)
		1389	(goto-char newpt)
		1390	(set-marker oldpt newpt)))))
1371	;; Decode what's left.	1391	;; Decode what's left.
1372	(when (> (point) oldpt)	1392	(when (> (point) oldpt)
1373	(decode-coding-region oldpt (point) 'ctext-no-compositions))	1393	(decode-coding-region oldpt (point) 'ctext-no-compositions))
1374	;; This buffer started as unibyte, because the string we get from	1394	;; This buffer started as unibyte, because the string we get from
1375	;; the X selection is a unibyte string. We must now make it	1395	;; the X selection is a unibyte string. We must now make it
1376	;; multibyte, so that the decoded text is inserted as multibyte	1396	;; multibyte, so that the decoded text is inserted as multibyte
1377	;; into its buffer.	1397	;; into its buffer.