diff options
| -rw-r--r-- | lisp/international/mule.el | 124 |
1 files changed, 72 insertions, 52 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 346133053cd..fc5b10bcb9b 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -429,7 +429,8 @@ code-point in CCS. Currently not supported and just ignored." | |||
| 429 | "Return the coding type of CODING-SYSTEM. | 429 | "Return the coding type of CODING-SYSTEM. |
| 430 | A coding type is an integer value indicating the encoding method | 430 | A coding type is an integer value indicating the encoding method |
| 431 | of CODING-SYSTEM. See the function `make-coding-system' for more detail." | 431 | of CODING-SYSTEM. See the function `make-coding-system' for more detail." |
| 432 | (aref (coding-system-spec coding-system) coding-spec-type-idx)) | 432 | (let ((spec (coding-system-spec coding-system))) |
| 433 | (if spec (aref spec coding-spec-type-idx)))) | ||
| 433 | 434 | ||
| 434 | (defun coding-system-mnemonic (coding-system) | 435 | (defun coding-system-mnemonic (coding-system) |
| 435 | "Return the mnemonic character of CODING-SYSTEM. | 436 | "Return the mnemonic character of CODING-SYSTEM. |
| @@ -440,18 +441,21 @@ to indicate the coding system. If the arg is nil, return ?-." | |||
| 440 | 441 | ||
| 441 | (defun coding-system-doc-string (coding-system) | 442 | (defun coding-system-doc-string (coding-system) |
| 442 | "Return the documentation string for CODING-SYSTEM." | 443 | "Return the documentation string for CODING-SYSTEM." |
| 443 | (aref (coding-system-spec coding-system) coding-spec-doc-string-idx)) | 444 | (let ((spec (coding-system-spec coding-system))) |
| 445 | (if spec (aref spec coding-spec-doc-string-idx)))) | ||
| 444 | 446 | ||
| 445 | (defun coding-system-plist (coding-system) | 447 | (defun coding-system-plist (coding-system) |
| 446 | "Return the property list of CODING-SYSTEM." | 448 | "Return the property list of CODING-SYSTEM." |
| 447 | (aref (coding-system-spec coding-system) coding-spec-plist-idx)) | 449 | (let ((spec (coding-system-spec coding-system))) |
| 450 | (if spec (aref spec coding-spec-plist-idx)))) | ||
| 448 | 451 | ||
| 449 | (defun coding-system-flags (coding-system) | 452 | (defun coding-system-flags (coding-system) |
| 450 | "Return `flags' of CODING-SYSTEM. | 453 | "Return `flags' of CODING-SYSTEM. |
| 451 | A `flags' of a coding system is a vector of length 32 indicating detailed | 454 | A `flags' of a coding system is a vector of length 32 indicating detailed |
| 452 | information of a coding system. See the function `make-coding-system' | 455 | information of a coding system. See the function `make-coding-system' |
| 453 | for more detail." | 456 | for more detail." |
| 454 | (aref (coding-system-spec coding-system) coding-spec-flags-idx)) | 457 | (let ((spec (coding-system-spec coding-system))) |
| 458 | (if spec (aref spec coding-spec-flags-idx)))) | ||
| 455 | 459 | ||
| 456 | (defun coding-system-get (coding-system prop) | 460 | (defun coding-system-get (coding-system prop) |
| 457 | "Extract a value from CODING-SYSTEM's property list for property PROP." | 461 | "Extract a value from CODING-SYSTEM's property list for property PROP." |
| @@ -462,8 +466,8 @@ for more detail." | |||
| 462 | (let ((plist (coding-system-plist coding-system))) | 466 | (let ((plist (coding-system-plist coding-system))) |
| 463 | (if plist | 467 | (if plist |
| 464 | (plist-put plist prop val) | 468 | (plist-put plist prop val) |
| 465 | (aset (coding-system-spec coding-system) coding-spec-plist-idx | 469 | (let ((spec (coding-system-spec coding-system))) |
| 466 | (list prop val))))) | 470 | (if spec (aset spec coding-spec-plist-idx (list prop val))))))) |
| 467 | 471 | ||
| 468 | (defun coding-system-category (coding-system) | 472 | (defun coding-system-category (coding-system) |
| 469 | "Return the coding category of CODING-SYSTEM. | 473 | "Return the coding category of CODING-SYSTEM. |
| @@ -1307,10 +1311,15 @@ ARG is a list of coding categories ordered by priority." | |||
| 1307 | charsets or coding systems.") | 1311 | charsets or coding systems.") |
| 1308 | 1312 | ||
| 1309 | ;; Functions to support "Non-Standard Character Set Encodings" defined | 1313 | ;; Functions to support "Non-Standard Character Set Encodings" defined |
| 1310 | ;; by the ICCCM spec. We support that by converting the leading | 1314 | ;; by the COMPOUND-TEXT spec. |
| 1311 | ;; sequence of the ``extended segment'' to the corresponding ISO-2022 | 1315 | ;; We support that by converting the leading sequence of the |
| 1312 | ;; sequences (if the leading sequence names an Emacs charset), or decode | 1316 | ;; ``extended segment'' to the corresponding ISO-2022 sequences (if |
| 1313 | ;; the segment (if it names a coding system). Encoding does the reverse. | 1317 | ;; the leading sequence names an Emacs charset), or decode the segment |
| 1318 | ;; (if it names a coding system). Encoding does the reverse. | ||
| 1319 | ;; This function also supports "The UTF-8 encoding" described in the | ||
| 1320 | ;; section 7 of the documentation fo COMPOUND-TEXT distributed with | ||
| 1321 | ;; XFree86. | ||
| 1322 | |||
| 1314 | (defun ctext-post-read-conversion (len) | 1323 | (defun ctext-post-read-conversion (len) |
| 1315 | "Decode LEN characters encoded as Compound Text with Extended Segments." | 1324 | "Decode LEN characters encoded as Compound Text with Extended Segments." |
| 1316 | (buffer-disable-undo) ; minimize consing due to insertions and deletions | 1325 | (buffer-disable-undo) ; minimize consing due to insertions and deletions |
| @@ -1324,54 +1333,65 @@ charsets or coding systems.") | |||
| 1324 | last-coding-system-used | 1333 | last-coding-system-used |
| 1325 | encoding textlen chset) | 1334 | encoding textlen chset) |
| 1326 | (while (re-search-forward | 1335 | (while (re-search-forward |
| 1327 | "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002" | 1336 | "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\|\e%G[^\e]+\e%@" |
| 1328 | nil 'move) | 1337 | nil 'move) |
| 1329 | (set-marker newpt (point)) | 1338 | (set-marker newpt (point)) |
| 1330 | (set-marker pt (match-beginning 0)) | 1339 | (set-marker pt (match-beginning 0)) |
| 1331 | (setq encoding (match-string 3)) | 1340 | (if (= (preceding-char) ?@) |
| 1332 | (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) | 1341 | ;; We found embedded utf-8 sequence. |
| 1333 | (- (aref (match-string 2) 1) 128)) | 1342 | (progn |
| 1334 | (1+ (length encoding)))) | 1343 | (delete-char -3) ; delete ESC % @ at the tail |
| 1335 | (setq | 1344 | (goto-char pt) |
| 1336 | chset (cdr (assoc-ignore-case encoding | 1345 | (delete-char 3) ; delete ESC % G at the head |
| 1337 | non-standard-icccm-encodings-alist))) | 1346 | (if (> pt oldpt) |
| 1338 | (cond ((null chset) | 1347 | (decode-coding-region oldpt pt 'ctext-no-compositions)) |
| 1339 | ;; This charset is not supported--leave this extended | 1348 | (decode-coding-region pt newpt 'mule-utf-8) |
| 1340 | ;; segment unaltered and skip over it. | 1349 | (goto-char newpt) |
| 1341 | (goto-char (+ (point) textlen))) | 1350 | (set-marker oldpt newpt)) |
| 1342 | ((charsetp chset) | 1351 | (setq encoding (match-string 3)) |
| 1343 | ;; If it's a charset, replace the leading escape sequence | 1352 | (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) |
| 1344 | ;; with a standard ISO-2022 sequence. We will decode all | 1353 | (- (aref (match-string 2) 1) 128)) |
| 1345 | ;; such segments later, in one go, when we exit the loop | 1354 | (1+ (length encoding)))) |
| 1346 | ;; or find an extended segment that names a coding | 1355 | (setq |
| 1347 | ;; system, not a charset. | 1356 | chset (cdr (assoc-ignore-case encoding |
| 1348 | (replace-match | 1357 | non-standard-icccm-encodings-alist))) |
| 1349 | (concat "\\1" | 1358 | (cond ((null chset) |
| 1350 | (if (= 0 (charset-iso-graphic-plane chset)) | 1359 | ;; This charset is not supported--leave this extended |
| 1351 | ;; GL charsets | 1360 | ;; segment unaltered and skip over it. |
| 1352 | (if (= 1 (charset-dimension chset)) "(" "$(") | 1361 | (goto-char (+ (point) textlen))) |
| 1353 | ;; GR charsets | 1362 | ((charsetp chset) |
| 1354 | (if (= 96 (charset-chars chset)) | 1363 | ;; If it's a charset, replace the leading escape sequence |
| 1355 | "-" | 1364 | ;; with a standard ISO-2022 sequence. We will decode all |
| 1356 | (if (= 1 (charset-dimension chset)) ")" "$)"))) | 1365 | ;; such segments later, in one go, when we exit the loop |
| 1357 | (string (charset-iso-final-char chset))) | 1366 | ;; or find an extended segment that names a coding |
| 1358 | t) | 1367 | ;; system, not a charset. |
| 1359 | (goto-char (+ (point) textlen))) | 1368 | (replace-match |
| 1360 | ((coding-system-p chset) | 1369 | (concat "\\1" |
| 1361 | ;; If it's a coding system, we need to decode the segment | 1370 | (if (= 0 (charset-iso-graphic-plane chset)) |
| 1362 | ;; right away. But first, decode what we've skipped | 1371 | ;; GL charsets |
| 1363 | ;; across until now. | 1372 | (if (= 1 (charset-dimension chset)) "(" "$(") |
| 1364 | (when (> pt oldpt) | 1373 | ;; GR charsets |
| 1365 | (decode-coding-region oldpt pt 'ctext-no-compositions)) | 1374 | (if (= 96 (charset-chars chset)) |
| 1366 | (delete-region pt newpt) | 1375 | "-" |
| 1367 | (set-marker newpt (+ newpt textlen)) | 1376 | (if (= 1 (charset-dimension chset)) ")" "$)"))) |
| 1368 | (decode-coding-region pt newpt chset) | 1377 | (string (charset-iso-final-char chset))) |
| 1369 | (goto-char newpt) | 1378 | t) |
| 1370 | (set-marker oldpt newpt)))) | 1379 | (goto-char (+ (point) textlen))) |
| 1380 | ((coding-system-p chset) | ||
| 1381 | ;; If it's a coding system, we need to decode the segment | ||
| 1382 | ;; right away. But first, decode what we've skipped | ||
| 1383 | ;; across until now. | ||
| 1384 | (when (> pt oldpt) | ||
| 1385 | (decode-coding-region oldpt pt 'ctext-no-compositions)) | ||
| 1386 | (delete-region pt newpt) | ||
| 1387 | (set-marker newpt (+ newpt textlen)) | ||
| 1388 | (decode-coding-region pt newpt chset) | ||
| 1389 | (goto-char newpt) | ||
| 1390 | (set-marker oldpt newpt))))) | ||
| 1371 | ;; Decode what's left. | 1391 | ;; Decode what's left. |
| 1372 | (when (> (point) oldpt) | 1392 | (when (> (point) oldpt) |
| 1373 | (decode-coding-region oldpt (point) 'ctext-no-compositions)) | 1393 | (decode-coding-region oldpt (point) 'ctext-no-compositions)) |
| 1374 | ;; This buffer started as unibyte, because the string we get from | 1394 | ;; This buffer started as unibyte, because the string we get from |
| 1375 | ;; the X selection is a unibyte string. We must now make it | 1395 | ;; the X selection is a unibyte string. We must now make it |
| 1376 | ;; multibyte, so that the decoded text is inserted as multibyte | 1396 | ;; multibyte, so that the decoded text is inserted as multibyte |
| 1377 | ;; into its buffer. | 1397 | ;; into its buffer. |