diff options
| author | Eli Zaretskii | 2002-02-22 13:44:21 +0000 |
|---|---|---|
| committer | Eli Zaretskii | 2002-02-22 13:44:21 +0000 |
| commit | 835cbadb3fa1792c6fb9716b502f7502d018f57b (patch) | |
| tree | 978743f3f71b1c1b1acef71763478cd4f1f691da | |
| parent | 23e16093ecd087847f494aaebbf58816415eed06 (diff) | |
| download | emacs-835cbadb3fa1792c6fb9716b502f7502d018f57b.tar.gz emacs-835cbadb3fa1792c6fb9716b502f7502d018f57b.zip | |
(non-standard-icccm-encodings-alist, non-standard-designations-alist): New
variables.
(ctext-post-read-conversion, ctext-pre-write-conversion): New functions.
| -rw-r--r-- | lisp/international/mule.el | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 124761c0d2a..9bb9c4bf5cc 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -1284,6 +1284,161 @@ ARG is a list of coding categories ordered by priority." | |||
| 1284 | (setq coding-category-list (append arg current-list)) | 1284 | (setq coding-category-list (append arg current-list)) |
| 1285 | (set-coding-priority-internal))) | 1285 | (set-coding-priority-internal))) |
| 1286 | 1286 | ||
| 1287 | ;;; X selections | ||
| 1288 | |||
| 1289 | (defvar non-standard-icccm-encodings-alist | ||
| 1290 | '(("ISO8859-15" . latin-iso8859-15) | ||
| 1291 | ("ISO8859-14" . latin-iso8859-14) | ||
| 1292 | ("KOI8-R" . koi8-r) | ||
| 1293 | ("BIG5-0" . big5)) | ||
| 1294 | "Alist of font charset names defined by XLFD, and the corresponding Emacs | ||
| 1295 | charsets or coding systems.") | ||
| 1296 | |||
| 1297 | ;; Functions to support "Non-Standard Character Set Encodings" defined | ||
| 1298 | ;; by the ICCCM spec. We support that by converting the leading | ||
| 1299 | ;; sequence of the ``extended segment'' to the corresponding ISO-2022 | ||
| 1300 | ;; sequences (if the leading sequence names an Emacs charset), or decode | ||
| 1301 | ;; the segment (if it names a coding system). Encoding does the reverse. | ||
| 1302 | (defun ctext-post-read-conversion (len) | ||
| 1303 | "Decode LEN characters encoded as Compound Text with Extended Segments." | ||
| 1304 | (buffer-disable-undo) ; minimize consing due to insertions and deletions | ||
| 1305 | (narrow-to-region (point) (+ (point) len)) | ||
| 1306 | (save-match-data | ||
| 1307 | (let ((pt (point-marker)) | ||
| 1308 | (oldpt (point-marker)) | ||
| 1309 | (newpt (make-marker)) | ||
| 1310 | (modified-p (buffer-modified-p)) | ||
| 1311 | (case-fold-search nil) | ||
| 1312 | last-coding-system-used | ||
| 1313 | encoding textlen chset) | ||
| 1314 | (while (re-search-forward | ||
| 1315 | "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002" | ||
| 1316 | nil 'move) | ||
| 1317 | (set-marker newpt (point)) | ||
| 1318 | (set-marker pt (match-beginning 0)) | ||
| 1319 | (setq encoding (match-string 3)) | ||
| 1320 | (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) | ||
| 1321 | (- (aref (match-string 2) 1) 128)) | ||
| 1322 | (1+ (length encoding)))) | ||
| 1323 | (setq | ||
| 1324 | chset (cdr (assoc-ignore-case encoding | ||
| 1325 | non-standard-icccm-encodings-alist))) | ||
| 1326 | (cond ((null chset) | ||
| 1327 | ;; This charset is not supported--leave this extended | ||
| 1328 | ;; segment unaltered and skip over it. | ||
| 1329 | (goto-char (+ (point) textlen))) | ||
| 1330 | ((charsetp chset) | ||
| 1331 | ;; If it's a charset, replace the leading escape sequence | ||
| 1332 | ;; with a standard ISO-2022 sequence. We will decode all | ||
| 1333 | ;; such segments later, in one go, when we exit the loop | ||
| 1334 | ;; or find an extended segment that names a coding | ||
| 1335 | ;; system, not a charset. | ||
| 1336 | (replace-match | ||
| 1337 | (concat "\\1" | ||
| 1338 | (if (= 0 (charset-iso-graphic-plane chset)) | ||
| 1339 | ;; GL charsets | ||
| 1340 | (if (= 1 (charset-dimension chset)) "(" "$(") | ||
| 1341 | ;; GR charsets | ||
| 1342 | (if (= 96 (charset-chars chset)) | ||
| 1343 | "-" | ||
| 1344 | (if (= 1 (charset-dimension chset)) ")" "$)"))) | ||
| 1345 | (string (charset-iso-final-char chset))) | ||
| 1346 | t) | ||
| 1347 | (goto-char (+ (point) textlen))) | ||
| 1348 | ((coding-system-p chset) | ||
| 1349 | ;; If it's a coding system, we need to decode the segment | ||
| 1350 | ;; right away. But first, decode what we've skipped | ||
| 1351 | ;; across until now. | ||
| 1352 | (when (> pt oldpt) | ||
| 1353 | (decode-coding-region oldpt pt 'ctext-no-compositions)) | ||
| 1354 | (delete-region pt newpt) | ||
| 1355 | (set-marker newpt (+ newpt textlen)) | ||
| 1356 | (decode-coding-region pt newpt chset) | ||
| 1357 | (goto-char newpt) | ||
| 1358 | (set-marker oldpt newpt)))) | ||
| 1359 | ;; Decode what's left. | ||
| 1360 | (when (> (point) oldpt) | ||
| 1361 | (decode-coding-region oldpt (point) 'ctext-no-compositions)) | ||
| 1362 | ;; This buffer started as unibyte, because the string we get from | ||
| 1363 | ;; the X selection is a unibyte string. We must now make it | ||
| 1364 | ;; multibyte, so that the decoded text is inserted as multibyte | ||
| 1365 | ;; into its buffer. | ||
| 1366 | (set-buffer-multibyte t) | ||
| 1367 | (set-buffer-modified-p modified-p) | ||
| 1368 | (- (point-max) (point-min))))) | ||
| 1369 | |||
| 1370 | (defvar non-standard-designations-alist | ||
| 1371 | '(("$(0" . (big5 "big5-0" 2)) | ||
| 1372 | ("$(1" . (big5 "big5-0" 2)) | ||
| 1373 | ("-V" . (t "iso8859-10" 1)) | ||
| 1374 | ("-Y" . (t "iso8859-13" 1)) | ||
| 1375 | ("-_" . (t "iso8859-14" 1)) | ||
| 1376 | ("-b" . (t "iso8859-15" 1)) | ||
| 1377 | ("-f" . (t "iso8859-16" 1))) | ||
| 1378 | "Alist of ctext control sequences that introduce character sets which | ||
| 1379 | are not in the list of approved ICCCM encodings, and the corresponding | ||
| 1380 | coding system, identifier string, and number of octets per encoded | ||
| 1381 | character. | ||
| 1382 | |||
| 1383 | Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ | ||
| 1384 | is the control sequence (sans the leading ESC) that introduces the character | ||
| 1385 | set in the text encoded by compound-text. ENCODING is a coding system | ||
| 1386 | symbol; if it is t, it means that the ctext coding system already encodes | ||
| 1387 | the text correctly, and only the leading control sequence needs to be altered. | ||
| 1388 | If ENCODING is a coding system, we need to re-encode the text with that | ||
| 1389 | coding system. CHARSET is the ICCCM name of the charset we need to put into | ||
| 1390 | the leading control sequence. NOCTETS is the number of octets (bytes) that | ||
| 1391 | encode each character in this charset. NOCTETS can be 0 (meaning the number | ||
| 1392 | of octets per character is variable), 1, 2, 3, or 4.") | ||
| 1393 | |||
| 1394 | (defun ctext-pre-write-conversion (from to) | ||
| 1395 | "Encode characters between FROM and TO as Compound Text w/Extended Segments." | ||
| 1396 | (buffer-disable-undo) ; minimize consing due to insertions and deletions | ||
| 1397 | (narrow-to-region from to) | ||
| 1398 | (encode-coding-region from to 'ctext-no-compositions) | ||
| 1399 | ;; Replace ISO-2022 charset designations with extended segments, for | ||
| 1400 | ;; those charsets that are not part of the official X registry. | ||
| 1401 | (save-match-data | ||
| 1402 | (goto-char (point-min)) | ||
| 1403 | (let ((newpt (make-marker)) | ||
| 1404 | (case-fold-search nil) | ||
| 1405 | pt desig encode-info encoding chset noctets textlen) | ||
| 1406 | (set-buffer-multibyte nil) | ||
| 1407 | (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move) | ||
| 1408 | (setq desig (match-string 1) | ||
| 1409 | pt (point-marker) | ||
| 1410 | encode-info (cdr (assoc desig non-standard-designations-alist)) | ||
| 1411 | encoding (car encode-info) | ||
| 1412 | chset (cadr encode-info) | ||
| 1413 | noctets (car (cddr encode-info))) | ||
| 1414 | (skip-chars-forward "^\e") | ||
| 1415 | (set-marker newpt (point)) | ||
| 1416 | (cond | ||
| 1417 | ((eq encoding t) ; only the leading sequence needs to be changed | ||
| 1418 | (setq textlen (+ (- newpt pt) (length chset) 1)) | ||
| 1419 | (replace-match (format "\e%%/%d%c%c%s" | ||
| 1420 | noctets | ||
| 1421 | (+ (/ textlen 128) 128) | ||
| 1422 | (+ (% textlen 128) 128) | ||
| 1423 | chset) | ||
| 1424 | t t)) | ||
| 1425 | ((coding-system-p encoding) ; need to recode the entire segment... | ||
| 1426 | (set-marker pt (match-beginning 0)) | ||
| 1427 | (decode-coding-region pt newpt 'ctext-no-compositions) | ||
| 1428 | (set-buffer-multibyte t) | ||
| 1429 | (encode-coding-region pt newpt encoding) | ||
| 1430 | (set-buffer-multibyte nil) | ||
| 1431 | (setq textlen (+ (- newpt pt) (length chset) 1)) | ||
| 1432 | (goto-char pt) | ||
| 1433 | (insert (format "\e%%/%d%c%c%s" | ||
| 1434 | noctets | ||
| 1435 | (+ (/ textlen 128) 128) | ||
| 1436 | (+ (% textlen 128) 128) | ||
| 1437 | chset)))) | ||
| 1438 | (goto-char newpt)))) | ||
| 1439 | (set-buffer-multibyte t) | ||
| 1440 | nil) | ||
| 1441 | |||
| 1287 | ;;; FILE I/O | 1442 | ;;; FILE I/O |
| 1288 | 1443 | ||
| 1289 | (defcustom auto-coding-alist | 1444 | (defcustom auto-coding-alist |