aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2002-02-22 13:44:21 +0000
committerEli Zaretskii2002-02-22 13:44:21 +0000
commit835cbadb3fa1792c6fb9716b502f7502d018f57b (patch)
tree978743f3f71b1c1b1acef71763478cd4f1f691da
parent23e16093ecd087847f494aaebbf58816415eed06 (diff)
downloademacs-835cbadb3fa1792c6fb9716b502f7502d018f57b.tar.gz
emacs-835cbadb3fa1792c6fb9716b502f7502d018f57b.zip
(non-standard-icccm-encodings-alist, non-standard-designations-alist): New
variables. (ctext-post-read-conversion, ctext-pre-write-conversion): New functions.
-rw-r--r--lisp/international/mule.el155
1 files changed, 155 insertions, 0 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 124761c0d2a..9bb9c4bf5cc 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -1284,6 +1284,161 @@ ARG is a list of coding categories ordered by priority."
1284 (setq coding-category-list (append arg current-list)) 1284 (setq coding-category-list (append arg current-list))
1285 (set-coding-priority-internal))) 1285 (set-coding-priority-internal)))
1286 1286
1287;;; X selections
1288
1289(defvar non-standard-icccm-encodings-alist
1290 '(("ISO8859-15" . latin-iso8859-15)
1291 ("ISO8859-14" . latin-iso8859-14)
1292 ("KOI8-R" . koi8-r)
1293 ("BIG5-0" . big5))
1294 "Alist of font charset names defined by XLFD, and the corresponding Emacs
1295charsets or coding systems.")
1296
1297;; Functions to support "Non-Standard Character Set Encodings" defined
1298;; by the ICCCM spec. We support that by converting the leading
1299;; sequence of the ``extended segment'' to the corresponding ISO-2022
1300;; sequences (if the leading sequence names an Emacs charset), or decode
1301;; the segment (if it names a coding system). Encoding does the reverse.
1302(defun ctext-post-read-conversion (len)
1303 "Decode LEN characters encoded as Compound Text with Extended Segments."
1304 (buffer-disable-undo) ; minimize consing due to insertions and deletions
1305 (narrow-to-region (point) (+ (point) len))
1306 (save-match-data
1307 (let ((pt (point-marker))
1308 (oldpt (point-marker))
1309 (newpt (make-marker))
1310 (modified-p (buffer-modified-p))
1311 (case-fold-search nil)
1312 last-coding-system-used
1313 encoding textlen chset)
1314 (while (re-search-forward
1315 "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
1316 nil 'move)
1317 (set-marker newpt (point))
1318 (set-marker pt (match-beginning 0))
1319 (setq encoding (match-string 3))
1320 (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
1321 (- (aref (match-string 2) 1) 128))
1322 (1+ (length encoding))))
1323 (setq
1324 chset (cdr (assoc-ignore-case encoding
1325 non-standard-icccm-encodings-alist)))
1326 (cond ((null chset)
1327 ;; This charset is not supported--leave this extended
1328 ;; segment unaltered and skip over it.
1329 (goto-char (+ (point) textlen)))
1330 ((charsetp chset)
1331 ;; If it's a charset, replace the leading escape sequence
1332 ;; with a standard ISO-2022 sequence. We will decode all
1333 ;; such segments later, in one go, when we exit the loop
1334 ;; or find an extended segment that names a coding
1335 ;; system, not a charset.
1336 (replace-match
1337 (concat "\\1"
1338 (if (= 0 (charset-iso-graphic-plane chset))
1339 ;; GL charsets
1340 (if (= 1 (charset-dimension chset)) "(" "$(")
1341 ;; GR charsets
1342 (if (= 96 (charset-chars chset))
1343 "-"
1344 (if (= 1 (charset-dimension chset)) ")" "$)")))
1345 (string (charset-iso-final-char chset)))
1346 t)
1347 (goto-char (+ (point) textlen)))
1348 ((coding-system-p chset)
1349 ;; If it's a coding system, we need to decode the segment
1350 ;; right away. But first, decode what we've skipped
1351 ;; across until now.
1352 (when (> pt oldpt)
1353 (decode-coding-region oldpt pt 'ctext-no-compositions))
1354 (delete-region pt newpt)
1355 (set-marker newpt (+ newpt textlen))
1356 (decode-coding-region pt newpt chset)
1357 (goto-char newpt)
1358 (set-marker oldpt newpt))))
1359 ;; Decode what's left.
1360 (when (> (point) oldpt)
1361 (decode-coding-region oldpt (point) 'ctext-no-compositions))
1362 ;; This buffer started as unibyte, because the string we get from
1363 ;; the X selection is a unibyte string. We must now make it
1364 ;; multibyte, so that the decoded text is inserted as multibyte
1365 ;; into its buffer.
1366 (set-buffer-multibyte t)
1367 (set-buffer-modified-p modified-p)
1368 (- (point-max) (point-min)))))
1369
1370(defvar non-standard-designations-alist
1371 '(("$(0" . (big5 "big5-0" 2))
1372 ("$(1" . (big5 "big5-0" 2))
1373 ("-V" . (t "iso8859-10" 1))
1374 ("-Y" . (t "iso8859-13" 1))
1375 ("-_" . (t "iso8859-14" 1))
1376 ("-b" . (t "iso8859-15" 1))
1377 ("-f" . (t "iso8859-16" 1)))
1378 "Alist of ctext control sequences that introduce character sets which
1379are not in the list of approved ICCCM encodings, and the corresponding
1380coding system, identifier string, and number of octets per encoded
1381character.
1382
1383Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
1384is the control sequence (sans the leading ESC) that introduces the character
1385set in the text encoded by compound-text. ENCODING is a coding system
1386symbol; if it is t, it means that the ctext coding system already encodes
1387the text correctly, and only the leading control sequence needs to be altered.
1388If ENCODING is a coding system, we need to re-encode the text with that
1389coding system. CHARSET is the ICCCM name of the charset we need to put into
1390the leading control sequence. NOCTETS is the number of octets (bytes) that
1391encode each character in this charset. NOCTETS can be 0 (meaning the number
1392of octets per character is variable), 1, 2, 3, or 4.")
1393
1394(defun ctext-pre-write-conversion (from to)
1395 "Encode characters between FROM and TO as Compound Text w/Extended Segments."
1396 (buffer-disable-undo) ; minimize consing due to insertions and deletions
1397 (narrow-to-region from to)
1398 (encode-coding-region from to 'ctext-no-compositions)
1399 ;; Replace ISO-2022 charset designations with extended segments, for
1400 ;; those charsets that are not part of the official X registry.
1401 (save-match-data
1402 (goto-char (point-min))
1403 (let ((newpt (make-marker))
1404 (case-fold-search nil)
1405 pt desig encode-info encoding chset noctets textlen)
1406 (set-buffer-multibyte nil)
1407 (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
1408 (setq desig (match-string 1)
1409 pt (point-marker)
1410 encode-info (cdr (assoc desig non-standard-designations-alist))
1411 encoding (car encode-info)
1412 chset (cadr encode-info)
1413 noctets (car (cddr encode-info)))
1414 (skip-chars-forward "^\e")
1415 (set-marker newpt (point))
1416 (cond
1417 ((eq encoding t) ; only the leading sequence needs to be changed
1418 (setq textlen (+ (- newpt pt) (length chset) 1))
1419 (replace-match (format "\e%%/%d%c%c%s"
1420 noctets
1421 (+ (/ textlen 128) 128)
1422 (+ (% textlen 128) 128)
1423 chset)
1424 t t))
1425 ((coding-system-p encoding) ; need to recode the entire segment...
1426 (set-marker pt (match-beginning 0))
1427 (decode-coding-region pt newpt 'ctext-no-compositions)
1428 (set-buffer-multibyte t)
1429 (encode-coding-region pt newpt encoding)
1430 (set-buffer-multibyte nil)
1431 (setq textlen (+ (- newpt pt) (length chset) 1))
1432 (goto-char pt)
1433 (insert (format "\e%%/%d%c%c%s"
1434 noctets
1435 (+ (/ textlen 128) 128)
1436 (+ (% textlen 128) 128)
1437 chset))))
1438 (goto-char newpt))))
1439 (set-buffer-multibyte t)
1440 nil)
1441
1287;;; FILE I/O 1442;;; FILE I/O
1288 1443
1289(defcustom auto-coding-alist 1444(defcustom auto-coding-alist