aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2010-08-04 17:06:52 +0900
committerKenichi Handa2010-08-04 17:06:52 +0900
commit6b4d96c2f04e5a08c4f9fff144743ff16c151dae (patch)
tree29def28b96201bf62115bbbadad44b5cf0ffbfe2
parent4ce5a4ccd4cca6dcdc6d8bafa8cfaea1986e7e6e (diff)
downloademacs-6b4d96c2f04e5a08c4f9fff144743ff16c151dae.tar.gz
emacs-6b4d96c2f04e5a08c4f9fff144743ff16c151dae.zip
Modify the coding system compound-text-with-extensions to conform to the spec of Compound Text.
-rw-r--r--lisp/ChangeLog17
-rw-r--r--lisp/international/mule-conf.el14
-rw-r--r--lisp/international/mule.el147
-rw-r--r--lisp/language/cyrillic.el7
4 files changed, 104 insertions, 81 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 670f07c2683..775ddcdc2e7 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,20 @@
12010-08-04 Kenichi Handa <handa@m17n.org>
2
3 * language/cyrillic.el: Don't add "microsoft-cp1251" to
4 ctext-non-standard-encodings-alist here.
5
6 * international/mule.el (ctext-non-standard-encodings-alist): Add
7 "koi8-r" and "microsoft-cp1251".
8 (ctext-standard-encodings): New variable.
9 (ctext-non-standard-encodings-table): List only elements for
10 non-standard encodings.
11 (ctext-pre-write-conversion): Adjusted for the above change.
12 Check ctext-standard-encodings.
13
14 * international/mule-conf.el (compound-text): Doc fix.
15 (ctext-no-compositions): Doc fix.
16 (compound-text-with-extensions): Doc fix.
17
12010-07-23 Juanma Barranquero <lekktu@gmail.com> 182010-07-23 Juanma Barranquero <lekktu@gmail.com>
2 19
3 * help-fns.el (find-lisp-object-file-name): Doc fix (bug#6494). 20 * help-fns.el (find-lisp-object-file-name): Doc fix (bug#6494).
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el
index f53b69eed8b..9ee8d22463a 100644
--- a/lisp/international/mule-conf.el
+++ b/lisp/international/mule-conf.el
@@ -1410,9 +1410,10 @@ is treated as a character."
1410 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) 1410 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1411 1411
1412(define-coding-system 'compound-text 1412(define-coding-system 'compound-text
1413 "Compound text based generic encoding for decoding unknown messages. 1413 "Compound text based generic encoding.
1414 1414This coding system is an extension of X's \"Compound Text Encoding\".
1415This coding system does not support extended segments of CTEXT." 1415It encodes many characters using the normal ISO-2022 designation sequences,
1416but it doesn't support extended segments of CTEXT."
1416 :coding-type 'iso-2022 1417 :coding-type 'iso-2022
1417 :mnemonic ?x 1418 :mnemonic ?x
1418 :charset-list 'iso-2022 1419 :charset-list 'iso-2022
@@ -1432,7 +1433,7 @@ This coding system does not support extended segments of CTEXT."
1432;; not have a mime-charset property, to prevent it from showing up 1433;; not have a mime-charset property, to prevent it from showing up
1433;; close to the beginning of coding systems ordered by priority. 1434;; close to the beginning of coding systems ordered by priority.
1434(define-coding-system 'ctext-no-compositions 1435(define-coding-system 'ctext-no-compositions
1435 "Compound text based generic encoding for decoding unknown messages. 1436 "Compound text based generic encoding.
1436 1437
1437Like `compound-text', but does not produce escape sequences for compositions." 1438Like `compound-text', but does not produce escape sequences for compositions."
1438 :coding-type 'iso-2022 1439 :coding-type 'iso-2022
@@ -1445,8 +1446,9 @@ Like `compound-text', but does not produce escape sequences for compositions."
1445(define-coding-system 'compound-text-with-extensions 1446(define-coding-system 'compound-text-with-extensions
1446 "Compound text encoding with ICCCM Extended Segment extensions. 1447 "Compound text encoding with ICCCM Extended Segment extensions.
1447 1448
1448See the variable `ctext-non-standard-encodings-alist' for the 1449See the variables `ctext-standard-encodings' and
1449detail about how extended segments are handled. 1450`ctext-non-standard-encodings-alist' for the detail about how
1451extended segments are handled.
1450 1452
1451This coding system should be used only for X selections. It is inappropriate 1453This coding system should be used only for X selections. It is inappropriate
1452for decoding and encoding files, process I/O, etc." 1454for decoding and encoding files, process I/O, etc."
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 7e7e55728c8..e030acbef02 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -1408,7 +1408,9 @@ This function is provided for backward compatibility."
1408 '(("big5-0" big5 2 big5) 1408 '(("big5-0" big5 2 big5)
1409 ("ISO8859-14" iso-8859-14 1 latin-iso8859-14) 1409 ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
1410 ("ISO8859-15" iso-8859-15 1 latin-iso8859-15) 1410 ("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
1411 ("gbk-0" gbk 2 chinese-gbk))) 1411 ("gbk-0" gbk 2 chinese-gbk)
1412 ("koi8-r" koi8-r 1 koi8-r)
1413 ("microsoft-cp1251" windows-1251 1 windows-1251)))
1412 "Alist of non-standard encoding names vs the corresponding usages in CTEXT. 1414 "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
1413 1415
1414It controls how extended segments of a compound text are handled 1416It controls how extended segments of a compound text are handled
@@ -1497,6 +1499,20 @@ Each element must be one of the names listed in the variable
1497 (goto-char (point-min)) 1499 (goto-char (point-min))
1498 (- (point-max) (point))))) 1500 (- (point-max) (point)))))
1499 1501
1502(defvar ctext-standard-encodings
1503 '(ascii latin-jisx0201 katakana-jisx0201
1504 latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
1505 greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5
1506 latin-iso8859-9
1507 chinese-gb2312 japanese-jisx0208 korean-ksc5601)
1508 "List of approved standard encodings (i.e. charsets) of X's Compound Text.
1509Coding-system `compound-text-with-extensions' encodes a character
1510belonging to any of those charsets using the normal ISO2022
1511designation sequence unless the current language environment or
1512the variable `ctext-non-standard-encodings' decide to use an extended
1513segment of CTEXT for that character. See also the documentation
1514of `ctext-non-standard-encodings-alist'.")
1515
1500;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from 1516;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
1501;; `ctext-non-standard-encodings' and a list specified by the key 1517;; `ctext-non-standard-encodings' and a list specified by the key
1502;; `ctext-non-standard-encodings' for the currrent language 1518;; `ctext-non-standard-encodings' for the currrent language
@@ -1508,77 +1524,74 @@ Each element must be one of the names listed in the variable
1508;; is encoded using UTF-8 encoding extention. 1524;; is encoded using UTF-8 encoding extention.
1509 1525
1510(defun ctext-non-standard-encodings-table () 1526(defun ctext-non-standard-encodings-table ()
1511 (let (table) 1527 (let* ((table (append ctext-non-standard-encodings
1512 ;; Setup charsets specified by the key 1528 (copy-sequence
1513 ;; `ctext-non-standard-encodings' for the current language 1529 (get-language-info current-language-environment
1514 ;; environment and in `ctext-non-standard-encodings'. 1530 'ctext-non-standard-encodings))))
1515 (dolist (encoding (append 1531 (tail table)
1516 (get-language-info current-language-environment 1532 elt)
1517 'ctext-non-standard-encodings) 1533 (while tail
1518 ctext-non-standard-encodings)) 1534 (setq elt (car tail))
1519 (let* ((slot (assoc encoding ctext-non-standard-encodings-alist)) 1535 (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
1520 (charset (nth 3 slot))) 1536 (charset (nth 3 slot)))
1521 (if (charsetp charset) 1537 (if (charsetp charset)
1522 (push (cons charset slot) table) 1538 (setcar tail (cons charset slot))
1523 (dolist (cs charset) 1539 (setcar tail (cons (car charset) slot))
1524 (push (cons cs slot) table))))) 1540 (dolist (cs (cdr charset))
1525 1541 (setcdr tail
1526 ;; Next prepend charsets for ISO2022 designation sequence. 1542 (cons (cons (car cs) slot) (cdr tail)))
1527 (dolist (charset charset-list) 1543 (setq tail (cdr tail))))
1528 (let ((final (plist-get (charset-plist charset) :iso-final-char))) 1544 (setq tail (cdr tail))))
1529 (if (and (integerp final) 1545 table))
1530 (>= final #x40) (<= final #x7e)
1531 ;; Exclude ascii and chinese-cns11643-X.
1532 (not (eq charset 'ascii))
1533 (not (string-match "cns11643" (symbol-name charset))))
1534 (push (cons charset nil) table))))
1535
1536 ;; Returned reversed list so that the charsets specified by the
1537 ;; key `ctext-non-standard-encodings' for the current language
1538 ;; have the highest priority.
1539 (nreverse table)))
1540 1546
1541(defun ctext-pre-write-conversion (from to) 1547(defun ctext-pre-write-conversion (from to)
1542 "Encode characters between FROM and TO as Compound Text w/Extended Segments. 1548 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1543 1549
1544If FROM is a string, or if the current buffer is not the one set up for us 1550If FROM is a string, generate a new temp buffer, insert the text,
1545by `encode-coding-string', generate a new temp buffer, insert the text, 1551and convert it in the temporary buffer. Otherwise, convert
1546and convert it in the temporary buffer. Otherwise, convert in-place." 1552in-place."
1547 (save-match-data 1553 (save-match-data
1548 ;; Setup a working buffer if necessary. 1554 ;; Setup a working buffer if necessary.
1549 (when (stringp from) 1555 (when (stringp from)
1550 (set-buffer (generate-new-buffer " *temp")) 1556 (set-buffer (generate-new-buffer " *temp"))
1551 (set-buffer-multibyte (multibyte-string-p from)) 1557 (set-buffer-multibyte (multibyte-string-p from))
1552 (insert from)) 1558 (insert from)
1553 1559 (setq from 1 to (point-max)))
1554 ;; Now we can encode the whole buffer. 1560 (save-restriction
1555 (let ((encoding-table (ctext-non-standard-encodings-table)) 1561 (narrow-to-region from to)
1556 last-coding-system-used 1562 (let ((encoding-table (ctext-non-standard-encodings-table))
1557 last-pos last-encoding-info 1563 (charset-list ctext-standard-encodings)
1558 encoding-info end-pos ch) 1564 last-coding-system-used
1559 (goto-char (setq last-pos (point-min))) 1565 last-pos last-encoding-info
1560 (setq end-pos (point-marker)) 1566 encoding-info end-pos ch charset)
1561 (while (re-search-forward "[^\000-\177]+" nil t) 1567 (dolist (elt encoding-table)
1562 ;; Found a sequence of non-ASCII characters. 1568 (push (car elt) charset-list))
1563 (setq last-pos (match-beginning 0) 1569 (goto-char (setq last-pos from))
1564 ch (char-after last-pos) 1570 (setq end-pos (point-marker))
1565 last-encoding-info (catch 'tag 1571 (while (re-search-forward "[^\000-\177]+" nil t)
1566 (dolist (elt encoding-table) 1572 ;; Found a sequence of non-ASCII characters.
1567 (if (encode-char ch (car elt)) 1573 (setq last-pos (match-beginning 0)
1568 (throw 'tag (cdr elt)))) 1574 ch (char-after last-pos)
1569 'utf-8)) 1575 charset (char-charset ch charset-list)
1570 (set-marker end-pos (match-end 0)) 1576 last-encoding-info
1571 (goto-char (1+ last-pos)) 1577 (if charset
1572 (catch 'tag 1578 (or (cdr (assq charset encoding-table))
1573 (while t 1579 charset)
1574 (setq encoding-info 1580 'utf-8))
1575 (if (< (point) end-pos) 1581 (set-marker end-pos (match-end 0))
1576 (catch 'tag 1582 (goto-char (1+ last-pos))
1577 (setq ch (following-char)) 1583 (while (marker-position end-pos)
1578 (dolist (elt encoding-table) 1584 (if (< (point) end-pos)
1579 (if (encode-char ch (car elt)) 1585 (progn
1580 (throw 'tag (cdr elt)))) 1586 (setq charset (char-charset (following-char) charset-list)
1581 'utf-8))) 1587 encoding-info
1588 (if charset
1589 (or (cdr (assq charset encoding-table))
1590 charset)
1591 'utf-8))
1592 (forward-char 1))
1593 (setq encoding-info nil)
1594 (set-marker end-pos nil))
1582 (unless (eq last-encoding-info encoding-info) 1595 (unless (eq last-encoding-info encoding-info)
1583 (cond ((consp last-encoding-info) 1596 (cond ((consp last-encoding-info)
1584 ;; Encode the previous range using an extended 1597 ;; Encode the previous range using an extended
@@ -1609,14 +1622,12 @@ and convert it in the temporary buffer. Otherwise, convert in-place."
1609 (save-excursion 1622 (save-excursion
1610 (goto-char last-pos) 1623 (goto-char last-pos)
1611 (insert "\e%G")) 1624 (insert "\e%G"))
1612 (insert "\e%@"))) 1625 (insert "\e%@"))
1626 (t
1627 (put-text-property last-pos (point) 'charset charset)))
1613 (setq last-pos (point) 1628 (setq last-pos (point)
1614 last-encoding-info encoding-info)) 1629 last-encoding-info encoding-info))))
1615 (if (< (point) end-pos) 1630 (goto-char (point-min)))))
1616 (forward-char 1)
1617 (throw 'tag nil)))))
1618 (set-marker end-pos nil)
1619 (goto-char (point-min))))
1620 ;; Must return nil, as build_annotations_2 expects that. 1631 ;; Must return nil, as build_annotations_2 expects that.
1621 nil) 1632 nil)
1622 1633
diff --git a/lisp/language/cyrillic.el b/lisp/language/cyrillic.el
index 7d2f082579f..b293ad1ff0b 100644
--- a/lisp/language/cyrillic.el
+++ b/lisp/language/cyrillic.el
@@ -239,13 +239,6 @@ Support for Russian using koi8-r and the russian-computer input method.")
239 (documentation . "Support for Tajik using KOI8-T.")) 239 (documentation . "Support for Tajik using KOI8-T."))
240 '("Cyrillic")) 240 '("Cyrillic"))
241 241
242(let ((elt `("microsoft-cp1251" windows-1251 1
243 ,(get 'encode-windows-1251 'translation-table)))
244 (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
245 (if slot
246 (setcdr slot (cdr elt))
247 (push elt ctext-non-standard-encodings-alist)))
248
249(set-language-info-alist 242(set-language-info-alist
250 "Bulgarian" `((coding-system windows-1251) 243 "Bulgarian" `((coding-system windows-1251)
251 (coding-priority windows-1251) 244 (coding-priority windows-1251)