diff options
| author | Kenichi Handa | 2010-08-04 17:06:52 +0900 |
|---|---|---|
| committer | Kenichi Handa | 2010-08-04 17:06:52 +0900 |
| commit | 6b4d96c2f04e5a08c4f9fff144743ff16c151dae (patch) | |
| tree | 29def28b96201bf62115bbbadad44b5cf0ffbfe2 | |
| parent | 4ce5a4ccd4cca6dcdc6d8bafa8cfaea1986e7e6e (diff) | |
| download | emacs-6b4d96c2f04e5a08c4f9fff144743ff16c151dae.tar.gz emacs-6b4d96c2f04e5a08c4f9fff144743ff16c151dae.zip | |
Modify the coding system compound-text-with-extensions to conform to the spec of Compound Text.
| -rw-r--r-- | lisp/ChangeLog | 17 | ||||
| -rw-r--r-- | lisp/international/mule-conf.el | 14 | ||||
| -rw-r--r-- | lisp/international/mule.el | 147 | ||||
| -rw-r--r-- | lisp/language/cyrillic.el | 7 |
4 files changed, 104 insertions, 81 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 670f07c2683..775ddcdc2e7 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,20 @@ | |||
| 1 | 2010-08-04 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * language/cyrillic.el: Don't add "microsoft-cp1251" to | ||
| 4 | ctext-non-standard-encodings-alist here. | ||
| 5 | |||
| 6 | * international/mule.el (ctext-non-standard-encodings-alist): Add | ||
| 7 | "koi8-r" and "microsoft-cp1251". | ||
| 8 | (ctext-standard-encodings): New variable. | ||
| 9 | (ctext-non-standard-encodings-table): List only elements for | ||
| 10 | non-standard encodings. | ||
| 11 | (ctext-pre-write-conversion): Adjusted for the above change. | ||
| 12 | Check ctext-standard-encodings. | ||
| 13 | |||
| 14 | * international/mule-conf.el (compound-text): Doc fix. | ||
| 15 | (ctext-no-compositions): Doc fix. | ||
| 16 | (compound-text-with-extensions): Doc fix. | ||
| 17 | |||
| 1 | 2010-07-23 Juanma Barranquero <lekktu@gmail.com> | 18 | 2010-07-23 Juanma Barranquero <lekktu@gmail.com> |
| 2 | 19 | ||
| 3 | * help-fns.el (find-lisp-object-file-name): Doc fix (bug#6494). | 20 | * help-fns.el (find-lisp-object-file-name): Doc fix (bug#6494). |
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el index f53b69eed8b..9ee8d22463a 100644 --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el | |||
| @@ -1410,9 +1410,10 @@ is treated as a character." | |||
| 1410 | :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) | 1410 | :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) |
| 1411 | 1411 | ||
| 1412 | (define-coding-system 'compound-text | 1412 | (define-coding-system 'compound-text |
| 1413 | "Compound text based generic encoding for decoding unknown messages. | 1413 | "Compound text based generic encoding. |
| 1414 | 1414 | This coding system is an extension of X's \"Compound Text Encoding\". | |
| 1415 | This coding system does not support extended segments of CTEXT." | 1415 | It encodes many characters using the normal ISO-2022 designation sequences, |
| 1416 | but it doesn't support extended segments of CTEXT." | ||
| 1416 | :coding-type 'iso-2022 | 1417 | :coding-type 'iso-2022 |
| 1417 | :mnemonic ?x | 1418 | :mnemonic ?x |
| 1418 | :charset-list 'iso-2022 | 1419 | :charset-list 'iso-2022 |
| @@ -1432,7 +1433,7 @@ This coding system does not support extended segments of CTEXT." | |||
| 1432 | ;; not have a mime-charset property, to prevent it from showing up | 1433 | ;; not have a mime-charset property, to prevent it from showing up |
| 1433 | ;; close to the beginning of coding systems ordered by priority. | 1434 | ;; close to the beginning of coding systems ordered by priority. |
| 1434 | (define-coding-system 'ctext-no-compositions | 1435 | (define-coding-system 'ctext-no-compositions |
| 1435 | "Compound text based generic encoding for decoding unknown messages. | 1436 | "Compound text based generic encoding. |
| 1436 | 1437 | ||
| 1437 | Like `compound-text', but does not produce escape sequences for compositions." | 1438 | Like `compound-text', but does not produce escape sequences for compositions." |
| 1438 | :coding-type 'iso-2022 | 1439 | :coding-type 'iso-2022 |
| @@ -1445,8 +1446,9 @@ Like `compound-text', but does not produce escape sequences for compositions." | |||
| 1445 | (define-coding-system 'compound-text-with-extensions | 1446 | (define-coding-system 'compound-text-with-extensions |
| 1446 | "Compound text encoding with ICCCM Extended Segment extensions. | 1447 | "Compound text encoding with ICCCM Extended Segment extensions. |
| 1447 | 1448 | ||
| 1448 | See the variable `ctext-non-standard-encodings-alist' for the | 1449 | See the variables `ctext-standard-encodings' and |
| 1449 | detail about how extended segments are handled. | 1450 | `ctext-non-standard-encodings-alist' for the detail about how |
| 1451 | extended segments are handled. | ||
| 1450 | 1452 | ||
| 1451 | This coding system should be used only for X selections. It is inappropriate | 1453 | This coding system should be used only for X selections. It is inappropriate |
| 1452 | for decoding and encoding files, process I/O, etc." | 1454 | for decoding and encoding files, process I/O, etc." |
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 7e7e55728c8..e030acbef02 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -1408,7 +1408,9 @@ This function is provided for backward compatibility." | |||
| 1408 | '(("big5-0" big5 2 big5) | 1408 | '(("big5-0" big5 2 big5) |
| 1409 | ("ISO8859-14" iso-8859-14 1 latin-iso8859-14) | 1409 | ("ISO8859-14" iso-8859-14 1 latin-iso8859-14) |
| 1410 | ("ISO8859-15" iso-8859-15 1 latin-iso8859-15) | 1410 | ("ISO8859-15" iso-8859-15 1 latin-iso8859-15) |
| 1411 | ("gbk-0" gbk 2 chinese-gbk))) | 1411 | ("gbk-0" gbk 2 chinese-gbk) |
| 1412 | ("koi8-r" koi8-r 1 koi8-r) | ||
| 1413 | ("microsoft-cp1251" windows-1251 1 windows-1251))) | ||
| 1412 | "Alist of non-standard encoding names vs the corresponding usages in CTEXT. | 1414 | "Alist of non-standard encoding names vs the corresponding usages in CTEXT. |
| 1413 | 1415 | ||
| 1414 | It controls how extended segments of a compound text are handled | 1416 | It controls how extended segments of a compound text are handled |
| @@ -1497,6 +1499,20 @@ Each element must be one of the names listed in the variable | |||
| 1497 | (goto-char (point-min)) | 1499 | (goto-char (point-min)) |
| 1498 | (- (point-max) (point))))) | 1500 | (- (point-max) (point))))) |
| 1499 | 1501 | ||
| 1502 | (defvar ctext-standard-encodings | ||
| 1503 | '(ascii latin-jisx0201 katakana-jisx0201 | ||
| 1504 | latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4 | ||
| 1505 | greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5 | ||
| 1506 | latin-iso8859-9 | ||
| 1507 | chinese-gb2312 japanese-jisx0208 korean-ksc5601) | ||
| 1508 | "List of approved standard encodings (i.e. charsets) of X's Compound Text. | ||
| 1509 | Coding-system `compound-text-with-extensions' encodes a character | ||
| 1510 | belonging to any of those charsets using the normal ISO2022 | ||
| 1511 | designation sequence unless the current language environment or | ||
| 1512 | the variable `ctext-non-standard-encodings' decide to use an extended | ||
| 1513 | segment of CTEXT for that character. See also the documentation | ||
| 1514 | of `ctext-non-standard-encodings-alist'.") | ||
| 1515 | |||
| 1500 | ;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from | 1516 | ;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from |
| 1501 | ;; `ctext-non-standard-encodings' and a list specified by the key | 1517 | ;; `ctext-non-standard-encodings' and a list specified by the key |
| 1502 | ;; `ctext-non-standard-encodings' for the currrent language | 1518 | ;; `ctext-non-standard-encodings' for the currrent language |
| @@ -1508,77 +1524,74 @@ Each element must be one of the names listed in the variable | |||
| 1508 | ;; is encoded using UTF-8 encoding extention. | 1524 | ;; is encoded using UTF-8 encoding extention. |
| 1509 | 1525 | ||
| 1510 | (defun ctext-non-standard-encodings-table () | 1526 | (defun ctext-non-standard-encodings-table () |
| 1511 | (let (table) | 1527 | (let* ((table (append ctext-non-standard-encodings |
| 1512 | ;; Setup charsets specified by the key | 1528 | (copy-sequence |
| 1513 | ;; `ctext-non-standard-encodings' for the current language | 1529 | (get-language-info current-language-environment |
| 1514 | ;; environment and in `ctext-non-standard-encodings'. | 1530 | 'ctext-non-standard-encodings)))) |
| 1515 | (dolist (encoding (append | 1531 | (tail table) |
| 1516 | (get-language-info current-language-environment | 1532 | elt) |
| 1517 | 'ctext-non-standard-encodings) | 1533 | (while tail |
| 1518 | ctext-non-standard-encodings)) | 1534 | (setq elt (car tail)) |
| 1519 | (let* ((slot (assoc encoding ctext-non-standard-encodings-alist)) | 1535 | (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) |
| 1520 | (charset (nth 3 slot))) | 1536 | (charset (nth 3 slot))) |
| 1521 | (if (charsetp charset) | 1537 | (if (charsetp charset) |
| 1522 | (push (cons charset slot) table) | 1538 | (setcar tail (cons charset slot)) |
| 1523 | (dolist (cs charset) | 1539 | (setcar tail (cons (car charset) slot)) |
| 1524 | (push (cons cs slot) table))))) | 1540 | (dolist (cs (cdr charset)) |
| 1525 | 1541 | (setcdr tail | |
| 1526 | ;; Next prepend charsets for ISO2022 designation sequence. | 1542 | (cons (cons (car cs) slot) (cdr tail))) |
| 1527 | (dolist (charset charset-list) | 1543 | (setq tail (cdr tail)))) |
| 1528 | (let ((final (plist-get (charset-plist charset) :iso-final-char))) | 1544 | (setq tail (cdr tail)))) |
| 1529 | (if (and (integerp final) | 1545 | table)) |
| 1530 | (>= final #x40) (<= final #x7e) | ||
| 1531 | ;; Exclude ascii and chinese-cns11643-X. | ||
| 1532 | (not (eq charset 'ascii)) | ||
| 1533 | (not (string-match "cns11643" (symbol-name charset)))) | ||
| 1534 | (push (cons charset nil) table)))) | ||
| 1535 | |||
| 1536 | ;; Returned reversed list so that the charsets specified by the | ||
| 1537 | ;; key `ctext-non-standard-encodings' for the current language | ||
| 1538 | ;; have the highest priority. | ||
| 1539 | (nreverse table))) | ||
| 1540 | 1546 | ||
| 1541 | (defun ctext-pre-write-conversion (from to) | 1547 | (defun ctext-pre-write-conversion (from to) |
| 1542 | "Encode characters between FROM and TO as Compound Text w/Extended Segments. | 1548 | "Encode characters between FROM and TO as Compound Text w/Extended Segments. |
| 1543 | 1549 | ||
| 1544 | If FROM is a string, or if the current buffer is not the one set up for us | 1550 | If FROM is a string, generate a new temp buffer, insert the text, |
| 1545 | by `encode-coding-string', generate a new temp buffer, insert the text, | 1551 | and convert it in the temporary buffer. Otherwise, convert |
| 1546 | and convert it in the temporary buffer. Otherwise, convert in-place." | 1552 | in-place." |
| 1547 | (save-match-data | 1553 | (save-match-data |
| 1548 | ;; Setup a working buffer if necessary. | 1554 | ;; Setup a working buffer if necessary. |
| 1549 | (when (stringp from) | 1555 | (when (stringp from) |
| 1550 | (set-buffer (generate-new-buffer " *temp")) | 1556 | (set-buffer (generate-new-buffer " *temp")) |
| 1551 | (set-buffer-multibyte (multibyte-string-p from)) | 1557 | (set-buffer-multibyte (multibyte-string-p from)) |
| 1552 | (insert from)) | 1558 | (insert from) |
| 1553 | 1559 | (setq from 1 to (point-max))) | |
| 1554 | ;; Now we can encode the whole buffer. | 1560 | (save-restriction |
| 1555 | (let ((encoding-table (ctext-non-standard-encodings-table)) | 1561 | (narrow-to-region from to) |
| 1556 | last-coding-system-used | 1562 | (let ((encoding-table (ctext-non-standard-encodings-table)) |
| 1557 | last-pos last-encoding-info | 1563 | (charset-list ctext-standard-encodings) |
| 1558 | encoding-info end-pos ch) | 1564 | last-coding-system-used |
| 1559 | (goto-char (setq last-pos (point-min))) | 1565 | last-pos last-encoding-info |
| 1560 | (setq end-pos (point-marker)) | 1566 | encoding-info end-pos ch charset) |
| 1561 | (while (re-search-forward "[^\000-\177]+" nil t) | 1567 | (dolist (elt encoding-table) |
| 1562 | ;; Found a sequence of non-ASCII characters. | 1568 | (push (car elt) charset-list)) |
| 1563 | (setq last-pos (match-beginning 0) | 1569 | (goto-char (setq last-pos from)) |
| 1564 | ch (char-after last-pos) | 1570 | (setq end-pos (point-marker)) |
| 1565 | last-encoding-info (catch 'tag | 1571 | (while (re-search-forward "[^\000-\177]+" nil t) |
| 1566 | (dolist (elt encoding-table) | 1572 | ;; Found a sequence of non-ASCII characters. |
| 1567 | (if (encode-char ch (car elt)) | 1573 | (setq last-pos (match-beginning 0) |
| 1568 | (throw 'tag (cdr elt)))) | 1574 | ch (char-after last-pos) |
| 1569 | 'utf-8)) | 1575 | charset (char-charset ch charset-list) |
| 1570 | (set-marker end-pos (match-end 0)) | 1576 | last-encoding-info |
| 1571 | (goto-char (1+ last-pos)) | 1577 | (if charset |
| 1572 | (catch 'tag | 1578 | (or (cdr (assq charset encoding-table)) |
| 1573 | (while t | 1579 | charset) |
| 1574 | (setq encoding-info | 1580 | 'utf-8)) |
| 1575 | (if (< (point) end-pos) | 1581 | (set-marker end-pos (match-end 0)) |
| 1576 | (catch 'tag | 1582 | (goto-char (1+ last-pos)) |
| 1577 | (setq ch (following-char)) | 1583 | (while (marker-position end-pos) |
| 1578 | (dolist (elt encoding-table) | 1584 | (if (< (point) end-pos) |
| 1579 | (if (encode-char ch (car elt)) | 1585 | (progn |
| 1580 | (throw 'tag (cdr elt)))) | 1586 | (setq charset (char-charset (following-char) charset-list) |
| 1581 | 'utf-8))) | 1587 | encoding-info |
| 1588 | (if charset | ||
| 1589 | (or (cdr (assq charset encoding-table)) | ||
| 1590 | charset) | ||
| 1591 | 'utf-8)) | ||
| 1592 | (forward-char 1)) | ||
| 1593 | (setq encoding-info nil) | ||
| 1594 | (set-marker end-pos nil)) | ||
| 1582 | (unless (eq last-encoding-info encoding-info) | 1595 | (unless (eq last-encoding-info encoding-info) |
| 1583 | (cond ((consp last-encoding-info) | 1596 | (cond ((consp last-encoding-info) |
| 1584 | ;; Encode the previous range using an extended | 1597 | ;; Encode the previous range using an extended |
| @@ -1609,14 +1622,12 @@ and convert it in the temporary buffer. Otherwise, convert in-place." | |||
| 1609 | (save-excursion | 1622 | (save-excursion |
| 1610 | (goto-char last-pos) | 1623 | (goto-char last-pos) |
| 1611 | (insert "\e%G")) | 1624 | (insert "\e%G")) |
| 1612 | (insert "\e%@"))) | 1625 | (insert "\e%@")) |
| 1626 | (t | ||
| 1627 | (put-text-property last-pos (point) 'charset charset))) | ||
| 1613 | (setq last-pos (point) | 1628 | (setq last-pos (point) |
| 1614 | last-encoding-info encoding-info)) | 1629 | last-encoding-info encoding-info)))) |
| 1615 | (if (< (point) end-pos) | 1630 | (goto-char (point-min))))) |
| 1616 | (forward-char 1) | ||
| 1617 | (throw 'tag nil))))) | ||
| 1618 | (set-marker end-pos nil) | ||
| 1619 | (goto-char (point-min)))) | ||
| 1620 | ;; Must return nil, as build_annotations_2 expects that. | 1631 | ;; Must return nil, as build_annotations_2 expects that. |
| 1621 | nil) | 1632 | nil) |
| 1622 | 1633 | ||
diff --git a/lisp/language/cyrillic.el b/lisp/language/cyrillic.el index 7d2f082579f..b293ad1ff0b 100644 --- a/lisp/language/cyrillic.el +++ b/lisp/language/cyrillic.el | |||
| @@ -239,13 +239,6 @@ Support for Russian using koi8-r and the russian-computer input method.") | |||
| 239 | (documentation . "Support for Tajik using KOI8-T.")) | 239 | (documentation . "Support for Tajik using KOI8-T.")) |
| 240 | '("Cyrillic")) | 240 | '("Cyrillic")) |
| 241 | 241 | ||
| 242 | (let ((elt `("microsoft-cp1251" windows-1251 1 | ||
| 243 | ,(get 'encode-windows-1251 'translation-table))) | ||
| 244 | (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist))) | ||
| 245 | (if slot | ||
| 246 | (setcdr slot (cdr elt)) | ||
| 247 | (push elt ctext-non-standard-encodings-alist))) | ||
| 248 | |||
| 249 | (set-language-info-alist | 242 | (set-language-info-alist |
| 250 | "Bulgarian" `((coding-system windows-1251) | 243 | "Bulgarian" `((coding-system windows-1251) |
| 251 | (coding-priority windows-1251) | 244 | (coding-priority windows-1251) |