diff options
| author | Kenichi Handa | 2010-08-06 17:11:19 +0900 |
|---|---|---|
| committer | Kenichi Handa | 2010-08-06 17:11:19 +0900 |
| commit | 7a84eee5b745ad577d414158716ffe2ee487a117 (patch) | |
| tree | 8941277791414c9648b79f304b5d27de0e46eb76 /lisp | |
| parent | faa28da9b740a4b5f297fc215d79a66d71bf6f78 (diff) | |
| download | emacs-7a84eee5b745ad577d414158716ffe2ee487a117.tar.gz emacs-7a84eee5b745ad577d414158716ffe2ee487a117.zip | |
Improve the encoding by compound-text-with-extensions.
Diffstat (limited to 'lisp')
| -rw-r--r-- | lisp/ChangeLog | 11 | ||||
| -rw-r--r-- | lisp/international/mule.el | 111 |
2 files changed, 59 insertions, 63 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 149de6629f0..1b2f0ebd99e 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,14 @@ | |||
| 1 | 2010-08-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * international/mule.el (define-charset): Store NAME as :base | ||
| 4 | property. | ||
| 5 | (ctext-non-standard-encodings-table): Pay attention to charset | ||
| 6 | aliases. | ||
| 7 | (ctext-pre-write-conversion): Sort ctext-standard-encodings by the | ||
| 8 | current priority. Force using the designation of the specific | ||
| 9 | charset by adding `charset' text property. Improve the whole | ||
| 10 | algorithm. | ||
| 11 | |||
| 1 | 2010-08-04 Kenichi Handa <handa@m17n.org> | 12 | 2010-08-04 Kenichi Handa <handa@m17n.org> |
| 2 | 13 | ||
| 3 | * language/cyrillic.el: Don't add "microsoft-cp1251" to | 14 | * language/cyrillic.el: Don't add "microsoft-cp1251" to |
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index e030acbef02..105163a5d11 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -282,6 +282,7 @@ attribute." | |||
| 282 | (plist-put props :short-name (symbol-name name))) | 282 | (plist-put props :short-name (symbol-name name))) |
| 283 | (or (plist-get props :long-name) | 283 | (or (plist-get props :long-name) |
| 284 | (plist-put props :long-name (plist-get props :short-name))) | 284 | (plist-put props :long-name (plist-get props :short-name))) |
| 285 | (plist-put props :base name) | ||
| 285 | ;; We can probably get a worthwhile amount in purespace. | 286 | ;; We can probably get a worthwhile amount in purespace. |
| 286 | (setq props | 287 | (setq props |
| 287 | (mapcar (lambda (elt) | 288 | (mapcar (lambda (elt) |
| @@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.") | |||
| 1535 | (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) | 1536 | (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) |
| 1536 | (charset (nth 3 slot))) | 1537 | (charset (nth 3 slot))) |
| 1537 | (if (charsetp charset) | 1538 | (if (charsetp charset) |
| 1538 | (setcar tail (cons charset slot)) | 1539 | (setcar tail |
| 1540 | (cons (plist-get (charset-plist charset) :base) slot)) | ||
| 1539 | (setcar tail (cons (car charset) slot)) | 1541 | (setcar tail (cons (car charset) slot)) |
| 1540 | (dolist (cs (cdr charset)) | 1542 | (dolist (cs (cdr charset)) |
| 1541 | (setcdr tail | 1543 | (setcdr tail |
| 1542 | (cons (cons (car cs) slot) (cdr tail))) | 1544 | (cons (cons (plist-get (charset-plist (car cs)) :base) slot) |
| 1545 | (cdr tail))) | ||
| 1543 | (setq tail (cdr tail)))) | 1546 | (setq tail (cdr tail)))) |
| 1544 | (setq tail (cdr tail)))) | 1547 | (setq tail (cdr tail)))) |
| 1545 | table)) | 1548 | table)) |
| @@ -1559,74 +1562,56 @@ in-place." | |||
| 1559 | (setq from 1 to (point-max))) | 1562 | (setq from 1 to (point-max))) |
| 1560 | (save-restriction | 1563 | (save-restriction |
| 1561 | (narrow-to-region from to) | 1564 | (narrow-to-region from to) |
| 1565 | (goto-char from) | ||
| 1562 | (let ((encoding-table (ctext-non-standard-encodings-table)) | 1566 | (let ((encoding-table (ctext-non-standard-encodings-table)) |
| 1563 | (charset-list ctext-standard-encodings) | 1567 | (charset-list (sort-charsets |
| 1568 | (copy-sequence ctext-standard-encodings))) | ||
| 1569 | (end-pos (make-marker)) | ||
| 1564 | last-coding-system-used | 1570 | last-coding-system-used |
| 1565 | last-pos last-encoding-info | 1571 | last-pos charset encoding-info) |
| 1566 | encoding-info end-pos ch charset) | ||
| 1567 | (dolist (elt encoding-table) | 1572 | (dolist (elt encoding-table) |
| 1568 | (push (car elt) charset-list)) | 1573 | (push (car elt) charset-list)) |
| 1569 | (goto-char (setq last-pos from)) | ||
| 1570 | (setq end-pos (point-marker)) | 1574 | (setq end-pos (point-marker)) |
| 1571 | (while (re-search-forward "[^\000-\177]+" nil t) | 1575 | (while (re-search-forward "[^\0-\177]+" nil t) |
| 1572 | ;; Found a sequence of non-ASCII characters. | 1576 | ;; Found a sequence of non-ASCII characters. |
| 1573 | (setq last-pos (match-beginning 0) | ||
| 1574 | ch (char-after last-pos) | ||
| 1575 | charset (char-charset ch charset-list) | ||
| 1576 | last-encoding-info | ||
| 1577 | (if charset | ||
| 1578 | (or (cdr (assq charset encoding-table)) | ||
| 1579 | charset) | ||
| 1580 | 'utf-8)) | ||
| 1581 | (set-marker end-pos (match-end 0)) | 1577 | (set-marker end-pos (match-end 0)) |
| 1582 | (goto-char (1+ last-pos)) | 1578 | (goto-char (match-beginning 0)) |
| 1583 | (while (marker-position end-pos) | 1579 | (setq last-pos (point) |
| 1584 | (if (< (point) end-pos) | 1580 | charset (char-charset (following-char) charset-list)) |
| 1585 | (progn | 1581 | (forward-char 1) |
| 1586 | (setq charset (char-charset (following-char) charset-list) | 1582 | (while (and (< (point) end-pos) |
| 1587 | encoding-info | 1583 | (eq charset (char-charset (following-char) charset-list))) |
| 1588 | (if charset | 1584 | (forward-char 1)) |
| 1589 | (or (cdr (assq charset encoding-table)) | 1585 | (if charset |
| 1590 | charset) | 1586 | (if (setq encoding-info (cdr (assq charset encoding-table))) |
| 1591 | 'utf-8)) | 1587 | ;; Encode this range using an extended segment. |
| 1592 | (forward-char 1)) | 1588 | (let ((encoding-name (car encoding-info)) |
| 1593 | (setq encoding-info nil) | 1589 | (coding-system (nth 1 encoding-info)) |
| 1594 | (set-marker end-pos nil)) | 1590 | (noctets (nth 2 encoding-info)) |
| 1595 | (unless (eq last-encoding-info encoding-info) | 1591 | len) |
| 1596 | (cond ((consp last-encoding-info) | 1592 | (encode-coding-region last-pos (point) coding-system) |
| 1597 | ;; Encode the previous range using an extended | 1593 | (setq len (+ (length encoding-name) 1 |
| 1598 | ;; segment. | 1594 | (- (point) last-pos))) |
| 1599 | (let ((encoding-name (car last-encoding-info)) | 1595 | ;; According to the spec of CTEXT, it is not |
| 1600 | (coding-system (nth 1 last-encoding-info)) | 1596 | ;; necessary to produce this extra designation |
| 1601 | (noctets (nth 2 last-encoding-info)) | 1597 | ;; sequence, but some buggy application |
| 1602 | len) | 1598 | ;; (e.g. crxvt-gb) requires it. |
| 1603 | (encode-coding-region last-pos (point) coding-system) | 1599 | (insert "\e(B") |
| 1604 | (setq len (+ (length encoding-name) 1 | 1600 | (save-excursion |
| 1605 | (- (point) last-pos))) | 1601 | (goto-char last-pos) |
| 1606 | ;; According to the spec of CTEXT, it is not | 1602 | (insert (format "\e%%/%d" noctets)) |
| 1607 | ;; necessary to produce this extra designation | 1603 | (insert-byte (+ (/ len 128) 128) 1) |
| 1608 | ;; sequence, but some buggy application | 1604 | (insert-byte (+ (% len 128) 128) 1) |
| 1609 | ;; (e.g. crxvt-gb) requires it. | 1605 | (insert encoding-name) |
| 1610 | (insert "\e(B") | 1606 | (insert 2))) |
| 1611 | (save-excursion | 1607 | ;; Encode this range as characters in CHARSET. |
| 1612 | (goto-char last-pos) | 1608 | (put-text-property last-pos (point) 'charset charset)) |
| 1613 | (insert (format "\e%%/%d" noctets)) | 1609 | ;; Encode this range using UTF-8 encoding extention. |
| 1614 | (insert-byte (+ (/ len 128) 128) 1) | 1610 | (encode-coding-region last-pos (point) 'mule-utf-8) |
| 1615 | (insert-byte (+ (% len 128) 128) 1) | 1611 | (save-excursion |
| 1616 | (insert encoding-name) | 1612 | (goto-char last-pos) |
| 1617 | (insert 2)))) | 1613 | (insert "\e%G")) |
| 1618 | ((eq last-encoding-info 'utf-8) | 1614 | (insert "\e%@"))) |
| 1619 | ;; Encode the previous range using UTF-8 encoding | ||
| 1620 | ;; extention. | ||
| 1621 | (encode-coding-region last-pos (point) 'mule-utf-8) | ||
| 1622 | (save-excursion | ||
| 1623 | (goto-char last-pos) | ||
| 1624 | (insert "\e%G")) | ||
| 1625 | (insert "\e%@")) | ||
| 1626 | (t | ||
| 1627 | (put-text-property last-pos (point) 'charset charset))) | ||
| 1628 | (setq last-pos (point) | ||
| 1629 | last-encoding-info encoding-info)))) | ||
| 1630 | (goto-char (point-min))))) | 1615 | (goto-char (point-min))))) |
| 1631 | ;; Must return nil, as build_annotations_2 expects that. | 1616 | ;; Must return nil, as build_annotations_2 expects that. |
| 1632 | nil) | 1617 | nil) |