aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorKenichi Handa2010-08-06 17:11:19 +0900
committerKenichi Handa2010-08-06 17:11:19 +0900
commit7a84eee5b745ad577d414158716ffe2ee487a117 (patch)
tree8941277791414c9648b79f304b5d27de0e46eb76 /lisp
parentfaa28da9b740a4b5f297fc215d79a66d71bf6f78 (diff)
downloademacs-7a84eee5b745ad577d414158716ffe2ee487a117.tar.gz
emacs-7a84eee5b745ad577d414158716ffe2ee487a117.zip
Improve the encoding by compound-text-with-extensions.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/ChangeLog11
-rw-r--r--lisp/international/mule.el111
2 files changed, 59 insertions, 63 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 149de6629f0..1b2f0ebd99e 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,14 @@
12010-08-06 Kenichi Handa <handa@m17n.org>
2
3 * international/mule.el (define-charset): Store NAME as :base
4 property.
5 (ctext-non-standard-encodings-table): Pay attention to charset
6 aliases.
7 (ctext-pre-write-conversion): Sort ctext-standard-encodings by the
8 current priority. Force using the designation of the specific
9 charset by adding `charset' text property. Improve the whole
10 algorithm.
11
12010-08-04 Kenichi Handa <handa@m17n.org> 122010-08-04 Kenichi Handa <handa@m17n.org>
2 13
3 * language/cyrillic.el: Don't add "microsoft-cp1251" to 14 * language/cyrillic.el: Don't add "microsoft-cp1251" to
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index e030acbef02..105163a5d11 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -282,6 +282,7 @@ attribute."
282 (plist-put props :short-name (symbol-name name))) 282 (plist-put props :short-name (symbol-name name)))
283 (or (plist-get props :long-name) 283 (or (plist-get props :long-name)
284 (plist-put props :long-name (plist-get props :short-name))) 284 (plist-put props :long-name (plist-get props :short-name)))
285 (plist-put props :base name)
285 ;; We can probably get a worthwhile amount in purespace. 286 ;; We can probably get a worthwhile amount in purespace.
286 (setq props 287 (setq props
287 (mapcar (lambda (elt) 288 (mapcar (lambda (elt)
@@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.")
1535 (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) 1536 (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
1536 (charset (nth 3 slot))) 1537 (charset (nth 3 slot)))
1537 (if (charsetp charset) 1538 (if (charsetp charset)
1538 (setcar tail (cons charset slot)) 1539 (setcar tail
1540 (cons (plist-get (charset-plist charset) :base) slot))
1539 (setcar tail (cons (car charset) slot)) 1541 (setcar tail (cons (car charset) slot))
1540 (dolist (cs (cdr charset)) 1542 (dolist (cs (cdr charset))
1541 (setcdr tail 1543 (setcdr tail
1542 (cons (cons (car cs) slot) (cdr tail))) 1544 (cons (cons (plist-get (charset-plist (car cs)) :base) slot)
1545 (cdr tail)))
1543 (setq tail (cdr tail)))) 1546 (setq tail (cdr tail))))
1544 (setq tail (cdr tail)))) 1547 (setq tail (cdr tail))))
1545 table)) 1548 table))
@@ -1559,74 +1562,56 @@ in-place."
1559 (setq from 1 to (point-max))) 1562 (setq from 1 to (point-max)))
1560 (save-restriction 1563 (save-restriction
1561 (narrow-to-region from to) 1564 (narrow-to-region from to)
1565 (goto-char from)
1562 (let ((encoding-table (ctext-non-standard-encodings-table)) 1566 (let ((encoding-table (ctext-non-standard-encodings-table))
1563 (charset-list ctext-standard-encodings) 1567 (charset-list (sort-charsets
1568 (copy-sequence ctext-standard-encodings)))
1569 (end-pos (make-marker))
1564 last-coding-system-used 1570 last-coding-system-used
1565 last-pos last-encoding-info 1571 last-pos charset encoding-info)
1566 encoding-info end-pos ch charset)
1567 (dolist (elt encoding-table) 1572 (dolist (elt encoding-table)
1568 (push (car elt) charset-list)) 1573 (push (car elt) charset-list))
1569 (goto-char (setq last-pos from))
1570 (setq end-pos (point-marker)) 1574 (setq end-pos (point-marker))
1571 (while (re-search-forward "[^\000-\177]+" nil t) 1575 (while (re-search-forward "[^\0-\177]+" nil t)
1572 ;; Found a sequence of non-ASCII characters. 1576 ;; Found a sequence of non-ASCII characters.
1573 (setq last-pos (match-beginning 0)
1574 ch (char-after last-pos)
1575 charset (char-charset ch charset-list)
1576 last-encoding-info
1577 (if charset
1578 (or (cdr (assq charset encoding-table))
1579 charset)
1580 'utf-8))
1581 (set-marker end-pos (match-end 0)) 1577 (set-marker end-pos (match-end 0))
1582 (goto-char (1+ last-pos)) 1578 (goto-char (match-beginning 0))
1583 (while (marker-position end-pos) 1579 (setq last-pos (point)
1584 (if (< (point) end-pos) 1580 charset (char-charset (following-char) charset-list))
1585 (progn 1581 (forward-char 1)
1586 (setq charset (char-charset (following-char) charset-list) 1582 (while (and (< (point) end-pos)
1587 encoding-info 1583 (eq charset (char-charset (following-char) charset-list)))
1588 (if charset 1584 (forward-char 1))
1589 (or (cdr (assq charset encoding-table)) 1585 (if charset
1590 charset) 1586 (if (setq encoding-info (cdr (assq charset encoding-table)))
1591 'utf-8)) 1587 ;; Encode this range using an extended segment.
1592 (forward-char 1)) 1588 (let ((encoding-name (car encoding-info))
1593 (setq encoding-info nil) 1589 (coding-system (nth 1 encoding-info))
1594 (set-marker end-pos nil)) 1590 (noctets (nth 2 encoding-info))
1595 (unless (eq last-encoding-info encoding-info) 1591 len)
1596 (cond ((consp last-encoding-info) 1592 (encode-coding-region last-pos (point) coding-system)
1597 ;; Encode the previous range using an extended 1593 (setq len (+ (length encoding-name) 1
1598 ;; segment. 1594 (- (point) last-pos)))
1599 (let ((encoding-name (car last-encoding-info)) 1595 ;; According to the spec of CTEXT, it is not
1600 (coding-system (nth 1 last-encoding-info)) 1596 ;; necessary to produce this extra designation
1601 (noctets (nth 2 last-encoding-info)) 1597 ;; sequence, but some buggy application
1602 len) 1598 ;; (e.g. crxvt-gb) requires it.
1603 (encode-coding-region last-pos (point) coding-system) 1599 (insert "\e(B")
1604 (setq len (+ (length encoding-name) 1 1600 (save-excursion
1605 (- (point) last-pos))) 1601 (goto-char last-pos)
1606 ;; According to the spec of CTEXT, it is not 1602 (insert (format "\e%%/%d" noctets))
1607 ;; necessary to produce this extra designation 1603 (insert-byte (+ (/ len 128) 128) 1)
1608 ;; sequence, but some buggy application 1604 (insert-byte (+ (% len 128) 128) 1)
1609 ;; (e.g. crxvt-gb) requires it. 1605 (insert encoding-name)
1610 (insert "\e(B") 1606 (insert 2)))
1611 (save-excursion 1607 ;; Encode this range as characters in CHARSET.
1612 (goto-char last-pos) 1608 (put-text-property last-pos (point) 'charset charset))
1613 (insert (format "\e%%/%d" noctets)) 1609 ;; Encode this range using UTF-8 encoding extention.
1614 (insert-byte (+ (/ len 128) 128) 1) 1610 (encode-coding-region last-pos (point) 'mule-utf-8)
1615 (insert-byte (+ (% len 128) 128) 1) 1611 (save-excursion
1616 (insert encoding-name) 1612 (goto-char last-pos)
1617 (insert 2)))) 1613 (insert "\e%G"))
1618 ((eq last-encoding-info 'utf-8) 1614 (insert "\e%@")))
1619 ;; Encode the previous range using UTF-8 encoding
1620 ;; extention.
1621 (encode-coding-region last-pos (point) 'mule-utf-8)
1622 (save-excursion
1623 (goto-char last-pos)
1624 (insert "\e%G"))
1625 (insert "\e%@"))
1626 (t
1627 (put-text-property last-pos (point) 'charset charset)))
1628 (setq last-pos (point)
1629 last-encoding-info encoding-info))))
1630 (goto-char (point-min))))) 1615 (goto-char (point-min)))))
1631 ;; Must return nil, as build_annotations_2 expects that. 1616 ;; Must return nil, as build_annotations_2 expects that.
1632 nil) 1617 nil)