aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2010-08-06 17:11:19 +0900
committerKenichi Handa2010-08-06 17:11:19 +0900
commit7a84eee5b745ad577d414158716ffe2ee487a117 (patch)
tree8941277791414c9648b79f304b5d27de0e46eb76
parentfaa28da9b740a4b5f297fc215d79a66d71bf6f78 (diff)
downloademacs-7a84eee5b745ad577d414158716ffe2ee487a117.tar.gz
emacs-7a84eee5b745ad577d414158716ffe2ee487a117.zip
Improve the encoding by compound-text-with-extensions.
-rw-r--r--lisp/ChangeLog11
-rw-r--r--lisp/international/mule.el111
-rw-r--r--src/ChangeLog11
-rw-r--r--src/charset.c86
-rw-r--r--src/coding.c2
5 files changed, 146 insertions, 75 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 149de6629f0..1b2f0ebd99e 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,14 @@
12010-08-06 Kenichi Handa <handa@m17n.org>
2
3 * international/mule.el (define-charset): Store NAME as :base
4 property.
5 (ctext-non-standard-encodings-table): Pay attention to charset
6 aliases.
7 (ctext-pre-write-conversion): Sort ctext-standard-encodings by the
8 current priority. Force using the designation of the specific
9 charset by adding `charset' text property. Improve the whole
10 algorithm.
11
12010-08-04 Kenichi Handa <handa@m17n.org> 122010-08-04 Kenichi Handa <handa@m17n.org>
2 13
3 * language/cyrillic.el: Don't add "microsoft-cp1251" to 14 * language/cyrillic.el: Don't add "microsoft-cp1251" to
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index e030acbef02..105163a5d11 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -282,6 +282,7 @@ attribute."
282 (plist-put props :short-name (symbol-name name))) 282 (plist-put props :short-name (symbol-name name)))
283 (or (plist-get props :long-name) 283 (or (plist-get props :long-name)
284 (plist-put props :long-name (plist-get props :short-name))) 284 (plist-put props :long-name (plist-get props :short-name)))
285 (plist-put props :base name)
285 ;; We can probably get a worthwhile amount in purespace. 286 ;; We can probably get a worthwhile amount in purespace.
286 (setq props 287 (setq props
287 (mapcar (lambda (elt) 288 (mapcar (lambda (elt)
@@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.")
1535 (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) 1536 (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
1536 (charset (nth 3 slot))) 1537 (charset (nth 3 slot)))
1537 (if (charsetp charset) 1538 (if (charsetp charset)
1538 (setcar tail (cons charset slot)) 1539 (setcar tail
1540 (cons (plist-get (charset-plist charset) :base) slot))
1539 (setcar tail (cons (car charset) slot)) 1541 (setcar tail (cons (car charset) slot))
1540 (dolist (cs (cdr charset)) 1542 (dolist (cs (cdr charset))
1541 (setcdr tail 1543 (setcdr tail
1542 (cons (cons (car cs) slot) (cdr tail))) 1544 (cons (cons (plist-get (charset-plist (car cs)) :base) slot)
1545 (cdr tail)))
1543 (setq tail (cdr tail)))) 1546 (setq tail (cdr tail))))
1544 (setq tail (cdr tail)))) 1547 (setq tail (cdr tail))))
1545 table)) 1548 table))
@@ -1559,74 +1562,56 @@ in-place."
1559 (setq from 1 to (point-max))) 1562 (setq from 1 to (point-max)))
1560 (save-restriction 1563 (save-restriction
1561 (narrow-to-region from to) 1564 (narrow-to-region from to)
1565 (goto-char from)
1562 (let ((encoding-table (ctext-non-standard-encodings-table)) 1566 (let ((encoding-table (ctext-non-standard-encodings-table))
1563 (charset-list ctext-standard-encodings) 1567 (charset-list (sort-charsets
1568 (copy-sequence ctext-standard-encodings)))
1569 (end-pos (make-marker))
1564 last-coding-system-used 1570 last-coding-system-used
1565 last-pos last-encoding-info 1571 last-pos charset encoding-info)
1566 encoding-info end-pos ch charset)
1567 (dolist (elt encoding-table) 1572 (dolist (elt encoding-table)
1568 (push (car elt) charset-list)) 1573 (push (car elt) charset-list))
1569 (goto-char (setq last-pos from))
1570 (setq end-pos (point-marker)) 1574 (setq end-pos (point-marker))
1571 (while (re-search-forward "[^\000-\177]+" nil t) 1575 (while (re-search-forward "[^\0-\177]+" nil t)
1572 ;; Found a sequence of non-ASCII characters. 1576 ;; Found a sequence of non-ASCII characters.
1573 (setq last-pos (match-beginning 0)
1574 ch (char-after last-pos)
1575 charset (char-charset ch charset-list)
1576 last-encoding-info
1577 (if charset
1578 (or (cdr (assq charset encoding-table))
1579 charset)
1580 'utf-8))
1581 (set-marker end-pos (match-end 0)) 1577 (set-marker end-pos (match-end 0))
1582 (goto-char (1+ last-pos)) 1578 (goto-char (match-beginning 0))
1583 (while (marker-position end-pos) 1579 (setq last-pos (point)
1584 (if (< (point) end-pos) 1580 charset (char-charset (following-char) charset-list))
1585 (progn 1581 (forward-char 1)
1586 (setq charset (char-charset (following-char) charset-list) 1582 (while (and (< (point) end-pos)
1587 encoding-info 1583 (eq charset (char-charset (following-char) charset-list)))
1588 (if charset 1584 (forward-char 1))
1589 (or (cdr (assq charset encoding-table)) 1585 (if charset
1590 charset) 1586 (if (setq encoding-info (cdr (assq charset encoding-table)))
1591 'utf-8)) 1587 ;; Encode this range using an extended segment.
1592 (forward-char 1)) 1588 (let ((encoding-name (car encoding-info))
1593 (setq encoding-info nil) 1589 (coding-system (nth 1 encoding-info))
1594 (set-marker end-pos nil)) 1590 (noctets (nth 2 encoding-info))
1595 (unless (eq last-encoding-info encoding-info) 1591 len)
1596 (cond ((consp last-encoding-info) 1592 (encode-coding-region last-pos (point) coding-system)
1597 ;; Encode the previous range using an extended 1593 (setq len (+ (length encoding-name) 1
1598 ;; segment. 1594 (- (point) last-pos)))
1599 (let ((encoding-name (car last-encoding-info)) 1595 ;; According to the spec of CTEXT, it is not
1600 (coding-system (nth 1 last-encoding-info)) 1596 ;; necessary to produce this extra designation
1601 (noctets (nth 2 last-encoding-info)) 1597 ;; sequence, but some buggy application
1602 len) 1598 ;; (e.g. crxvt-gb) requires it.
1603 (encode-coding-region last-pos (point) coding-system) 1599 (insert "\e(B")
1604 (setq len (+ (length encoding-name) 1 1600 (save-excursion
1605 (- (point) last-pos))) 1601 (goto-char last-pos)
1606 ;; According to the spec of CTEXT, it is not 1602 (insert (format "\e%%/%d" noctets))
1607 ;; necessary to produce this extra designation 1603 (insert-byte (+ (/ len 128) 128) 1)
1608 ;; sequence, but some buggy application 1604 (insert-byte (+ (% len 128) 128) 1)
1609 ;; (e.g. crxvt-gb) requires it. 1605 (insert encoding-name)
1610 (insert "\e(B") 1606 (insert 2)))
1611 (save-excursion 1607 ;; Encode this range as characters in CHARSET.
1612 (goto-char last-pos) 1608 (put-text-property last-pos (point) 'charset charset))
1613 (insert (format "\e%%/%d" noctets)) 1609 ;; Encode this range using UTF-8 encoding extention.
1614 (insert-byte (+ (/ len 128) 128) 1) 1610 (encode-coding-region last-pos (point) 'mule-utf-8)
1615 (insert-byte (+ (% len 128) 128) 1) 1611 (save-excursion
1616 (insert encoding-name) 1612 (goto-char last-pos)
1617 (insert 2)))) 1613 (insert "\e%G"))
1618 ((eq last-encoding-info 'utf-8) 1614 (insert "\e%@")))
1619 ;; Encode the previous range using UTF-8 encoding
1620 ;; extention.
1621 (encode-coding-region last-pos (point) 'mule-utf-8)
1622 (save-excursion
1623 (goto-char last-pos)
1624 (insert "\e%G"))
1625 (insert "\e%@"))
1626 (t
1627 (put-text-property last-pos (point) 'charset charset)))
1628 (setq last-pos (point)
1629 last-encoding-info encoding-info))))
1630 (goto-char (point-min))))) 1615 (goto-char (point-min)))))
1631 ;; Must return nil, as build_annotations_2 expects that. 1616 ;; Must return nil, as build_annotations_2 expects that.
1632 nil) 1617 nil)
diff --git a/src/ChangeLog b/src/ChangeLog
index 2b413a7958c..514c42cd922 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,14 @@
12010-08-06 Kenichi Handa <handa@m17n.org>
2
3 * charset.c: Include <stdlib.h>
4 (struct charset_sort_data): New struct.
5 (charset_compare): New function.
6 (Fsort_charsets): New funciton.
7 (syms_of_charset): Declare Fsort_charsets as a Lisp function.
8
9 * coding.c (decode_coding_iso_2022): Fix checking of dimension
10 number in CTEXT extended segment.
11
12010-08-01 Juanma Barranquero <lekktu@gmail.com> 122010-08-01 Juanma Barranquero <lekktu@gmail.com>
2 13
3 * w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring. 14 * w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring.
diff --git a/src/charset.c b/src/charset.c
index 125c9131687..3b45dc348ed 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -28,6 +28,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28#include <config.h> 28#include <config.h>
29 29
30#include <stdio.h> 30#include <stdio.h>
31#include <stdlib.h>
31#include <unistd.h> 32#include <unistd.h>
32#include <ctype.h> 33#include <ctype.h>
33#include <sys/types.h> 34#include <sys/types.h>
@@ -2139,23 +2140,22 @@ that case, find the charset from what supported by that coding system. */)
2139 charset = CHAR_CHARSET (XINT (ch)); 2140 charset = CHAR_CHARSET (XINT (ch));
2140 else 2141 else
2141 { 2142 {
2142 Lisp_Object charset_list;
2143
2144 if (CONSP (restriction)) 2143 if (CONSP (restriction))
2145 { 2144 {
2146 for (charset_list = Qnil; CONSP (restriction); 2145 int c = XFASTINT (ch);
2147 restriction = XCDR (restriction)) 2146
2147 for (; CONSP (restriction); restriction = XCDR (restriction))
2148 { 2148 {
2149 int id; 2149 struct charset *charset;
2150 2150
2151 CHECK_CHARSET_GET_ID (XCAR (restriction), id); 2151 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
2152 charset_list = Fcons (make_number (id), charset_list); 2152 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
2153 return XCAR (restriction);
2153 } 2154 }
2154 charset_list = Fnreverse (charset_list); 2155 return Qnil;
2155 } 2156 }
2156 else 2157 restriction = coding_system_charset_list (restriction);
2157 charset_list = coding_system_charset_list (restriction); 2158 charset = char_charset (XINT (ch), restriction, NULL);
2158 charset = char_charset (XINT (ch), charset_list, NULL);
2159 if (! charset) 2159 if (! charset)
2160 return Qnil; 2160 return Qnil;
2161 } 2161 }
@@ -2312,6 +2312,69 @@ Return charset identification number of CHARSET. */)
2312 return make_number (id); 2312 return make_number (id);
2313} 2313}
2314 2314
2315struct charset_sort_data
2316{
2317 Lisp_Object charset;
2318 int id;
2319 int priority;
2320};
2321
2322static int
2323charset_compare (const void *d1, const void *d2)
2324{
2325 const struct charset_sort_data *data1 = d1, *data2 = d2;
2326 return (data1->priority - data2->priority);
2327}
2328
2329DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2330 doc: /* Sort charset list CHARSETS by a priority of each charset.
2331Return the sorted list. CHARSETS is modified by side effects.
2332See also `charset-priority-list' and `set-charset-priority'. */)
2333 (Lisp_Object charsets)
2334{
2335 Lisp_Object len = Flength (charsets);
2336 int n = XFASTINT (len), i, j, done;
2337 Lisp_Object tail, elt, attrs;
2338 struct charset_sort_data *sort_data;
2339 int id, min_id, max_id;
2340 USE_SAFE_ALLOCA;
2341
2342 if (n == 0)
2343 return Qnil;
2344 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2345 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2346 {
2347 elt = XCAR (tail);
2348 CHECK_CHARSET_GET_ATTR (elt, attrs);
2349 sort_data[i].charset = elt;
2350 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2351 if (i == 0)
2352 min_id = max_id = id;
2353 else if (id < min_id)
2354 min_id = id;
2355 else if (id > max_id)
2356 max_id = id;
2357 }
2358 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2359 done < n && CONSP (tail); tail = XCDR (tail), i++)
2360 {
2361 elt = XCAR (tail);
2362 id = XFASTINT (elt);
2363 if (id >= min_id && id <= max_id)
2364 for (j = 0; j < n; j++)
2365 if (sort_data[j].id == id)
2366 {
2367 sort_data[j].priority = i;
2368 done++;
2369 }
2370 }
2371 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2372 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2373 XSETCAR (tail, sort_data[i].charset);
2374 SAFE_FREE ();
2375 return charsets;
2376}
2377
2315 2378
2316void 2379void
2317init_charset () 2380init_charset ()
@@ -2414,6 +2477,7 @@ syms_of_charset ()
2414 defsubr (&Scharset_priority_list); 2477 defsubr (&Scharset_priority_list);
2415 defsubr (&Sset_charset_priority); 2478 defsubr (&Sset_charset_priority);
2416 defsubr (&Scharset_id_internal); 2479 defsubr (&Scharset_id_internal);
2480 defsubr (&Ssort_charsets);
2417 2481
2418 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path, 2482 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2419 doc: /* *List of directories to search for charset map files. */); 2483 doc: /* *List of directories to search for charset map files. */);
diff --git a/src/coding.c b/src/coding.c
index bdc37cb7c53..aef80f5cc80 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -3935,7 +3935,7 @@ decode_coding_iso_2022 (coding)
3935 int size; 3935 int size;
3936 3936
3937 ONE_MORE_BYTE (dim); 3937 ONE_MORE_BYTE (dim);
3938 if (dim < 0 || dim > 4) 3938 if (dim < '0' || dim > '4')
3939 goto invalid_code; 3939 goto invalid_code;
3940 ONE_MORE_BYTE (M); 3940 ONE_MORE_BYTE (M);
3941 if (M < 128) 3941 if (M < 128)