diff options
| author | Kenichi Handa | 2010-08-06 17:11:19 +0900 |
|---|---|---|
| committer | Kenichi Handa | 2010-08-06 17:11:19 +0900 |
| commit | 7a84eee5b745ad577d414158716ffe2ee487a117 (patch) | |
| tree | 8941277791414c9648b79f304b5d27de0e46eb76 | |
| parent | faa28da9b740a4b5f297fc215d79a66d71bf6f78 (diff) | |
| download | emacs-7a84eee5b745ad577d414158716ffe2ee487a117.tar.gz emacs-7a84eee5b745ad577d414158716ffe2ee487a117.zip | |
Improve the encoding by compound-text-with-extensions.
| -rw-r--r-- | lisp/ChangeLog | 11 | ||||
| -rw-r--r-- | lisp/international/mule.el | 111 | ||||
| -rw-r--r-- | src/ChangeLog | 11 | ||||
| -rw-r--r-- | src/charset.c | 86 | ||||
| -rw-r--r-- | src/coding.c | 2 |
5 files changed, 146 insertions, 75 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 149de6629f0..1b2f0ebd99e 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,14 @@ | |||
| 1 | 2010-08-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * international/mule.el (define-charset): Store NAME as :base | ||
| 4 | property. | ||
| 5 | (ctext-non-standard-encodings-table): Pay attention to charset | ||
| 6 | aliases. | ||
| 7 | (ctext-pre-write-conversion): Sort ctext-standard-encodings by the | ||
| 8 | current priority. Force using the designation of the specific | ||
| 9 | charset by adding `charset' text property. Improve the whole | ||
| 10 | algorithm. | ||
| 11 | |||
| 1 | 2010-08-04 Kenichi Handa <handa@m17n.org> | 12 | 2010-08-04 Kenichi Handa <handa@m17n.org> |
| 2 | 13 | ||
| 3 | * language/cyrillic.el: Don't add "microsoft-cp1251" to | 14 | * language/cyrillic.el: Don't add "microsoft-cp1251" to |
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index e030acbef02..105163a5d11 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el | |||
| @@ -282,6 +282,7 @@ attribute." | |||
| 282 | (plist-put props :short-name (symbol-name name))) | 282 | (plist-put props :short-name (symbol-name name))) |
| 283 | (or (plist-get props :long-name) | 283 | (or (plist-get props :long-name) |
| 284 | (plist-put props :long-name (plist-get props :short-name))) | 284 | (plist-put props :long-name (plist-get props :short-name))) |
| 285 | (plist-put props :base name) | ||
| 285 | ;; We can probably get a worthwhile amount in purespace. | 286 | ;; We can probably get a worthwhile amount in purespace. |
| 286 | (setq props | 287 | (setq props |
| 287 | (mapcar (lambda (elt) | 288 | (mapcar (lambda (elt) |
| @@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.") | |||
| 1535 | (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) | 1536 | (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) |
| 1536 | (charset (nth 3 slot))) | 1537 | (charset (nth 3 slot))) |
| 1537 | (if (charsetp charset) | 1538 | (if (charsetp charset) |
| 1538 | (setcar tail (cons charset slot)) | 1539 | (setcar tail |
| 1540 | (cons (plist-get (charset-plist charset) :base) slot)) | ||
| 1539 | (setcar tail (cons (car charset) slot)) | 1541 | (setcar tail (cons (car charset) slot)) |
| 1540 | (dolist (cs (cdr charset)) | 1542 | (dolist (cs (cdr charset)) |
| 1541 | (setcdr tail | 1543 | (setcdr tail |
| 1542 | (cons (cons (car cs) slot) (cdr tail))) | 1544 | (cons (cons (plist-get (charset-plist (car cs)) :base) slot) |
| 1545 | (cdr tail))) | ||
| 1543 | (setq tail (cdr tail)))) | 1546 | (setq tail (cdr tail)))) |
| 1544 | (setq tail (cdr tail)))) | 1547 | (setq tail (cdr tail)))) |
| 1545 | table)) | 1548 | table)) |
| @@ -1559,74 +1562,56 @@ in-place." | |||
| 1559 | (setq from 1 to (point-max))) | 1562 | (setq from 1 to (point-max))) |
| 1560 | (save-restriction | 1563 | (save-restriction |
| 1561 | (narrow-to-region from to) | 1564 | (narrow-to-region from to) |
| 1565 | (goto-char from) | ||
| 1562 | (let ((encoding-table (ctext-non-standard-encodings-table)) | 1566 | (let ((encoding-table (ctext-non-standard-encodings-table)) |
| 1563 | (charset-list ctext-standard-encodings) | 1567 | (charset-list (sort-charsets |
| 1568 | (copy-sequence ctext-standard-encodings))) | ||
| 1569 | (end-pos (make-marker)) | ||
| 1564 | last-coding-system-used | 1570 | last-coding-system-used |
| 1565 | last-pos last-encoding-info | 1571 | last-pos charset encoding-info) |
| 1566 | encoding-info end-pos ch charset) | ||
| 1567 | (dolist (elt encoding-table) | 1572 | (dolist (elt encoding-table) |
| 1568 | (push (car elt) charset-list)) | 1573 | (push (car elt) charset-list)) |
| 1569 | (goto-char (setq last-pos from)) | ||
| 1570 | (setq end-pos (point-marker)) | 1574 | (setq end-pos (point-marker)) |
| 1571 | (while (re-search-forward "[^\000-\177]+" nil t) | 1575 | (while (re-search-forward "[^\0-\177]+" nil t) |
| 1572 | ;; Found a sequence of non-ASCII characters. | 1576 | ;; Found a sequence of non-ASCII characters. |
| 1573 | (setq last-pos (match-beginning 0) | ||
| 1574 | ch (char-after last-pos) | ||
| 1575 | charset (char-charset ch charset-list) | ||
| 1576 | last-encoding-info | ||
| 1577 | (if charset | ||
| 1578 | (or (cdr (assq charset encoding-table)) | ||
| 1579 | charset) | ||
| 1580 | 'utf-8)) | ||
| 1581 | (set-marker end-pos (match-end 0)) | 1577 | (set-marker end-pos (match-end 0)) |
| 1582 | (goto-char (1+ last-pos)) | 1578 | (goto-char (match-beginning 0)) |
| 1583 | (while (marker-position end-pos) | 1579 | (setq last-pos (point) |
| 1584 | (if (< (point) end-pos) | 1580 | charset (char-charset (following-char) charset-list)) |
| 1585 | (progn | 1581 | (forward-char 1) |
| 1586 | (setq charset (char-charset (following-char) charset-list) | 1582 | (while (and (< (point) end-pos) |
| 1587 | encoding-info | 1583 | (eq charset (char-charset (following-char) charset-list))) |
| 1588 | (if charset | 1584 | (forward-char 1)) |
| 1589 | (or (cdr (assq charset encoding-table)) | 1585 | (if charset |
| 1590 | charset) | 1586 | (if (setq encoding-info (cdr (assq charset encoding-table))) |
| 1591 | 'utf-8)) | 1587 | ;; Encode this range using an extended segment. |
| 1592 | (forward-char 1)) | 1588 | (let ((encoding-name (car encoding-info)) |
| 1593 | (setq encoding-info nil) | 1589 | (coding-system (nth 1 encoding-info)) |
| 1594 | (set-marker end-pos nil)) | 1590 | (noctets (nth 2 encoding-info)) |
| 1595 | (unless (eq last-encoding-info encoding-info) | 1591 | len) |
| 1596 | (cond ((consp last-encoding-info) | 1592 | (encode-coding-region last-pos (point) coding-system) |
| 1597 | ;; Encode the previous range using an extended | 1593 | (setq len (+ (length encoding-name) 1 |
| 1598 | ;; segment. | 1594 | (- (point) last-pos))) |
| 1599 | (let ((encoding-name (car last-encoding-info)) | 1595 | ;; According to the spec of CTEXT, it is not |
| 1600 | (coding-system (nth 1 last-encoding-info)) | 1596 | ;; necessary to produce this extra designation |
| 1601 | (noctets (nth 2 last-encoding-info)) | 1597 | ;; sequence, but some buggy application |
| 1602 | len) | 1598 | ;; (e.g. crxvt-gb) requires it. |
| 1603 | (encode-coding-region last-pos (point) coding-system) | 1599 | (insert "\e(B") |
| 1604 | (setq len (+ (length encoding-name) 1 | 1600 | (save-excursion |
| 1605 | (- (point) last-pos))) | 1601 | (goto-char last-pos) |
| 1606 | ;; According to the spec of CTEXT, it is not | 1602 | (insert (format "\e%%/%d" noctets)) |
| 1607 | ;; necessary to produce this extra designation | 1603 | (insert-byte (+ (/ len 128) 128) 1) |
| 1608 | ;; sequence, but some buggy application | 1604 | (insert-byte (+ (% len 128) 128) 1) |
| 1609 | ;; (e.g. crxvt-gb) requires it. | 1605 | (insert encoding-name) |
| 1610 | (insert "\e(B") | 1606 | (insert 2))) |
| 1611 | (save-excursion | 1607 | ;; Encode this range as characters in CHARSET. |
| 1612 | (goto-char last-pos) | 1608 | (put-text-property last-pos (point) 'charset charset)) |
| 1613 | (insert (format "\e%%/%d" noctets)) | 1609 | ;; Encode this range using UTF-8 encoding extention. |
| 1614 | (insert-byte (+ (/ len 128) 128) 1) | 1610 | (encode-coding-region last-pos (point) 'mule-utf-8) |
| 1615 | (insert-byte (+ (% len 128) 128) 1) | 1611 | (save-excursion |
| 1616 | (insert encoding-name) | 1612 | (goto-char last-pos) |
| 1617 | (insert 2)))) | 1613 | (insert "\e%G")) |
| 1618 | ((eq last-encoding-info 'utf-8) | 1614 | (insert "\e%@"))) |
| 1619 | ;; Encode the previous range using UTF-8 encoding | ||
| 1620 | ;; extention. | ||
| 1621 | (encode-coding-region last-pos (point) 'mule-utf-8) | ||
| 1622 | (save-excursion | ||
| 1623 | (goto-char last-pos) | ||
| 1624 | (insert "\e%G")) | ||
| 1625 | (insert "\e%@")) | ||
| 1626 | (t | ||
| 1627 | (put-text-property last-pos (point) 'charset charset))) | ||
| 1628 | (setq last-pos (point) | ||
| 1629 | last-encoding-info encoding-info)))) | ||
| 1630 | (goto-char (point-min))))) | 1615 | (goto-char (point-min))))) |
| 1631 | ;; Must return nil, as build_annotations_2 expects that. | 1616 | ;; Must return nil, as build_annotations_2 expects that. |
| 1632 | nil) | 1617 | nil) |
diff --git a/src/ChangeLog b/src/ChangeLog index 2b413a7958c..514c42cd922 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,14 @@ | |||
| 1 | 2010-08-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * charset.c: Include <stdlib.h> | ||
| 4 | (struct charset_sort_data): New struct. | ||
| 5 | (charset_compare): New function. | ||
| 6 | (Fsort_charsets): New funciton. | ||
| 7 | (syms_of_charset): Declare Fsort_charsets as a Lisp function. | ||
| 8 | |||
| 9 | * coding.c (decode_coding_iso_2022): Fix checking of dimension | ||
| 10 | number in CTEXT extended segment. | ||
| 11 | |||
| 1 | 2010-08-01 Juanma Barranquero <lekktu@gmail.com> | 12 | 2010-08-01 Juanma Barranquero <lekktu@gmail.com> |
| 2 | 13 | ||
| 3 | * w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring. | 14 | * w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring. |
diff --git a/src/charset.c b/src/charset.c index 125c9131687..3b45dc348ed 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -28,6 +28,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 28 | #include <config.h> | 28 | #include <config.h> |
| 29 | 29 | ||
| 30 | #include <stdio.h> | 30 | #include <stdio.h> |
| 31 | #include <stdlib.h> | ||
| 31 | #include <unistd.h> | 32 | #include <unistd.h> |
| 32 | #include <ctype.h> | 33 | #include <ctype.h> |
| 33 | #include <sys/types.h> | 34 | #include <sys/types.h> |
| @@ -2139,23 +2140,22 @@ that case, find the charset from what supported by that coding system. */) | |||
| 2139 | charset = CHAR_CHARSET (XINT (ch)); | 2140 | charset = CHAR_CHARSET (XINT (ch)); |
| 2140 | else | 2141 | else |
| 2141 | { | 2142 | { |
| 2142 | Lisp_Object charset_list; | ||
| 2143 | |||
| 2144 | if (CONSP (restriction)) | 2143 | if (CONSP (restriction)) |
| 2145 | { | 2144 | { |
| 2146 | for (charset_list = Qnil; CONSP (restriction); | 2145 | int c = XFASTINT (ch); |
| 2147 | restriction = XCDR (restriction)) | 2146 | |
| 2147 | for (; CONSP (restriction); restriction = XCDR (restriction)) | ||
| 2148 | { | 2148 | { |
| 2149 | int id; | 2149 | struct charset *charset; |
| 2150 | 2150 | ||
| 2151 | CHECK_CHARSET_GET_ID (XCAR (restriction), id); | 2151 | CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset); |
| 2152 | charset_list = Fcons (make_number (id), charset_list); | 2152 | if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset)) |
| 2153 | return XCAR (restriction); | ||
| 2153 | } | 2154 | } |
| 2154 | charset_list = Fnreverse (charset_list); | 2155 | return Qnil; |
| 2155 | } | 2156 | } |
| 2156 | else | 2157 | restriction = coding_system_charset_list (restriction); |
| 2157 | charset_list = coding_system_charset_list (restriction); | 2158 | charset = char_charset (XINT (ch), restriction, NULL); |
| 2158 | charset = char_charset (XINT (ch), charset_list, NULL); | ||
| 2159 | if (! charset) | 2159 | if (! charset) |
| 2160 | return Qnil; | 2160 | return Qnil; |
| 2161 | } | 2161 | } |
| @@ -2312,6 +2312,69 @@ Return charset identification number of CHARSET. */) | |||
| 2312 | return make_number (id); | 2312 | return make_number (id); |
| 2313 | } | 2313 | } |
| 2314 | 2314 | ||
| 2315 | struct charset_sort_data | ||
| 2316 | { | ||
| 2317 | Lisp_Object charset; | ||
| 2318 | int id; | ||
| 2319 | int priority; | ||
| 2320 | }; | ||
| 2321 | |||
| 2322 | static int | ||
| 2323 | charset_compare (const void *d1, const void *d2) | ||
| 2324 | { | ||
| 2325 | const struct charset_sort_data *data1 = d1, *data2 = d2; | ||
| 2326 | return (data1->priority - data2->priority); | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0, | ||
| 2330 | doc: /* Sort charset list CHARSETS by a priority of each charset. | ||
| 2331 | Return the sorted list. CHARSETS is modified by side effects. | ||
| 2332 | See also `charset-priority-list' and `set-charset-priority'. */) | ||
| 2333 | (Lisp_Object charsets) | ||
| 2334 | { | ||
| 2335 | Lisp_Object len = Flength (charsets); | ||
| 2336 | int n = XFASTINT (len), i, j, done; | ||
| 2337 | Lisp_Object tail, elt, attrs; | ||
| 2338 | struct charset_sort_data *sort_data; | ||
| 2339 | int id, min_id, max_id; | ||
| 2340 | USE_SAFE_ALLOCA; | ||
| 2341 | |||
| 2342 | if (n == 0) | ||
| 2343 | return Qnil; | ||
| 2344 | SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n); | ||
| 2345 | for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++) | ||
| 2346 | { | ||
| 2347 | elt = XCAR (tail); | ||
| 2348 | CHECK_CHARSET_GET_ATTR (elt, attrs); | ||
| 2349 | sort_data[i].charset = elt; | ||
| 2350 | sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs)); | ||
| 2351 | if (i == 0) | ||
| 2352 | min_id = max_id = id; | ||
| 2353 | else if (id < min_id) | ||
| 2354 | min_id = id; | ||
| 2355 | else if (id > max_id) | ||
| 2356 | max_id = id; | ||
| 2357 | } | ||
| 2358 | for (done = 0, tail = Vcharset_ordered_list, i = 0; | ||
| 2359 | done < n && CONSP (tail); tail = XCDR (tail), i++) | ||
| 2360 | { | ||
| 2361 | elt = XCAR (tail); | ||
| 2362 | id = XFASTINT (elt); | ||
| 2363 | if (id >= min_id && id <= max_id) | ||
| 2364 | for (j = 0; j < n; j++) | ||
| 2365 | if (sort_data[j].id == id) | ||
| 2366 | { | ||
| 2367 | sort_data[j].priority = i; | ||
| 2368 | done++; | ||
| 2369 | } | ||
| 2370 | } | ||
| 2371 | qsort (sort_data, n, sizeof *sort_data, charset_compare); | ||
| 2372 | for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++) | ||
| 2373 | XSETCAR (tail, sort_data[i].charset); | ||
| 2374 | SAFE_FREE (); | ||
| 2375 | return charsets; | ||
| 2376 | } | ||
| 2377 | |||
| 2315 | 2378 | ||
| 2316 | void | 2379 | void |
| 2317 | init_charset () | 2380 | init_charset () |
| @@ -2414,6 +2477,7 @@ syms_of_charset () | |||
| 2414 | defsubr (&Scharset_priority_list); | 2477 | defsubr (&Scharset_priority_list); |
| 2415 | defsubr (&Sset_charset_priority); | 2478 | defsubr (&Sset_charset_priority); |
| 2416 | defsubr (&Scharset_id_internal); | 2479 | defsubr (&Scharset_id_internal); |
| 2480 | defsubr (&Ssort_charsets); | ||
| 2417 | 2481 | ||
| 2418 | DEFVAR_LISP ("charset-map-path", &Vcharset_map_path, | 2482 | DEFVAR_LISP ("charset-map-path", &Vcharset_map_path, |
| 2419 | doc: /* *List of directories to search for charset map files. */); | 2483 | doc: /* *List of directories to search for charset map files. */); |
diff --git a/src/coding.c b/src/coding.c index bdc37cb7c53..aef80f5cc80 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -3935,7 +3935,7 @@ decode_coding_iso_2022 (coding) | |||
| 3935 | int size; | 3935 | int size; |
| 3936 | 3936 | ||
| 3937 | ONE_MORE_BYTE (dim); | 3937 | ONE_MORE_BYTE (dim); |
| 3938 | if (dim < 0 || dim > 4) | 3938 | if (dim < '0' || dim > '4') |
| 3939 | goto invalid_code; | 3939 | goto invalid_code; |
| 3940 | ONE_MORE_BYTE (M); | 3940 | ONE_MORE_BYTE (M); |
| 3941 | if (M < 128) | 3941 | if (M < 128) |