diff options
| author | Kenichi Handa | 1999-12-15 00:06:45 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1999-12-15 00:06:45 +0000 |
| commit | ec6d2bb84babd6588b3da8d205ff7bea64e812d7 (patch) | |
| tree | 7a2ccd313b2a191f51b1b446caf6c46c59b7ab47 /src/coding.c | |
| parent | 279d9f7b8d033be6b1b5bd3ef6d776513d66db12 (diff) | |
| download | emacs-ec6d2bb84babd6588b3da8d205ff7bea64e812d7.tar.gz emacs-ec6d2bb84babd6588b3da8d205ff7bea64e812d7.zip | |
Include composite.h.
(DECODE_CHARACTER_ASCII): Don't handle composition here.
(DECODE_CHARACTER_DIMENSION1): Likewise. Don't check the validity
of multibyte code here.
(DECODE_CHARACTER_DIMENSION2): Likewise.
(detect_coding_emacs_mule): Change the case label from
EMACS_leading_code_composition to 0x80.
(detect_coding_iso2022): Handle new composition sequence.
(DECODE_ISO_CHARACTER): Likewise.
(check_composing_code): Deleted.
(coding_allocate_composition_data): New function.
(CODING_ADD_COMPOSITION_START) (CODING_ADD_COMPOSITION_END)
(CODING_ADD_COMPOSITION_COMPONENT) (DECODE_COMPOSITION_START)
(DECODE_COMPOSITION_END) (DECODE_COMPOSITION_RULE): New macros.
(decode_coding_iso2022): Handle new composition sequence.
(ENCODE_ISO_CHARACTER): Don't check composition here.
(ENCODE_COMPOSITION_RULE) (ENCODE_COMPOSITION_START): New macros.
(ENCODE_COMPOSITION_NO_RULE_START)
(ENCODE_COMPOSITION_WITH_RULE_START): Deleted.
(ENCODE_COMPOSITION_END): Handle new composition sequence.
(ENCODE_COMPOSITION_FAKE_START): New macro.
(encode_coding_iso2022): Handle new composition sequence.
(ENCODE_SJIS_BIG5_CHARACTER): Delete superfluous `;' at the tail.
(encode_coding_sjis_big5): Ignore composition.
(setup_coding_system): Initialize new members of struct
coding_system. Enable composition only when the coding system has
`composition' property t.
(coding_free_composition_data) (coding_adjust_composition_offset)
(coding_save_composition) (coding_restore_composition): New
functions.
(code_convert_region): Call coding_save_composition for encoding
and coding_allocate_composition_data for decoding. Don't skip
ASCII characters if we handle composition on encoding. Call
signal_after_change with Check_BORDER.
(code_convert_string): Call coding_save_composition for encoding
and coding_allocate_composition_data for decoding. Don't skip
ASCII characters if we handle composition on encoding.
(code_convert_string1): Set Vlast_coding_system_used after calling
code_convert_string.
(code_convert_string_norecord): Disable composition.
(Fset_terminal_coding_system_internal): Likewise.
(Fset_safe_terminal_coding_system_internal): Likewise.
(Fset_keyboard_coding_system_internal): Likewise.
(init_coding_once): Set emacs_code_class[0x80] to
EMACS_invalid_code.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 976 |
1 files changed, 641 insertions, 335 deletions
diff --git a/src/coding.c b/src/coding.c index 05fe37fcabf..14fb12dc344 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -213,22 +213,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 213 | 213 | ||
| 214 | /* Decode one ASCII character C. */ | 214 | /* Decode one ASCII character C. */ |
| 215 | 215 | ||
| 216 | #define DECODE_CHARACTER_ASCII(c) \ | 216 | #define DECODE_CHARACTER_ASCII(c) \ |
| 217 | do { \ | 217 | do { \ |
| 218 | if (COMPOSING_P (coding->composing)) \ | 218 | *dst++ = (c) & 0x7F; \ |
| 219 | { \ | 219 | coding->produced_char++; \ |
| 220 | *dst++ = 0xA0, *dst++ = (c) | 0x80; \ | ||
| 221 | coding->composed_chars++; \ | ||
| 222 | if (((c) | 0x80) < 0xA0) \ | ||
| 223 | coding->fake_multibyte = 1; \ | ||
| 224 | } \ | ||
| 225 | else \ | ||
| 226 | { \ | ||
| 227 | /* If ASCII charset is invoked to GR, \ | ||
| 228 | we must reset MSB now. */ \ | ||
| 229 | *dst++ = (c) & 0x7F; \ | ||
| 230 | coding->produced_char++; \ | ||
| 231 | } \ | ||
| 232 | } while (0) | 220 | } while (0) |
| 233 | 221 | ||
| 234 | /* Decode one DIMENSION1 character whose charset is CHARSET and whose | 222 | /* Decode one DIMENSION1 character whose charset is CHARSET and whose |
| @@ -237,21 +225,12 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 237 | #define DECODE_CHARACTER_DIMENSION1(charset, c) \ | 225 | #define DECODE_CHARACTER_DIMENSION1(charset, c) \ |
| 238 | do { \ | 226 | do { \ |
| 239 | unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ | 227 | unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ |
| 240 | if (COMPOSING_P (coding->composing)) \ | 228 | \ |
| 241 | { \ | 229 | *dst++ = leading_code; \ |
| 242 | *dst++ = leading_code + 0x20; \ | 230 | if ((leading_code = CHARSET_LEADING_CODE_EXT (charset)) > 0) \ |
| 243 | coding->composed_chars++; \ | ||
| 244 | } \ | ||
| 245 | else \ | ||
| 246 | { \ | ||
| 247 | *dst++ = leading_code; \ | ||
| 248 | coding->produced_char++; \ | ||
| 249 | } \ | ||
| 250 | if (leading_code = CHARSET_LEADING_CODE_EXT (charset)) \ | ||
| 251 | *dst++ = leading_code; \ | 231 | *dst++ = leading_code; \ |
| 252 | *dst++ = (c) | 0x80; \ | 232 | *dst++ = (c) | 0x80; \ |
| 253 | if (((c) | 0x80) < 0xA0) \ | 233 | coding->produced_char++; \ |
| 254 | coding->fake_multibyte = 1; \ | ||
| 255 | } while (0) | 234 | } while (0) |
| 256 | 235 | ||
| 257 | /* Decode one DIMENSION2 character whose charset is CHARSET and whose | 236 | /* Decode one DIMENSION2 character whose charset is CHARSET and whose |
| @@ -261,8 +240,6 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 261 | do { \ | 240 | do { \ |
| 262 | DECODE_CHARACTER_DIMENSION1 (charset, c1); \ | 241 | DECODE_CHARACTER_DIMENSION1 (charset, c1); \ |
| 263 | *dst++ = (c2) | 0x80; \ | 242 | *dst++ = (c2) | 0x80; \ |
| 264 | if (((c2) | 0x80) < 0xA0) \ | ||
| 265 | coding->fake_multibyte = 1; \ | ||
| 266 | } while (0) | 243 | } while (0) |
| 267 | 244 | ||
| 268 | 245 | ||
| @@ -279,6 +256,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 279 | #include "lisp.h" | 256 | #include "lisp.h" |
| 280 | #include "buffer.h" | 257 | #include "buffer.h" |
| 281 | #include "charset.h" | 258 | #include "charset.h" |
| 259 | #include "composite.h" | ||
| 282 | #include "ccl.h" | 260 | #include "ccl.h" |
| 283 | #include "coding.h" | 261 | #include "coding.h" |
| 284 | #include "window.h" | 262 | #include "window.h" |
| @@ -438,19 +416,10 @@ static int inhibit_pre_post_conversion; | |||
| 438 | the range 0xA0 through 0xFF. See `charset.h' for more details | 416 | the range 0xA0 through 0xFF. See `charset.h' for more details |
| 439 | about leading-code and position-code. | 417 | about leading-code and position-code. |
| 440 | 418 | ||
| 441 | There's one exception to this rule. Special leading-code | ||
| 442 | `leading-code-composition' denotes that the following several | ||
| 443 | characters should be composed into one character. Leading-codes of | ||
| 444 | components (except for ASCII) are added 0x20. An ASCII character | ||
| 445 | component is represented by a 2-byte sequence of `0xA0' and | ||
| 446 | `ASCII-code + 0x80'. See also the comments in `charset.h' for the | ||
| 447 | details of composite character. Hence, we can summarize the code | ||
| 448 | range as follows: | ||
| 449 | |||
| 450 | --- CODE RANGE of Emacs' internal format --- | 419 | --- CODE RANGE of Emacs' internal format --- |
| 451 | (character set) (range) | 420 | (character set) (range) |
| 452 | ASCII 0x00 .. 0x7F | 421 | ASCII 0x00 .. 0x7F |
| 453 | ELSE (1st byte) 0x80 .. 0x9F | 422 | ELSE (1st byte) 0x81 .. 0x9F |
| 454 | (rest bytes) 0xA0 .. 0xFF | 423 | (rest bytes) 0xA0 .. 0xFF |
| 455 | --------------------------------------------- | 424 | --------------------------------------------- |
| 456 | 425 | ||
| @@ -505,13 +474,6 @@ detect_coding_emacs_mule (src, src_end) | |||
| 505 | case EMACS_invalid_code: | 474 | case EMACS_invalid_code: |
| 506 | return 0; | 475 | return 0; |
| 507 | 476 | ||
| 508 | case EMACS_leading_code_composition: /* c == 0x80 */ | ||
| 509 | if (composing) | ||
| 510 | CHECK_CODE_RANGE_A0_FF; | ||
| 511 | else | ||
| 512 | composing = 1; | ||
| 513 | break; | ||
| 514 | |||
| 515 | case EMACS_leading_code_4: | 477 | case EMACS_leading_code_4: |
| 516 | CHECK_CODE_RANGE_A0_FF; | 478 | CHECK_CODE_RANGE_A0_FF; |
| 517 | /* fall down to check it two more times ... */ | 479 | /* fall down to check it two more times ... */ |
| @@ -524,6 +486,13 @@ detect_coding_emacs_mule (src, src_end) | |||
| 524 | CHECK_CODE_RANGE_A0_FF; | 486 | CHECK_CODE_RANGE_A0_FF; |
| 525 | break; | 487 | break; |
| 526 | 488 | ||
| 489 | case 0x80: /* Old leading code for a composite character. */ | ||
| 490 | if (composing) | ||
| 491 | CHECK_CODE_RANGE_A0_FF; | ||
| 492 | else | ||
| 493 | composing = 1; | ||
| 494 | break; | ||
| 495 | |||
| 527 | default: | 496 | default: |
| 528 | label_end_of_switch: | 497 | label_end_of_switch: |
| 529 | break; | 498 | break; |
| @@ -683,10 +652,30 @@ detect_coding_emacs_mule (src, src_end) | |||
| 683 | abbreviated to the escape sequence ESC '[' in a 7-bit environment. | 652 | abbreviated to the escape sequence ESC '[' in a 7-bit environment. |
| 684 | 653 | ||
| 685 | Character composition specification takes the following form: | 654 | Character composition specification takes the following form: |
| 686 | o ESC '0' -- start character composition | 655 | o ESC '0' -- start relative composition |
| 687 | o ESC '1' -- end character composition | 656 | o ESC '1' -- end composition |
| 657 | o ESC '2' -- start rule-base composition (*) | ||
| 658 | o ESC '3' -- start relative composition with alternate chars (**) | ||
| 659 | o ESC '4' -- start rule-base composition with alternate chars (**) | ||
| 688 | Since these are not standard escape sequences of any ISO standard, | 660 | Since these are not standard escape sequences of any ISO standard, |
| 689 | the use of them for these meaning is restricted to Emacs only. */ | 661 | the use of them for these meaning is restricted to Emacs only. |
| 662 | |||
| 663 | (*) This form is used only in Emacs 20.5 and the older versions, | ||
| 664 | but the newer versions can safely decode it. | ||
| 665 | (**) This form is used only in Emacs 21.1 and the newer versions, | ||
| 666 | and the older versions can't decode it. | ||
| 667 | |||
| 668 | Here's a list of examples usages of these composition escape | ||
| 669 | sequences (categorized by `enum composition_method'). | ||
| 670 | |||
| 671 | COMPOSITION_RELATIVE: | ||
| 672 | ESC 0 CHAR [ CHAR ] ESC 1 | ||
| 673 | COMPOSITOIN_WITH_RULE: | ||
| 674 | ESC 2 CHAR [ RULE CHAR ] ESC 1 | ||
| 675 | COMPOSITION_WITH_ALTCHARS: | ||
| 676 | ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 | ||
| 677 | COMPOSITION_WITH_RULE_ALTCHARS: | ||
| 678 | ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | ||
| 690 | 679 | ||
| 691 | enum iso_code_class_type iso_code_class[256]; | 680 | enum iso_code_class_type iso_code_class[256]; |
| 692 | 681 | ||
| @@ -774,9 +763,12 @@ detect_coding_iso2022 (src, src_end) | |||
| 774 | mask &= CODING_CATEGORY_MASK_ISO_7_ELSE; | 763 | mask &= CODING_CATEGORY_MASK_ISO_7_ELSE; |
| 775 | break; | 764 | break; |
| 776 | } | 765 | } |
| 777 | else if (c == '0' || c == '1' || c == '2') | 766 | else if (c >= '0' && c <= '4') |
| 778 | /* ESC <Fp> for start/end composition. Just ignore. */ | 767 | { |
| 779 | break; | 768 | /* ESC <Fp> for start/end composition. */ |
| 769 | mask_found |= CODING_CATEGORY_MASK_ISO; | ||
| 770 | break; | ||
| 771 | } | ||
| 780 | else | 772 | else |
| 781 | /* Invalid escape sequence. Just ignore. */ | 773 | /* Invalid escape sequence. Just ignore. */ |
| 782 | break; | 774 | break; |
| @@ -914,45 +906,52 @@ detect_coding_iso2022 (src, src_end) | |||
| 914 | code is C1. If dimension of CHARSET is 2, the 2nd position code is | 906 | code is C1. If dimension of CHARSET is 2, the 2nd position code is |
| 915 | fetched from SRC and set to C2. If CHARSET is negative, it means | 907 | fetched from SRC and set to C2. If CHARSET is negative, it means |
| 916 | that we are decoding ill formed text, and what we can do is just to | 908 | that we are decoding ill formed text, and what we can do is just to |
| 917 | read C1 as is. */ | 909 | read C1 as is. |
| 918 | 910 | ||
| 919 | #define DECODE_ISO_CHARACTER(charset, c1) \ | 911 | If we are now in the middle of composition sequence, the decoded |
| 920 | do { \ | 912 | character may be ALTCHAR (see the comment above). In that case, |
| 921 | int c_alt, charset_alt = (charset); \ | 913 | the character goes to coding->cmp_data->data instead of DST. */ |
| 922 | if (COMPOSING_HEAD_P (coding->composing)) \ | 914 | |
| 923 | { \ | 915 | #define DECODE_ISO_CHARACTER(charset, c1) \ |
| 924 | *dst++ = LEADING_CODE_COMPOSITION; \ | 916 | do { \ |
| 925 | if (COMPOSING_WITH_RULE_P (coding->composing)) \ | 917 | int c_alt = -1, charset_alt = (charset); \ |
| 926 | /* To tell composition rules are embeded. */ \ | 918 | if (charset_alt >= 0) \ |
| 927 | *dst++ = 0xFF; \ | 919 | { \ |
| 928 | coding->composing += 2; \ | 920 | if (CHARSET_DIMENSION (charset_alt) == 2) \ |
| 929 | } \ | 921 | { \ |
| 930 | if (charset_alt >= 0) \ | 922 | ONE_MORE_BYTE (c2); \ |
| 931 | { \ | 923 | if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \ |
| 932 | if (CHARSET_DIMENSION (charset_alt) == 2) \ | 924 | && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \ |
| 933 | { \ | 925 | { \ |
| 934 | ONE_MORE_BYTE (c2); \ | 926 | src--; \ |
| 935 | if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \ | 927 | charset_alt = CHARSET_ASCII; \ |
| 936 | && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \ | 928 | } \ |
| 937 | { \ | 929 | } \ |
| 938 | src--; \ | 930 | if (!NILP (translation_table) \ |
| 939 | charset_alt = CHARSET_ASCII; \ | 931 | && ((c_alt = translate_char (translation_table, \ |
| 940 | } \ | ||
| 941 | } \ | ||
| 942 | if (!NILP (translation_table) \ | ||
| 943 | && ((c_alt = translate_char (translation_table, \ | ||
| 944 | -1, charset_alt, c1, c2)) >= 0)) \ | 932 | -1, charset_alt, c1, c2)) >= 0)) \ |
| 945 | SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | 933 | SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ |
| 946 | } \ | 934 | } \ |
| 947 | if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ | 935 | if (! COMPOSING_P (coding) \ |
| 948 | DECODE_CHARACTER_ASCII (c1); \ | 936 | || coding->composing == COMPOSITION_RELATIVE \ |
| 949 | else if (CHARSET_DIMENSION (charset_alt) == 1) \ | 937 | || coding->composing == COMPOSITION_WITH_RULE) \ |
| 950 | DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \ | 938 | { \ |
| 951 | else \ | 939 | if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ |
| 952 | DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ | 940 | DECODE_CHARACTER_ASCII (c1); \ |
| 953 | if (COMPOSING_WITH_RULE_P (coding->composing)) \ | 941 | else if (CHARSET_DIMENSION (charset_alt) == 1) \ |
| 954 | /* To tell a composition rule follows. */ \ | 942 | DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \ |
| 955 | coding->composing = COMPOSING_WITH_RULE_RULE; \ | 943 | else \ |
| 944 | DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ | ||
| 945 | } \ | ||
| 946 | if (COMPOSING_P (coding) \ | ||
| 947 | && coding->composing != COMPOSITION_RELATIVE) \ | ||
| 948 | { \ | ||
| 949 | if (c_alt < 0) \ | ||
| 950 | c_alt = MAKE_CHAR (charset_alt, c1, c2); \ | ||
| 951 | CODING_ADD_COMPOSITION_COMPONENT (coding, c_alt); \ | ||
| 952 | coding->composition_rule_follows \ | ||
| 953 | = coding->composing != COMPOSITION_WITH_ALTCHARS; \ | ||
| 954 | } \ | ||
| 956 | } while (0) | 955 | } while (0) |
| 957 | 956 | ||
| 958 | /* Set designation state into CODING. */ | 957 | /* Set designation state into CODING. */ |
| @@ -991,50 +990,144 @@ detect_coding_iso2022 (src, src_end) | |||
| 991 | } \ | 990 | } \ |
| 992 | } while (0) | 991 | } while (0) |
| 993 | 992 | ||
| 994 | /* Return 0 if there's a valid composing sequence starting at SRC and | 993 | /* Allocate a memory block for storing information about compositions. |
| 995 | ending before SRC_END, else return -1. */ | 994 | The block is chained to the already allocated blocks. */ |
| 996 | 995 | ||
| 997 | int | 996 | static void |
| 998 | check_composing_code (coding, src, src_end) | 997 | coding_allocate_composition_data (coding, char_offset) |
| 999 | struct coding_system *coding; | 998 | struct coding_system *coding; |
| 1000 | unsigned char *src, *src_end; | 999 | int char_offset; |
| 1001 | { | 1000 | { |
| 1002 | int charset, c, c1, dim; | 1001 | struct composition_data *cmp_data |
| 1002 | = (struct composition_data *) xmalloc (sizeof *cmp_data); | ||
| 1003 | |||
| 1004 | cmp_data->char_offset = char_offset; | ||
| 1005 | cmp_data->used = 0; | ||
| 1006 | cmp_data->prev = coding->cmp_data; | ||
| 1007 | cmp_data->next = NULL; | ||
| 1008 | if (coding->cmp_data) | ||
| 1009 | coding->cmp_data->next = cmp_data; | ||
| 1010 | coding->cmp_data = cmp_data; | ||
| 1011 | coding->cmp_data_start = 0; | ||
| 1012 | } | ||
| 1003 | 1013 | ||
| 1004 | while (src < src_end) | 1014 | /* Record the starting position START and METHOD of one composition. */ |
| 1005 | { | 1015 | |
| 1006 | c = *src++; | 1016 | #define CODING_ADD_COMPOSITION_START(coding, start, method) \ |
| 1007 | if (c >= 0x20) | 1017 | do { \ |
| 1008 | continue; | 1018 | struct composition_data *cmp_data = coding->cmp_data; \ |
| 1009 | if (c != ISO_CODE_ESC || src >= src_end) | 1019 | int *data = cmp_data->data + cmp_data->used; \ |
| 1010 | return -1; | 1020 | coding->cmp_data_start = cmp_data->used; \ |
| 1011 | c = *src++; | 1021 | data[0] = -1; \ |
| 1012 | if (c == '1') /* end of compsition */ | 1022 | data[1] = cmp_data->char_offset + start; \ |
| 1013 | return 0; | 1023 | data[3] = (int) method; \ |
| 1014 | if (src + 2 >= src_end | 1024 | cmp_data->used += 4; \ |
| 1015 | || !coding->flags & CODING_FLAG_ISO_DESIGNATION) | 1025 | } while (0) |
| 1016 | return -1; | 1026 | |
| 1017 | 1027 | /* Record the ending position END of the current composition. */ | |
| 1018 | dim = (c == '$'); | 1028 | |
| 1019 | if (dim == 1) | 1029 | #define CODING_ADD_COMPOSITION_END(coding, end) \ |
| 1020 | c = (*src >= '@' && *src <= 'B') ? '(' : *src++; | 1030 | do { \ |
| 1021 | if (c >= '(' && c <= '/') | 1031 | struct composition_data *cmp_data = coding->cmp_data; \ |
| 1022 | { | 1032 | int *data = cmp_data->data + coding->cmp_data_start; \ |
| 1023 | c1 = *src++; | 1033 | data[0] = cmp_data->used - coding->cmp_data_start; \ |
| 1024 | if ((c1 < ' ' || c1 >= 0x80) | 1034 | data[2] = cmp_data->char_offset + end; \ |
| 1025 | || (charset = iso_charset_table[dim][c >= ','][c1]) < 0 | 1035 | } while (0) |
| 1026 | || ! coding->safe_charsets[charset] | 1036 | |
| 1027 | || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | 1037 | /* Record one COMPONENT (alternate character or composition rule). */ |
| 1028 | == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) | 1038 | |
| 1029 | return -1; | 1039 | #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ |
| 1030 | } | 1040 | (coding->cmp_data->data[coding->cmp_data->used++] = component) |
| 1031 | else | 1041 | |
| 1032 | return -1; | 1042 | /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */ |
| 1033 | } | 1043 | |
| 1044 | #define DECODE_COMPOSITION_START(c1) \ | ||
| 1045 | do { \ | ||
| 1046 | if (coding->composing == COMPOSITION_DISABLED) \ | ||
| 1047 | { \ | ||
| 1048 | *dst++ = ISO_CODE_ESC; \ | ||
| 1049 | *dst++ = c1 & 0x7f; \ | ||
| 1050 | coding->produced_char += 2; \ | ||
| 1051 | } \ | ||
| 1052 | else if (!COMPOSING_P (coding)) \ | ||
| 1053 | { \ | ||
| 1054 | /* This is surely the start of a composition. We must be sure \ | ||
| 1055 | that coding->cmp_data has enough space to store the \ | ||
| 1056 | information about the composition. If not, terminate the \ | ||
| 1057 | current decoding loop, allocate one more memory block for \ | ||
| 1058 | coding->cmp_data in the calller, then start the decoding \ | ||
| 1059 | loop again. We can't allocate memory here directly because \ | ||
| 1060 | it may cause buffer/string relocation. */ \ | ||
| 1061 | if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \ | ||
| 1062 | >= COMPOSITION_DATA_SIZE) \ | ||
| 1063 | { \ | ||
| 1064 | result = CODING_FINISH_INSUFFICIENT_CMP; \ | ||
| 1065 | goto label_end_of_loop_2; \ | ||
| 1066 | } \ | ||
| 1067 | coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \ | ||
| 1068 | : c1 == '2' ? COMPOSITION_WITH_RULE \ | ||
| 1069 | : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \ | ||
| 1070 | : COMPOSITION_WITH_RULE_ALTCHARS); \ | ||
| 1071 | CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \ | ||
| 1072 | coding->composing); \ | ||
| 1073 | coding->composition_rule_follows = 0; \ | ||
| 1074 | } \ | ||
| 1075 | else \ | ||
| 1076 | { \ | ||
| 1077 | /* We are already handling a composition. If the method is \ | ||
| 1078 | the following two, the codes following the current escape \ | ||
| 1079 | sequence are actual characters stored in a buffer. */ \ | ||
| 1080 | if (coding->composing == COMPOSITION_WITH_ALTCHARS \ | ||
| 1081 | || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \ | ||
| 1082 | { \ | ||
| 1083 | coding->composing = COMPOSITION_RELATIVE; \ | ||
| 1084 | coding->composition_rule_follows = 0; \ | ||
| 1085 | } \ | ||
| 1086 | } \ | ||
| 1087 | } while (0) | ||
| 1088 | |||
| 1089 | /* Handle compositoin end sequence ESC 1. */ | ||
| 1090 | |||
| 1091 | #define DECODE_COMPOSITION_END(c1) \ | ||
| 1092 | do { \ | ||
| 1093 | if (coding->composing == COMPOSITION_DISABLED) \ | ||
| 1094 | { \ | ||
| 1095 | *dst++ = ISO_CODE_ESC; \ | ||
| 1096 | *dst++ = c1; \ | ||
| 1097 | coding->produced_char += 2; \ | ||
| 1098 | } \ | ||
| 1099 | else \ | ||
| 1100 | { \ | ||
| 1101 | CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \ | ||
| 1102 | coding->composing = COMPOSITION_NO; \ | ||
| 1103 | } \ | ||
| 1104 | } while (0) | ||
| 1105 | |||
| 1106 | /* Decode a composition rule from the byte C1 (and maybe one more byte | ||
| 1107 | from SRC) and store one encoded composition rule in | ||
| 1108 | coding->cmp_data. */ | ||
| 1109 | |||
| 1110 | #define DECODE_COMPOSITION_RULE(c1) \ | ||
| 1111 | do { \ | ||
| 1112 | int rule = 0; \ | ||
| 1113 | (c1) -= 32; \ | ||
| 1114 | if (c1 < 81) /* old format (before ver.21) */ \ | ||
| 1115 | { \ | ||
| 1116 | int gref = (c1) / 9; \ | ||
| 1117 | int nref = (c1) % 9; \ | ||
| 1118 | if (gref == 4) gref = 10; \ | ||
| 1119 | if (nref == 4) nref = 10; \ | ||
| 1120 | rule = COMPOSITION_ENCODE_RULE (gref, nref); \ | ||
| 1121 | } \ | ||
| 1122 | else if (c1 < 93) /* new format (after ver.21 */ \ | ||
| 1123 | { \ | ||
| 1124 | ONE_MORE_BYTE (c2); \ | ||
| 1125 | rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \ | ||
| 1126 | } \ | ||
| 1127 | CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \ | ||
| 1128 | coding->composition_rule_follows = 0; \ | ||
| 1129 | } while (0) | ||
| 1034 | 1130 | ||
| 1035 | /* We have not found the sequence "ESC 1". */ | ||
| 1036 | return -1; | ||
| 1037 | } | ||
| 1038 | 1131 | ||
| 1039 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 1132 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 1040 | 1133 | ||
| @@ -1077,11 +1170,16 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1077 | unsigned char *src_base = src; | 1170 | unsigned char *src_base = src; |
| 1078 | int c1 = *src++, c2; | 1171 | int c1 = *src++, c2; |
| 1079 | 1172 | ||
| 1173 | /* We produce no character or one character. */ | ||
| 1080 | switch (iso_code_class [c1]) | 1174 | switch (iso_code_class [c1]) |
| 1081 | { | 1175 | { |
| 1082 | case ISO_0x20_or_0x7F: | 1176 | case ISO_0x20_or_0x7F: |
| 1083 | if (!coding->composing | 1177 | if (COMPOSING_P (coding) && coding->composition_rule_follows) |
| 1084 | && (charset0 < 0 || CHARSET_CHARS (charset0) == 94)) | 1178 | { |
| 1179 | DECODE_COMPOSITION_RULE (c1); | ||
| 1180 | break; | ||
| 1181 | } | ||
| 1182 | if (charset0 < 0 || CHARSET_CHARS (charset0) == 94) | ||
| 1085 | { | 1183 | { |
| 1086 | /* This is SPACE or DEL. */ | 1184 | /* This is SPACE or DEL. */ |
| 1087 | *dst++ = c1; | 1185 | *dst++ = c1; |
| @@ -1091,12 +1189,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1091 | /* This is a graphic character, we fall down ... */ | 1189 | /* This is a graphic character, we fall down ... */ |
| 1092 | 1190 | ||
| 1093 | case ISO_graphic_plane_0: | 1191 | case ISO_graphic_plane_0: |
| 1094 | if (coding->composing == COMPOSING_WITH_RULE_RULE) | 1192 | if (COMPOSING_P (coding) && coding->composition_rule_follows) |
| 1095 | { | 1193 | DECODE_COMPOSITION_RULE (c1); |
| 1096 | /* This is a composition rule. */ | ||
| 1097 | *dst++ = c1 | 0x80; | ||
| 1098 | coding->composing = COMPOSING_WITH_RULE_TAIL; | ||
| 1099 | } | ||
| 1100 | else | 1194 | else |
| 1101 | DECODE_ISO_CHARACTER (charset0, c1); | 1195 | DECODE_ISO_CHARACTER (charset0, c1); |
| 1102 | break; | 1196 | break; |
| @@ -1110,11 +1204,13 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1110 | case ISO_graphic_plane_1: | 1204 | case ISO_graphic_plane_1: |
| 1111 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1205 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) |
| 1112 | goto label_invalid_code; | 1206 | goto label_invalid_code; |
| 1113 | else | 1207 | DECODE_ISO_CHARACTER (charset1, c1); |
| 1114 | DECODE_ISO_CHARACTER (charset1, c1); | ||
| 1115 | break; | 1208 | break; |
| 1116 | 1209 | ||
| 1117 | case ISO_control_code: | 1210 | case ISO_control_code: |
| 1211 | if (COMPOSING_P (coding)) | ||
| 1212 | DECODE_COMPOSITION_END ('1'); | ||
| 1213 | |||
| 1118 | /* All ISO2022 control characters in this class have the | 1214 | /* All ISO2022 control characters in this class have the |
| 1119 | same representation in Emacs internal format. */ | 1215 | same representation in Emacs internal format. */ |
| 1120 | if (c1 == '\n' | 1216 | if (c1 == '\n' |
| @@ -1127,11 +1223,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1127 | } | 1223 | } |
| 1128 | *dst++ = c1; | 1224 | *dst++ = c1; |
| 1129 | coding->produced_char++; | 1225 | coding->produced_char++; |
| 1130 | if (c1 >= 0x80) | ||
| 1131 | coding->fake_multibyte = 1; | ||
| 1132 | break; | 1226 | break; |
| 1133 | 1227 | ||
| 1134 | case ISO_carriage_return: | 1228 | case ISO_carriage_return: |
| 1229 | if (COMPOSING_P (coding)) | ||
| 1230 | DECODE_COMPOSITION_END ('1'); | ||
| 1231 | |||
| 1135 | if (coding->eol_type == CODING_EOL_CR) | 1232 | if (coding->eol_type == CODING_EOL_CR) |
| 1136 | *dst++ = '\n'; | 1233 | *dst++ = '\n'; |
| 1137 | else if (coding->eol_type == CODING_EOL_CRLF) | 1234 | else if (coding->eol_type == CODING_EOL_CRLF) |
| @@ -1265,58 +1362,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1265 | DECODE_ISO_CHARACTER (charset, c1); | 1362 | DECODE_ISO_CHARACTER (charset, c1); |
| 1266 | break; | 1363 | break; |
| 1267 | 1364 | ||
| 1268 | case '0': case '2': /* start composing */ | 1365 | case '0': case '2': case '3': case '4': /* start composition */ |
| 1269 | /* Before processing composing, we must be sure that all | 1366 | DECODE_COMPOSITION_START (c1); |
| 1270 | characters being composed are supported by CODING. | ||
| 1271 | If not, we must give up composing. */ | ||
| 1272 | if (check_composing_code (coding, src, src_end) == 0) | ||
| 1273 | { | ||
| 1274 | /* We are looking at a valid composition sequence. */ | ||
| 1275 | coding->composing = (c1 == '0' | ||
| 1276 | ? COMPOSING_NO_RULE_HEAD | ||
| 1277 | : COMPOSING_WITH_RULE_HEAD); | ||
| 1278 | coding->composed_chars = 0; | ||
| 1279 | } | ||
| 1280 | else | ||
| 1281 | { | ||
| 1282 | *dst++ = ISO_CODE_ESC; | ||
| 1283 | *dst++ = c1; | ||
| 1284 | coding->produced_char += 2; | ||
| 1285 | } | ||
| 1286 | break; | 1367 | break; |
| 1287 | 1368 | ||
| 1288 | case '1': /* end composing */ | 1369 | case '1': /* end composition */ |
| 1289 | if (!coding->composing) | 1370 | DECODE_COMPOSITION_END (c1); |
| 1290 | { | ||
| 1291 | *dst++ = ISO_CODE_ESC; | ||
| 1292 | *dst++ = c1; | ||
| 1293 | coding->produced_char += 2; | ||
| 1294 | break; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | if (coding->composed_chars > 0) | ||
| 1298 | { | ||
| 1299 | if (coding->composed_chars == 1) | ||
| 1300 | { | ||
| 1301 | unsigned char *this_char_start = dst; | ||
| 1302 | int this_bytes; | ||
| 1303 | |||
| 1304 | /* Only one character is in the composing | ||
| 1305 | sequence. Make it a normal character. */ | ||
| 1306 | while (*--this_char_start != LEADING_CODE_COMPOSITION); | ||
| 1307 | dst = (this_char_start | ||
| 1308 | + (coding->composing == COMPOSING_NO_RULE_TAIL | ||
| 1309 | ? 1 : 2)); | ||
| 1310 | *dst -= 0x20; | ||
| 1311 | if (*dst == 0x80) | ||
| 1312 | *++dst &= 0x7F; | ||
| 1313 | this_bytes = BYTES_BY_CHAR_HEAD (*dst); | ||
| 1314 | while (this_bytes--) *this_char_start++ = *dst++; | ||
| 1315 | dst = this_char_start; | ||
| 1316 | } | ||
| 1317 | coding->produced_char++; | ||
| 1318 | } | ||
| 1319 | coding->composing = COMPOSING_NO; | ||
| 1320 | break; | 1371 | break; |
| 1321 | 1372 | ||
| 1322 | case '[': /* specification of direction */ | 1373 | case '[': /* specification of direction */ |
| @@ -1377,9 +1428,11 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1377 | break; | 1428 | break; |
| 1378 | 1429 | ||
| 1379 | label_invalid_code: | 1430 | label_invalid_code: |
| 1431 | if (COMPOSING_P (coding)) | ||
| 1432 | DECODE_COMPOSITION_END ('1'); | ||
| 1433 | coding->produced_char += src - src_base; | ||
| 1380 | while (src_base < src) | 1434 | while (src_base < src) |
| 1381 | *dst++ = *src_base++; | 1435 | *dst++ = (*src_base++) & 0x7F; |
| 1382 | coding->fake_multibyte = 1; | ||
| 1383 | } | 1436 | } |
| 1384 | continue; | 1437 | continue; |
| 1385 | 1438 | ||
| @@ -1400,13 +1453,14 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1400 | /* This is the last block of the text to be decoded. We had | 1453 | /* This is the last block of the text to be decoded. We had |
| 1401 | better just flush out all remaining codes in the text | 1454 | better just flush out all remaining codes in the text |
| 1402 | although they are not valid characters. */ | 1455 | although they are not valid characters. */ |
| 1456 | if (COMPOSING_P (coding)) | ||
| 1457 | DECODE_COMPOSITION_END ('1'); | ||
| 1403 | src_bytes = src_end - src; | 1458 | src_bytes = src_end - src; |
| 1404 | if (dst_bytes && (dst_end - dst < src_bytes)) | 1459 | if (dst_bytes && (dst_end - dst < src_end - src)) |
| 1405 | src_bytes = dst_end - dst; | 1460 | src_end = src + (dst_end - dst); |
| 1406 | bcopy (src, dst, src_bytes); | 1461 | coding->produced_char += src_end - src; |
| 1407 | dst += src_bytes; | 1462 | while (src < src_end) |
| 1408 | src += src_bytes; | 1463 | *dst++ = (*src++) & 0x7F; |
| 1409 | coding->fake_multibyte = 1; | ||
| 1410 | } | 1464 | } |
| 1411 | } | 1465 | } |
| 1412 | 1466 | ||
| @@ -1624,6 +1678,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1624 | #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ | 1678 | #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ |
| 1625 | do { \ | 1679 | do { \ |
| 1626 | int c_alt, charset_alt; \ | 1680 | int c_alt, charset_alt; \ |
| 1681 | \ | ||
| 1627 | if (!NILP (translation_table) \ | 1682 | if (!NILP (translation_table) \ |
| 1628 | && ((c_alt = translate_char (translation_table, -1, \ | 1683 | && ((c_alt = translate_char (translation_table, -1, \ |
| 1629 | charset, c1, c2)) \ | 1684 | charset, c1, c2)) \ |
| @@ -1665,8 +1720,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1665 | *dst++ = c2; \ | 1720 | *dst++ = c2; \ |
| 1666 | } \ | 1721 | } \ |
| 1667 | } \ | 1722 | } \ |
| 1668 | if (! COMPOSING_P (coding->composing)) \ | 1723 | coding->consumed_char++; \ |
| 1669 | coding->consumed_char++; \ | ||
| 1670 | } while (0) | 1724 | } while (0) |
| 1671 | 1725 | ||
| 1672 | /* Produce designation and invocation codes at a place pointed by DST | 1726 | /* Produce designation and invocation codes at a place pointed by DST |
| @@ -1732,10 +1786,63 @@ encode_invocation_designation (charset, coding, dst) | |||
| 1732 | return dst; | 1786 | return dst; |
| 1733 | } | 1787 | } |
| 1734 | 1788 | ||
| 1735 | /* The following two macros produce codes for indicating composition. */ | 1789 | /* Produce 2-byte codes for encoded composition rule RULE. */ |
| 1736 | #define ENCODE_COMPOSITION_NO_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '0' | 1790 | |
| 1737 | #define ENCODE_COMPOSITION_WITH_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '2' | 1791 | #define ENCODE_COMPOSITION_RULE(rule) \ |
| 1738 | #define ENCODE_COMPOSITION_END *dst++ = ISO_CODE_ESC, *dst++ = '1' | 1792 | do { \ |
| 1793 | int gref, nref; \ | ||
| 1794 | COMPOSITION_DECODE_RULE (rule, gref, nref); \ | ||
| 1795 | *dst++ = 32 + 81 + gref; \ | ||
| 1796 | *dst++ = 32 + nref; \ | ||
| 1797 | } while (0) | ||
| 1798 | |||
| 1799 | /* Produce codes for indicating the start of a composition sequence | ||
| 1800 | (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers | ||
| 1801 | which specify information about the composition. See the comment | ||
| 1802 | in coding.h for the format of DATA. */ | ||
| 1803 | |||
| 1804 | #define ENCODE_COMPOSITION_START(coding, data) \ | ||
| 1805 | do { \ | ||
| 1806 | coding->composing = data[3]; \ | ||
| 1807 | *dst++ = ISO_CODE_ESC; \ | ||
| 1808 | if (coding->composing == COMPOSITION_RELATIVE) \ | ||
| 1809 | *dst++ = '0'; \ | ||
| 1810 | else \ | ||
| 1811 | { \ | ||
| 1812 | *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \ | ||
| 1813 | ? '3' : '4'); \ | ||
| 1814 | coding->cmp_data_index = coding->cmp_data_start + 4; \ | ||
| 1815 | coding->composition_rule_follows = 0; \ | ||
| 1816 | } \ | ||
| 1817 | } while (0) | ||
| 1818 | |||
| 1819 | /* Produce codes for indicating the end of the current composition. */ | ||
| 1820 | |||
| 1821 | #define ENCODE_COMPOSITION_END(coding, data) \ | ||
| 1822 | do { \ | ||
| 1823 | *dst++ = ISO_CODE_ESC; \ | ||
| 1824 | *dst++ = '1'; \ | ||
| 1825 | coding->cmp_data_start += data[0]; \ | ||
| 1826 | coding->composing = COMPOSITION_NO; \ | ||
| 1827 | if (coding->cmp_data_start == coding->cmp_data->used \ | ||
| 1828 | && coding->cmp_data->next) \ | ||
| 1829 | { \ | ||
| 1830 | coding->cmp_data = coding->cmp_data->next; \ | ||
| 1831 | coding->cmp_data_start = 0; \ | ||
| 1832 | } \ | ||
| 1833 | } while (0) | ||
| 1834 | |||
| 1835 | /* Produce composition start sequence ESC 0. Here, this sequence | ||
| 1836 | doesn't mean the start of a new composition but means that we have | ||
| 1837 | just produced components (alternate chars and composition rules) of | ||
| 1838 | the composition and the actual text follows in SRC. */ | ||
| 1839 | |||
| 1840 | #define ENCODE_COMPOSITION_FAKE_START(coding) \ | ||
| 1841 | do { \ | ||
| 1842 | *dst++ = ISO_CODE_ESC; \ | ||
| 1843 | *dst++ = '0'; \ | ||
| 1844 | coding->composing = COMPOSITION_RELATIVE; \ | ||
| 1845 | } while (0) | ||
| 1739 | 1846 | ||
| 1740 | /* The following three macros produce codes for indicating direction | 1847 | /* The following three macros produce codes for indicating direction |
| 1741 | of text. */ | 1848 | of text. */ |
| @@ -1836,10 +1943,10 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1836 | unsigned char *src_end = source + src_bytes; | 1943 | unsigned char *src_end = source + src_bytes; |
| 1837 | unsigned char *dst = destination; | 1944 | unsigned char *dst = destination; |
| 1838 | unsigned char *dst_end = destination + dst_bytes; | 1945 | unsigned char *dst_end = destination + dst_bytes; |
| 1839 | /* Since the maximum bytes produced by each loop is 20, we subtract 19 | 1946 | /* Since the maximum bytes produced by each loop is 14, we subtract 13 |
| 1840 | from DST_END to assure overflow checking is necessary only at the | 1947 | from DST_END to assure overflow checking is necessary only at the |
| 1841 | head of loop. */ | 1948 | head of loop. */ |
| 1842 | unsigned char *adjusted_dst_end = dst_end - 19; | 1949 | unsigned char *adjusted_dst_end = dst_end - 13; |
| 1843 | Lisp_Object translation_table | 1950 | Lisp_Object translation_table |
| 1844 | = coding->translation_table_for_encode; | 1951 | = coding->translation_table_for_encode; |
| 1845 | int result = CODING_FINISH_NORMAL; | 1952 | int result = CODING_FINISH_NORMAL; |
| @@ -1851,7 +1958,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1851 | coding->fake_multibyte = 0; | 1958 | coding->fake_multibyte = 0; |
| 1852 | while (src < src_end && (dst_bytes | 1959 | while (src < src_end && (dst_bytes |
| 1853 | ? (dst < adjusted_dst_end) | 1960 | ? (dst < adjusted_dst_end) |
| 1854 | : (dst < src - 19))) | 1961 | : (dst < src - 13))) |
| 1855 | { | 1962 | { |
| 1856 | /* SRC_BASE remembers the start position in source in each loop. | 1963 | /* SRC_BASE remembers the start position in source in each loop. |
| 1857 | The loop will be exited when there's not enough source text | 1964 | The loop will be exited when there's not enough source text |
| @@ -1870,44 +1977,62 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1870 | CODING_SPEC_ISO_BOL (coding) = 0; | 1977 | CODING_SPEC_ISO_BOL (coding) = 0; |
| 1871 | } | 1978 | } |
| 1872 | 1979 | ||
| 1873 | c1 = *src++; | 1980 | /* Check composition start and end. */ |
| 1874 | /* If we are seeing a component of a composite character, we are | 1981 | if (coding->composing != COMPOSITION_DISABLED |
| 1875 | seeing a leading-code encoded irregularly for composition, or | 1982 | && coding->cmp_data_start < coding->cmp_data->used) |
| 1876 | a composition rule if composing with rule. We must set C1 to | ||
| 1877 | a normal leading-code or an ASCII code. If we are not seeing | ||
| 1878 | a composite character, we must reset composition, | ||
| 1879 | designation, and invocation states. */ | ||
| 1880 | if (COMPOSING_P (coding->composing)) | ||
| 1881 | { | 1983 | { |
| 1882 | if (c1 < 0xA0) | 1984 | struct composition_data *cmp_data = coding->cmp_data; |
| 1985 | int *data = cmp_data->data + coding->cmp_data_start; | ||
| 1986 | int this_pos = cmp_data->char_offset + coding->consumed_char; | ||
| 1987 | |||
| 1988 | if (coding->composing == COMPOSITION_RELATIVE) | ||
| 1883 | { | 1989 | { |
| 1884 | /* We are not in a composite character any longer. */ | 1990 | if (this_pos == data[2]) |
| 1885 | coding->composing = COMPOSING_NO; | 1991 | { |
| 1886 | ENCODE_RESET_PLANE_AND_REGISTER; | 1992 | ENCODE_COMPOSITION_END (coding, data); |
| 1887 | ENCODE_COMPOSITION_END; | 1993 | cmp_data = coding->cmp_data; |
| 1994 | data = cmp_data->data + coding->cmp_data_start; | ||
| 1995 | } | ||
| 1888 | } | 1996 | } |
| 1889 | else | 1997 | else if (COMPOSING_P (coding)) |
| 1890 | { | 1998 | { |
| 1891 | if (coding->composing == COMPOSING_WITH_RULE_RULE) | 1999 | /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */ |
| 2000 | if (coding->cmp_data_index == coding->cmp_data_start + data[0]) | ||
| 2001 | /* We have consumed components of the composition. | ||
| 2002 | What follows in SRC is the compositions's base | ||
| 2003 | text. */ | ||
| 2004 | ENCODE_COMPOSITION_FAKE_START (coding); | ||
| 2005 | else | ||
| 1892 | { | 2006 | { |
| 1893 | *dst++ = c1 & 0x7F; | 2007 | int c = cmp_data->data[coding->cmp_data_index++]; |
| 1894 | coding->composing = COMPOSING_WITH_RULE_HEAD; | 2008 | if (coding->composition_rule_follows) |
| 2009 | { | ||
| 2010 | ENCODE_COMPOSITION_RULE (c); | ||
| 2011 | coding->composition_rule_follows = 0; | ||
| 2012 | } | ||
| 2013 | else | ||
| 2014 | { | ||
| 2015 | SPLIT_CHAR (c, charset, c1, c2); | ||
| 2016 | ENCODE_ISO_CHARACTER (charset, c1, c2); | ||
| 2017 | /* But, we didn't consume a character in SRC. */ | ||
| 2018 | coding->consumed_char--; | ||
| 2019 | if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) | ||
| 2020 | coding->composition_rule_follows = 1; | ||
| 2021 | } | ||
| 1895 | continue; | 2022 | continue; |
| 1896 | } | 2023 | } |
| 1897 | else if (coding->composing == COMPOSING_WITH_RULE_HEAD) | 2024 | } |
| 1898 | coding->composing = COMPOSING_WITH_RULE_RULE; | 2025 | if (!COMPOSING_P (coding)) |
| 1899 | if (c1 == 0xA0) | 2026 | { |
| 2027 | if (this_pos == data[1]) | ||
| 1900 | { | 2028 | { |
| 1901 | /* This is an ASCII component. */ | 2029 | ENCODE_COMPOSITION_START (coding, data); |
| 1902 | ONE_MORE_BYTE (c1); | 2030 | continue; |
| 1903 | c1 &= 0x7F; | ||
| 1904 | } | 2031 | } |
| 1905 | else | ||
| 1906 | /* This is a leading-code of non ASCII component. */ | ||
| 1907 | c1 -= 0x20; | ||
| 1908 | } | 2032 | } |
| 1909 | } | 2033 | } |
| 1910 | 2034 | ||
| 2035 | c1 = *src++; | ||
| 1911 | /* Now encode one character. C1 is a control character, an | 2036 | /* Now encode one character. C1 is a control character, an |
| 1912 | ASCII character, or a leading-code of multi-byte character. */ | 2037 | ASCII character, or a leading-code of multi-byte character. */ |
| 1913 | switch (emacs_code_class[c1]) | 2038 | switch (emacs_code_class[c1]) |
| @@ -1996,34 +2121,6 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1996 | ENCODE_ISO_CHARACTER (c2, c3, c4); | 2121 | ENCODE_ISO_CHARACTER (c2, c3, c4); |
| 1997 | break; | 2122 | break; |
| 1998 | 2123 | ||
| 1999 | case EMACS_leading_code_composition: | ||
| 2000 | ONE_MORE_BYTE (c2); | ||
| 2001 | if (c2 < 0xA0) | ||
| 2002 | { | ||
| 2003 | /* invalid sequence */ | ||
| 2004 | *dst++ = c1; | ||
| 2005 | src--; | ||
| 2006 | coding->consumed_char++; | ||
| 2007 | } | ||
| 2008 | else if (c2 == 0xFF) | ||
| 2009 | { | ||
| 2010 | ENCODE_RESET_PLANE_AND_REGISTER; | ||
| 2011 | coding->composing = COMPOSING_WITH_RULE_HEAD; | ||
| 2012 | ENCODE_COMPOSITION_WITH_RULE_START; | ||
| 2013 | coding->consumed_char++; | ||
| 2014 | } | ||
| 2015 | else | ||
| 2016 | { | ||
| 2017 | ENCODE_RESET_PLANE_AND_REGISTER; | ||
| 2018 | /* Rewind one byte because it is a character code of | ||
| 2019 | composition elements. */ | ||
| 2020 | src--; | ||
| 2021 | coding->composing = COMPOSING_NO_RULE_HEAD; | ||
| 2022 | ENCODE_COMPOSITION_NO_RULE_START; | ||
| 2023 | coding->consumed_char++; | ||
| 2024 | } | ||
| 2025 | break; | ||
| 2026 | |||
| 2027 | case EMACS_invalid_code: | 2124 | case EMACS_invalid_code: |
| 2028 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | 2125 | if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 2029 | ENCODE_RESET_PLANE_AND_REGISTER; | 2126 | ENCODE_RESET_PLANE_AND_REGISTER; |
| @@ -2047,8 +2144,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2047 | if (coding->mode & CODING_MODE_LAST_BLOCK) | 2144 | if (coding->mode & CODING_MODE_LAST_BLOCK) |
| 2048 | { | 2145 | { |
| 2049 | ENCODE_RESET_PLANE_AND_REGISTER; | 2146 | ENCODE_RESET_PLANE_AND_REGISTER; |
| 2050 | if (COMPOSING_P (coding->composing)) | 2147 | if (COMPOSING_P (coding)) |
| 2051 | ENCODE_COMPOSITION_END; | 2148 | *dst++ = ISO_CODE_ESC, *dst++ = '1'; |
| 2052 | if (result == CODING_FINISH_INSUFFICIENT_SRC) | 2149 | if (result == CODING_FINISH_INSUFFICIENT_SRC) |
| 2053 | { | 2150 | { |
| 2054 | while (src < src_end && dst < dst_end) | 2151 | while (src < src_end && dst < dst_end) |
| @@ -2202,7 +2299,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2202 | } \ | 2299 | } \ |
| 2203 | } \ | 2300 | } \ |
| 2204 | coding->consumed_char++; \ | 2301 | coding->consumed_char++; \ |
| 2205 | } while (0); | 2302 | } while (0) |
| 2206 | 2303 | ||
| 2207 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2304 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 2208 | Check if a text is encoded in SJIS. If it is, return | 2305 | Check if a text is encoded in SJIS. If it is, return |
| @@ -2464,19 +2561,6 @@ encode_coding_sjis_big5 (coding, source, destination, | |||
| 2464 | unsigned char *src_base = src; | 2561 | unsigned char *src_base = src; |
| 2465 | unsigned char c1 = *src++, c2, c3, c4; | 2562 | unsigned char c1 = *src++, c2, c3, c4; |
| 2466 | 2563 | ||
| 2467 | if (coding->composing) | ||
| 2468 | { | ||
| 2469 | if (c1 == 0xA0) | ||
| 2470 | { | ||
| 2471 | ONE_MORE_BYTE (c1); | ||
| 2472 | c1 &= 0x7F; | ||
| 2473 | } | ||
| 2474 | else if (c1 >= 0xA0) | ||
| 2475 | c1 -= 0x20; | ||
| 2476 | else | ||
| 2477 | coding->composing = 0; | ||
| 2478 | } | ||
| 2479 | |||
| 2480 | switch (emacs_code_class[c1]) | 2564 | switch (emacs_code_class[c1]) |
| 2481 | { | 2565 | { |
| 2482 | case EMACS_ascii_code: | 2566 | case EMACS_ascii_code: |
| @@ -2523,10 +2607,6 @@ encode_coding_sjis_big5 (coding, source, destination, | |||
| 2523 | ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4); | 2607 | ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4); |
| 2524 | break; | 2608 | break; |
| 2525 | 2609 | ||
| 2526 | case EMACS_leading_code_composition: | ||
| 2527 | coding->composing = 1; | ||
| 2528 | break; | ||
| 2529 | |||
| 2530 | default: /* i.e. case EMACS_invalid_code: */ | 2610 | default: /* i.e. case EMACS_invalid_code: */ |
| 2531 | *dst++ = c1; | 2611 | *dst++ = c1; |
| 2532 | coding->consumed_char++; | 2612 | coding->consumed_char++; |
| @@ -2898,6 +2978,8 @@ setup_coding_system (coding_system, coding) | |||
| 2898 | coding->mode = 0; | 2978 | coding->mode = 0; |
| 2899 | coding->heading_ascii = -1; | 2979 | coding->heading_ascii = -1; |
| 2900 | coding->post_read_conversion = coding->pre_write_conversion = Qnil; | 2980 | coding->post_read_conversion = coding->pre_write_conversion = Qnil; |
| 2981 | coding->composing = COMPOSITION_DISABLED; | ||
| 2982 | coding->cmp_data = NULL; | ||
| 2901 | 2983 | ||
| 2902 | if (NILP (coding_system)) | 2984 | if (NILP (coding_system)) |
| 2903 | goto label_invalid_coding_system; | 2985 | goto label_invalid_coding_system; |
| @@ -2944,10 +3026,6 @@ setup_coding_system (coding_system, coding) | |||
| 2944 | return 0; | 3026 | return 0; |
| 2945 | } | 3027 | } |
| 2946 | 3028 | ||
| 2947 | /* Initialize remaining fields. */ | ||
| 2948 | coding->composing = 0; | ||
| 2949 | coding->composed_chars = 0; | ||
| 2950 | |||
| 2951 | /* Get values of coding system properties: | 3029 | /* Get values of coding system properties: |
| 2952 | `post-read-conversion', `pre-write-conversion', | 3030 | `post-read-conversion', `pre-write-conversion', |
| 2953 | `translation-table-for-decode', `translation-table-for-encode'. */ | 3031 | `translation-table-for-decode', `translation-table-for-encode'. */ |
| @@ -2997,6 +3075,12 @@ setup_coding_system (coding_system, coding) | |||
| 2997 | } | 3075 | } |
| 2998 | } | 3076 | } |
| 2999 | 3077 | ||
| 3078 | /* If the coding system has non-nil `composition' property, enable | ||
| 3079 | composition handling. */ | ||
| 3080 | val = Fplist_get (plist, Qcomposition); | ||
| 3081 | if (!NILP (val)) | ||
| 3082 | coding->composing = COMPOSITION_NO; | ||
| 3083 | |||
| 3000 | switch (XFASTINT (coding_type)) | 3084 | switch (XFASTINT (coding_type)) |
| 3001 | { | 3085 | { |
| 3002 | case 0: | 3086 | case 0: |
| @@ -3237,6 +3321,43 @@ setup_coding_system (coding_system, coding) | |||
| 3237 | return -1; | 3321 | return -1; |
| 3238 | } | 3322 | } |
| 3239 | 3323 | ||
| 3324 | /* Free memory blocks allocated for storing composition information. */ | ||
| 3325 | |||
| 3326 | void | ||
| 3327 | coding_free_composition_data (coding) | ||
| 3328 | struct coding_system *coding; | ||
| 3329 | { | ||
| 3330 | struct composition_data *cmp_data = coding->cmp_data, *next; | ||
| 3331 | |||
| 3332 | if (!cmp_data) | ||
| 3333 | return; | ||
| 3334 | /* Memory blocks are chained. At first, rewind to the first, then, | ||
| 3335 | free blocks one by one. */ | ||
| 3336 | while (cmp_data->prev) | ||
| 3337 | cmp_data = cmp_data->prev; | ||
| 3338 | while (cmp_data) | ||
| 3339 | { | ||
| 3340 | next = cmp_data->next; | ||
| 3341 | xfree (cmp_data); | ||
| 3342 | cmp_data = next; | ||
| 3343 | } | ||
| 3344 | coding->cmp_data = NULL; | ||
| 3345 | } | ||
| 3346 | |||
| 3347 | /* Set `char_offset' member of all memory blocks pointed by | ||
| 3348 | coding->cmp_data to POS. */ | ||
| 3349 | |||
| 3350 | void | ||
| 3351 | coding_adjust_composition_offset (coding, pos) | ||
| 3352 | struct coding_system *coding; | ||
| 3353 | int pos; | ||
| 3354 | { | ||
| 3355 | struct composition_data *cmp_data; | ||
| 3356 | |||
| 3357 | for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next) | ||
| 3358 | cmp_data->char_offset = pos; | ||
| 3359 | } | ||
| 3360 | |||
| 3240 | /* Setup raw-text or one of its subsidiaries in the structure | 3361 | /* Setup raw-text or one of its subsidiaries in the structure |
| 3241 | coding_system CODING according to the already setup value eol_type | 3362 | coding_system CODING according to the already setup value eol_type |
| 3242 | in CODING. CODING should be setup for some coding system in | 3363 | in CODING. CODING should be setup for some coding system in |
| @@ -4246,6 +4367,128 @@ code_convert_region_unwind (dummy) | |||
| 4246 | return Qnil; | 4367 | return Qnil; |
| 4247 | } | 4368 | } |
| 4248 | 4369 | ||
| 4370 | /* Store information about all compositions in the range FROM and TO | ||
| 4371 | of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a | ||
| 4372 | buffer or a string, defaults to the current buffer. */ | ||
| 4373 | |||
| 4374 | void | ||
| 4375 | coding_save_composition (coding, from, to, obj) | ||
| 4376 | struct coding_system *coding; | ||
| 4377 | int from, to; | ||
| 4378 | Lisp_Object obj; | ||
| 4379 | { | ||
| 4380 | Lisp_Object prop; | ||
| 4381 | int start, end; | ||
| 4382 | |||
| 4383 | coding->composing = COMPOSITION_DISABLED; | ||
| 4384 | if (!find_composition (from, to, &start, &end, &prop, obj) | ||
| 4385 | || end > to) | ||
| 4386 | return; | ||
| 4387 | if (start < from | ||
| 4388 | && (!find_composition (end, to, &start, &end, &prop, obj) | ||
| 4389 | || end > to)) | ||
| 4390 | return; | ||
| 4391 | coding->composing = COMPOSITION_NO; | ||
| 4392 | coding_allocate_composition_data (coding, from); | ||
| 4393 | do | ||
| 4394 | { | ||
| 4395 | if (COMPOSITION_VALID_P (start, end, prop)) | ||
| 4396 | { | ||
| 4397 | enum composition_method method = COMPOSITION_METHOD (prop); | ||
| 4398 | if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH | ||
| 4399 | >= COMPOSITION_DATA_SIZE) | ||
| 4400 | coding_allocate_composition_data (coding, from); | ||
| 4401 | /* For relative composition, we remember start and end | ||
| 4402 | positions, for the other compositions, we also remember | ||
| 4403 | components. */ | ||
| 4404 | CODING_ADD_COMPOSITION_START (coding, start - from, method); | ||
| 4405 | if (method != COMPOSITION_RELATIVE) | ||
| 4406 | { | ||
| 4407 | /* We must store a*/ | ||
| 4408 | Lisp_Object val, ch; | ||
| 4409 | |||
| 4410 | val = COMPOSITION_COMPONENTS (prop); | ||
| 4411 | if (CONSP (val)) | ||
| 4412 | while (CONSP (val)) | ||
| 4413 | { | ||
| 4414 | ch = XCAR (val), val = XCDR (val); | ||
| 4415 | CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch)); | ||
| 4416 | } | ||
| 4417 | else if (VECTORP (val) || STRINGP (val)) | ||
| 4418 | { | ||
| 4419 | int len = (VECTORP (val) | ||
| 4420 | ? XVECTOR (val)->size : XSTRING (val)->size); | ||
| 4421 | int i; | ||
| 4422 | for (i = 0; i < len; i++) | ||
| 4423 | { | ||
| 4424 | ch = (STRINGP (val) | ||
| 4425 | ? Faref (val, make_number (i)) | ||
| 4426 | : XVECTOR (val)->contents[i]); | ||
| 4427 | CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch)); | ||
| 4428 | } | ||
| 4429 | } | ||
| 4430 | else /* INTEGERP (val) */ | ||
| 4431 | CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val)); | ||
| 4432 | } | ||
| 4433 | CODING_ADD_COMPOSITION_END (coding, end - from); | ||
| 4434 | } | ||
| 4435 | start = end; | ||
| 4436 | } | ||
| 4437 | while (start < to | ||
| 4438 | && find_composition (start, to, &start, &end, &prop, obj) | ||
| 4439 | && end <= to); | ||
| 4440 | |||
| 4441 | /* Make coding->cmp_data point to the first memory block. */ | ||
| 4442 | while (coding->cmp_data->prev) | ||
| 4443 | coding->cmp_data = coding->cmp_data->prev; | ||
| 4444 | coding->cmp_data_start = 0; | ||
| 4445 | } | ||
| 4446 | |||
| 4447 | /* Reflect the saved information about compositions to OBJ. | ||
| 4448 | CODING->cmp_data points to a memory block for the informaiton. OBJ | ||
| 4449 | is a buffer or a string, defaults to the current buffer. */ | ||
| 4450 | |||
| 4451 | static void | ||
| 4452 | coding_restore_composition (coding, obj) | ||
| 4453 | struct coding_system *coding; | ||
| 4454 | Lisp_Object obj; | ||
| 4455 | { | ||
| 4456 | struct composition_data *cmp_data = coding->cmp_data; | ||
| 4457 | |||
| 4458 | if (!cmp_data) | ||
| 4459 | return; | ||
| 4460 | |||
| 4461 | while (cmp_data->prev) | ||
| 4462 | cmp_data = cmp_data->prev; | ||
| 4463 | |||
| 4464 | while (cmp_data) | ||
| 4465 | { | ||
| 4466 | int i; | ||
| 4467 | |||
| 4468 | for (i = 0; i < cmp_data->used; i += cmp_data->data[i]) | ||
| 4469 | { | ||
| 4470 | int *data = cmp_data->data + i; | ||
| 4471 | enum composition_method method = (enum composition_method) data[3]; | ||
| 4472 | Lisp_Object components; | ||
| 4473 | |||
| 4474 | if (method == COMPOSITION_RELATIVE) | ||
| 4475 | components = Qnil; | ||
| 4476 | else | ||
| 4477 | { | ||
| 4478 | int len = data[0] - 4, j; | ||
| 4479 | Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; | ||
| 4480 | |||
| 4481 | for (j = 0; j < len; j++) | ||
| 4482 | args[j] = make_number (data[4 + j]); | ||
| 4483 | components = (method == COMPOSITION_WITH_ALTCHARS | ||
| 4484 | ? Fstring (len, args) : Fvector (len, args)); | ||
| 4485 | } | ||
| 4486 | compose_text (data[1], data[2], components, Qnil, obj); | ||
| 4487 | } | ||
| 4488 | cmp_data = cmp_data->next; | ||
| 4489 | } | ||
| 4490 | } | ||
| 4491 | |||
| 4249 | /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the | 4492 | /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the |
| 4250 | text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by | 4493 | text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by |
| 4251 | coding system CODING, and return the status code of code conversion | 4494 | coding system CODING, and return the status code of code conversion |
| @@ -4329,12 +4572,12 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4329 | } | 4572 | } |
| 4330 | } | 4573 | } |
| 4331 | 4574 | ||
| 4332 | coding->consumed_char = len, coding->consumed = len_byte; | ||
| 4333 | |||
| 4334 | if (encodep | 4575 | if (encodep |
| 4335 | ? ! CODING_REQUIRE_ENCODING (coding) | 4576 | ? ! CODING_REQUIRE_ENCODING (coding) |
| 4336 | : ! CODING_REQUIRE_DECODING (coding)) | 4577 | : ! CODING_REQUIRE_DECODING (coding)) |
| 4337 | { | 4578 | { |
| 4579 | coding->consumed_char = len; | ||
| 4580 | coding->consumed = len_byte; | ||
| 4338 | coding->produced = len_byte; | 4581 | coding->produced = len_byte; |
| 4339 | if (multibyte | 4582 | if (multibyte |
| 4340 | && ! replace | 4583 | && ! replace |
| @@ -4407,33 +4650,48 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4407 | if (replace) | 4650 | if (replace) |
| 4408 | deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1); | 4651 | deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1); |
| 4409 | 4652 | ||
| 4410 | /* Try to skip the heading and tailing ASCIIs. */ | 4653 | if (coding->composing != COMPOSITION_DISABLED) |
| 4411 | { | 4654 | { |
| 4412 | int from_byte_orig = from_byte, to_byte_orig = to_byte; | 4655 | if (encodep) |
| 4413 | 4656 | coding_save_composition (coding, from, to, Fcurrent_buffer ()); | |
| 4414 | if (from < GPT && GPT < to) | 4657 | else |
| 4415 | move_gap_both (from, from_byte); | 4658 | coding_allocate_composition_data (coding, from); |
| 4416 | SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); | 4659 | } |
| 4417 | if (from_byte == to_byte | ||
| 4418 | && coding->type != coding_type_ccl | ||
| 4419 | && ! (coding->mode & CODING_MODE_LAST_BLOCK | ||
| 4420 | && CODING_REQUIRE_FLUSHING (coding))) | ||
| 4421 | { | ||
| 4422 | coding->produced = len_byte; | ||
| 4423 | coding->produced_char = multibyte ? len : len_byte; | ||
| 4424 | if (!replace) | ||
| 4425 | /* We must record and adjust for this new text now. */ | ||
| 4426 | adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); | ||
| 4427 | return 0; | ||
| 4428 | } | ||
| 4429 | 4660 | ||
| 4430 | head_skip = from_byte - from_byte_orig; | 4661 | /* For conversion by CCL program and for encoding with composition |
| 4431 | tail_skip = to_byte_orig - to_byte; | 4662 | handling, we can't skip any character because we may convert or |
| 4432 | total_skip = head_skip + tail_skip; | 4663 | compose even ASCII characters. */ |
| 4433 | from += head_skip; | 4664 | if (coding->type != coding_type_ccl |
| 4434 | to -= tail_skip; | 4665 | && (!encodep || coding->cmp_data == NULL)) |
| 4435 | len -= total_skip; len_byte -= total_skip; | 4666 | { |
| 4436 | } | 4667 | /* Try to skip the heading and tailing ASCIIs. */ |
| 4668 | int from_byte_orig = from_byte, to_byte_orig = to_byte; | ||
| 4669 | |||
| 4670 | if (from < GPT && GPT < to) | ||
| 4671 | move_gap_both (from, from_byte); | ||
| 4672 | SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); | ||
| 4673 | if (from_byte == to_byte | ||
| 4674 | && (encodep || NILP (coding->post_read_conversion)) | ||
| 4675 | && ! CODING_REQUIRE_FLUSHING (coding)) | ||
| 4676 | { | ||
| 4677 | coding->produced = len_byte; | ||
| 4678 | coding->produced_char = multibyte ? len : len_byte; | ||
| 4679 | if (!replace) | ||
| 4680 | /* We must record and adjust for this new text now. */ | ||
| 4681 | adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); | ||
| 4682 | return 0; | ||
| 4683 | } | ||
| 4684 | |||
| 4685 | head_skip = from_byte - from_byte_orig; | ||
| 4686 | tail_skip = to_byte_orig - to_byte; | ||
| 4687 | total_skip = head_skip + tail_skip; | ||
| 4688 | from += head_skip; | ||
| 4689 | to -= tail_skip; | ||
| 4690 | len -= total_skip; len_byte -= total_skip; | ||
| 4691 | |||
| 4692 | if (coding->cmp_data) | ||
| 4693 | coding->cmp_data->char_offset = from; | ||
| 4694 | } | ||
| 4437 | 4695 | ||
| 4438 | /* The code conversion routine can not preserve text properties for | 4696 | /* The code conversion routine can not preserve text properties for |
| 4439 | now. So, we must remove all text properties in the region. | 4697 | now. So, we must remove all text properties in the region. |
| @@ -4458,7 +4716,6 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4458 | move_gap_both (from, from_byte); | 4716 | move_gap_both (from, from_byte); |
| 4459 | 4717 | ||
| 4460 | inserted = inserted_byte = 0; | 4718 | inserted = inserted_byte = 0; |
| 4461 | src = GAP_END_ADDR, dst = GPT_ADDR; | ||
| 4462 | 4719 | ||
| 4463 | GAP_SIZE += len_byte; | 4720 | GAP_SIZE += len_byte; |
| 4464 | ZV -= len; | 4721 | ZV -= len; |
| @@ -4475,18 +4732,23 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4475 | { | 4732 | { |
| 4476 | int result; | 4733 | int result; |
| 4477 | 4734 | ||
| 4478 | /* The buffer memory is changed from: | 4735 | /* The buffer memory is now: |
| 4479 | +--------+converted-text+---------+-------original-text------+---+ | 4736 | +--------+converted-text+---------+-------original-text------+---+ |
| 4480 | |<-from->|<--inserted-->|---------|<-----------len---------->|---| | 4737 | |<-from->|<--inserted-->|---------|<-----------len---------->|---| |
| 4481 | |<------------------- GAP_SIZE -------------------->| */ | 4738 | |<------------------- GAP_SIZE -------------------->| */ |
| 4739 | src = GAP_END_ADDR - len_byte; | ||
| 4740 | dst = GPT_ADDR + inserted_byte; | ||
| 4741 | |||
| 4482 | if (encodep) | 4742 | if (encodep) |
| 4483 | result = encode_coding (coding, src, dst, len_byte, 0); | 4743 | result = encode_coding (coding, src, dst, len_byte, 0); |
| 4484 | else | 4744 | else |
| 4485 | result = decode_coding (coding, src, dst, len_byte, 0); | 4745 | result = decode_coding (coding, src, dst, len_byte, 0); |
| 4486 | /* to: | 4746 | |
| 4747 | /* The buffer memory is now: | ||
| 4487 | +--------+-------converted-text--------+--+---original-text--+---+ | 4748 | +--------+-------converted-text--------+--+---original-text--+---+ |
| 4488 | |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| | 4749 | |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| |
| 4489 | |<------------------- GAP_SIZE -------------------->| */ | 4750 | |<------------------- GAP_SIZE -------------------->| */ |
| 4751 | |||
| 4490 | if (coding->fake_multibyte) | 4752 | if (coding->fake_multibyte) |
| 4491 | fake_multibyte = 1; | 4753 | fake_multibyte = 1; |
| 4492 | 4754 | ||
| @@ -4495,6 +4757,13 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4495 | inserted += coding->produced_char; | 4757 | inserted += coding->produced_char; |
| 4496 | inserted_byte += coding->produced; | 4758 | inserted_byte += coding->produced; |
| 4497 | len_byte -= coding->consumed; | 4759 | len_byte -= coding->consumed; |
| 4760 | |||
| 4761 | if (result == CODING_FINISH_INSUFFICIENT_CMP) | ||
| 4762 | { | ||
| 4763 | coding_allocate_composition_data (coding, from + inserted); | ||
| 4764 | continue; | ||
| 4765 | } | ||
| 4766 | |||
| 4498 | src += coding->consumed; | 4767 | src += coding->consumed; |
| 4499 | dst += coding->produced; | 4768 | dst += coding->produced; |
| 4500 | 4769 | ||
| @@ -4626,9 +4895,6 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4626 | GAP_SIZE += add; | 4895 | GAP_SIZE += add; |
| 4627 | ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | 4896 | ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; |
| 4628 | GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | 4897 | GPT -= inserted_byte; GPT_BYTE -= inserted_byte; |
| 4629 | /* Don't forget to update SRC, DST. */ | ||
| 4630 | src = GAP_END_ADDR - len_byte; | ||
| 4631 | dst = GPT_ADDR + inserted_byte; | ||
| 4632 | } | 4898 | } |
| 4633 | } | 4899 | } |
| 4634 | if (src - dst > 0) *dst = 0; /* Put an anchor. */ | 4900 | if (src - dst > 0) *dst = 0; /* Put an anchor. */ |
| @@ -4657,6 +4923,10 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4657 | adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte); | 4923 | adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte); |
| 4658 | inserted = Z - prev_Z; | 4924 | inserted = Z - prev_Z; |
| 4659 | 4925 | ||
| 4926 | if (!encodep && coding->cmp_data && coding->cmp_data->used) | ||
| 4927 | coding_restore_composition (coding, Fcurrent_buffer ()); | ||
| 4928 | coding_free_composition_data (coding); | ||
| 4929 | |||
| 4660 | if (! encodep && ! NILP (coding->post_read_conversion)) | 4930 | if (! encodep && ! NILP (coding->post_read_conversion)) |
| 4661 | { | 4931 | { |
| 4662 | Lisp_Object val; | 4932 | Lisp_Object val; |
| @@ -4686,7 +4956,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4686 | TEMP_SET_PT (orig_point); | 4956 | TEMP_SET_PT (orig_point); |
| 4687 | } | 4957 | } |
| 4688 | 4958 | ||
| 4689 | signal_after_change (from, to - from, inserted); | 4959 | if (replace) |
| 4960 | { | ||
| 4961 | signal_after_change (from, to - from, inserted); | ||
| 4962 | update_compositions (from, to, CHECK_BORDER); | ||
| 4963 | } | ||
| 4690 | 4964 | ||
| 4691 | { | 4965 | { |
| 4692 | coding->consumed = to_byte - from_byte; | 4966 | coding->consumed = to_byte - from_byte; |
| @@ -4768,16 +5042,33 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 4768 | if (encodep | 5042 | if (encodep |
| 4769 | ? ! CODING_REQUIRE_ENCODING (coding) | 5043 | ? ! CODING_REQUIRE_ENCODING (coding) |
| 4770 | : ! CODING_REQUIRE_DECODING (coding)) | 5044 | : ! CODING_REQUIRE_DECODING (coding)) |
| 4771 | from = to_byte; | 5045 | return (nocopy ? str : Fcopy_sequence (str)); |
| 4772 | else | 5046 | |
| 5047 | if (coding->composing != COMPOSITION_DISABLED) | ||
| 5048 | { | ||
| 5049 | if (encodep) | ||
| 5050 | coding_save_composition (coding, from, to, str); | ||
| 5051 | else | ||
| 5052 | coding_allocate_composition_data (coding, from); | ||
| 5053 | } | ||
| 5054 | |||
| 5055 | /* For conversion by CCL program and for encoding with composition | ||
| 5056 | handling, we can't skip any character because we may convert or | ||
| 5057 | compose even ASCII characters. */ | ||
| 5058 | if (coding->type != coding_type_ccl | ||
| 5059 | && (!encodep || coding->cmp_data == NULL)) | ||
| 4773 | { | 5060 | { |
| 4774 | /* Try to skip the heading and tailing ASCIIs. */ | 5061 | /* Try to skip the heading and tailing ASCIIs. */ |
| 5062 | int from_orig = from; | ||
| 5063 | |||
| 4775 | SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, | 5064 | SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, |
| 4776 | encodep); | 5065 | encodep); |
| 5066 | if (from == to_byte) | ||
| 5067 | return (nocopy ? str : Fcopy_sequence (str)); | ||
| 5068 | |||
| 5069 | if (coding->cmp_data) | ||
| 5070 | coding->cmp_data->char_offset = from; | ||
| 4777 | } | 5071 | } |
| 4778 | if (from == to_byte | ||
| 4779 | && coding->type != coding_type_ccl) | ||
| 4780 | return (nocopy ? str : Fcopy_sequence (str)); | ||
| 4781 | 5072 | ||
| 4782 | if (encodep) | 5073 | if (encodep) |
| 4783 | len = encoding_buffer_size (coding, to_byte - from); | 5074 | len = encoding_buffer_size (coding, to_byte - from); |
| @@ -4797,10 +5088,11 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 4797 | buf + from, to_byte - from, len)); | 5088 | buf + from, to_byte - from, len)); |
| 4798 | if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) | 5089 | if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) |
| 4799 | { | 5090 | { |
| 4800 | /* We simple try to decode the whole string again but without | 5091 | /* We simply try to decode the whole string again but without |
| 4801 | eol-conversion this time. */ | 5092 | eol-conversion this time. */ |
| 4802 | coding->eol_type = CODING_EOL_LF; | 5093 | coding->eol_type = CODING_EOL_LF; |
| 4803 | coding->symbol = saved_coding_symbol; | 5094 | coding->symbol = saved_coding_symbol; |
| 5095 | coding_free_composition_data (coding); | ||
| 4804 | return code_convert_string (str, coding, encodep, nocopy); | 5096 | return code_convert_string (str, coding, encodep, nocopy); |
| 4805 | } | 5097 | } |
| 4806 | 5098 | ||
| @@ -4818,6 +5110,10 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 4818 | str = make_multibyte_string (buf, len + chars, len + coding->produced); | 5110 | str = make_multibyte_string (buf, len + chars, len + coding->produced); |
| 4819 | } | 5111 | } |
| 4820 | 5112 | ||
| 5113 | if (!encodep && coding->cmp_data && coding->cmp_data->used) | ||
| 5114 | coding_restore_composition (coding, str); | ||
| 5115 | |||
| 5116 | coding_free_composition_data (coding); | ||
| 4821 | return str; | 5117 | return str; |
| 4822 | } | 5118 | } |
| 4823 | 5119 | ||
| @@ -5077,8 +5373,10 @@ code_convert_string1 (string, coding_system, nocopy, encodep) | |||
| 5077 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 5373 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
| 5078 | 5374 | ||
| 5079 | coding.mode |= CODING_MODE_LAST_BLOCK; | 5375 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 5376 | string = code_convert_string (string, &coding, encodep, !NILP (nocopy)); | ||
| 5080 | Vlast_coding_system_used = coding.symbol; | 5377 | Vlast_coding_system_used = coding.symbol; |
| 5081 | return code_convert_string (string, &coding, encodep, !NILP (nocopy)); | 5378 | |
| 5379 | return string; | ||
| 5082 | } | 5380 | } |
| 5083 | 5381 | ||
| 5084 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, | 5382 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, |
| @@ -5110,7 +5408,10 @@ not fully specified.)") | |||
| 5110 | } | 5408 | } |
| 5111 | 5409 | ||
| 5112 | /* Encode or decode STRING according to CODING_SYSTEM. | 5410 | /* Encode or decode STRING according to CODING_SYSTEM. |
| 5113 | Do not set Vlast_coding_system_used. */ | 5411 | Do not set Vlast_coding_system_used. |
| 5412 | |||
| 5413 | This function is called only from macros DECODE_FILE and | ||
| 5414 | ENCODE_FILE, thus we ignore character composition. */ | ||
| 5114 | 5415 | ||
| 5115 | Lisp_Object | 5416 | Lisp_Object |
| 5116 | code_convert_string_norecord (string, coding_system, encodep) | 5417 | code_convert_string_norecord (string, coding_system, encodep) |
| @@ -5128,6 +5429,7 @@ code_convert_string_norecord (string, coding_system, encodep) | |||
| 5128 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 5429 | if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 5129 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 5430 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
| 5130 | 5431 | ||
| 5432 | coding.composing = COMPOSITION_DISABLED; | ||
| 5131 | coding.mode |= CODING_MODE_LAST_BLOCK; | 5433 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 5132 | return code_convert_string (string, &coding, encodep, Qt); | 5434 | return code_convert_string (string, &coding, encodep, Qt); |
| 5133 | } | 5435 | } |
| @@ -5262,7 +5564,8 @@ DEFUN ("set-terminal-coding-system-internal", | |||
| 5262 | setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); | 5564 | setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); |
| 5263 | /* We had better not send unsafe characters to terminal. */ | 5565 | /* We had better not send unsafe characters to terminal. */ |
| 5264 | terminal_coding.flags |= CODING_FLAG_ISO_SAFE; | 5566 | terminal_coding.flags |= CODING_FLAG_ISO_SAFE; |
| 5265 | 5567 | /* Characer composition should be disabled. */ | |
| 5568 | terminal_coding.composing = COMPOSITION_DISABLED; | ||
| 5266 | return Qnil; | 5569 | return Qnil; |
| 5267 | } | 5570 | } |
| 5268 | 5571 | ||
| @@ -5275,6 +5578,8 @@ DEFUN ("set-safe-terminal-coding-system-internal", | |||
| 5275 | CHECK_SYMBOL (coding_system, 0); | 5578 | CHECK_SYMBOL (coding_system, 0); |
| 5276 | setup_coding_system (Fcheck_coding_system (coding_system), | 5579 | setup_coding_system (Fcheck_coding_system (coding_system), |
| 5277 | &safe_terminal_coding); | 5580 | &safe_terminal_coding); |
| 5581 | /* Characer composition should be disabled. */ | ||
| 5582 | safe_terminal_coding.composing = COMPOSITION_DISABLED; | ||
| 5278 | return Qnil; | 5583 | return Qnil; |
| 5279 | } | 5584 | } |
| 5280 | 5585 | ||
| @@ -5294,6 +5599,8 @@ DEFUN ("set-keyboard-coding-system-internal", | |||
| 5294 | { | 5599 | { |
| 5295 | CHECK_SYMBOL (coding_system, 0); | 5600 | CHECK_SYMBOL (coding_system, 0); |
| 5296 | setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding); | 5601 | setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding); |
| 5602 | /* Characer composition should be disabled. */ | ||
| 5603 | keyboard_coding.composing = COMPOSITION_DISABLED; | ||
| 5297 | return Qnil; | 5604 | return Qnil; |
| 5298 | } | 5605 | } |
| 5299 | 5606 | ||
| @@ -5489,8 +5796,7 @@ init_coding_once () | |||
| 5489 | for (i = 0x21 ; i < 0x7F; i++) | 5796 | for (i = 0x21 ; i < 0x7F; i++) |
| 5490 | emacs_code_class[i] = EMACS_ascii_code; | 5797 | emacs_code_class[i] = EMACS_ascii_code; |
| 5491 | emacs_code_class[0x7F] = EMACS_control_code; | 5798 | emacs_code_class[0x7F] = EMACS_control_code; |
| 5492 | emacs_code_class[0x80] = EMACS_leading_code_composition; | 5799 | for (i = 0x80; i < 0xFF; i++) |
| 5493 | for (i = 0x81; i < 0xFF; i++) | ||
| 5494 | emacs_code_class[i] = EMACS_invalid_code; | 5800 | emacs_code_class[i] = EMACS_invalid_code; |
| 5495 | emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3; | 5801 | emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3; |
| 5496 | emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3; | 5802 | emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3; |