diff options
| author | Kenichi Handa | 2012-01-19 22:19:21 +0800 |
|---|---|---|
| committer | Chong Yidong | 2012-01-19 22:19:21 +0800 |
| commit | a32a7dc7214bf5f618d50d0143fe5f8159445d2d (patch) | |
| tree | 36a206a6908cbbabdfc3a3a7cf843176d7fdcdc4 /src/coding.c | |
| parent | 6689f4b68ad9de3023e656acbec6a8d7245616ef (diff) | |
| download | emacs-a32a7dc7214bf5f618d50d0143fe5f8159445d2d.tar.gz emacs-a32a7dc7214bf5f618d50d0143fe5f8159445d2d.zip | |
Pay attention to buffer relocation on encoding (Bug#9318; backport from trunk).
Backport of 2011-12-05T06:39:26Z!handa@m17n.org from trunk.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 171 |
1 files changed, 131 insertions, 40 deletions
diff --git a/src/coding.c b/src/coding.c index fbb028f658c..9a2c1f9c3f2 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -936,17 +936,16 @@ static int encode_coding_ccl P_ ((struct coding_system *)); | |||
| 936 | static void decode_coding_raw_text P_ ((struct coding_system *)); | 936 | static void decode_coding_raw_text P_ ((struct coding_system *)); |
| 937 | static int encode_coding_raw_text P_ ((struct coding_system *)); | 937 | static int encode_coding_raw_text P_ ((struct coding_system *)); |
| 938 | 938 | ||
| 939 | static void coding_set_source P_ ((struct coding_system *)); | 939 | static EMACS_INT coding_set_source P_ ((struct coding_system *)); |
| 940 | static void coding_set_destination P_ ((struct coding_system *)); | 940 | static EMACS_INT coding_set_destination P_ ((struct coding_system *)); |
| 941 | static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT)); | 941 | static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT)); |
| 942 | static void coding_alloc_by_making_gap P_ ((struct coding_system *, | 942 | static void coding_alloc_by_making_gap P_ ((struct coding_system *, |
| 943 | EMACS_INT, EMACS_INT)); | 943 | EMACS_INT, EMACS_INT)); |
| 944 | static unsigned char *alloc_destination P_ ((struct coding_system *, | 944 | static unsigned char *alloc_destination P_ ((struct coding_system *, |
| 945 | EMACS_INT, unsigned char *)); | 945 | EMACS_INT, unsigned char *)); |
| 946 | static void setup_iso_safe_charsets P_ ((Lisp_Object)); | 946 | static void setup_iso_safe_charsets P_ ((Lisp_Object)); |
| 947 | static unsigned char *encode_designation_at_bol P_ ((struct coding_system *, | 947 | static int encode_designation_at_bol P_ ((struct coding_system *, |
| 948 | int *, int *, | 948 | int *, int *, unsigned char *)); |
| 949 | unsigned char *)); | ||
| 950 | static int detect_eol P_ ((const unsigned char *, | 949 | static int detect_eol P_ ((const unsigned char *, |
| 951 | EMACS_INT, enum coding_category)); | 950 | EMACS_INT, enum coding_category)); |
| 952 | static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); | 951 | static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); |
| @@ -1005,27 +1004,68 @@ record_conversion_result (struct coding_system *coding, | |||
| 1005 | } | 1004 | } |
| 1006 | } | 1005 | } |
| 1007 | 1006 | ||
| 1008 | /* This wrapper macro is used to preserve validity of pointers into | 1007 | /* These wrapper macros are used to preserve validity of pointers into |
| 1009 | buffer text across calls to decode_char, which could cause | 1008 | buffer text across calls to decode_char, encode_char, etc, which |
| 1010 | relocation of buffers if it loads a charset map, because loading a | 1009 | could cause relocation of buffers if it loads a charset map, |
| 1011 | charset map allocates large structures. */ | 1010 | because loading a charset map allocates large structures. */ |
| 1011 | |||
| 1012 | #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ | 1012 | #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ |
| 1013 | do { \ | 1013 | do { \ |
| 1014 | EMACS_INT offset; \ | ||
| 1015 | \ | ||
| 1014 | charset_map_loaded = 0; \ | 1016 | charset_map_loaded = 0; \ |
| 1015 | c = DECODE_CHAR (charset, code); \ | 1017 | c = DECODE_CHAR (charset, code); \ |
| 1016 | if (charset_map_loaded) \ | 1018 | if (charset_map_loaded \ |
| 1019 | && (offset = coding_set_source (coding))) \ | ||
| 1017 | { \ | 1020 | { \ |
| 1018 | const unsigned char *orig = coding->source; \ | ||
| 1019 | EMACS_INT offset; \ | ||
| 1020 | \ | ||
| 1021 | coding_set_source (coding); \ | ||
| 1022 | offset = coding->source - orig; \ | ||
| 1023 | src += offset; \ | 1021 | src += offset; \ |
| 1024 | src_base += offset; \ | 1022 | src_base += offset; \ |
| 1025 | src_end += offset; \ | 1023 | src_end += offset; \ |
| 1026 | } \ | 1024 | } \ |
| 1027 | } while (0) | 1025 | } while (0) |
| 1028 | 1026 | ||
| 1027 | #define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \ | ||
| 1028 | do { \ | ||
| 1029 | EMACS_INT offset; \ | ||
| 1030 | \ | ||
| 1031 | charset_map_loaded = 0; \ | ||
| 1032 | code = ENCODE_CHAR (charset, c); \ | ||
| 1033 | if (charset_map_loaded \ | ||
| 1034 | && (offset = coding_set_destination (coding))) \ | ||
| 1035 | { \ | ||
| 1036 | dst += offset; \ | ||
| 1037 | dst_end += offset; \ | ||
| 1038 | } \ | ||
| 1039 | } while (0) | ||
| 1040 | |||
| 1041 | #define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \ | ||
| 1042 | do { \ | ||
| 1043 | EMACS_INT offset; \ | ||
| 1044 | \ | ||
| 1045 | charset_map_loaded = 0; \ | ||
| 1046 | charset = char_charset (c, charset_list, code_return); \ | ||
| 1047 | if (charset_map_loaded \ | ||
| 1048 | && (offset = coding_set_destination (coding))) \ | ||
| 1049 | { \ | ||
| 1050 | dst += offset; \ | ||
| 1051 | dst_end += offset; \ | ||
| 1052 | } \ | ||
| 1053 | } while (0) | ||
| 1054 | |||
| 1055 | #define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \ | ||
| 1056 | do { \ | ||
| 1057 | EMACS_INT offset; \ | ||
| 1058 | \ | ||
| 1059 | charset_map_loaded = 0; \ | ||
| 1060 | result = CHAR_CHARSET_P (c, charset); \ | ||
| 1061 | if (charset_map_loaded \ | ||
| 1062 | && (offset = coding_set_destination (coding))) \ | ||
| 1063 | { \ | ||
| 1064 | dst += offset; \ | ||
| 1065 | dst_end += offset; \ | ||
| 1066 | } \ | ||
| 1067 | } while (0) | ||
| 1068 | |||
| 1029 | 1069 | ||
| 1030 | /* If there are at least BYTES length of room at dst, allocate memory | 1070 | /* If there are at least BYTES length of room at dst, allocate memory |
| 1031 | for coding->destination and update dst and dst_end. We don't have | 1071 | for coding->destination and update dst and dst_end. We don't have |
| @@ -1105,10 +1145,15 @@ record_conversion_result (struct coding_system *coding, | |||
| 1105 | | ((p)[-1] & 0x3F)))) | 1145 | | ((p)[-1] & 0x3F)))) |
| 1106 | 1146 | ||
| 1107 | 1147 | ||
| 1108 | static void | 1148 | /* Update coding->source from coding->src_object, and return how many |
| 1149 | bytes coding->source was changed. */ | ||
| 1150 | |||
| 1151 | static EMACS_INT | ||
| 1109 | coding_set_source (coding) | 1152 | coding_set_source (coding) |
| 1110 | struct coding_system *coding; | 1153 | struct coding_system *coding; |
| 1111 | { | 1154 | { |
| 1155 | const unsigned char *orig = coding->source; | ||
| 1156 | |||
| 1112 | if (BUFFERP (coding->src_object)) | 1157 | if (BUFFERP (coding->src_object)) |
| 1113 | { | 1158 | { |
| 1114 | struct buffer *buf = XBUFFER (coding->src_object); | 1159 | struct buffer *buf = XBUFFER (coding->src_object); |
| @@ -1126,12 +1171,19 @@ coding_set_source (coding) | |||
| 1126 | /* Otherwise, the source is C string and is never relocated | 1171 | /* Otherwise, the source is C string and is never relocated |
| 1127 | automatically. Thus we don't have to update anything. */ | 1172 | automatically. Thus we don't have to update anything. */ |
| 1128 | ; | 1173 | ; |
| 1174 | |||
| 1175 | return coding->source - orig; | ||
| 1129 | } | 1176 | } |
| 1130 | 1177 | ||
| 1131 | static void | 1178 | /* Update coding->destination from coding->dst_object, and return how |
| 1179 | many bytes coding->destination was changed. */ | ||
| 1180 | |||
| 1181 | static EMACS_INT | ||
| 1132 | coding_set_destination (coding) | 1182 | coding_set_destination (coding) |
| 1133 | struct coding_system *coding; | 1183 | struct coding_system *coding; |
| 1134 | { | 1184 | { |
| 1185 | const unsigned char *orig = coding->destination; | ||
| 1186 | |||
| 1135 | if (BUFFERP (coding->dst_object)) | 1187 | if (BUFFERP (coding->dst_object)) |
| 1136 | { | 1188 | { |
| 1137 | if (coding->src_pos < 0) | 1189 | if (coding->src_pos < 0) |
| @@ -1155,6 +1207,8 @@ coding_set_destination (coding) | |||
| 1155 | /* Otherwise, the destination is C string and is never relocated | 1207 | /* Otherwise, the destination is C string and is never relocated |
| 1156 | automatically. Thus we don't have to update anything. */ | 1208 | automatically. Thus we don't have to update anything. */ |
| 1157 | ; | 1209 | ; |
| 1210 | |||
| 1211 | return coding->destination - orig; | ||
| 1158 | } | 1212 | } |
| 1159 | 1213 | ||
| 1160 | 1214 | ||
| @@ -2778,14 +2832,19 @@ encode_coding_emacs_mule (coding) | |||
| 2778 | 2832 | ||
| 2779 | if (preferred_charset_id >= 0) | 2833 | if (preferred_charset_id >= 0) |
| 2780 | { | 2834 | { |
| 2835 | int result; | ||
| 2836 | |||
| 2781 | charset = CHARSET_FROM_ID (preferred_charset_id); | 2837 | charset = CHARSET_FROM_ID (preferred_charset_id); |
| 2782 | if (CHAR_CHARSET_P (c, charset)) | 2838 | CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); |
| 2839 | if (result) | ||
| 2783 | code = ENCODE_CHAR (charset, c); | 2840 | code = ENCODE_CHAR (charset, c); |
| 2784 | else | 2841 | else |
| 2785 | charset = char_charset (c, charset_list, &code); | 2842 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, |
| 2843 | &code, charset); | ||
| 2786 | } | 2844 | } |
| 2787 | else | 2845 | else |
| 2788 | charset = char_charset (c, charset_list, &code); | 2846 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, |
| 2847 | &code, charset); | ||
| 2789 | if (! charset) | 2848 | if (! charset) |
| 2790 | { | 2849 | { |
| 2791 | c = coding->default_char; | 2850 | c = coding->default_char; |
| @@ -2794,7 +2853,8 @@ encode_coding_emacs_mule (coding) | |||
| 2794 | EMIT_ONE_ASCII_BYTE (c); | 2853 | EMIT_ONE_ASCII_BYTE (c); |
| 2795 | continue; | 2854 | continue; |
| 2796 | } | 2855 | } |
| 2797 | charset = char_charset (c, charset_list, &code); | 2856 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, |
| 2857 | &code, charset); | ||
| 2798 | } | 2858 | } |
| 2799 | dimension = CHARSET_DIMENSION (charset); | 2859 | dimension = CHARSET_DIMENSION (charset); |
| 2800 | emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); | 2860 | emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); |
| @@ -4317,7 +4377,8 @@ decode_coding_iso_2022 (coding) | |||
| 4317 | 4377 | ||
| 4318 | #define ENCODE_ISO_CHARACTER(charset, c) \ | 4378 | #define ENCODE_ISO_CHARACTER(charset, c) \ |
| 4319 | do { \ | 4379 | do { \ |
| 4320 | int code = ENCODE_CHAR ((charset),(c)); \ | 4380 | int code; \ |
| 4381 | CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \ | ||
| 4321 | \ | 4382 | \ |
| 4322 | if (CHARSET_DIMENSION (charset) == 1) \ | 4383 | if (CHARSET_DIMENSION (charset) == 1) \ |
| 4323 | ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \ | 4384 | ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \ |
| @@ -4441,17 +4502,20 @@ encode_invocation_designation (charset, coding, dst, p_nchars) | |||
| 4441 | 4502 | ||
| 4442 | 4503 | ||
| 4443 | /* Produce designation sequences of charsets in the line started from | 4504 | /* Produce designation sequences of charsets in the line started from |
| 4444 | SRC to a place pointed by DST, and return updated DST. | 4505 | CHARBUF to a place pointed by DST, and return the number of |
| 4506 | produced bytes. DST should not directly point a buffer text area | ||
| 4507 | which may be relocated by char_charset call. | ||
| 4445 | 4508 | ||
| 4446 | If the current block ends before any end-of-line, we may fail to | 4509 | If the current block ends before any end-of-line, we may fail to |
| 4447 | find all the necessary designations. */ | 4510 | find all the necessary designations. */ |
| 4448 | 4511 | ||
| 4449 | static unsigned char * | 4512 | static int |
| 4450 | encode_designation_at_bol (coding, charbuf, charbuf_end, dst) | 4513 | encode_designation_at_bol (coding, charbuf, charbuf_end, dst) |
| 4451 | struct coding_system *coding; | 4514 | struct coding_system *coding; |
| 4452 | int *charbuf, *charbuf_end; | 4515 | int *charbuf, *charbuf_end; |
| 4453 | unsigned char *dst; | 4516 | unsigned char *dst; |
| 4454 | { | 4517 | { |
| 4518 | unsigned char *orig; | ||
| 4455 | struct charset *charset; | 4519 | struct charset *charset; |
| 4456 | /* Table of charsets to be designated to each graphic register. */ | 4520 | /* Table of charsets to be designated to each graphic register. */ |
| 4457 | int r[4]; | 4521 | int r[4]; |
| @@ -4469,7 +4533,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst) | |||
| 4469 | for (reg = 0; reg < 4; reg++) | 4533 | for (reg = 0; reg < 4; reg++) |
| 4470 | r[reg] = -1; | 4534 | r[reg] = -1; |
| 4471 | 4535 | ||
| 4472 | while (found < 4) | 4536 | while (charbuf < charbuf_end && found < 4) |
| 4473 | { | 4537 | { |
| 4474 | int id; | 4538 | int id; |
| 4475 | 4539 | ||
| @@ -4494,7 +4558,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst) | |||
| 4494 | ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding); | 4558 | ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding); |
| 4495 | } | 4559 | } |
| 4496 | 4560 | ||
| 4497 | return dst; | 4561 | return dst - orig; |
| 4498 | } | 4562 | } |
| 4499 | 4563 | ||
| 4500 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ | 4564 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ |
| @@ -4539,13 +4603,26 @@ encode_coding_iso_2022 (coding) | |||
| 4539 | 4603 | ||
| 4540 | if (bol_designation) | 4604 | if (bol_designation) |
| 4541 | { | 4605 | { |
| 4542 | unsigned char *dst_prev = dst; | ||
| 4543 | |||
| 4544 | /* We have to produce designation sequences if any now. */ | 4606 | /* We have to produce designation sequences if any now. */ |
| 4545 | dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst); | 4607 | unsigned char desig_buf[16]; |
| 4546 | bol_designation = 0; | 4608 | int nbytes; |
| 4609 | EMACS_INT offset; | ||
| 4610 | |||
| 4611 | charset_map_loaded = 0; | ||
| 4612 | nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end, | ||
| 4613 | desig_buf); | ||
| 4614 | if (charset_map_loaded | ||
| 4615 | && (offset = coding_set_destination (coding))) | ||
| 4616 | { | ||
| 4617 | dst += offset; | ||
| 4618 | dst_end += offset; | ||
| 4619 | } | ||
| 4620 | memcpy (dst, desig_buf, nbytes); | ||
| 4621 | dst += nbytes; | ||
| 4547 | /* We are sure that designation sequences are all ASCII bytes. */ | 4622 | /* We are sure that designation sequences are all ASCII bytes. */ |
| 4548 | produced_chars += dst - dst_prev; | 4623 | produced_chars += nbytes; |
| 4624 | bol_designation = 0; | ||
| 4625 | ASSURE_DESTINATION (safe_room); | ||
| 4549 | } | 4626 | } |
| 4550 | 4627 | ||
| 4551 | c = *charbuf++; | 4628 | c = *charbuf++; |
| @@ -4616,12 +4693,17 @@ encode_coding_iso_2022 (coding) | |||
| 4616 | 4693 | ||
| 4617 | if (preferred_charset_id >= 0) | 4694 | if (preferred_charset_id >= 0) |
| 4618 | { | 4695 | { |
| 4696 | int result; | ||
| 4697 | |||
| 4619 | charset = CHARSET_FROM_ID (preferred_charset_id); | 4698 | charset = CHARSET_FROM_ID (preferred_charset_id); |
| 4620 | if (! CHAR_CHARSET_P (c, charset)) | 4699 | CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); |
| 4621 | charset = char_charset (c, charset_list, NULL); | 4700 | if (! result) |
| 4701 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, | ||
| 4702 | NULL, charset); | ||
| 4622 | } | 4703 | } |
| 4623 | else | 4704 | else |
| 4624 | charset = char_charset (c, charset_list, NULL); | 4705 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, |
| 4706 | NULL, charset); | ||
| 4625 | if (!charset) | 4707 | if (!charset) |
| 4626 | { | 4708 | { |
| 4627 | if (coding->mode & CODING_MODE_SAFE_ENCODING) | 4709 | if (coding->mode & CODING_MODE_SAFE_ENCODING) |
| @@ -4632,7 +4714,8 @@ encode_coding_iso_2022 (coding) | |||
| 4632 | else | 4714 | else |
| 4633 | { | 4715 | { |
| 4634 | c = coding->default_char; | 4716 | c = coding->default_char; |
| 4635 | charset = char_charset (c, charset_list, NULL); | 4717 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, |
| 4718 | charset_list, NULL, charset); | ||
| 4636 | } | 4719 | } |
| 4637 | } | 4720 | } |
| 4638 | ENCODE_ISO_CHARACTER (charset, c); | 4721 | ENCODE_ISO_CHARACTER (charset, c); |
| @@ -5064,7 +5147,9 @@ encode_coding_sjis (coding) | |||
| 5064 | else | 5147 | else |
| 5065 | { | 5148 | { |
| 5066 | unsigned code; | 5149 | unsigned code; |
| 5067 | struct charset *charset = char_charset (c, charset_list, &code); | 5150 | struct charset *charset; |
| 5151 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, | ||
| 5152 | &code, charset); | ||
| 5068 | 5153 | ||
| 5069 | if (!charset) | 5154 | if (!charset) |
| 5070 | { | 5155 | { |
| @@ -5076,7 +5161,8 @@ encode_coding_sjis (coding) | |||
| 5076 | else | 5161 | else |
| 5077 | { | 5162 | { |
| 5078 | c = coding->default_char; | 5163 | c = coding->default_char; |
| 5079 | charset = char_charset (c, charset_list, &code); | 5164 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, |
| 5165 | charset_list, &code, charset); | ||
| 5080 | } | 5166 | } |
| 5081 | } | 5167 | } |
| 5082 | if (code == CHARSET_INVALID_CODE (charset)) | 5168 | if (code == CHARSET_INVALID_CODE (charset)) |
| @@ -5153,7 +5239,9 @@ encode_coding_big5 (coding) | |||
| 5153 | else | 5239 | else |
| 5154 | { | 5240 | { |
| 5155 | unsigned code; | 5241 | unsigned code; |
| 5156 | struct charset *charset = char_charset (c, charset_list, &code); | 5242 | struct charset *charset; |
| 5243 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, | ||
| 5244 | &code, charset); | ||
| 5157 | 5245 | ||
| 5158 | if (! charset) | 5246 | if (! charset) |
| 5159 | { | 5247 | { |
| @@ -5165,7 +5253,8 @@ encode_coding_big5 (coding) | |||
| 5165 | else | 5253 | else |
| 5166 | { | 5254 | { |
| 5167 | c = coding->default_char; | 5255 | c = coding->default_char; |
| 5168 | charset = char_charset (c, charset_list, &code); | 5256 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, |
| 5257 | charset_list, &code, charset); | ||
| 5169 | } | 5258 | } |
| 5170 | } | 5259 | } |
| 5171 | if (code == CHARSET_INVALID_CODE (charset)) | 5260 | if (code == CHARSET_INVALID_CODE (charset)) |
| @@ -5747,7 +5836,9 @@ encode_coding_charset (coding) | |||
| 5747 | } | 5836 | } |
| 5748 | else | 5837 | else |
| 5749 | { | 5838 | { |
| 5750 | charset = char_charset (c, charset_list, &code); | 5839 | CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, |
| 5840 | &code, charset); | ||
| 5841 | |||
| 5751 | if (charset) | 5842 | if (charset) |
| 5752 | { | 5843 | { |
| 5753 | if (CHARSET_DIMENSION (charset) == 1) | 5844 | if (CHARSET_DIMENSION (charset) == 1) |