aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c171
1 files changed, 131 insertions, 40 deletions
diff --git a/src/coding.c b/src/coding.c
index fbb028f658c..9a2c1f9c3f2 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -936,17 +936,16 @@ static int encode_coding_ccl P_ ((struct coding_system *));
936static void decode_coding_raw_text P_ ((struct coding_system *)); 936static void decode_coding_raw_text P_ ((struct coding_system *));
937static int encode_coding_raw_text P_ ((struct coding_system *)); 937static int encode_coding_raw_text P_ ((struct coding_system *));
938 938
939static void coding_set_source P_ ((struct coding_system *)); 939static EMACS_INT coding_set_source P_ ((struct coding_system *));
940static void coding_set_destination P_ ((struct coding_system *)); 940static EMACS_INT coding_set_destination P_ ((struct coding_system *));
941static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT)); 941static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
942static void coding_alloc_by_making_gap P_ ((struct coding_system *, 942static void coding_alloc_by_making_gap P_ ((struct coding_system *,
943 EMACS_INT, EMACS_INT)); 943 EMACS_INT, EMACS_INT));
944static unsigned char *alloc_destination P_ ((struct coding_system *, 944static unsigned char *alloc_destination P_ ((struct coding_system *,
945 EMACS_INT, unsigned char *)); 945 EMACS_INT, unsigned char *));
946static void setup_iso_safe_charsets P_ ((Lisp_Object)); 946static void setup_iso_safe_charsets P_ ((Lisp_Object));
947static unsigned char *encode_designation_at_bol P_ ((struct coding_system *, 947static int encode_designation_at_bol P_ ((struct coding_system *,
948 int *, int *, 948 int *, int *, unsigned char *));
949 unsigned char *));
950static int detect_eol P_ ((const unsigned char *, 949static int detect_eol P_ ((const unsigned char *,
951 EMACS_INT, enum coding_category)); 950 EMACS_INT, enum coding_category));
952static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); 951static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
@@ -1005,27 +1004,68 @@ record_conversion_result (struct coding_system *coding,
1005 } 1004 }
1006} 1005}
1007 1006
1008/* This wrapper macro is used to preserve validity of pointers into 1007/* These wrapper macros are used to preserve validity of pointers into
1009 buffer text across calls to decode_char, which could cause 1008 buffer text across calls to decode_char, encode_char, etc, which
1010 relocation of buffers if it loads a charset map, because loading a 1009 could cause relocation of buffers if it loads a charset map,
1011 charset map allocates large structures. */ 1010 because loading a charset map allocates large structures. */
1011
1012#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ 1012#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1013 do { \ 1013 do { \
1014 EMACS_INT offset; \
1015 \
1014 charset_map_loaded = 0; \ 1016 charset_map_loaded = 0; \
1015 c = DECODE_CHAR (charset, code); \ 1017 c = DECODE_CHAR (charset, code); \
1016 if (charset_map_loaded) \ 1018 if (charset_map_loaded \
1019 && (offset = coding_set_source (coding))) \
1017 { \ 1020 { \
1018 const unsigned char *orig = coding->source; \
1019 EMACS_INT offset; \
1020 \
1021 coding_set_source (coding); \
1022 offset = coding->source - orig; \
1023 src += offset; \ 1021 src += offset; \
1024 src_base += offset; \ 1022 src_base += offset; \
1025 src_end += offset; \ 1023 src_end += offset; \
1026 } \ 1024 } \
1027 } while (0) 1025 } while (0)
1028 1026
1027#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \
1028 do { \
1029 EMACS_INT offset; \
1030 \
1031 charset_map_loaded = 0; \
1032 code = ENCODE_CHAR (charset, c); \
1033 if (charset_map_loaded \
1034 && (offset = coding_set_destination (coding))) \
1035 { \
1036 dst += offset; \
1037 dst_end += offset; \
1038 } \
1039 } while (0)
1040
1041#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
1042 do { \
1043 EMACS_INT offset; \
1044 \
1045 charset_map_loaded = 0; \
1046 charset = char_charset (c, charset_list, code_return); \
1047 if (charset_map_loaded \
1048 && (offset = coding_set_destination (coding))) \
1049 { \
1050 dst += offset; \
1051 dst_end += offset; \
1052 } \
1053 } while (0)
1054
1055#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
1056 do { \
1057 EMACS_INT offset; \
1058 \
1059 charset_map_loaded = 0; \
1060 result = CHAR_CHARSET_P (c, charset); \
1061 if (charset_map_loaded \
1062 && (offset = coding_set_destination (coding))) \
1063 { \
1064 dst += offset; \
1065 dst_end += offset; \
1066 } \
1067 } while (0)
1068
1029 1069
1030/* If there are at least BYTES length of room at dst, allocate memory 1070/* If there are at least BYTES length of room at dst, allocate memory
1031 for coding->destination and update dst and dst_end. We don't have 1071 for coding->destination and update dst and dst_end. We don't have
@@ -1105,10 +1145,15 @@ record_conversion_result (struct coding_system *coding,
1105 | ((p)[-1] & 0x3F)))) 1145 | ((p)[-1] & 0x3F))))
1106 1146
1107 1147
1108static void 1148/* Update coding->source from coding->src_object, and return how many
1149 bytes coding->source was changed. */
1150
1151static EMACS_INT
1109coding_set_source (coding) 1152coding_set_source (coding)
1110 struct coding_system *coding; 1153 struct coding_system *coding;
1111{ 1154{
1155 const unsigned char *orig = coding->source;
1156
1112 if (BUFFERP (coding->src_object)) 1157 if (BUFFERP (coding->src_object))
1113 { 1158 {
1114 struct buffer *buf = XBUFFER (coding->src_object); 1159 struct buffer *buf = XBUFFER (coding->src_object);
@@ -1126,12 +1171,19 @@ coding_set_source (coding)
1126 /* Otherwise, the source is C string and is never relocated 1171 /* Otherwise, the source is C string and is never relocated
1127 automatically. Thus we don't have to update anything. */ 1172 automatically. Thus we don't have to update anything. */
1128 ; 1173 ;
1174
1175 return coding->source - orig;
1129} 1176}
1130 1177
1131static void 1178/* Update coding->destination from coding->dst_object, and return how
1179 many bytes coding->destination was changed. */
1180
1181static EMACS_INT
1132coding_set_destination (coding) 1182coding_set_destination (coding)
1133 struct coding_system *coding; 1183 struct coding_system *coding;
1134{ 1184{
1185 const unsigned char *orig = coding->destination;
1186
1135 if (BUFFERP (coding->dst_object)) 1187 if (BUFFERP (coding->dst_object))
1136 { 1188 {
1137 if (coding->src_pos < 0) 1189 if (coding->src_pos < 0)
@@ -1155,6 +1207,8 @@ coding_set_destination (coding)
1155 /* Otherwise, the destination is C string and is never relocated 1207 /* Otherwise, the destination is C string and is never relocated
1156 automatically. Thus we don't have to update anything. */ 1208 automatically. Thus we don't have to update anything. */
1157 ; 1209 ;
1210
1211 return coding->destination - orig;
1158} 1212}
1159 1213
1160 1214
@@ -2778,14 +2832,19 @@ encode_coding_emacs_mule (coding)
2778 2832
2779 if (preferred_charset_id >= 0) 2833 if (preferred_charset_id >= 0)
2780 { 2834 {
2835 int result;
2836
2781 charset = CHARSET_FROM_ID (preferred_charset_id); 2837 charset = CHARSET_FROM_ID (preferred_charset_id);
2782 if (CHAR_CHARSET_P (c, charset)) 2838 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2839 if (result)
2783 code = ENCODE_CHAR (charset, c); 2840 code = ENCODE_CHAR (charset, c);
2784 else 2841 else
2785 charset = char_charset (c, charset_list, &code); 2842 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2843 &code, charset);
2786 } 2844 }
2787 else 2845 else
2788 charset = char_charset (c, charset_list, &code); 2846 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2847 &code, charset);
2789 if (! charset) 2848 if (! charset)
2790 { 2849 {
2791 c = coding->default_char; 2850 c = coding->default_char;
@@ -2794,7 +2853,8 @@ encode_coding_emacs_mule (coding)
2794 EMIT_ONE_ASCII_BYTE (c); 2853 EMIT_ONE_ASCII_BYTE (c);
2795 continue; 2854 continue;
2796 } 2855 }
2797 charset = char_charset (c, charset_list, &code); 2856 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2857 &code, charset);
2798 } 2858 }
2799 dimension = CHARSET_DIMENSION (charset); 2859 dimension = CHARSET_DIMENSION (charset);
2800 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); 2860 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
@@ -4317,7 +4377,8 @@ decode_coding_iso_2022 (coding)
4317 4377
4318#define ENCODE_ISO_CHARACTER(charset, c) \ 4378#define ENCODE_ISO_CHARACTER(charset, c) \
4319 do { \ 4379 do { \
4320 int code = ENCODE_CHAR ((charset),(c)); \ 4380 int code; \
4381 CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \
4321 \ 4382 \
4322 if (CHARSET_DIMENSION (charset) == 1) \ 4383 if (CHARSET_DIMENSION (charset) == 1) \
4323 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \ 4384 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
@@ -4441,17 +4502,20 @@ encode_invocation_designation (charset, coding, dst, p_nchars)
4441 4502
4442 4503
4443/* Produce designation sequences of charsets in the line started from 4504/* Produce designation sequences of charsets in the line started from
4444 SRC to a place pointed by DST, and return updated DST. 4505 CHARBUF to a place pointed by DST, and return the number of
4506 produced bytes. DST should not directly point a buffer text area
4507 which may be relocated by char_charset call.
4445 4508
4446 If the current block ends before any end-of-line, we may fail to 4509 If the current block ends before any end-of-line, we may fail to
4447 find all the necessary designations. */ 4510 find all the necessary designations. */
4448 4511
4449static unsigned char * 4512static int
4450encode_designation_at_bol (coding, charbuf, charbuf_end, dst) 4513encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4451 struct coding_system *coding; 4514 struct coding_system *coding;
4452 int *charbuf, *charbuf_end; 4515 int *charbuf, *charbuf_end;
4453 unsigned char *dst; 4516 unsigned char *dst;
4454{ 4517{
4518 unsigned char *orig;
4455 struct charset *charset; 4519 struct charset *charset;
4456 /* Table of charsets to be designated to each graphic register. */ 4520 /* Table of charsets to be designated to each graphic register. */
4457 int r[4]; 4521 int r[4];
@@ -4469,7 +4533,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4469 for (reg = 0; reg < 4; reg++) 4533 for (reg = 0; reg < 4; reg++)
4470 r[reg] = -1; 4534 r[reg] = -1;
4471 4535
4472 while (found < 4) 4536 while (charbuf < charbuf_end && found < 4)
4473 { 4537 {
4474 int id; 4538 int id;
4475 4539
@@ -4494,7 +4558,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4494 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding); 4558 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4495 } 4559 }
4496 4560
4497 return dst; 4561 return dst - orig;
4498} 4562}
4499 4563
4500/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ 4564/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
@@ -4539,13 +4603,26 @@ encode_coding_iso_2022 (coding)
4539 4603
4540 if (bol_designation) 4604 if (bol_designation)
4541 { 4605 {
4542 unsigned char *dst_prev = dst;
4543
4544 /* We have to produce designation sequences if any now. */ 4606 /* We have to produce designation sequences if any now. */
4545 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst); 4607 unsigned char desig_buf[16];
4546 bol_designation = 0; 4608 int nbytes;
4609 EMACS_INT offset;
4610
4611 charset_map_loaded = 0;
4612 nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4613 desig_buf);
4614 if (charset_map_loaded
4615 && (offset = coding_set_destination (coding)))
4616 {
4617 dst += offset;
4618 dst_end += offset;
4619 }
4620 memcpy (dst, desig_buf, nbytes);
4621 dst += nbytes;
4547 /* We are sure that designation sequences are all ASCII bytes. */ 4622 /* We are sure that designation sequences are all ASCII bytes. */
4548 produced_chars += dst - dst_prev; 4623 produced_chars += nbytes;
4624 bol_designation = 0;
4625 ASSURE_DESTINATION (safe_room);
4549 } 4626 }
4550 4627
4551 c = *charbuf++; 4628 c = *charbuf++;
@@ -4616,12 +4693,17 @@ encode_coding_iso_2022 (coding)
4616 4693
4617 if (preferred_charset_id >= 0) 4694 if (preferred_charset_id >= 0)
4618 { 4695 {
4696 int result;
4697
4619 charset = CHARSET_FROM_ID (preferred_charset_id); 4698 charset = CHARSET_FROM_ID (preferred_charset_id);
4620 if (! CHAR_CHARSET_P (c, charset)) 4699 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4621 charset = char_charset (c, charset_list, NULL); 4700 if (! result)
4701 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4702 NULL, charset);
4622 } 4703 }
4623 else 4704 else
4624 charset = char_charset (c, charset_list, NULL); 4705 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4706 NULL, charset);
4625 if (!charset) 4707 if (!charset)
4626 { 4708 {
4627 if (coding->mode & CODING_MODE_SAFE_ENCODING) 4709 if (coding->mode & CODING_MODE_SAFE_ENCODING)
@@ -4632,7 +4714,8 @@ encode_coding_iso_2022 (coding)
4632 else 4714 else
4633 { 4715 {
4634 c = coding->default_char; 4716 c = coding->default_char;
4635 charset = char_charset (c, charset_list, NULL); 4717 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4718 charset_list, NULL, charset);
4636 } 4719 }
4637 } 4720 }
4638 ENCODE_ISO_CHARACTER (charset, c); 4721 ENCODE_ISO_CHARACTER (charset, c);
@@ -5064,7 +5147,9 @@ encode_coding_sjis (coding)
5064 else 5147 else
5065 { 5148 {
5066 unsigned code; 5149 unsigned code;
5067 struct charset *charset = char_charset (c, charset_list, &code); 5150 struct charset *charset;
5151 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5152 &code, charset);
5068 5153
5069 if (!charset) 5154 if (!charset)
5070 { 5155 {
@@ -5076,7 +5161,8 @@ encode_coding_sjis (coding)
5076 else 5161 else
5077 { 5162 {
5078 c = coding->default_char; 5163 c = coding->default_char;
5079 charset = char_charset (c, charset_list, &code); 5164 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5165 charset_list, &code, charset);
5080 } 5166 }
5081 } 5167 }
5082 if (code == CHARSET_INVALID_CODE (charset)) 5168 if (code == CHARSET_INVALID_CODE (charset))
@@ -5153,7 +5239,9 @@ encode_coding_big5 (coding)
5153 else 5239 else
5154 { 5240 {
5155 unsigned code; 5241 unsigned code;
5156 struct charset *charset = char_charset (c, charset_list, &code); 5242 struct charset *charset;
5243 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5244 &code, charset);
5157 5245
5158 if (! charset) 5246 if (! charset)
5159 { 5247 {
@@ -5165,7 +5253,8 @@ encode_coding_big5 (coding)
5165 else 5253 else
5166 { 5254 {
5167 c = coding->default_char; 5255 c = coding->default_char;
5168 charset = char_charset (c, charset_list, &code); 5256 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5257 charset_list, &code, charset);
5169 } 5258 }
5170 } 5259 }
5171 if (code == CHARSET_INVALID_CODE (charset)) 5260 if (code == CHARSET_INVALID_CODE (charset))
@@ -5747,7 +5836,9 @@ encode_coding_charset (coding)
5747 } 5836 }
5748 else 5837 else
5749 { 5838 {
5750 charset = char_charset (c, charset_list, &code); 5839 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5840 &code, charset);
5841
5751 if (charset) 5842 if (charset)
5752 { 5843 {
5753 if (CHARSET_DIMENSION (charset) == 1) 5844 if (CHARSET_DIMENSION (charset) == 1)