aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorPaul Eggert2011-12-10 16:56:50 -0800
committerPaul Eggert2011-12-10 16:56:50 -0800
commit8f50130c565eaf0ad7c49e4ad044c3291ecdfa71 (patch)
treec8129448cbbf387fe82667ccac02983592c688f1 /src/coding.c
parent85a83e2e2585a1906dec5168ed96ad521b5849ed (diff)
parent7b9d523a07395ecea505be88f45c33d73aea7038 (diff)
downloademacs-8f50130c565eaf0ad7c49e4ad044c3291ecdfa71.tar.gz
emacs-8f50130c565eaf0ad7c49e4ad044c3291ecdfa71.zip
Merge from trunk.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c177
1 files changed, 135 insertions, 42 deletions
diff --git a/src/coding.c b/src/coding.c
index b0dfc498add..74cf232cfb2 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -847,16 +847,16 @@ static int encode_coding_ccl (struct coding_system *);
847static void decode_coding_raw_text (struct coding_system *); 847static void decode_coding_raw_text (struct coding_system *);
848static int encode_coding_raw_text (struct coding_system *); 848static int encode_coding_raw_text (struct coding_system *);
849 849
850static void coding_set_source (struct coding_system *); 850static ptrdiff_t coding_set_source (struct coding_system *);
851static void coding_set_destination (struct coding_system *); 851static ptrdiff_t coding_set_destination (struct coding_system *);
852static void coding_alloc_by_realloc (struct coding_system *, ptrdiff_t); 852static void coding_alloc_by_realloc (struct coding_system *, ptrdiff_t);
853static void coding_alloc_by_making_gap (struct coding_system *, 853static void coding_alloc_by_making_gap (struct coding_system *,
854 ptrdiff_t, ptrdiff_t); 854 ptrdiff_t, ptrdiff_t);
855static unsigned char *alloc_destination (struct coding_system *, 855static unsigned char *alloc_destination (struct coding_system *,
856 ptrdiff_t, unsigned char *); 856 ptrdiff_t, unsigned char *);
857static void setup_iso_safe_charsets (Lisp_Object); 857static void setup_iso_safe_charsets (Lisp_Object);
858static unsigned char *encode_designation_at_bol (struct coding_system *, 858static int encode_designation_at_bol (struct coding_system *,
859 int *, unsigned char *); 859 int *, int *, unsigned char *);
860static int detect_eol (const unsigned char *, 860static int detect_eol (const unsigned char *,
861 ptrdiff_t, enum coding_category); 861 ptrdiff_t, enum coding_category);
862static Lisp_Object adjust_coding_eol_type (struct coding_system *, int); 862static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
@@ -915,27 +915,68 @@ record_conversion_result (struct coding_system *coding,
915 } 915 }
916} 916}
917 917
918/* This wrapper macro is used to preserve validity of pointers into 918/* These wrapper macros are used to preserve validity of pointers into
919 buffer text across calls to decode_char, which could cause 919 buffer text across calls to decode_char, encode_char, etc, which
920 relocation of buffers if it loads a charset map, because loading a 920 could cause relocation of buffers if it loads a charset map,
921 charset map allocates large structures. */ 921 because loading a charset map allocates large structures. */
922
922#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ 923#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
923 do { \ 924 do { \
925 ptrdiff_t offset; \
926 \
924 charset_map_loaded = 0; \ 927 charset_map_loaded = 0; \
925 c = DECODE_CHAR (charset, code); \ 928 c = DECODE_CHAR (charset, code); \
926 if (charset_map_loaded) \ 929 if (charset_map_loaded \
930 && (offset = coding_set_source (coding))) \
927 { \ 931 { \
928 const unsigned char *orig = coding->source; \
929 ptrdiff_t offset; \
930 \
931 coding_set_source (coding); \
932 offset = coding->source - orig; \
933 src += offset; \ 932 src += offset; \
934 src_base += offset; \ 933 src_base += offset; \
935 src_end += offset; \ 934 src_end += offset; \
936 } \ 935 } \
937 } while (0) 936 } while (0)
938 937
938#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \
939 do { \
940 ptrdiff_t offset; \
941 \
942 charset_map_loaded = 0; \
943 code = ENCODE_CHAR (charset, c); \
944 if (charset_map_loaded \
945 && (offset = coding_set_destination (coding))) \
946 { \
947 dst += offset; \
948 dst_end += offset; \
949 } \
950 } while (0)
951
952#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
953 do { \
954 ptrdiff_t offset; \
955 \
956 charset_map_loaded = 0; \
957 charset = char_charset (c, charset_list, code_return); \
958 if (charset_map_loaded \
959 && (offset = coding_set_destination (coding))) \
960 { \
961 dst += offset; \
962 dst_end += offset; \
963 } \
964 } while (0)
965
966#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
967 do { \
968 ptrdiff_t offset; \
969 \
970 charset_map_loaded = 0; \
971 result = CHAR_CHARSET_P (c, charset); \
972 if (charset_map_loaded \
973 && (offset = coding_set_destination (coding))) \
974 { \
975 dst += offset; \
976 dst_end += offset; \
977 } \
978 } while (0)
979
939 980
940/* If there are at least BYTES length of room at dst, allocate memory 981/* If there are at least BYTES length of room at dst, allocate memory
941 for coding->destination and update dst and dst_end. We don't have 982 for coding->destination and update dst and dst_end. We don't have
@@ -1015,9 +1056,14 @@ record_conversion_result (struct coding_system *coding,
1015 | ((p)[-1] & 0x3F)))) 1056 | ((p)[-1] & 0x3F))))
1016 1057
1017 1058
1018static void 1059/* Update coding->source from coding->src_object, and return how many
1060 bytes coding->source was changed. */
1061
1062static ptrdiff_t
1019coding_set_source (struct coding_system *coding) 1063coding_set_source (struct coding_system *coding)
1020{ 1064{
1065 const unsigned char *orig = coding->source;
1066
1021 if (BUFFERP (coding->src_object)) 1067 if (BUFFERP (coding->src_object))
1022 { 1068 {
1023 struct buffer *buf = XBUFFER (coding->src_object); 1069 struct buffer *buf = XBUFFER (coding->src_object);
@@ -1036,11 +1082,18 @@ coding_set_source (struct coding_system *coding)
1036 /* Otherwise, the source is C string and is never relocated 1082 /* Otherwise, the source is C string and is never relocated
1037 automatically. Thus we don't have to update anything. */ 1083 automatically. Thus we don't have to update anything. */
1038 } 1084 }
1085 return coding->source - orig;
1039} 1086}
1040 1087
1041static void 1088
1089/* Update coding->destination from coding->dst_object, and return how
1090 many bytes coding->destination was changed. */
1091
1092static ptrdiff_t
1042coding_set_destination (struct coding_system *coding) 1093coding_set_destination (struct coding_system *coding)
1043{ 1094{
1095 const unsigned char *orig = coding->destination;
1096
1044 if (BUFFERP (coding->dst_object)) 1097 if (BUFFERP (coding->dst_object))
1045 { 1098 {
1046 if (BUFFERP (coding->src_object) && coding->src_pos < 0) 1099 if (BUFFERP (coding->src_object) && coding->src_pos < 0)
@@ -1065,6 +1118,7 @@ coding_set_destination (struct coding_system *coding)
1065 /* Otherwise, the destination is C string and is never relocated 1118 /* Otherwise, the destination is C string and is never relocated
1066 automatically. Thus we don't have to update anything. */ 1119 automatically. Thus we don't have to update anything. */
1067 } 1120 }
1121 return coding->destination - orig;
1068} 1122}
1069 1123
1070 1124
@@ -2650,14 +2704,19 @@ encode_coding_emacs_mule (struct coding_system *coding)
2650 2704
2651 if (preferred_charset_id >= 0) 2705 if (preferred_charset_id >= 0)
2652 { 2706 {
2707 int result;
2708
2653 charset = CHARSET_FROM_ID (preferred_charset_id); 2709 charset = CHARSET_FROM_ID (preferred_charset_id);
2654 if (CHAR_CHARSET_P (c, charset)) 2710 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2711 if (result)
2655 code = ENCODE_CHAR (charset, c); 2712 code = ENCODE_CHAR (charset, c);
2656 else 2713 else
2657 charset = char_charset (c, charset_list, &code); 2714 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2715 &code, charset);
2658 } 2716 }
2659 else 2717 else
2660 charset = char_charset (c, charset_list, &code); 2718 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2719 &code, charset);
2661 if (! charset) 2720 if (! charset)
2662 { 2721 {
2663 c = coding->default_char; 2722 c = coding->default_char;
@@ -2666,7 +2725,8 @@ encode_coding_emacs_mule (struct coding_system *coding)
2666 EMIT_ONE_ASCII_BYTE (c); 2725 EMIT_ONE_ASCII_BYTE (c);
2667 continue; 2726 continue;
2668 } 2727 }
2669 charset = char_charset (c, charset_list, &code); 2728 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2729 &code, charset);
2670 } 2730 }
2671 dimension = CHARSET_DIMENSION (charset); 2731 dimension = CHARSET_DIMENSION (charset);
2672 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); 2732 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
@@ -4185,7 +4245,8 @@ decode_coding_iso_2022 (struct coding_system *coding)
4185 4245
4186#define ENCODE_ISO_CHARACTER(charset, c) \ 4246#define ENCODE_ISO_CHARACTER(charset, c) \
4187 do { \ 4247 do { \
4188 unsigned code = ENCODE_CHAR ((charset), (c)); \ 4248 unsigned code; \
4249 CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \
4189 \ 4250 \
4190 if (CHARSET_DIMENSION (charset) == 1) \ 4251 if (CHARSET_DIMENSION (charset) == 1) \
4191 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \ 4252 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
@@ -4283,15 +4344,19 @@ encode_invocation_designation (struct charset *charset,
4283 4344
4284 4345
4285/* Produce designation sequences of charsets in the line started from 4346/* Produce designation sequences of charsets in the line started from
4286 SRC to a place pointed by DST, and return updated DST. 4347 CHARBUF to a place pointed by DST, and return the number of
4348 produced bytes. DST should not directly point a buffer text area
4349 which may be relocated by char_charset call.
4287 4350
4288 If the current block ends before any end-of-line, we may fail to 4351 If the current block ends before any end-of-line, we may fail to
4289 find all the necessary designations. */ 4352 find all the necessary designations. */
4290 4353
4291static unsigned char * 4354static int
4292encode_designation_at_bol (struct coding_system *coding, int *charbuf, 4355encode_designation_at_bol (struct coding_system *coding,
4356 int *charbuf, int *charbuf_end,
4293 unsigned char *dst) 4357 unsigned char *dst)
4294{ 4358{
4359 unsigned char *orig = dst;
4295 struct charset *charset; 4360 struct charset *charset;
4296 /* Table of charsets to be designated to each graphic register. */ 4361 /* Table of charsets to be designated to each graphic register. */
4297 int r[4]; 4362 int r[4];
@@ -4309,7 +4374,7 @@ encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4309 for (reg = 0; reg < 4; reg++) 4374 for (reg = 0; reg < 4; reg++)
4310 r[reg] = -1; 4375 r[reg] = -1;
4311 4376
4312 while (found < 4) 4377 while (charbuf < charbuf_end && found < 4)
4313 { 4378 {
4314 int id; 4379 int id;
4315 4380
@@ -4334,7 +4399,7 @@ encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4334 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding); 4399 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4335 } 4400 }
4336 4401
4337 return dst; 4402 return dst - orig;
4338} 4403}
4339 4404
4340/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ 4405/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
@@ -4378,13 +4443,26 @@ encode_coding_iso_2022 (struct coding_system *coding)
4378 4443
4379 if (bol_designation) 4444 if (bol_designation)
4380 { 4445 {
4381 unsigned char *dst_prev = dst;
4382
4383 /* We have to produce designation sequences if any now. */ 4446 /* We have to produce designation sequences if any now. */
4384 dst = encode_designation_at_bol (coding, charbuf, dst); 4447 unsigned char desig_buf[16];
4385 bol_designation = 0; 4448 int nbytes;
4449 ptrdiff_t offset;
4450
4451 charset_map_loaded = 0;
4452 nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4453 desig_buf);
4454 if (charset_map_loaded
4455 && (offset = coding_set_destination (coding)))
4456 {
4457 dst += offset;
4458 dst_end += offset;
4459 }
4460 memcpy (dst, desig_buf, nbytes);
4461 dst += nbytes;
4386 /* We are sure that designation sequences are all ASCII bytes. */ 4462 /* We are sure that designation sequences are all ASCII bytes. */
4387 produced_chars += dst - dst_prev; 4463 produced_chars += nbytes;
4464 bol_designation = 0;
4465 ASSURE_DESTINATION (safe_room);
4388 } 4466 }
4389 4467
4390 c = *charbuf++; 4468 c = *charbuf++;
@@ -4455,12 +4533,17 @@ encode_coding_iso_2022 (struct coding_system *coding)
4455 4533
4456 if (preferred_charset_id >= 0) 4534 if (preferred_charset_id >= 0)
4457 { 4535 {
4536 int result;
4537
4458 charset = CHARSET_FROM_ID (preferred_charset_id); 4538 charset = CHARSET_FROM_ID (preferred_charset_id);
4459 if (! CHAR_CHARSET_P (c, charset)) 4539 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4460 charset = char_charset (c, charset_list, NULL); 4540 if (! result)
4541 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4542 NULL, charset);
4461 } 4543 }
4462 else 4544 else
4463 charset = char_charset (c, charset_list, NULL); 4545 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4546 NULL, charset);
4464 if (!charset) 4547 if (!charset)
4465 { 4548 {
4466 if (coding->mode & CODING_MODE_SAFE_ENCODING) 4549 if (coding->mode & CODING_MODE_SAFE_ENCODING)
@@ -4471,7 +4554,8 @@ encode_coding_iso_2022 (struct coding_system *coding)
4471 else 4554 else
4472 { 4555 {
4473 c = coding->default_char; 4556 c = coding->default_char;
4474 charset = char_charset (c, charset_list, NULL); 4557 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4558 charset_list, NULL, charset);
4475 } 4559 }
4476 } 4560 }
4477 ENCODE_ISO_CHARACTER (charset, c); 4561 ENCODE_ISO_CHARACTER (charset, c);
@@ -4897,7 +4981,9 @@ encode_coding_sjis (struct coding_system *coding)
4897 else 4981 else
4898 { 4982 {
4899 unsigned code; 4983 unsigned code;
4900 struct charset *charset = char_charset (c, charset_list, &code); 4984 struct charset *charset;
4985 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4986 &code, charset);
4901 4987
4902 if (!charset) 4988 if (!charset)
4903 { 4989 {
@@ -4909,7 +4995,8 @@ encode_coding_sjis (struct coding_system *coding)
4909 else 4995 else
4910 { 4996 {
4911 c = coding->default_char; 4997 c = coding->default_char;
4912 charset = char_charset (c, charset_list, &code); 4998 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4999 charset_list, &code, charset);
4913 } 5000 }
4914 } 5001 }
4915 if (code == CHARSET_INVALID_CODE (charset)) 5002 if (code == CHARSET_INVALID_CODE (charset))
@@ -4984,7 +5071,9 @@ encode_coding_big5 (struct coding_system *coding)
4984 else 5071 else
4985 { 5072 {
4986 unsigned code; 5073 unsigned code;
4987 struct charset *charset = char_charset (c, charset_list, &code); 5074 struct charset *charset;
5075 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5076 &code, charset);
4988 5077
4989 if (! charset) 5078 if (! charset)
4990 { 5079 {
@@ -4996,7 +5085,8 @@ encode_coding_big5 (struct coding_system *coding)
4996 else 5085 else
4997 { 5086 {
4998 c = coding->default_char; 5087 c = coding->default_char;
4999 charset = char_charset (c, charset_list, &code); 5088 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5089 charset_list, &code, charset);
5000 } 5090 }
5001 } 5091 }
5002 if (code == CHARSET_INVALID_CODE (charset)) 5092 if (code == CHARSET_INVALID_CODE (charset))
@@ -5154,7 +5244,7 @@ encode_coding_ccl (struct coding_system *coding)
5154 && coding->mode & CODING_MODE_LAST_BLOCK) 5244 && coding->mode & CODING_MODE_LAST_BLOCK)
5155 ccl->last_block = 1; 5245 ccl->last_block = 1;
5156 5246
5157 while (charbuf < charbuf_end) 5247 do
5158 { 5248 {
5159 ccl_driver (ccl, charbuf, destination_charbuf, 5249 ccl_driver (ccl, charbuf, destination_charbuf,
5160 charbuf_end - charbuf, 1024, charset_list); 5250 charbuf_end - charbuf, 1024, charset_list);
@@ -5176,6 +5266,7 @@ encode_coding_ccl (struct coding_system *coding)
5176 || ccl->status == CCL_STAT_INVALID_CMD) 5266 || ccl->status == CCL_STAT_INVALID_CMD)
5177 break; 5267 break;
5178 } 5268 }
5269 while (charbuf < charbuf_end);
5179 5270
5180 switch (ccl->status) 5271 switch (ccl->status)
5181 { 5272 {
@@ -5572,7 +5663,9 @@ encode_coding_charset (struct coding_system *coding)
5572 } 5663 }
5573 else 5664 else
5574 { 5665 {
5575 charset = char_charset (c, charset_list, &code); 5666 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5667 &code, charset);
5668
5576 if (charset) 5669 if (charset)
5577 { 5670 {
5578 if (CHARSET_DIMENSION (charset) == 1) 5671 if (CHARSET_DIMENSION (charset) == 1)
@@ -9207,7 +9300,7 @@ frame's terminal device. */)
9207 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1)); 9300 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9208 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id); 9301 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9209 9302
9210 /* For backward compatibility, return nil if it is `undecided'. */ 9303 /* For backward compatibility, return nil if it is `undecided'. */
9211 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil); 9304 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9212} 9305}
9213 9306