diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 304 |
1 files changed, 224 insertions, 80 deletions
diff --git a/src/coding.c b/src/coding.c index 56202e4861d..cb81375a043 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Coding system handler (conversion, detection, etc). | 1 | /* Coding system handler (conversion, detection, etc). |
| 2 | Copyright (C) 2001-2012 Free Software Foundation, Inc. | 2 | Copyright (C) 2001-2013 Free Software Foundation, Inc. |
| 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, | 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 | 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 5 | National Institute of Advanced Industrial Science and Technology (AIST) | 5 | National Institute of Advanced Industrial Science and Technology (AIST) |
| @@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding) | |||
| 286 | #include <config.h> | 286 | #include <config.h> |
| 287 | #include <stdio.h> | 287 | #include <stdio.h> |
| 288 | 288 | ||
| 289 | #ifdef HAVE_WCHAR_H | ||
| 290 | #include <wchar.h> | ||
| 291 | #endif /* HAVE_WCHAR_H */ | ||
| 292 | |||
| 289 | #include "lisp.h" | 293 | #include "lisp.h" |
| 290 | #include "character.h" | 294 | #include "character.h" |
| 291 | #include "buffer.h" | 295 | #include "buffer.h" |
| @@ -302,6 +306,7 @@ Lisp_Object Vcoding_system_hash_table; | |||
| 302 | static Lisp_Object Qcoding_system, Qeol_type; | 306 | static Lisp_Object Qcoding_system, Qeol_type; |
| 303 | static Lisp_Object Qcoding_aliases; | 307 | static Lisp_Object Qcoding_aliases; |
| 304 | Lisp_Object Qunix, Qdos; | 308 | Lisp_Object Qunix, Qdos; |
| 309 | static Lisp_Object Qmac; | ||
| 305 | Lisp_Object Qbuffer_file_coding_system; | 310 | Lisp_Object Qbuffer_file_coding_system; |
| 306 | static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 311 | static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 307 | static Lisp_Object Qdefault_char; | 312 | static Lisp_Object Qdefault_char; |
| @@ -321,8 +326,7 @@ Lisp_Object Qcall_process, Qcall_process_region; | |||
| 321 | Lisp_Object Qstart_process, Qopen_network_stream; | 326 | Lisp_Object Qstart_process, Qopen_network_stream; |
| 322 | static Lisp_Object Qtarget_idx; | 327 | static Lisp_Object Qtarget_idx; |
| 323 | 328 | ||
| 324 | static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; | 329 | static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted; |
| 325 | static Lisp_Object Qinterrupted, Qinsufficient_memory; | ||
| 326 | 330 | ||
| 327 | /* If a symbol has this property, evaluate the value to define the | 331 | /* If a symbol has this property, evaluate the value to define the |
| 328 | symbol as a coding system. */ | 332 | symbol as a coding system. */ |
| @@ -819,18 +823,12 @@ record_conversion_result (struct coding_system *coding, | |||
| 819 | case CODING_RESULT_INSUFFICIENT_SRC: | 823 | case CODING_RESULT_INSUFFICIENT_SRC: |
| 820 | Vlast_code_conversion_error = Qinsufficient_source; | 824 | Vlast_code_conversion_error = Qinsufficient_source; |
| 821 | break; | 825 | break; |
| 822 | case CODING_RESULT_INCONSISTENT_EOL: | ||
| 823 | Vlast_code_conversion_error = Qinconsistent_eol; | ||
| 824 | break; | ||
| 825 | case CODING_RESULT_INVALID_SRC: | 826 | case CODING_RESULT_INVALID_SRC: |
| 826 | Vlast_code_conversion_error = Qinvalid_source; | 827 | Vlast_code_conversion_error = Qinvalid_source; |
| 827 | break; | 828 | break; |
| 828 | case CODING_RESULT_INTERRUPT: | 829 | case CODING_RESULT_INTERRUPT: |
| 829 | Vlast_code_conversion_error = Qinterrupted; | 830 | Vlast_code_conversion_error = Qinterrupted; |
| 830 | break; | 831 | break; |
| 831 | case CODING_RESULT_INSUFFICIENT_MEM: | ||
| 832 | Vlast_code_conversion_error = Qinsufficient_memory; | ||
| 833 | break; | ||
| 834 | case CODING_RESULT_INSUFFICIENT_DST: | 832 | case CODING_RESULT_INSUFFICIENT_DST: |
| 835 | /* Don't record this error in Vlast_code_conversion_error | 833 | /* Don't record this error in Vlast_code_conversion_error |
| 836 | because it happens just temporarily and is resolved when the | 834 | because it happens just temporarily and is resolved when the |
| @@ -1048,14 +1046,7 @@ coding_alloc_by_making_gap (struct coding_system *coding, | |||
| 1048 | GPT -= gap_head_used, GPT_BYTE -= gap_head_used; | 1046 | GPT -= gap_head_used, GPT_BYTE -= gap_head_used; |
| 1049 | } | 1047 | } |
| 1050 | else | 1048 | else |
| 1051 | { | 1049 | make_gap_1 (XBUFFER (coding->dst_object), bytes); |
| 1052 | Lisp_Object this_buffer; | ||
| 1053 | |||
| 1054 | this_buffer = Fcurrent_buffer (); | ||
| 1055 | set_buffer_internal (XBUFFER (coding->dst_object)); | ||
| 1056 | make_gap (bytes); | ||
| 1057 | set_buffer_internal (XBUFFER (this_buffer)); | ||
| 1058 | } | ||
| 1059 | } | 1050 | } |
| 1060 | 1051 | ||
| 1061 | 1052 | ||
| @@ -3063,20 +3054,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3063 | } | 3054 | } |
| 3064 | if (single_shifting) | 3055 | if (single_shifting) |
| 3065 | break; | 3056 | break; |
| 3066 | check_extra_latin: | 3057 | goto check_extra_latin; |
| 3067 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3068 | || NILP (AREF (Vlatin_extra_code_table, c))) | ||
| 3069 | { | ||
| 3070 | rejected = CATEGORY_MASK_ISO; | ||
| 3071 | break; | ||
| 3072 | } | ||
| 3073 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3074 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3075 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3076 | else | ||
| 3077 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3078 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3079 | break; | ||
| 3080 | 3058 | ||
| 3081 | default: | 3059 | default: |
| 3082 | if (c < 0) | 3060 | if (c < 0) |
| @@ -3127,6 +3105,20 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3127 | } | 3105 | } |
| 3128 | break; | 3106 | break; |
| 3129 | } | 3107 | } |
| 3108 | check_extra_latin: | ||
| 3109 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3110 | || NILP (AREF (Vlatin_extra_code_table, c))) | ||
| 3111 | { | ||
| 3112 | rejected = CATEGORY_MASK_ISO; | ||
| 3113 | break; | ||
| 3114 | } | ||
| 3115 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3116 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3117 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3118 | else | ||
| 3119 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3120 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3121 | break; | ||
| 3130 | } | 3122 | } |
| 3131 | } | 3123 | } |
| 3132 | detect_info->rejected |= CATEGORY_MASK_ISO; | 3124 | detect_info->rejected |= CATEGORY_MASK_ISO; |
| @@ -6079,6 +6071,93 @@ complement_process_encoding_system (Lisp_Object coding_system) | |||
| 6079 | #define EOL_SEEN_CR 2 | 6071 | #define EOL_SEEN_CR 2 |
| 6080 | #define EOL_SEEN_CRLF 4 | 6072 | #define EOL_SEEN_CRLF 4 |
| 6081 | 6073 | ||
| 6074 | |||
| 6075 | static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, int eol_seen); | ||
| 6076 | |||
| 6077 | |||
| 6078 | /* Return true iff all the source bytes are ASCII. | ||
| 6079 | By side effects, set coding->head_ascii and coding->eol_seen. The | ||
| 6080 | value of coding->eol_seen is "logical or" of EOL_SEEN_LF, | ||
| 6081 | EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when | ||
| 6082 | all the source bytes are ASCII. */ | ||
| 6083 | |||
| 6084 | static bool | ||
| 6085 | detect_ascii (struct coding_system *coding) | ||
| 6086 | { | ||
| 6087 | const unsigned char *src, *end; | ||
| 6088 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6089 | int eol_seen; | ||
| 6090 | |||
| 6091 | eol_seen = (VECTORP (eol_type) ? EOL_SEEN_NONE | ||
| 6092 | : EQ (eol_type, Qunix) ? EOL_SEEN_LF | ||
| 6093 | : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF | ||
| 6094 | : EOL_SEEN_CR); | ||
| 6095 | coding_set_source (coding); | ||
| 6096 | src = coding->source; | ||
| 6097 | end = src + coding->src_bytes; | ||
| 6098 | |||
| 6099 | if (inhibit_eol_conversion) | ||
| 6100 | { | ||
| 6101 | /* We don't have to check EOL format. */ | ||
| 6102 | while (src < end && !( *src & 0x80)) src++; | ||
| 6103 | eol_seen = EOL_SEEN_LF; | ||
| 6104 | adjust_coding_eol_type (coding, eol_seen); | ||
| 6105 | } | ||
| 6106 | else if (eol_seen != EOL_SEEN_NONE) | ||
| 6107 | { | ||
| 6108 | /* We don't have to check EOL format either. */ | ||
| 6109 | while (src < end && !(*src & 0x80)) src++; | ||
| 6110 | } | ||
| 6111 | else | ||
| 6112 | { | ||
| 6113 | end--; /* We look ahead one byte. */ | ||
| 6114 | while (src < end) | ||
| 6115 | { | ||
| 6116 | int c = *src; | ||
| 6117 | |||
| 6118 | if (c & 0x80) | ||
| 6119 | break; | ||
| 6120 | src++; | ||
| 6121 | if (c < 0x20) | ||
| 6122 | { | ||
| 6123 | if (c == '\r') | ||
| 6124 | { | ||
| 6125 | if (*src == '\n') | ||
| 6126 | { | ||
| 6127 | eol_seen |= EOL_SEEN_CRLF; | ||
| 6128 | src++; | ||
| 6129 | } | ||
| 6130 | else | ||
| 6131 | eol_seen |= EOL_SEEN_CR; | ||
| 6132 | } | ||
| 6133 | else if (c == '\n') | ||
| 6134 | eol_seen |= EOL_SEEN_LF; | ||
| 6135 | } | ||
| 6136 | } | ||
| 6137 | if (src > end) | ||
| 6138 | /* The last two bytes are CR LF, which means that we have | ||
| 6139 | scanned all bytes. */ | ||
| 6140 | end++; | ||
| 6141 | else if (src == end) | ||
| 6142 | { | ||
| 6143 | end++; | ||
| 6144 | if (! (*src & 0x80)) | ||
| 6145 | { | ||
| 6146 | if (*src == '\r') | ||
| 6147 | eol_seen |= EOL_SEEN_CR; | ||
| 6148 | else if (*src == '\n') | ||
| 6149 | eol_seen |= EOL_SEEN_LF; | ||
| 6150 | src++; | ||
| 6151 | } | ||
| 6152 | } | ||
| 6153 | adjust_coding_eol_type (coding, eol_seen); | ||
| 6154 | } | ||
| 6155 | coding->head_ascii = src - coding->source; | ||
| 6156 | coding->eol_seen = eol_seen; | ||
| 6157 | return (src == end); | ||
| 6158 | } | ||
| 6159 | |||
| 6160 | |||
| 6082 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by | 6161 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by |
| 6083 | SOURCE is encoded. If CATEGORY is one of | 6162 | SOURCE is encoded. If CATEGORY is one of |
| 6084 | coding_category_utf_16_XXXX, assume that CR and LF are encoded by | 6163 | coding_category_utf_16_XXXX, assume that CR and LF are encoded by |
| @@ -6223,7 +6302,6 @@ detect_coding (struct coding_system *coding) | |||
| 6223 | coding_set_source (coding); | 6302 | coding_set_source (coding); |
| 6224 | 6303 | ||
| 6225 | src_end = coding->source + coding->src_bytes; | 6304 | src_end = coding->source + coding->src_bytes; |
| 6226 | coding->head_ascii = 0; | ||
| 6227 | 6305 | ||
| 6228 | /* If we have not yet decided the text encoding type, detect it | 6306 | /* If we have not yet decided the text encoding type, detect it |
| 6229 | now. */ | 6307 | now. */ |
| @@ -6233,6 +6311,8 @@ detect_coding (struct coding_system *coding) | |||
| 6233 | struct coding_detection_info detect_info; | 6311 | struct coding_detection_info detect_info; |
| 6234 | bool null_byte_found = 0, eight_bit_found = 0; | 6312 | bool null_byte_found = 0, eight_bit_found = 0; |
| 6235 | 6313 | ||
| 6314 | coding->head_ascii = 0; | ||
| 6315 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6236 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 6316 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 6237 | for (src = coding->source; src < src_end; src++) | 6317 | for (src = coding->source; src < src_end; src++) |
| 6238 | { | 6318 | { |
| @@ -6271,6 +6351,26 @@ detect_coding (struct coding_system *coding) | |||
| 6271 | if (eight_bit_found) | 6351 | if (eight_bit_found) |
| 6272 | break; | 6352 | break; |
| 6273 | } | 6353 | } |
| 6354 | else if (! disable_ascii_optimization | ||
| 6355 | && ! inhibit_eol_conversion) | ||
| 6356 | { | ||
| 6357 | if (c == '\r') | ||
| 6358 | { | ||
| 6359 | if (src < src_end && src[1] == '\n') | ||
| 6360 | { | ||
| 6361 | coding->eol_seen |= EOL_SEEN_CRLF; | ||
| 6362 | src++; | ||
| 6363 | coding->head_ascii++; | ||
| 6364 | } | ||
| 6365 | else | ||
| 6366 | coding->eol_seen |= EOL_SEEN_CR; | ||
| 6367 | } | ||
| 6368 | else if (c == '\n') | ||
| 6369 | { | ||
| 6370 | coding->eol_seen |= EOL_SEEN_LF; | ||
| 6371 | } | ||
| 6372 | } | ||
| 6373 | |||
| 6274 | if (! eight_bit_found) | 6374 | if (! eight_bit_found) |
| 6275 | coding->head_ascii++; | 6375 | coding->head_ascii++; |
| 6276 | } | 6376 | } |
| @@ -6361,14 +6461,20 @@ detect_coding (struct coding_system *coding) | |||
| 6361 | coding_systems | 6461 | coding_systems |
| 6362 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); | 6462 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); |
| 6363 | detect_info.found = detect_info.rejected = 0; | 6463 | detect_info.found = detect_info.rejected = 0; |
| 6364 | coding->head_ascii = 0; | 6464 | if (detect_ascii (coding)) |
| 6365 | if (CONSP (coding_systems) | ||
| 6366 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6367 | { | 6465 | { |
| 6368 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | 6466 | setup_coding_system (XCDR (coding_systems), coding); |
| 6369 | setup_coding_system (XCAR (coding_systems), coding); | 6467 | } |
| 6370 | else | 6468 | else |
| 6371 | setup_coding_system (XCDR (coding_systems), coding); | 6469 | { |
| 6470 | if (CONSP (coding_systems) | ||
| 6471 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6472 | { | ||
| 6473 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6474 | setup_coding_system (XCAR (coding_systems), coding); | ||
| 6475 | else | ||
| 6476 | setup_coding_system (XCDR (coding_systems), coding); | ||
| 6477 | } | ||
| 6372 | } | 6478 | } |
| 6373 | } | 6479 | } |
| 6374 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) | 6480 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
| @@ -6381,6 +6487,7 @@ detect_coding (struct coding_system *coding) | |||
| 6381 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); | 6487 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); |
| 6382 | detect_info.found = detect_info.rejected = 0; | 6488 | detect_info.found = detect_info.rejected = 0; |
| 6383 | coding->head_ascii = 0; | 6489 | coding->head_ascii = 0; |
| 6490 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6384 | if (CONSP (coding_systems) | 6491 | if (CONSP (coding_systems) |
| 6385 | && detect_coding_utf_16 (coding, &detect_info)) | 6492 | && detect_coding_utf_16 (coding, &detect_info)) |
| 6386 | { | 6493 | { |
| @@ -6818,7 +6925,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6818 | 6925 | ||
| 6819 | produced = dst - (coding->destination + coding->produced); | 6926 | produced = dst - (coding->destination + coding->produced); |
| 6820 | if (BUFFERP (coding->dst_object) && produced_chars > 0) | 6927 | if (BUFFERP (coding->dst_object) && produced_chars > 0) |
| 6821 | insert_from_gap (produced_chars, produced); | 6928 | insert_from_gap (produced_chars, produced, 0); |
| 6822 | coding->produced += produced; | 6929 | coding->produced += produced; |
| 6823 | coding->produced_char += produced_chars; | 6930 | coding->produced_char += produced_chars; |
| 6824 | return carryover; | 6931 | return carryover; |
| @@ -6889,22 +6996,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | |||
| 6889 | 6996 | ||
| 6890 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ | 6997 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ |
| 6891 | do { \ | 6998 | do { \ |
| 6892 | int size = CHARBUF_SIZE; \ | 6999 | coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \ |
| 6893 | \ | 7000 | coding->charbuf_size = CHARBUF_SIZE; \ |
| 6894 | coding->charbuf = NULL; \ | ||
| 6895 | while (size > 1024) \ | ||
| 6896 | { \ | ||
| 6897 | coding->charbuf = alloca (sizeof (int) * size); \ | ||
| 6898 | if (coding->charbuf) \ | ||
| 6899 | break; \ | ||
| 6900 | size >>= 1; \ | ||
| 6901 | } \ | ||
| 6902 | if (! coding->charbuf) \ | ||
| 6903 | { \ | ||
| 6904 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \ | ||
| 6905 | return; \ | ||
| 6906 | } \ | ||
| 6907 | coding->charbuf_size = size; \ | ||
| 6908 | } while (0) | 7001 | } while (0) |
| 6909 | 7002 | ||
| 6910 | 7003 | ||
| @@ -6973,6 +7066,8 @@ decode_coding (struct coding_system *coding) | |||
| 6973 | int carryover; | 7066 | int carryover; |
| 6974 | int i; | 7067 | int i; |
| 6975 | 7068 | ||
| 7069 | USE_SAFE_ALLOCA; | ||
| 7070 | |||
| 6976 | if (BUFFERP (coding->src_object) | 7071 | if (BUFFERP (coding->src_object) |
| 6977 | && coding->src_pos > 0 | 7072 | && coding->src_pos > 0 |
| 6978 | && coding->src_pos < GPT | 7073 | && coding->src_pos < GPT |
| @@ -7095,6 +7190,8 @@ decode_coding (struct coding_system *coding) | |||
| 7095 | bset_undo_list (current_buffer, undo_list); | 7190 | bset_undo_list (current_buffer, undo_list); |
| 7096 | record_insert (coding->dst_pos, coding->produced_char); | 7191 | record_insert (coding->dst_pos, coding->produced_char); |
| 7097 | } | 7192 | } |
| 7193 | |||
| 7194 | SAFE_FREE (); | ||
| 7098 | } | 7195 | } |
| 7099 | 7196 | ||
| 7100 | 7197 | ||
| @@ -7378,6 +7475,8 @@ encode_coding (struct coding_system *coding) | |||
| 7378 | int max_lookup; | 7475 | int max_lookup; |
| 7379 | struct ccl_spec cclspec; | 7476 | struct ccl_spec cclspec; |
| 7380 | 7477 | ||
| 7478 | USE_SAFE_ALLOCA; | ||
| 7479 | |||
| 7381 | attrs = CODING_ID_ATTRS (coding->id); | 7480 | attrs = CODING_ID_ATTRS (coding->id); |
| 7382 | if (coding->encoder == encode_coding_raw_text) | 7481 | if (coding->encoder == encode_coding_raw_text) |
| 7383 | translation_table = Qnil, max_lookup = 0; | 7482 | translation_table = Qnil, max_lookup = 0; |
| @@ -7411,7 +7510,9 @@ encode_coding (struct coding_system *coding) | |||
| 7411 | } while (coding->consumed_char < coding->src_chars); | 7510 | } while (coding->consumed_char < coding->src_chars); |
| 7412 | 7511 | ||
| 7413 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) | 7512 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) |
| 7414 | insert_from_gap (coding->produced_char, coding->produced); | 7513 | insert_from_gap (coding->produced_char, coding->produced, 0); |
| 7514 | |||
| 7515 | SAFE_FREE (); | ||
| 7415 | } | 7516 | } |
| 7416 | 7517 | ||
| 7417 | 7518 | ||
| @@ -7505,8 +7606,6 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7505 | ptrdiff_t count = SPECPDL_INDEX (); | 7606 | ptrdiff_t count = SPECPDL_INDEX (); |
| 7506 | Lisp_Object attrs; | 7607 | Lisp_Object attrs; |
| 7507 | 7608 | ||
| 7508 | code_conversion_save (0, 0); | ||
| 7509 | |||
| 7510 | coding->src_object = Fcurrent_buffer (); | 7609 | coding->src_object = Fcurrent_buffer (); |
| 7511 | coding->src_chars = chars; | 7610 | coding->src_chars = chars; |
| 7512 | coding->src_bytes = bytes; | 7611 | coding->src_bytes = bytes; |
| @@ -7520,13 +7619,53 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7520 | 7619 | ||
| 7521 | if (CODING_REQUIRE_DETECTION (coding)) | 7620 | if (CODING_REQUIRE_DETECTION (coding)) |
| 7522 | detect_coding (coding); | 7621 | detect_coding (coding); |
| 7622 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7623 | if (! disable_ascii_optimization) | ||
| 7624 | { | ||
| 7625 | if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) | ||
| 7626 | && NILP (CODING_ATTR_POST_READ (attrs)) | ||
| 7627 | && NILP (get_translation_table (attrs, 0, NULL)) | ||
| 7628 | && (coding->head_ascii >= 0 /* We've already called detect_coding */ | ||
| 7629 | ? coding->head_ascii == bytes | ||
| 7630 | : detect_ascii (coding))) | ||
| 7631 | { | ||
| 7632 | if (coding->eol_seen == EOL_SEEN_CR) | ||
| 7633 | { | ||
| 7634 | unsigned char *src_end = GAP_END_ADDR; | ||
| 7635 | unsigned char *src = src_end - coding->src_bytes; | ||
| 7636 | |||
| 7637 | while (src < src_end) | ||
| 7638 | { | ||
| 7639 | if (*src++ == '\r') | ||
| 7640 | src[-1] = '\n'; | ||
| 7641 | } | ||
| 7642 | } | ||
| 7643 | else if (coding->eol_seen == EOL_SEEN_CRLF) | ||
| 7644 | { | ||
| 7645 | unsigned char *src = GAP_END_ADDR; | ||
| 7646 | unsigned char *src_beg = src - coding->src_bytes; | ||
| 7647 | unsigned char *dst = src; | ||
| 7648 | |||
| 7649 | while (src_beg < src) | ||
| 7650 | { | ||
| 7651 | *--dst = *--src; | ||
| 7652 | if (*src == '\n') | ||
| 7653 | src--; | ||
| 7654 | } | ||
| 7655 | bytes -= dst - src; | ||
| 7656 | } | ||
| 7657 | coding->produced_char = coding->produced = bytes; | ||
| 7658 | insert_from_gap (bytes, bytes, 1); | ||
| 7659 | return; | ||
| 7660 | } | ||
| 7661 | } | ||
| 7662 | code_conversion_save (0, 0); | ||
| 7523 | 7663 | ||
| 7524 | coding->mode |= CODING_MODE_LAST_BLOCK; | 7664 | coding->mode |= CODING_MODE_LAST_BLOCK; |
| 7525 | current_buffer->text->inhibit_shrinking = 1; | 7665 | current_buffer->text->inhibit_shrinking = 1; |
| 7526 | decode_coding (coding); | 7666 | decode_coding (coding); |
| 7527 | current_buffer->text->inhibit_shrinking = 0; | 7667 | current_buffer->text->inhibit_shrinking = 0; |
| 7528 | 7668 | ||
| 7529 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7530 | if (! NILP (CODING_ATTR_POST_READ (attrs))) | 7669 | if (! NILP (CODING_ATTR_POST_READ (attrs))) |
| 7531 | { | 7670 | { |
| 7532 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; | 7671 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; |
| @@ -7700,14 +7839,8 @@ decode_coding_object (struct coding_system *coding, | |||
| 7700 | set_buffer_internal (XBUFFER (coding->dst_object)); | 7839 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 7701 | if (dst_bytes < coding->produced) | 7840 | if (dst_bytes < coding->produced) |
| 7702 | { | 7841 | { |
| 7842 | eassert (coding->produced > 0); | ||
| 7703 | destination = xrealloc (destination, coding->produced); | 7843 | destination = xrealloc (destination, coding->produced); |
| 7704 | if (! destination) | ||
| 7705 | { | ||
| 7706 | record_conversion_result (coding, | ||
| 7707 | CODING_RESULT_INSUFFICIENT_MEM); | ||
| 7708 | unbind_to (count, Qnil); | ||
| 7709 | return; | ||
| 7710 | } | ||
| 7711 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) | 7844 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) |
| 7712 | move_gap_both (BEGV, BEGV_BYTE); | 7845 | move_gap_both (BEGV, BEGV_BYTE); |
| 7713 | memcpy (destination, BEGV_ADDR, coding->produced); | 7846 | memcpy (destination, BEGV_ADDR, coding->produced); |
| @@ -7990,11 +8123,21 @@ from_unicode (Lisp_Object str) | |||
| 7990 | return code_convert_string_norecord (str, Qutf_16le, 0); | 8123 | return code_convert_string_norecord (str, Qutf_16le, 0); |
| 7991 | } | 8124 | } |
| 7992 | 8125 | ||
| 8126 | Lisp_Object | ||
| 8127 | from_unicode_buffer (const wchar_t* wstr) | ||
| 8128 | { | ||
| 8129 | return from_unicode ( | ||
| 8130 | make_unibyte_string ( | ||
| 8131 | (char*) wstr, | ||
| 8132 | /* we get one of the two final 0 bytes for free. */ | ||
| 8133 | 1 + sizeof (wchar_t) * wcslen (wstr))); | ||
| 8134 | } | ||
| 8135 | |||
| 7993 | wchar_t * | 8136 | wchar_t * |
| 7994 | to_unicode (Lisp_Object str, Lisp_Object *buf) | 8137 | to_unicode (Lisp_Object str, Lisp_Object *buf) |
| 7995 | { | 8138 | { |
| 7996 | *buf = code_convert_string_norecord (str, Qutf_16le, 1); | 8139 | *buf = code_convert_string_norecord (str, Qutf_16le, 1); |
| 7997 | /* We need to make a another copy (in addition to the one made by | 8140 | /* We need to make another copy (in addition to the one made by |
| 7998 | code_convert_string_norecord) to ensure that the final string is | 8141 | code_convert_string_norecord) to ensure that the final string is |
| 7999 | _doubly_ zero terminated --- that is, that the string is | 8142 | _doubly_ zero terminated --- that is, that the string is |
| 8000 | terminated by two zero bytes and one utf-16le null character. | 8143 | terminated by two zero bytes and one utf-16le null character. |
| @@ -8426,9 +8569,6 @@ highest priority. */) | |||
| 8426 | ptrdiff_t from, to; | 8569 | ptrdiff_t from, to; |
| 8427 | ptrdiff_t from_byte, to_byte; | 8570 | ptrdiff_t from_byte, to_byte; |
| 8428 | 8571 | ||
| 8429 | CHECK_NUMBER_COERCE_MARKER (start); | ||
| 8430 | CHECK_NUMBER_COERCE_MARKER (end); | ||
| 8431 | |||
| 8432 | validate_region (&start, &end); | 8572 | validate_region (&start, &end); |
| 8433 | from = XINT (start), to = XINT (end); | 8573 | from = XINT (start), to = XINT (end); |
| 8434 | from_byte = CHAR_TO_BYTE (from); | 8574 | from_byte = CHAR_TO_BYTE (from); |
| @@ -8872,8 +9012,6 @@ code_convert_region (Lisp_Object start, Lisp_Object end, | |||
| 8872 | ptrdiff_t from, from_byte, to, to_byte; | 9012 | ptrdiff_t from, from_byte, to, to_byte; |
| 8873 | Lisp_Object src_object; | 9013 | Lisp_Object src_object; |
| 8874 | 9014 | ||
| 8875 | CHECK_NUMBER_COERCE_MARKER (start); | ||
| 8876 | CHECK_NUMBER_COERCE_MARKER (end); | ||
| 8877 | if (NILP (coding_system)) | 9015 | if (NILP (coding_system)) |
| 8878 | coding_system = Qno_conversion; | 9016 | coding_system = Qno_conversion; |
| 8879 | else | 9017 | else |
| @@ -9493,7 +9631,7 @@ make_subsidiaries (Lisp_Object base) | |||
| 9493 | int i; | 9631 | int i; |
| 9494 | 9632 | ||
| 9495 | memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); | 9633 | memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); |
| 9496 | subsidiaries = Fmake_vector (make_number (3), Qnil); | 9634 | subsidiaries = make_uninit_vector (3); |
| 9497 | for (i = 0; i < 3; i++) | 9635 | for (i = 0; i < 3; i++) |
| 9498 | { | 9636 | { |
| 9499 | strcpy (buf + base_name_len, suffixes[i]); | 9637 | strcpy (buf + base_name_len, suffixes[i]); |
| @@ -9793,7 +9931,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9793 | CHECK_VECTOR (initial); | 9931 | CHECK_VECTOR (initial); |
| 9794 | for (i = 0; i < 4; i++) | 9932 | for (i = 0; i < 4; i++) |
| 9795 | { | 9933 | { |
| 9796 | val = Faref (initial, make_number (i)); | 9934 | val = AREF (initial, i); |
| 9797 | if (! NILP (val)) | 9935 | if (! NILP (val)) |
| 9798 | { | 9936 | { |
| 9799 | struct charset *charset; | 9937 | struct charset *charset; |
| @@ -9998,7 +10136,8 @@ usage: (define-coding-system-internal ...) */) | |||
| 9998 | this_name = AREF (eol_type, i); | 10136 | this_name = AREF (eol_type, i); |
| 9999 | this_aliases = Fcons (this_name, Qnil); | 10137 | this_aliases = Fcons (this_name, Qnil); |
| 10000 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); | 10138 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); |
| 10001 | this_spec = Fmake_vector (make_number (3), attrs); | 10139 | this_spec = make_uninit_vector (3); |
| 10140 | ASET (this_spec, 0, attrs); | ||
| 10002 | ASET (this_spec, 1, this_aliases); | 10141 | ASET (this_spec, 1, this_aliases); |
| 10003 | ASET (this_spec, 2, this_eol_type); | 10142 | ASET (this_spec, 2, this_eol_type); |
| 10004 | Fputhash (this_name, this_spec, Vcoding_system_hash_table); | 10143 | Fputhash (this_name, this_spec, Vcoding_system_hash_table); |
| @@ -10011,7 +10150,8 @@ usage: (define-coding-system-internal ...) */) | |||
| 10011 | } | 10150 | } |
| 10012 | } | 10151 | } |
| 10013 | 10152 | ||
| 10014 | spec_vec = Fmake_vector (make_number (3), attrs); | 10153 | spec_vec = make_uninit_vector (3); |
| 10154 | ASET (spec_vec, 0, attrs); | ||
| 10015 | ASET (spec_vec, 1, aliases); | 10155 | ASET (spec_vec, 1, aliases); |
| 10016 | ASET (spec_vec, 2, eol_type); | 10156 | ASET (spec_vec, 2, eol_type); |
| 10017 | 10157 | ||
| @@ -10308,6 +10448,7 @@ syms_of_coding (void) | |||
| 10308 | DEFSYM (Qeol_type, "eol-type"); | 10448 | DEFSYM (Qeol_type, "eol-type"); |
| 10309 | DEFSYM (Qunix, "unix"); | 10449 | DEFSYM (Qunix, "unix"); |
| 10310 | DEFSYM (Qdos, "dos"); | 10450 | DEFSYM (Qdos, "dos"); |
| 10451 | DEFSYM (Qmac, "mac"); | ||
| 10311 | 10452 | ||
| 10312 | DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); | 10453 | DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); |
| 10313 | DEFSYM (Qpost_read_conversion, "post-read-conversion"); | 10454 | DEFSYM (Qpost_read_conversion, "post-read-conversion"); |
| @@ -10415,10 +10556,8 @@ syms_of_coding (void) | |||
| 10415 | intern_c_string ("coding-category-undecided")); | 10556 | intern_c_string ("coding-category-undecided")); |
| 10416 | 10557 | ||
| 10417 | DEFSYM (Qinsufficient_source, "insufficient-source"); | 10558 | DEFSYM (Qinsufficient_source, "insufficient-source"); |
| 10418 | DEFSYM (Qinconsistent_eol, "inconsistent-eol"); | ||
| 10419 | DEFSYM (Qinvalid_source, "invalid-source"); | 10559 | DEFSYM (Qinvalid_source, "invalid-source"); |
| 10420 | DEFSYM (Qinterrupted, "interrupted"); | 10560 | DEFSYM (Qinterrupted, "interrupted"); |
| 10421 | DEFSYM (Qinsufficient_memory, "insufficient-memory"); | ||
| 10422 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); | 10561 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); |
| 10423 | 10562 | ||
| 10424 | defsubr (&Scoding_system_p); | 10563 | defsubr (&Scoding_system_p); |
| @@ -10719,7 +10858,7 @@ reading if you suppress escape sequence detection. | |||
| 10719 | 10858 | ||
| 10720 | The other way to read escape sequences in a file without decoding is | 10859 | The other way to read escape sequences in a file without decoding is |
| 10721 | to explicitly specify some coding system that doesn't use ISO-2022 | 10860 | to explicitly specify some coding system that doesn't use ISO-2022 |
| 10722 | escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); | 10861 | escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */); |
| 10723 | inhibit_iso_escape_detection = 0; | 10862 | inhibit_iso_escape_detection = 0; |
| 10724 | 10863 | ||
| 10725 | DEFVAR_BOOL ("inhibit-null-byte-detection", | 10864 | DEFVAR_BOOL ("inhibit-null-byte-detection", |
| @@ -10735,6 +10874,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and | |||
| 10735 | decode text as usual. */); | 10874 | decode text as usual. */); |
| 10736 | inhibit_null_byte_detection = 0; | 10875 | inhibit_null_byte_detection = 0; |
| 10737 | 10876 | ||
| 10877 | DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization, | ||
| 10878 | doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files. | ||
| 10879 | Internal use only. Removed after the experimental optimizer gets stable. */); | ||
| 10880 | disable_ascii_optimization = 1; | ||
| 10881 | |||
| 10738 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, | 10882 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, |
| 10739 | doc: /* Char table for translating self-inserting characters. | 10883 | doc: /* Char table for translating self-inserting characters. |
| 10740 | This is applied to the result of input methods, not their input. | 10884 | This is applied to the result of input methods, not their input. |