diff options
| author | Stephen Berman | 2013-06-14 22:07:55 +0200 |
|---|---|---|
| committer | Stephen Berman | 2013-06-14 22:07:55 +0200 |
| commit | bd358779861f265a7acff31ead40172735af693e (patch) | |
| tree | 345217a9889dbd29b09bdc80a94265c17719d41f /src/coding.c | |
| parent | 2a97b47f0878cbda86cb6ba0e7e744924810b70e (diff) | |
| parent | f7394b12358ae453a0c8b85fc307afc1b740010d (diff) | |
| download | emacs-bd358779861f265a7acff31ead40172735af693e.tar.gz emacs-bd358779861f265a7acff31ead40172735af693e.zip | |
Merge from trunk.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 724 |
1 files changed, 549 insertions, 175 deletions
diff --git a/src/coding.c b/src/coding.c index 94a2d9fea80..42fd81b6322 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Coding system handler (conversion, detection, etc). | 1 | /* Coding system handler (conversion, detection, etc). |
| 2 | Copyright (C) 2001-2012 Free Software Foundation, Inc. | 2 | Copyright (C) 2001-2013 Free Software Foundation, Inc. |
| 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, | 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 | 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 5 | National Institute of Advanced Industrial Science and Technology (AIST) | 5 | National Institute of Advanced Industrial Science and Technology (AIST) |
| @@ -285,7 +285,10 @@ encode_coding_XXX (struct coding_system *coding) | |||
| 285 | 285 | ||
| 286 | #include <config.h> | 286 | #include <config.h> |
| 287 | #include <stdio.h> | 287 | #include <stdio.h> |
| 288 | #include <setjmp.h> | 288 | |
| 289 | #ifdef HAVE_WCHAR_H | ||
| 290 | #include <wchar.h> | ||
| 291 | #endif /* HAVE_WCHAR_H */ | ||
| 289 | 292 | ||
| 290 | #include "lisp.h" | 293 | #include "lisp.h" |
| 291 | #include "character.h" | 294 | #include "character.h" |
| @@ -303,6 +306,7 @@ Lisp_Object Vcoding_system_hash_table; | |||
| 303 | static Lisp_Object Qcoding_system, Qeol_type; | 306 | static Lisp_Object Qcoding_system, Qeol_type; |
| 304 | static Lisp_Object Qcoding_aliases; | 307 | static Lisp_Object Qcoding_aliases; |
| 305 | Lisp_Object Qunix, Qdos; | 308 | Lisp_Object Qunix, Qdos; |
| 309 | static Lisp_Object Qmac; | ||
| 306 | Lisp_Object Qbuffer_file_coding_system; | 310 | Lisp_Object Qbuffer_file_coding_system; |
| 307 | static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 311 | static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 308 | static Lisp_Object Qdefault_char; | 312 | static Lisp_Object Qdefault_char; |
| @@ -322,8 +326,7 @@ Lisp_Object Qcall_process, Qcall_process_region; | |||
| 322 | Lisp_Object Qstart_process, Qopen_network_stream; | 326 | Lisp_Object Qstart_process, Qopen_network_stream; |
| 323 | static Lisp_Object Qtarget_idx; | 327 | static Lisp_Object Qtarget_idx; |
| 324 | 328 | ||
| 325 | static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; | 329 | static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted; |
| 326 | static Lisp_Object Qinterrupted, Qinsufficient_memory; | ||
| 327 | 330 | ||
| 328 | /* If a symbol has this property, evaluate the value to define the | 331 | /* If a symbol has this property, evaluate the value to define the |
| 329 | symbol as a coding system. */ | 332 | symbol as a coding system. */ |
| @@ -344,6 +347,10 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error; | |||
| 344 | Lisp_Object Qemacs_mule, Qraw_text; | 347 | Lisp_Object Qemacs_mule, Qraw_text; |
| 345 | Lisp_Object Qutf_8_emacs; | 348 | Lisp_Object Qutf_8_emacs; |
| 346 | 349 | ||
| 350 | #if defined (WINDOWSNT) || defined (CYGWIN) | ||
| 351 | static Lisp_Object Qutf_16le; | ||
| 352 | #endif | ||
| 353 | |||
| 347 | /* Coding-systems are handed between Emacs Lisp programs and C internal | 354 | /* Coding-systems are handed between Emacs Lisp programs and C internal |
| 348 | routines by the following three variables. */ | 355 | routines by the following three variables. */ |
| 349 | /* Coding system to be used to encode text for terminal display when | 356 | /* Coding system to be used to encode text for terminal display when |
| @@ -416,7 +423,7 @@ enum iso_code_class_type | |||
| 416 | ISO_shift_out, /* ISO_CODE_SO (0x0E) */ | 423 | ISO_shift_out, /* ISO_CODE_SO (0x0E) */ |
| 417 | ISO_shift_in, /* ISO_CODE_SI (0x0F) */ | 424 | ISO_shift_in, /* ISO_CODE_SI (0x0F) */ |
| 418 | ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ | 425 | ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ |
| 419 | ISO_escape, /* ISO_CODE_SO (0x1B) */ | 426 | ISO_escape, /* ISO_CODE_ESC (0x1B) */ |
| 420 | ISO_control_1, /* Control codes in the range | 427 | ISO_control_1, /* Control codes in the range |
| 421 | 0x80..0x9F, except for the | 428 | 0x80..0x9F, except for the |
| 422 | following 3 codes. */ | 429 | following 3 codes. */ |
| @@ -816,18 +823,12 @@ record_conversion_result (struct coding_system *coding, | |||
| 816 | case CODING_RESULT_INSUFFICIENT_SRC: | 823 | case CODING_RESULT_INSUFFICIENT_SRC: |
| 817 | Vlast_code_conversion_error = Qinsufficient_source; | 824 | Vlast_code_conversion_error = Qinsufficient_source; |
| 818 | break; | 825 | break; |
| 819 | case CODING_RESULT_INCONSISTENT_EOL: | ||
| 820 | Vlast_code_conversion_error = Qinconsistent_eol; | ||
| 821 | break; | ||
| 822 | case CODING_RESULT_INVALID_SRC: | 826 | case CODING_RESULT_INVALID_SRC: |
| 823 | Vlast_code_conversion_error = Qinvalid_source; | 827 | Vlast_code_conversion_error = Qinvalid_source; |
| 824 | break; | 828 | break; |
| 825 | case CODING_RESULT_INTERRUPT: | 829 | case CODING_RESULT_INTERRUPT: |
| 826 | Vlast_code_conversion_error = Qinterrupted; | 830 | Vlast_code_conversion_error = Qinterrupted; |
| 827 | break; | 831 | break; |
| 828 | case CODING_RESULT_INSUFFICIENT_MEM: | ||
| 829 | Vlast_code_conversion_error = Qinsufficient_memory; | ||
| 830 | break; | ||
| 831 | case CODING_RESULT_INSUFFICIENT_DST: | 832 | case CODING_RESULT_INSUFFICIENT_DST: |
| 832 | /* Don't record this error in Vlast_code_conversion_error | 833 | /* Don't record this error in Vlast_code_conversion_error |
| 833 | because it happens just temporarily and is resolved when the | 834 | because it happens just temporarily and is resolved when the |
| @@ -921,65 +922,18 @@ record_conversion_result (struct coding_system *coding, | |||
| 921 | 922 | ||
| 922 | 923 | ||
| 923 | /* Store multibyte form of the character C in P, and advance P to the | 924 | /* Store multibyte form of the character C in P, and advance P to the |
| 924 | end of the multibyte form. This is like CHAR_STRING_ADVANCE but it | 925 | end of the multibyte form. This used to be like CHAR_STRING_ADVANCE |
| 925 | never calls MAYBE_UNIFY_CHAR. */ | 926 | without ever calling MAYBE_UNIFY_CHAR, but nowadays we don't call |
| 926 | 927 | MAYBE_UNIFY_CHAR in CHAR_STRING_ADVANCE. */ | |
| 927 | #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \ | ||
| 928 | do { \ | ||
| 929 | if ((c) <= MAX_1_BYTE_CHAR) \ | ||
| 930 | *(p)++ = (c); \ | ||
| 931 | else if ((c) <= MAX_2_BYTE_CHAR) \ | ||
| 932 | *(p)++ = (0xC0 | ((c) >> 6)), \ | ||
| 933 | *(p)++ = (0x80 | ((c) & 0x3F)); \ | ||
| 934 | else if ((c) <= MAX_3_BYTE_CHAR) \ | ||
| 935 | *(p)++ = (0xE0 | ((c) >> 12)), \ | ||
| 936 | *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \ | ||
| 937 | *(p)++ = (0x80 | ((c) & 0x3F)); \ | ||
| 938 | else if ((c) <= MAX_4_BYTE_CHAR) \ | ||
| 939 | *(p)++ = (0xF0 | (c >> 18)), \ | ||
| 940 | *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | ||
| 941 | *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | ||
| 942 | *(p)++ = (0x80 | (c & 0x3F)); \ | ||
| 943 | else if ((c) <= MAX_5_BYTE_CHAR) \ | ||
| 944 | *(p)++ = 0xF8, \ | ||
| 945 | *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \ | ||
| 946 | *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | ||
| 947 | *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | ||
| 948 | *(p)++ = (0x80 | (c & 0x3F)); \ | ||
| 949 | else \ | ||
| 950 | (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \ | ||
| 951 | } while (0) | ||
| 952 | 928 | ||
| 929 | #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) CHAR_STRING_ADVANCE(c, p) | ||
| 953 | 930 | ||
| 954 | /* Return the character code of character whose multibyte form is at | 931 | /* Return the character code of character whose multibyte form is at |
| 955 | P, and advance P to the end of the multibyte form. This is like | 932 | P, and advance P to the end of the multibyte form. This used to be |
| 956 | STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */ | 933 | like STRING_CHAR_ADVANCE without ever calling MAYBE_UNIFY_CHAR, but |
| 957 | 934 | nowadays STRING_CHAR_ADVANCE doesn't call MAYBE_UNIFY_CHAR. */ | |
| 958 | #define STRING_CHAR_ADVANCE_NO_UNIFY(p) \ | ||
| 959 | (!((p)[0] & 0x80) \ | ||
| 960 | ? *(p)++ \ | ||
| 961 | : ! ((p)[0] & 0x20) \ | ||
| 962 | ? ((p) += 2, \ | ||
| 963 | ((((p)[-2] & 0x1F) << 6) \ | ||
| 964 | | ((p)[-1] & 0x3F) \ | ||
| 965 | | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ | ||
| 966 | : ! ((p)[0] & 0x10) \ | ||
| 967 | ? ((p) += 3, \ | ||
| 968 | ((((p)[-3] & 0x0F) << 12) \ | ||
| 969 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 970 | | ((p)[-1] & 0x3F))) \ | ||
| 971 | : ! ((p)[0] & 0x08) \ | ||
| 972 | ? ((p) += 4, \ | ||
| 973 | ((((p)[-4] & 0xF) << 18) \ | ||
| 974 | | (((p)[-3] & 0x3F) << 12) \ | ||
| 975 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 976 | | ((p)[-1] & 0x3F))) \ | ||
| 977 | : ((p) += 5, \ | ||
| 978 | ((((p)[-4] & 0x3F) << 18) \ | ||
| 979 | | (((p)[-3] & 0x3F) << 12) \ | ||
| 980 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 981 | | ((p)[-1] & 0x3F)))) | ||
| 982 | 935 | ||
| 936 | #define STRING_CHAR_ADVANCE_NO_UNIFY(p) STRING_CHAR_ADVANCE(p) | ||
| 983 | 937 | ||
| 984 | /* Set coding->source from coding->src_object. */ | 938 | /* Set coding->source from coding->src_object. */ |
| 985 | 939 | ||
| @@ -1092,14 +1046,7 @@ coding_alloc_by_making_gap (struct coding_system *coding, | |||
| 1092 | GPT -= gap_head_used, GPT_BYTE -= gap_head_used; | 1046 | GPT -= gap_head_used, GPT_BYTE -= gap_head_used; |
| 1093 | } | 1047 | } |
| 1094 | else | 1048 | else |
| 1095 | { | 1049 | make_gap_1 (XBUFFER (coding->dst_object), bytes); |
| 1096 | Lisp_Object this_buffer; | ||
| 1097 | |||
| 1098 | this_buffer = Fcurrent_buffer (); | ||
| 1099 | set_buffer_internal (XBUFFER (coding->dst_object)); | ||
| 1100 | make_gap (bytes); | ||
| 1101 | set_buffer_internal (XBUFFER (this_buffer)); | ||
| 1102 | } | ||
| 1103 | } | 1050 | } |
| 1104 | 1051 | ||
| 1105 | 1052 | ||
| @@ -1178,6 +1125,14 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1178 | *buf++ = id; \ | 1125 | *buf++ = id; \ |
| 1179 | } while (0) | 1126 | } while (0) |
| 1180 | 1127 | ||
| 1128 | |||
| 1129 | /* Bitmasks for coding->eol_seen. */ | ||
| 1130 | |||
| 1131 | #define EOL_SEEN_NONE 0 | ||
| 1132 | #define EOL_SEEN_LF 1 | ||
| 1133 | #define EOL_SEEN_CR 2 | ||
| 1134 | #define EOL_SEEN_CRLF 4 | ||
| 1135 | |||
| 1181 | 1136 | ||
| 1182 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ | 1137 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ |
| 1183 | 1138 | ||
| @@ -1200,6 +1155,9 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1200 | #define UTF_8_BOM_2 0xBB | 1155 | #define UTF_8_BOM_2 0xBB |
| 1201 | #define UTF_8_BOM_3 0xBF | 1156 | #define UTF_8_BOM_3 0xBF |
| 1202 | 1157 | ||
| 1158 | /* Unlike the other detect_coding_XXX, this function counts number of | ||
| 1159 | characters and check EOL format. */ | ||
| 1160 | |||
| 1203 | static bool | 1161 | static bool |
| 1204 | detect_coding_utf_8 (struct coding_system *coding, | 1162 | detect_coding_utf_8 (struct coding_system *coding, |
| 1205 | struct coding_detection_info *detect_info) | 1163 | struct coding_detection_info *detect_info) |
| @@ -1209,11 +1167,23 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1209 | bool multibytep = coding->src_multibyte; | 1167 | bool multibytep = coding->src_multibyte; |
| 1210 | ptrdiff_t consumed_chars = 0; | 1168 | ptrdiff_t consumed_chars = 0; |
| 1211 | bool bom_found = 0; | 1169 | bool bom_found = 0; |
| 1212 | bool found = 0; | 1170 | int nchars = coding->head_ascii; |
| 1171 | int eol_seen = coding->eol_seen; | ||
| 1213 | 1172 | ||
| 1214 | detect_info->checked |= CATEGORY_MASK_UTF_8; | 1173 | detect_info->checked |= CATEGORY_MASK_UTF_8; |
| 1215 | /* A coding system of this category is always ASCII compatible. */ | 1174 | /* A coding system of this category is always ASCII compatible. */ |
| 1216 | src += coding->head_ascii; | 1175 | src += nchars; |
| 1176 | |||
| 1177 | if (src == coding->source /* BOM should be at the head. */ | ||
| 1178 | && src + 3 < src_end /* BOM is 3-byte long. */ | ||
| 1179 | && src[0] == UTF_8_BOM_1 | ||
| 1180 | && src[1] == UTF_8_BOM_2 | ||
| 1181 | && src[2] == UTF_8_BOM_3) | ||
| 1182 | { | ||
| 1183 | bom_found = 1; | ||
| 1184 | src += 3; | ||
| 1185 | nchars++; | ||
| 1186 | } | ||
| 1217 | 1187 | ||
| 1218 | while (1) | 1188 | while (1) |
| 1219 | { | 1189 | { |
| @@ -1222,13 +1192,29 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1222 | src_base = src; | 1192 | src_base = src; |
| 1223 | ONE_MORE_BYTE (c); | 1193 | ONE_MORE_BYTE (c); |
| 1224 | if (c < 0 || UTF_8_1_OCTET_P (c)) | 1194 | if (c < 0 || UTF_8_1_OCTET_P (c)) |
| 1225 | continue; | 1195 | { |
| 1196 | nchars++; | ||
| 1197 | if (c == '\r') | ||
| 1198 | { | ||
| 1199 | if (src < src_end && *src == '\n') | ||
| 1200 | { | ||
| 1201 | eol_seen |= EOL_SEEN_CRLF; | ||
| 1202 | src++; | ||
| 1203 | nchars++; | ||
| 1204 | } | ||
| 1205 | else | ||
| 1206 | eol_seen |= EOL_SEEN_CR; | ||
| 1207 | } | ||
| 1208 | else if (c == '\n') | ||
| 1209 | eol_seen |= EOL_SEEN_LF; | ||
| 1210 | continue; | ||
| 1211 | } | ||
| 1226 | ONE_MORE_BYTE (c1); | 1212 | ONE_MORE_BYTE (c1); |
| 1227 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) | 1213 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) |
| 1228 | break; | 1214 | break; |
| 1229 | if (UTF_8_2_OCTET_LEADING_P (c)) | 1215 | if (UTF_8_2_OCTET_LEADING_P (c)) |
| 1230 | { | 1216 | { |
| 1231 | found = 1; | 1217 | nchars++; |
| 1232 | continue; | 1218 | continue; |
| 1233 | } | 1219 | } |
| 1234 | ONE_MORE_BYTE (c2); | 1220 | ONE_MORE_BYTE (c2); |
| @@ -1236,10 +1222,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1236 | break; | 1222 | break; |
| 1237 | if (UTF_8_3_OCTET_LEADING_P (c)) | 1223 | if (UTF_8_3_OCTET_LEADING_P (c)) |
| 1238 | { | 1224 | { |
| 1239 | found = 1; | 1225 | nchars++; |
| 1240 | if (src_base == coding->source | ||
| 1241 | && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3) | ||
| 1242 | bom_found = 1; | ||
| 1243 | continue; | 1226 | continue; |
| 1244 | } | 1227 | } |
| 1245 | ONE_MORE_BYTE (c3); | 1228 | ONE_MORE_BYTE (c3); |
| @@ -1247,7 +1230,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1247 | break; | 1230 | break; |
| 1248 | if (UTF_8_4_OCTET_LEADING_P (c)) | 1231 | if (UTF_8_4_OCTET_LEADING_P (c)) |
| 1249 | { | 1232 | { |
| 1250 | found = 1; | 1233 | nchars++; |
| 1251 | continue; | 1234 | continue; |
| 1252 | } | 1235 | } |
| 1253 | ONE_MORE_BYTE (c4); | 1236 | ONE_MORE_BYTE (c4); |
| @@ -1255,7 +1238,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1255 | break; | 1238 | break; |
| 1256 | if (UTF_8_5_OCTET_LEADING_P (c)) | 1239 | if (UTF_8_5_OCTET_LEADING_P (c)) |
| 1257 | { | 1240 | { |
| 1258 | found = 1; | 1241 | nchars++; |
| 1259 | continue; | 1242 | continue; |
| 1260 | } | 1243 | } |
| 1261 | break; | 1244 | break; |
| @@ -1272,14 +1255,17 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1272 | if (bom_found) | 1255 | if (bom_found) |
| 1273 | { | 1256 | { |
| 1274 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ | 1257 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ |
| 1275 | detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; | 1258 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; |
| 1276 | } | 1259 | } |
| 1277 | else | 1260 | else |
| 1278 | { | 1261 | { |
| 1279 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; | 1262 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; |
| 1280 | if (found) | 1263 | if (nchars < src_end - coding->source) |
| 1281 | detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG; | 1264 | /* The found characters are less than source bytes, which |
| 1265 | means that we found a valid non-ASCII characters. */ | ||
| 1266 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; | ||
| 1282 | } | 1267 | } |
| 1268 | coding->detected_utf8_chars = nchars; | ||
| 1283 | return 1; | 1269 | return 1; |
| 1284 | } | 1270 | } |
| 1285 | 1271 | ||
| @@ -3107,20 +3093,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3107 | } | 3093 | } |
| 3108 | if (single_shifting) | 3094 | if (single_shifting) |
| 3109 | break; | 3095 | break; |
| 3110 | check_extra_latin: | 3096 | goto check_extra_latin; |
| 3111 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3112 | || NILP (AREF (Vlatin_extra_code_table, c))) | ||
| 3113 | { | ||
| 3114 | rejected = CATEGORY_MASK_ISO; | ||
| 3115 | break; | ||
| 3116 | } | ||
| 3117 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3118 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3119 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3120 | else | ||
| 3121 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3122 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3123 | break; | ||
| 3124 | 3097 | ||
| 3125 | default: | 3098 | default: |
| 3126 | if (c < 0) | 3099 | if (c < 0) |
| @@ -3171,6 +3144,20 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3171 | } | 3144 | } |
| 3172 | break; | 3145 | break; |
| 3173 | } | 3146 | } |
| 3147 | check_extra_latin: | ||
| 3148 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3149 | || NILP (AREF (Vlatin_extra_code_table, c))) | ||
| 3150 | { | ||
| 3151 | rejected = CATEGORY_MASK_ISO; | ||
| 3152 | break; | ||
| 3153 | } | ||
| 3154 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3155 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3156 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3157 | else | ||
| 3158 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3159 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3160 | break; | ||
| 3174 | } | 3161 | } |
| 3175 | } | 3162 | } |
| 3176 | detect_info->rejected |= CATEGORY_MASK_ISO; | 3163 | detect_info->rejected |= CATEGORY_MASK_ISO; |
| @@ -3939,6 +3926,14 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3939 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 3926 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 3940 | char_offset++; | 3927 | char_offset++; |
| 3941 | coding->errors++; | 3928 | coding->errors++; |
| 3929 | /* Reset the invocation and designation status to the safest | ||
| 3930 | one; i.e. designate ASCII to the graphic register 0, and | ||
| 3931 | invoke that register to the graphic plane 0. This typically | ||
| 3932 | helps the case that an designation sequence for ASCII "ESC ( | ||
| 3933 | B" is somehow broken (e.g. broken by a newline). */ | ||
| 3934 | CODING_ISO_INVOCATION (coding, 0) = 0; | ||
| 3935 | CODING_ISO_DESIGNATION (coding, 0) = charset_ascii; | ||
| 3936 | charset_id_0 = charset_ascii; | ||
| 3942 | continue; | 3937 | continue; |
| 3943 | 3938 | ||
| 3944 | break_loop: | 3939 | break_loop: |
| @@ -5107,6 +5102,7 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5107 | while (1) | 5102 | while (1) |
| 5108 | { | 5103 | { |
| 5109 | const unsigned char *p = src; | 5104 | const unsigned char *p = src; |
| 5105 | ptrdiff_t offset; | ||
| 5110 | int i = 0; | 5106 | int i = 0; |
| 5111 | 5107 | ||
| 5112 | if (multibytep) | 5108 | if (multibytep) |
| @@ -5124,8 +5120,17 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5124 | 5120 | ||
| 5125 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) | 5121 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) |
| 5126 | ccl->last_block = 1; | 5122 | ccl->last_block = 1; |
| 5123 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ | ||
| 5124 | charset_map_loaded = 0; | ||
| 5127 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, | 5125 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, |
| 5128 | charset_list); | 5126 | charset_list); |
| 5127 | if (charset_map_loaded | ||
| 5128 | && (offset = coding_change_source (coding))) | ||
| 5129 | { | ||
| 5130 | p += offset; | ||
| 5131 | src += offset; | ||
| 5132 | src_end += offset; | ||
| 5133 | } | ||
| 5129 | charbuf += ccl->produced; | 5134 | charbuf += ccl->produced; |
| 5130 | if (multibytep) | 5135 | if (multibytep) |
| 5131 | src += source_byteidx[ccl->consumed]; | 5136 | src += source_byteidx[ccl->consumed]; |
| @@ -5178,8 +5183,15 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5178 | 5183 | ||
| 5179 | do | 5184 | do |
| 5180 | { | 5185 | { |
| 5186 | ptrdiff_t offset; | ||
| 5187 | |||
| 5188 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ | ||
| 5189 | charset_map_loaded = 0; | ||
| 5181 | ccl_driver (ccl, charbuf, destination_charbuf, | 5190 | ccl_driver (ccl, charbuf, destination_charbuf, |
| 5182 | charbuf_end - charbuf, 1024, charset_list); | 5191 | charbuf_end - charbuf, 1024, charset_list); |
| 5192 | if (charset_map_loaded | ||
| 5193 | && (offset = coding_change_destination (coding))) | ||
| 5194 | dst += offset; | ||
| 5183 | if (multibytep) | 5195 | if (multibytep) |
| 5184 | { | 5196 | { |
| 5185 | ASSURE_DESTINATION (ccl->produced * 2); | 5197 | ASSURE_DESTINATION (ccl->produced * 2); |
| @@ -5649,7 +5661,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5649 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); | 5661 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); |
| 5650 | 5662 | ||
| 5651 | coding->mode = 0; | 5663 | coding->mode = 0; |
| 5652 | coding->head_ascii = -1; | ||
| 5653 | if (VECTORP (eol_type)) | 5664 | if (VECTORP (eol_type)) |
| 5654 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK | 5665 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK |
| 5655 | | CODING_REQUIRE_DETECTION_MASK); | 5666 | | CODING_REQUIRE_DETECTION_MASK); |
| @@ -6101,10 +6112,181 @@ complement_process_encoding_system (Lisp_Object coding_system) | |||
| 6101 | 6112 | ||
| 6102 | */ | 6113 | */ |
| 6103 | 6114 | ||
| 6104 | #define EOL_SEEN_NONE 0 | 6115 | static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, |
| 6105 | #define EOL_SEEN_LF 1 | 6116 | int eol_seen); |
| 6106 | #define EOL_SEEN_CR 2 | 6117 | |
| 6107 | #define EOL_SEEN_CRLF 4 | 6118 | |
| 6119 | /* Return the number of ASCII characters at the head of the source. | ||
| 6120 | By side effects, set coding->head_ascii and update | ||
| 6121 | coding->eol_seen. The value of coding->eol_seen is "logical or" of | ||
| 6122 | EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is | ||
| 6123 | reliable only when all the source bytes are ASCII. */ | ||
| 6124 | |||
| 6125 | static int | ||
| 6126 | check_ascii (struct coding_system *coding) | ||
| 6127 | { | ||
| 6128 | const unsigned char *src, *end; | ||
| 6129 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6130 | int eol_seen = coding->eol_seen; | ||
| 6131 | |||
| 6132 | coding_set_source (coding); | ||
| 6133 | src = coding->source; | ||
| 6134 | end = src + coding->src_bytes; | ||
| 6135 | |||
| 6136 | if (inhibit_eol_conversion | ||
| 6137 | || SYMBOLP (eol_type)) | ||
| 6138 | { | ||
| 6139 | /* We don't have to check EOL format. */ | ||
| 6140 | while (src < end && !( *src & 0x80)) | ||
| 6141 | { | ||
| 6142 | if (*src++ == '\n') | ||
| 6143 | eol_seen |= EOL_SEEN_LF; | ||
| 6144 | } | ||
| 6145 | } | ||
| 6146 | else | ||
| 6147 | { | ||
| 6148 | end--; /* We look ahead one byte for "CR LF". */ | ||
| 6149 | while (src < end) | ||
| 6150 | { | ||
| 6151 | int c = *src; | ||
| 6152 | |||
| 6153 | if (c & 0x80) | ||
| 6154 | break; | ||
| 6155 | src++; | ||
| 6156 | if (c == '\r') | ||
| 6157 | { | ||
| 6158 | if (*src == '\n') | ||
| 6159 | { | ||
| 6160 | eol_seen |= EOL_SEEN_CRLF; | ||
| 6161 | src++; | ||
| 6162 | } | ||
| 6163 | else | ||
| 6164 | eol_seen |= EOL_SEEN_CR; | ||
| 6165 | } | ||
| 6166 | else if (c == '\n') | ||
| 6167 | eol_seen |= EOL_SEEN_LF; | ||
| 6168 | } | ||
| 6169 | if (src == end) | ||
| 6170 | { | ||
| 6171 | int c = *src; | ||
| 6172 | |||
| 6173 | /* All bytes but the last one C are ASCII. */ | ||
| 6174 | if (! (c & 0x80)) | ||
| 6175 | { | ||
| 6176 | if (c == '\r') | ||
| 6177 | eol_seen |= EOL_SEEN_CR; | ||
| 6178 | else if (c == '\n') | ||
| 6179 | eol_seen |= EOL_SEEN_LF; | ||
| 6180 | src++; | ||
| 6181 | } | ||
| 6182 | } | ||
| 6183 | } | ||
| 6184 | coding->head_ascii = src - coding->source; | ||
| 6185 | coding->eol_seen = eol_seen; | ||
| 6186 | return (coding->head_ascii); | ||
| 6187 | } | ||
| 6188 | |||
| 6189 | |||
| 6190 | /* Return the number of characters at the source if all the bytes are | ||
| 6191 | valid UTF-8 (of Unicode range). Otherwise, return -1. By side | ||
| 6192 | effects, update coding->eol_seen. The value of coding->eol_seen is | ||
| 6193 | "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but | ||
| 6194 | the value is reliable only when all the source bytes are valid | ||
| 6195 | UTF-8. */ | ||
| 6196 | |||
| 6197 | static int | ||
| 6198 | check_utf_8 (struct coding_system *coding) | ||
| 6199 | { | ||
| 6200 | const unsigned char *src, *end; | ||
| 6201 | int eol_seen; | ||
| 6202 | int nchars = coding->head_ascii; | ||
| 6203 | |||
| 6204 | if (coding->head_ascii < 0) | ||
| 6205 | check_ascii (coding); | ||
| 6206 | else | ||
| 6207 | coding_set_source (coding); | ||
| 6208 | src = coding->source + coding->head_ascii; | ||
| 6209 | /* We look ahead one byte for CR LF. */ | ||
| 6210 | end = coding->source + coding->src_bytes - 1; | ||
| 6211 | eol_seen = coding->eol_seen; | ||
| 6212 | while (src < end) | ||
| 6213 | { | ||
| 6214 | int c = *src; | ||
| 6215 | |||
| 6216 | if (UTF_8_1_OCTET_P (*src)) | ||
| 6217 | { | ||
| 6218 | src++; | ||
| 6219 | if (c < 0x20) | ||
| 6220 | { | ||
| 6221 | if (c == '\r') | ||
| 6222 | { | ||
| 6223 | if (*src == '\n') | ||
| 6224 | { | ||
| 6225 | eol_seen |= EOL_SEEN_CRLF; | ||
| 6226 | src++; | ||
| 6227 | nchars++; | ||
| 6228 | } | ||
| 6229 | else | ||
| 6230 | eol_seen |= EOL_SEEN_CR; | ||
| 6231 | } | ||
| 6232 | else if (c == '\n') | ||
| 6233 | eol_seen |= EOL_SEEN_LF; | ||
| 6234 | } | ||
| 6235 | } | ||
| 6236 | else if (UTF_8_2_OCTET_LEADING_P (c)) | ||
| 6237 | { | ||
| 6238 | if (c < 0xC2 /* overlong sequence */ | ||
| 6239 | || src + 1 >= end | ||
| 6240 | || ! UTF_8_EXTRA_OCTET_P (src[1])) | ||
| 6241 | return -1; | ||
| 6242 | src += 2; | ||
| 6243 | } | ||
| 6244 | else if (UTF_8_3_OCTET_LEADING_P (c)) | ||
| 6245 | { | ||
| 6246 | if (src + 2 >= end | ||
| 6247 | || ! (UTF_8_EXTRA_OCTET_P (src[1]) | ||
| 6248 | && UTF_8_EXTRA_OCTET_P (src[2]))) | ||
| 6249 | return -1; | ||
| 6250 | c = (((c & 0xF) << 12) | ||
| 6251 | | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); | ||
| 6252 | if (c < 0x800 /* overlong sequence */ | ||
| 6253 | || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */ | ||
| 6254 | return -1; | ||
| 6255 | src += 3; | ||
| 6256 | } | ||
| 6257 | else if (UTF_8_4_OCTET_LEADING_P (c)) | ||
| 6258 | { | ||
| 6259 | if (src + 3 >= end | ||
| 6260 | || ! (UTF_8_EXTRA_OCTET_P (src[1]) | ||
| 6261 | && UTF_8_EXTRA_OCTET_P (src[2]) | ||
| 6262 | && UTF_8_EXTRA_OCTET_P (src[3]))) | ||
| 6263 | return -1; | ||
| 6264 | c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12) | ||
| 6265 | | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); | ||
| 6266 | if (c < 0x10000 /* overlong sequence */ | ||
| 6267 | || c >= 0x110000) /* non-Unicode character */ | ||
| 6268 | return -1; | ||
| 6269 | src += 4; | ||
| 6270 | } | ||
| 6271 | else | ||
| 6272 | return -1; | ||
| 6273 | nchars++; | ||
| 6274 | } | ||
| 6275 | |||
| 6276 | if (src == end) | ||
| 6277 | { | ||
| 6278 | if (! UTF_8_1_OCTET_P (*src)) | ||
| 6279 | return -1; | ||
| 6280 | nchars++; | ||
| 6281 | if (*src == '\r') | ||
| 6282 | eol_seen |= EOL_SEEN_CR; | ||
| 6283 | else if (*src == '\n') | ||
| 6284 | eol_seen |= EOL_SEEN_LF; | ||
| 6285 | } | ||
| 6286 | coding->eol_seen = eol_seen; | ||
| 6287 | return nchars; | ||
| 6288 | } | ||
| 6289 | |||
| 6108 | 6290 | ||
| 6109 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by | 6291 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by |
| 6110 | SOURCE is encoded. If CATEGORY is one of | 6292 | SOURCE is encoded. If CATEGORY is one of |
| @@ -6217,6 +6399,9 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen) | |||
| 6217 | Lisp_Object eol_type; | 6399 | Lisp_Object eol_type; |
| 6218 | 6400 | ||
| 6219 | eol_type = CODING_ID_EOL_TYPE (coding->id); | 6401 | eol_type = CODING_ID_EOL_TYPE (coding->id); |
| 6402 | if (! VECTORP (eol_type)) | ||
| 6403 | /* Already adjusted. */ | ||
| 6404 | return eol_type; | ||
| 6220 | if (eol_seen & EOL_SEEN_LF) | 6405 | if (eol_seen & EOL_SEEN_LF) |
| 6221 | { | 6406 | { |
| 6222 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); | 6407 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); |
| @@ -6244,14 +6429,16 @@ detect_coding (struct coding_system *coding) | |||
| 6244 | { | 6429 | { |
| 6245 | const unsigned char *src, *src_end; | 6430 | const unsigned char *src, *src_end; |
| 6246 | unsigned int saved_mode = coding->mode; | 6431 | unsigned int saved_mode = coding->mode; |
| 6432 | Lisp_Object found = Qnil; | ||
| 6433 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6247 | 6434 | ||
| 6248 | coding->consumed = coding->consumed_char = 0; | 6435 | coding->consumed = coding->consumed_char = 0; |
| 6249 | coding->produced = coding->produced_char = 0; | 6436 | coding->produced = coding->produced_char = 0; |
| 6250 | coding_set_source (coding); | 6437 | coding_set_source (coding); |
| 6251 | 6438 | ||
| 6252 | src_end = coding->source + coding->src_bytes; | 6439 | src_end = coding->source + coding->src_bytes; |
| 6253 | coding->head_ascii = 0; | ||
| 6254 | 6440 | ||
| 6441 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6255 | /* If we have not yet decided the text encoding type, detect it | 6442 | /* If we have not yet decided the text encoding type, detect it |
| 6256 | now. */ | 6443 | now. */ |
| 6257 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 6444 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
| @@ -6260,6 +6447,7 @@ detect_coding (struct coding_system *coding) | |||
| 6260 | struct coding_detection_info detect_info; | 6447 | struct coding_detection_info detect_info; |
| 6261 | bool null_byte_found = 0, eight_bit_found = 0; | 6448 | bool null_byte_found = 0, eight_bit_found = 0; |
| 6262 | 6449 | ||
| 6450 | coding->head_ascii = 0; | ||
| 6263 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 6451 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 6264 | for (src = coding->source; src < src_end; src++) | 6452 | for (src = coding->source; src < src_end; src++) |
| 6265 | { | 6453 | { |
| @@ -6298,6 +6486,27 @@ detect_coding (struct coding_system *coding) | |||
| 6298 | if (eight_bit_found) | 6486 | if (eight_bit_found) |
| 6299 | break; | 6487 | break; |
| 6300 | } | 6488 | } |
| 6489 | else if (! disable_ascii_optimization | ||
| 6490 | && ! inhibit_eol_conversion) | ||
| 6491 | { | ||
| 6492 | if (c == '\r') | ||
| 6493 | { | ||
| 6494 | if (src < src_end && src[1] == '\n') | ||
| 6495 | { | ||
| 6496 | coding->eol_seen |= EOL_SEEN_CRLF; | ||
| 6497 | src++; | ||
| 6498 | if (! eight_bit_found) | ||
| 6499 | coding->head_ascii++; | ||
| 6500 | } | ||
| 6501 | else | ||
| 6502 | coding->eol_seen |= EOL_SEEN_CR; | ||
| 6503 | } | ||
| 6504 | else if (c == '\n') | ||
| 6505 | { | ||
| 6506 | coding->eol_seen |= EOL_SEEN_LF; | ||
| 6507 | } | ||
| 6508 | } | ||
| 6509 | |||
| 6301 | if (! eight_bit_found) | 6510 | if (! eight_bit_found) |
| 6302 | coding->head_ascii++; | 6511 | coding->head_ascii++; |
| 6303 | } | 6512 | } |
| @@ -6332,6 +6541,9 @@ detect_coding (struct coding_system *coding) | |||
| 6332 | { | 6541 | { |
| 6333 | category = coding_priorities[i]; | 6542 | category = coding_priorities[i]; |
| 6334 | this = coding_categories + category; | 6543 | this = coding_categories + category; |
| 6544 | /* Some of this->detector (e.g. detect_coding_sjis) | ||
| 6545 | require this information. */ | ||
| 6546 | coding->id = this->id; | ||
| 6335 | if (this->id < 0) | 6547 | if (this->id < 0) |
| 6336 | { | 6548 | { |
| 6337 | /* No coding system of this category is defined. */ | 6549 | /* No coding system of this category is defined. */ |
| @@ -6346,32 +6558,58 @@ detect_coding (struct coding_system *coding) | |||
| 6346 | } | 6558 | } |
| 6347 | else if ((*(this->detector)) (coding, &detect_info) | 6559 | else if ((*(this->detector)) (coding, &detect_info) |
| 6348 | && detect_info.found & (1 << category)) | 6560 | && detect_info.found & (1 << category)) |
| 6349 | { | 6561 | break; |
| 6350 | if (category == coding_category_utf_16_auto) | ||
| 6351 | { | ||
| 6352 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6353 | category = coding_category_utf_16_le; | ||
| 6354 | else | ||
| 6355 | category = coding_category_utf_16_be; | ||
| 6356 | } | ||
| 6357 | break; | ||
| 6358 | } | ||
| 6359 | } | 6562 | } |
| 6360 | } | 6563 | } |
| 6361 | 6564 | ||
| 6362 | if (i < coding_category_raw_text) | 6565 | if (i < coding_category_raw_text) |
| 6363 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6566 | { |
| 6567 | if (category == coding_category_utf_8_auto) | ||
| 6568 | { | ||
| 6569 | Lisp_Object coding_systems; | ||
| 6570 | |||
| 6571 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6572 | coding_attr_utf_bom); | ||
| 6573 | if (CONSP (coding_systems)) | ||
| 6574 | { | ||
| 6575 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6576 | found = XCAR (coding_systems); | ||
| 6577 | else | ||
| 6578 | found = XCDR (coding_systems); | ||
| 6579 | } | ||
| 6580 | else | ||
| 6581 | found = CODING_ID_NAME (this->id); | ||
| 6582 | } | ||
| 6583 | else if (category == coding_category_utf_16_auto) | ||
| 6584 | { | ||
| 6585 | Lisp_Object coding_systems; | ||
| 6586 | |||
| 6587 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6588 | coding_attr_utf_bom); | ||
| 6589 | if (CONSP (coding_systems)) | ||
| 6590 | { | ||
| 6591 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6592 | found = XCAR (coding_systems); | ||
| 6593 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | ||
| 6594 | found = XCDR (coding_systems); | ||
| 6595 | } | ||
| 6596 | else | ||
| 6597 | found = CODING_ID_NAME (this->id); | ||
| 6598 | } | ||
| 6599 | else | ||
| 6600 | found = CODING_ID_NAME (this->id); | ||
| 6601 | } | ||
| 6364 | else if (null_byte_found) | 6602 | else if (null_byte_found) |
| 6365 | setup_coding_system (Qno_conversion, coding); | 6603 | found = Qno_conversion; |
| 6366 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) | 6604 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) |
| 6367 | == CATEGORY_MASK_ANY) | 6605 | == CATEGORY_MASK_ANY) |
| 6368 | setup_coding_system (Qraw_text, coding); | 6606 | found = Qraw_text; |
| 6369 | else if (detect_info.rejected) | 6607 | else if (detect_info.rejected) |
| 6370 | for (i = 0; i < coding_category_raw_text; i++) | 6608 | for (i = 0; i < coding_category_raw_text; i++) |
| 6371 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) | 6609 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) |
| 6372 | { | 6610 | { |
| 6373 | this = coding_categories + coding_priorities[i]; | 6611 | this = coding_categories + coding_priorities[i]; |
| 6374 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6612 | found = CODING_ID_NAME (this->id); |
| 6375 | break; | 6613 | break; |
| 6376 | } | 6614 | } |
| 6377 | } | 6615 | } |
| @@ -6385,14 +6623,21 @@ detect_coding (struct coding_system *coding) | |||
| 6385 | coding_systems | 6623 | coding_systems |
| 6386 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); | 6624 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); |
| 6387 | detect_info.found = detect_info.rejected = 0; | 6625 | detect_info.found = detect_info.rejected = 0; |
| 6388 | coding->head_ascii = 0; | 6626 | if (check_ascii (coding) == coding->src_bytes) |
| 6389 | if (CONSP (coding_systems) | ||
| 6390 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6391 | { | 6627 | { |
| 6392 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | 6628 | if (CONSP (coding_systems)) |
| 6393 | setup_coding_system (XCAR (coding_systems), coding); | 6629 | found = XCDR (coding_systems); |
| 6394 | else | 6630 | } |
| 6395 | setup_coding_system (XCDR (coding_systems), coding); | 6631 | else |
| 6632 | { | ||
| 6633 | if (CONSP (coding_systems) | ||
| 6634 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6635 | { | ||
| 6636 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6637 | found = XCAR (coding_systems); | ||
| 6638 | else | ||
| 6639 | found = XCDR (coding_systems); | ||
| 6640 | } | ||
| 6396 | } | 6641 | } |
| 6397 | } | 6642 | } |
| 6398 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) | 6643 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
| @@ -6409,11 +6654,24 @@ detect_coding (struct coding_system *coding) | |||
| 6409 | && detect_coding_utf_16 (coding, &detect_info)) | 6654 | && detect_coding_utf_16 (coding, &detect_info)) |
| 6410 | { | 6655 | { |
| 6411 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | 6656 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) |
| 6412 | setup_coding_system (XCAR (coding_systems), coding); | 6657 | found = XCAR (coding_systems); |
| 6413 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | 6658 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) |
| 6414 | setup_coding_system (XCDR (coding_systems), coding); | 6659 | found = XCDR (coding_systems); |
| 6415 | } | 6660 | } |
| 6416 | } | 6661 | } |
| 6662 | |||
| 6663 | if (! NILP (found)) | ||
| 6664 | { | ||
| 6665 | int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE | ||
| 6666 | : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF | ||
| 6667 | : EQ (eol_type, Qmac) ? EOL_SEEN_CR | ||
| 6668 | : EOL_SEEN_LF); | ||
| 6669 | |||
| 6670 | setup_coding_system (found, coding); | ||
| 6671 | if (specified_eol != EOL_SEEN_NONE) | ||
| 6672 | adjust_coding_eol_type (coding, specified_eol); | ||
| 6673 | } | ||
| 6674 | |||
| 6417 | coding->mode = saved_mode; | 6675 | coding->mode = saved_mode; |
| 6418 | } | 6676 | } |
| 6419 | 6677 | ||
| @@ -6842,7 +7100,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6842 | 7100 | ||
| 6843 | produced = dst - (coding->destination + coding->produced); | 7101 | produced = dst - (coding->destination + coding->produced); |
| 6844 | if (BUFFERP (coding->dst_object) && produced_chars > 0) | 7102 | if (BUFFERP (coding->dst_object) && produced_chars > 0) |
| 6845 | insert_from_gap (produced_chars, produced); | 7103 | insert_from_gap (produced_chars, produced, 0); |
| 6846 | coding->produced += produced; | 7104 | coding->produced += produced; |
| 6847 | coding->produced_char += produced_chars; | 7105 | coding->produced_char += produced_chars; |
| 6848 | return carryover; | 7106 | return carryover; |
| @@ -6853,7 +7111,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6853 | [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] | 7111 | [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] |
| 6854 | */ | 7112 | */ |
| 6855 | 7113 | ||
| 6856 | static inline void | 7114 | static void |
| 6857 | produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | 7115 | produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) |
| 6858 | { | 7116 | { |
| 6859 | int len; | 7117 | int len; |
| @@ -6897,7 +7155,7 @@ produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | |||
| 6897 | [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] | 7155 | [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] |
| 6898 | */ | 7156 | */ |
| 6899 | 7157 | ||
| 6900 | static inline void | 7158 | static void |
| 6901 | produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | 7159 | produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) |
| 6902 | { | 7160 | { |
| 6903 | ptrdiff_t from = pos - charbuf[2]; | 7161 | ptrdiff_t from = pos - charbuf[2]; |
| @@ -6913,22 +7171,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | |||
| 6913 | 7171 | ||
| 6914 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ | 7172 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ |
| 6915 | do { \ | 7173 | do { \ |
| 6916 | int size = CHARBUF_SIZE; \ | 7174 | coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \ |
| 6917 | \ | 7175 | coding->charbuf_size = CHARBUF_SIZE; \ |
| 6918 | coding->charbuf = NULL; \ | ||
| 6919 | while (size > 1024) \ | ||
| 6920 | { \ | ||
| 6921 | coding->charbuf = alloca (sizeof (int) * size); \ | ||
| 6922 | if (coding->charbuf) \ | ||
| 6923 | break; \ | ||
| 6924 | size >>= 1; \ | ||
| 6925 | } \ | ||
| 6926 | if (! coding->charbuf) \ | ||
| 6927 | { \ | ||
| 6928 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \ | ||
| 6929 | return; \ | ||
| 6930 | } \ | ||
| 6931 | coding->charbuf_size = size; \ | ||
| 6932 | } while (0) | 7176 | } while (0) |
| 6933 | 7177 | ||
| 6934 | 7178 | ||
| @@ -6997,6 +7241,8 @@ decode_coding (struct coding_system *coding) | |||
| 6997 | int carryover; | 7241 | int carryover; |
| 6998 | int i; | 7242 | int i; |
| 6999 | 7243 | ||
| 7244 | USE_SAFE_ALLOCA; | ||
| 7245 | |||
| 7000 | if (BUFFERP (coding->src_object) | 7246 | if (BUFFERP (coding->src_object) |
| 7001 | && coding->src_pos > 0 | 7247 | && coding->src_pos > 0 |
| 7002 | && coding->src_pos < GPT | 7248 | && coding->src_pos < GPT |
| @@ -7119,6 +7365,8 @@ decode_coding (struct coding_system *coding) | |||
| 7119 | bset_undo_list (current_buffer, undo_list); | 7365 | bset_undo_list (current_buffer, undo_list); |
| 7120 | record_insert (coding->dst_pos, coding->produced_char); | 7366 | record_insert (coding->dst_pos, coding->produced_char); |
| 7121 | } | 7367 | } |
| 7368 | |||
| 7369 | SAFE_FREE (); | ||
| 7122 | } | 7370 | } |
| 7123 | 7371 | ||
| 7124 | 7372 | ||
| @@ -7132,7 +7380,7 @@ decode_coding (struct coding_system *coding) | |||
| 7132 | position of a composition after POS (if any) or to LIMIT, and | 7380 | position of a composition after POS (if any) or to LIMIT, and |
| 7133 | return BUF. */ | 7381 | return BUF. */ |
| 7134 | 7382 | ||
| 7135 | static inline int * | 7383 | static int * |
| 7136 | handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, | 7384 | handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, |
| 7137 | struct coding_system *coding, int *buf, | 7385 | struct coding_system *coding, int *buf, |
| 7138 | ptrdiff_t *stop) | 7386 | ptrdiff_t *stop) |
| @@ -7215,7 +7463,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, | |||
| 7215 | If the property value is nil, set *STOP to the position where the | 7463 | If the property value is nil, set *STOP to the position where the |
| 7216 | property value is non-nil (limiting by LIMIT), and return BUF. */ | 7464 | property value is non-nil (limiting by LIMIT), and return BUF. */ |
| 7217 | 7465 | ||
| 7218 | static inline int * | 7466 | static int * |
| 7219 | handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit, | 7467 | handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit, |
| 7220 | struct coding_system *coding, int *buf, | 7468 | struct coding_system *coding, int *buf, |
| 7221 | ptrdiff_t *stop) | 7469 | ptrdiff_t *stop) |
| @@ -7402,6 +7650,8 @@ encode_coding (struct coding_system *coding) | |||
| 7402 | int max_lookup; | 7650 | int max_lookup; |
| 7403 | struct ccl_spec cclspec; | 7651 | struct ccl_spec cclspec; |
| 7404 | 7652 | ||
| 7653 | USE_SAFE_ALLOCA; | ||
| 7654 | |||
| 7405 | attrs = CODING_ID_ATTRS (coding->id); | 7655 | attrs = CODING_ID_ATTRS (coding->id); |
| 7406 | if (coding->encoder == encode_coding_raw_text) | 7656 | if (coding->encoder == encode_coding_raw_text) |
| 7407 | translation_table = Qnil, max_lookup = 0; | 7657 | translation_table = Qnil, max_lookup = 0; |
| @@ -7435,7 +7685,9 @@ encode_coding (struct coding_system *coding) | |||
| 7435 | } while (coding->consumed_char < coding->src_chars); | 7685 | } while (coding->consumed_char < coding->src_chars); |
| 7436 | 7686 | ||
| 7437 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) | 7687 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) |
| 7438 | insert_from_gap (coding->produced_char, coding->produced); | 7688 | insert_from_gap (coding->produced_char, coding->produced, 0); |
| 7689 | |||
| 7690 | SAFE_FREE (); | ||
| 7439 | } | 7691 | } |
| 7440 | 7692 | ||
| 7441 | 7693 | ||
| @@ -7529,8 +7781,6 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7529 | ptrdiff_t count = SPECPDL_INDEX (); | 7781 | ptrdiff_t count = SPECPDL_INDEX (); |
| 7530 | Lisp_Object attrs; | 7782 | Lisp_Object attrs; |
| 7531 | 7783 | ||
| 7532 | code_conversion_save (0, 0); | ||
| 7533 | |||
| 7534 | coding->src_object = Fcurrent_buffer (); | 7784 | coding->src_object = Fcurrent_buffer (); |
| 7535 | coding->src_chars = chars; | 7785 | coding->src_chars = chars; |
| 7536 | coding->src_bytes = bytes; | 7786 | coding->src_bytes = bytes; |
| @@ -7542,15 +7792,95 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7542 | coding->dst_pos_byte = PT_BYTE; | 7792 | coding->dst_pos_byte = PT_BYTE; |
| 7543 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); | 7793 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 7544 | 7794 | ||
| 7795 | coding->head_ascii = -1; | ||
| 7796 | coding->detected_utf8_chars = -1; | ||
| 7797 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 7545 | if (CODING_REQUIRE_DETECTION (coding)) | 7798 | if (CODING_REQUIRE_DETECTION (coding)) |
| 7546 | detect_coding (coding); | 7799 | detect_coding (coding); |
| 7800 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7801 | if (! disable_ascii_optimization | ||
| 7802 | && ! coding->src_multibyte | ||
| 7803 | && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) | ||
| 7804 | && NILP (CODING_ATTR_POST_READ (attrs)) | ||
| 7805 | && NILP (get_translation_table (attrs, 0, NULL))) | ||
| 7806 | { | ||
| 7807 | chars = coding->head_ascii; | ||
| 7808 | if (chars < 0) | ||
| 7809 | chars = check_ascii (coding); | ||
| 7810 | if (chars != bytes) | ||
| 7811 | { | ||
| 7812 | /* There exists a non-ASCII byte. */ | ||
| 7813 | if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)) | ||
| 7814 | { | ||
| 7815 | if (coding->detected_utf8_chars >= 0) | ||
| 7816 | chars = coding->detected_utf8_chars; | ||
| 7817 | else | ||
| 7818 | chars = check_utf_8 (coding); | ||
| 7819 | if (CODING_UTF_8_BOM (coding) != utf_without_bom | ||
| 7820 | && coding->head_ascii == 0 | ||
| 7821 | && coding->source[0] == UTF_8_BOM_1 | ||
| 7822 | && coding->source[1] == UTF_8_BOM_2 | ||
| 7823 | && coding->source[2] == UTF_8_BOM_3) | ||
| 7824 | { | ||
| 7825 | chars--; | ||
| 7826 | bytes -= 3; | ||
| 7827 | coding->src_bytes -= 3; | ||
| 7828 | } | ||
| 7829 | } | ||
| 7830 | else | ||
| 7831 | chars = -1; | ||
| 7832 | } | ||
| 7833 | if (chars >= 0) | ||
| 7834 | { | ||
| 7835 | Lisp_Object eol_type; | ||
| 7836 | |||
| 7837 | eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 7838 | if (VECTORP (eol_type)) | ||
| 7839 | { | ||
| 7840 | if (coding->eol_seen != EOL_SEEN_NONE) | ||
| 7841 | eol_type = adjust_coding_eol_type (coding, coding->eol_seen); | ||
| 7842 | } | ||
| 7843 | if (EQ (eol_type, Qmac)) | ||
| 7844 | { | ||
| 7845 | unsigned char *src_end = GAP_END_ADDR; | ||
| 7846 | unsigned char *src = src_end - coding->src_bytes; | ||
| 7847 | |||
| 7848 | while (src < src_end) | ||
| 7849 | { | ||
| 7850 | if (*src++ == '\r') | ||
| 7851 | src[-1] = '\n'; | ||
| 7852 | } | ||
| 7853 | } | ||
| 7854 | else if (EQ (eol_type, Qdos)) | ||
| 7855 | { | ||
| 7856 | unsigned char *src = GAP_END_ADDR; | ||
| 7857 | unsigned char *src_beg = src - coding->src_bytes; | ||
| 7858 | unsigned char *dst = src; | ||
| 7859 | ptrdiff_t diff; | ||
| 7860 | |||
| 7861 | while (src_beg < src) | ||
| 7862 | { | ||
| 7863 | *--dst = *--src; | ||
| 7864 | if (*src == '\n' && src > src_beg && src[-1] == '\r') | ||
| 7865 | src--; | ||
| 7866 | } | ||
| 7867 | diff = dst - src; | ||
| 7868 | bytes -= diff; | ||
| 7869 | chars -= diff; | ||
| 7870 | } | ||
| 7871 | coding->produced = bytes; | ||
| 7872 | coding->produced_char = chars; | ||
| 7873 | insert_from_gap (chars, bytes, 1); | ||
| 7874 | return; | ||
| 7875 | } | ||
| 7876 | } | ||
| 7877 | code_conversion_save (0, 0); | ||
| 7547 | 7878 | ||
| 7548 | coding->mode |= CODING_MODE_LAST_BLOCK; | 7879 | coding->mode |= CODING_MODE_LAST_BLOCK; |
| 7549 | current_buffer->text->inhibit_shrinking = 1; | 7880 | current_buffer->text->inhibit_shrinking = 1; |
| 7550 | decode_coding (coding); | 7881 | decode_coding (coding); |
| 7551 | current_buffer->text->inhibit_shrinking = 0; | 7882 | current_buffer->text->inhibit_shrinking = 0; |
| 7552 | 7883 | ||
| 7553 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7554 | if (! NILP (CODING_ATTR_POST_READ (attrs))) | 7884 | if (! NILP (CODING_ATTR_POST_READ (attrs))) |
| 7555 | { | 7885 | { |
| 7556 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; | 7886 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; |
| @@ -7724,14 +8054,8 @@ decode_coding_object (struct coding_system *coding, | |||
| 7724 | set_buffer_internal (XBUFFER (coding->dst_object)); | 8054 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 7725 | if (dst_bytes < coding->produced) | 8055 | if (dst_bytes < coding->produced) |
| 7726 | { | 8056 | { |
| 8057 | eassert (coding->produced > 0); | ||
| 7727 | destination = xrealloc (destination, coding->produced); | 8058 | destination = xrealloc (destination, coding->produced); |
| 7728 | if (! destination) | ||
| 7729 | { | ||
| 7730 | record_conversion_result (coding, | ||
| 7731 | CODING_RESULT_INSUFFICIENT_MEM); | ||
| 7732 | unbind_to (count, Qnil); | ||
| 7733 | return; | ||
| 7734 | } | ||
| 7735 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) | 8059 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) |
| 7736 | move_gap_both (BEGV, BEGV_BYTE); | 8060 | move_gap_both (BEGV, BEGV_BYTE); |
| 7737 | memcpy (destination, BEGV_ADDR, coding->produced); | 8061 | memcpy (destination, BEGV_ADDR, coding->produced); |
| @@ -7999,6 +8323,50 @@ preferred_coding_system (void) | |||
| 7999 | return CODING_ID_NAME (id); | 8323 | return CODING_ID_NAME (id); |
| 8000 | } | 8324 | } |
| 8001 | 8325 | ||
| 8326 | #if defined (WINDOWSNT) || defined (CYGWIN) | ||
| 8327 | |||
| 8328 | Lisp_Object | ||
| 8329 | from_unicode (Lisp_Object str) | ||
| 8330 | { | ||
| 8331 | CHECK_STRING (str); | ||
| 8332 | if (!STRING_MULTIBYTE (str) && | ||
| 8333 | SBYTES (str) & 1) | ||
| 8334 | { | ||
| 8335 | str = Fsubstring (str, make_number (0), make_number (-1)); | ||
| 8336 | } | ||
| 8337 | |||
| 8338 | return code_convert_string_norecord (str, Qutf_16le, 0); | ||
| 8339 | } | ||
| 8340 | |||
| 8341 | Lisp_Object | ||
| 8342 | from_unicode_buffer (const wchar_t* wstr) | ||
| 8343 | { | ||
| 8344 | return from_unicode ( | ||
| 8345 | make_unibyte_string ( | ||
| 8346 | (char*) wstr, | ||
| 8347 | /* we get one of the two final 0 bytes for free. */ | ||
| 8348 | 1 + sizeof (wchar_t) * wcslen (wstr))); | ||
| 8349 | } | ||
| 8350 | |||
| 8351 | wchar_t * | ||
| 8352 | to_unicode (Lisp_Object str, Lisp_Object *buf) | ||
| 8353 | { | ||
| 8354 | *buf = code_convert_string_norecord (str, Qutf_16le, 1); | ||
| 8355 | /* We need to make another copy (in addition to the one made by | ||
| 8356 | code_convert_string_norecord) to ensure that the final string is | ||
| 8357 | _doubly_ zero terminated --- that is, that the string is | ||
| 8358 | terminated by two zero bytes and one utf-16le null character. | ||
| 8359 | Because strings are already terminated with a single zero byte, | ||
| 8360 | we just add one additional zero. */ | ||
| 8361 | str = make_uninit_string (SBYTES (*buf) + 1); | ||
| 8362 | memcpy (SDATA (str), SDATA (*buf), SBYTES (*buf)); | ||
| 8363 | SDATA (str) [SBYTES (*buf)] = '\0'; | ||
| 8364 | *buf = str; | ||
| 8365 | return WCSDATA (*buf); | ||
| 8366 | } | ||
| 8367 | |||
| 8368 | #endif /* WINDOWSNT || CYGWIN */ | ||
| 8369 | |||
| 8002 | 8370 | ||
| 8003 | #ifdef emacs | 8371 | #ifdef emacs |
| 8004 | /*** 8. Emacs Lisp library functions ***/ | 8372 | /*** 8. Emacs Lisp library functions ***/ |
| @@ -8416,9 +8784,6 @@ highest priority. */) | |||
| 8416 | ptrdiff_t from, to; | 8784 | ptrdiff_t from, to; |
| 8417 | ptrdiff_t from_byte, to_byte; | 8785 | ptrdiff_t from_byte, to_byte; |
| 8418 | 8786 | ||
| 8419 | CHECK_NUMBER_COERCE_MARKER (start); | ||
| 8420 | CHECK_NUMBER_COERCE_MARKER (end); | ||
| 8421 | |||
| 8422 | validate_region (&start, &end); | 8787 | validate_region (&start, &end); |
| 8423 | from = XINT (start), to = XINT (end); | 8788 | from = XINT (start), to = XINT (end); |
| 8424 | from_byte = CHAR_TO_BYTE (from); | 8789 | from_byte = CHAR_TO_BYTE (from); |
| @@ -8460,7 +8825,7 @@ highest priority. */) | |||
| 8460 | } | 8825 | } |
| 8461 | 8826 | ||
| 8462 | 8827 | ||
| 8463 | static inline bool | 8828 | static bool |
| 8464 | char_encodable_p (int c, Lisp_Object attrs) | 8829 | char_encodable_p (int c, Lisp_Object attrs) |
| 8465 | { | 8830 | { |
| 8466 | Lisp_Object tail; | 8831 | Lisp_Object tail; |
| @@ -8862,8 +9227,6 @@ code_convert_region (Lisp_Object start, Lisp_Object end, | |||
| 8862 | ptrdiff_t from, from_byte, to, to_byte; | 9227 | ptrdiff_t from, from_byte, to, to_byte; |
| 8863 | Lisp_Object src_object; | 9228 | Lisp_Object src_object; |
| 8864 | 9229 | ||
| 8865 | CHECK_NUMBER_COERCE_MARKER (start); | ||
| 8866 | CHECK_NUMBER_COERCE_MARKER (end); | ||
| 8867 | if (NILP (coding_system)) | 9230 | if (NILP (coding_system)) |
| 8868 | coding_system = Qno_conversion; | 9231 | coding_system = Qno_conversion; |
| 8869 | else | 9232 | else |
| @@ -9483,7 +9846,7 @@ make_subsidiaries (Lisp_Object base) | |||
| 9483 | int i; | 9846 | int i; |
| 9484 | 9847 | ||
| 9485 | memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); | 9848 | memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); |
| 9486 | subsidiaries = Fmake_vector (make_number (3), Qnil); | 9849 | subsidiaries = make_uninit_vector (3); |
| 9487 | for (i = 0; i < 3; i++) | 9850 | for (i = 0; i < 3; i++) |
| 9488 | { | 9851 | { |
| 9489 | strcpy (buf + base_name_len, suffixes[i]); | 9852 | strcpy (buf + base_name_len, suffixes[i]); |
| @@ -9783,7 +10146,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9783 | CHECK_VECTOR (initial); | 10146 | CHECK_VECTOR (initial); |
| 9784 | for (i = 0; i < 4; i++) | 10147 | for (i = 0; i < 4; i++) |
| 9785 | { | 10148 | { |
| 9786 | val = Faref (initial, make_number (i)); | 10149 | val = AREF (initial, i); |
| 9787 | if (! NILP (val)) | 10150 | if (! NILP (val)) |
| 9788 | { | 10151 | { |
| 9789 | struct charset *charset; | 10152 | struct charset *charset; |
| @@ -9988,7 +10351,8 @@ usage: (define-coding-system-internal ...) */) | |||
| 9988 | this_name = AREF (eol_type, i); | 10351 | this_name = AREF (eol_type, i); |
| 9989 | this_aliases = Fcons (this_name, Qnil); | 10352 | this_aliases = Fcons (this_name, Qnil); |
| 9990 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); | 10353 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); |
| 9991 | this_spec = Fmake_vector (make_number (3), attrs); | 10354 | this_spec = make_uninit_vector (3); |
| 10355 | ASET (this_spec, 0, attrs); | ||
| 9992 | ASET (this_spec, 1, this_aliases); | 10356 | ASET (this_spec, 1, this_aliases); |
| 9993 | ASET (this_spec, 2, this_eol_type); | 10357 | ASET (this_spec, 2, this_eol_type); |
| 9994 | Fputhash (this_name, this_spec, Vcoding_system_hash_table); | 10358 | Fputhash (this_name, this_spec, Vcoding_system_hash_table); |
| @@ -10001,7 +10365,8 @@ usage: (define-coding-system-internal ...) */) | |||
| 10001 | } | 10365 | } |
| 10002 | } | 10366 | } |
| 10003 | 10367 | ||
| 10004 | spec_vec = Fmake_vector (make_number (3), attrs); | 10368 | spec_vec = make_uninit_vector (3); |
| 10369 | ASET (spec_vec, 0, attrs); | ||
| 10005 | ASET (spec_vec, 1, aliases); | 10370 | ASET (spec_vec, 1, aliases); |
| 10006 | ASET (spec_vec, 2, eol_type); | 10371 | ASET (spec_vec, 2, eol_type); |
| 10007 | 10372 | ||
| @@ -10298,6 +10663,7 @@ syms_of_coding (void) | |||
| 10298 | DEFSYM (Qeol_type, "eol-type"); | 10663 | DEFSYM (Qeol_type, "eol-type"); |
| 10299 | DEFSYM (Qunix, "unix"); | 10664 | DEFSYM (Qunix, "unix"); |
| 10300 | DEFSYM (Qdos, "dos"); | 10665 | DEFSYM (Qdos, "dos"); |
| 10666 | DEFSYM (Qmac, "mac"); | ||
| 10301 | 10667 | ||
| 10302 | DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); | 10668 | DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); |
| 10303 | DEFSYM (Qpost_read_conversion, "post-read-conversion"); | 10669 | DEFSYM (Qpost_read_conversion, "post-read-conversion"); |
| @@ -10312,6 +10678,11 @@ syms_of_coding (void) | |||
| 10312 | DEFSYM (Qutf_8, "utf-8"); | 10678 | DEFSYM (Qutf_8, "utf-8"); |
| 10313 | DEFSYM (Qutf_8_emacs, "utf-8-emacs"); | 10679 | DEFSYM (Qutf_8_emacs, "utf-8-emacs"); |
| 10314 | 10680 | ||
| 10681 | #if defined (WINDOWSNT) || defined (CYGWIN) | ||
| 10682 | /* No, not utf-16-le: that one has a BOM. */ | ||
| 10683 | DEFSYM (Qutf_16le, "utf-16le"); | ||
| 10684 | #endif | ||
| 10685 | |||
| 10315 | DEFSYM (Qutf_16, "utf-16"); | 10686 | DEFSYM (Qutf_16, "utf-16"); |
| 10316 | DEFSYM (Qbig, "big"); | 10687 | DEFSYM (Qbig, "big"); |
| 10317 | DEFSYM (Qlittle, "little"); | 10688 | DEFSYM (Qlittle, "little"); |
| @@ -10400,10 +10771,8 @@ syms_of_coding (void) | |||
| 10400 | intern_c_string ("coding-category-undecided")); | 10771 | intern_c_string ("coding-category-undecided")); |
| 10401 | 10772 | ||
| 10402 | DEFSYM (Qinsufficient_source, "insufficient-source"); | 10773 | DEFSYM (Qinsufficient_source, "insufficient-source"); |
| 10403 | DEFSYM (Qinconsistent_eol, "inconsistent-eol"); | ||
| 10404 | DEFSYM (Qinvalid_source, "invalid-source"); | 10774 | DEFSYM (Qinvalid_source, "invalid-source"); |
| 10405 | DEFSYM (Qinterrupted, "interrupted"); | 10775 | DEFSYM (Qinterrupted, "interrupted"); |
| 10406 | DEFSYM (Qinsufficient_memory, "insufficient-memory"); | ||
| 10407 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); | 10776 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); |
| 10408 | 10777 | ||
| 10409 | defsubr (&Scoding_system_p); | 10778 | defsubr (&Scoding_system_p); |
| @@ -10704,7 +11073,7 @@ reading if you suppress escape sequence detection. | |||
| 10704 | 11073 | ||
| 10705 | The other way to read escape sequences in a file without decoding is | 11074 | The other way to read escape sequences in a file without decoding is |
| 10706 | to explicitly specify some coding system that doesn't use ISO-2022 | 11075 | to explicitly specify some coding system that doesn't use ISO-2022 |
| 10707 | escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); | 11076 | escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */); |
| 10708 | inhibit_iso_escape_detection = 0; | 11077 | inhibit_iso_escape_detection = 0; |
| 10709 | 11078 | ||
| 10710 | DEFVAR_BOOL ("inhibit-null-byte-detection", | 11079 | DEFVAR_BOOL ("inhibit-null-byte-detection", |
| @@ -10720,6 +11089,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and | |||
| 10720 | decode text as usual. */); | 11089 | decode text as usual. */); |
| 10721 | inhibit_null_byte_detection = 0; | 11090 | inhibit_null_byte_detection = 0; |
| 10722 | 11091 | ||
| 11092 | DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization, | ||
| 11093 | doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files. | ||
| 11094 | Internal use only. Removed after the experimental optimizer gets stable. */); | ||
| 11095 | disable_ascii_optimization = 0; | ||
| 11096 | |||
| 10723 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, | 11097 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, |
| 10724 | doc: /* Char table for translating self-inserting characters. | 11098 | doc: /* Char table for translating self-inserting characters. |
| 10725 | This is applied to the result of input methods, not their input. | 11099 | This is applied to the result of input methods, not their input. |