diff options
| author | Kenichi Handa | 2013-05-22 23:53:21 +0900 |
|---|---|---|
| committer | Kenichi Handa | 2013-05-22 23:53:21 +0900 |
| commit | e6d2f1553635a746396f2f4261dde31e03e0fdd1 (patch) | |
| tree | 00882ebfc0d82b37593f64bee4aee51c49b5f19b /src/coding.c | |
| parent | 59c886717271b57d661027685d203a3dd5cfafa7 (diff) | |
| download | emacs-e6d2f1553635a746396f2f4261dde31e03e0fdd1.tar.gz emacs-e6d2f1553635a746396f2f4261dde31e03e0fdd1.zip | |
Fix the setting of buffer-file-coding-system on, for instance, C-x RET c unix RET _FILE_OF_DOS_EOL_TYPE_ RET.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 221 |
1 files changed, 154 insertions, 67 deletions
diff --git a/src/coding.c b/src/coding.c index f6664e179b7..42fd81b6322 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1125,6 +1125,14 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1125 | *buf++ = id; \ | 1125 | *buf++ = id; \ |
| 1126 | } while (0) | 1126 | } while (0) |
| 1127 | 1127 | ||
| 1128 | |||
| 1129 | /* Bitmasks for coding->eol_seen. */ | ||
| 1130 | |||
| 1131 | #define EOL_SEEN_NONE 0 | ||
| 1132 | #define EOL_SEEN_LF 1 | ||
| 1133 | #define EOL_SEEN_CR 2 | ||
| 1134 | #define EOL_SEEN_CRLF 4 | ||
| 1135 | |||
| 1128 | 1136 | ||
| 1129 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ | 1137 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ |
| 1130 | 1138 | ||
| @@ -1147,6 +1155,9 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1147 | #define UTF_8_BOM_2 0xBB | 1155 | #define UTF_8_BOM_2 0xBB |
| 1148 | #define UTF_8_BOM_3 0xBF | 1156 | #define UTF_8_BOM_3 0xBF |
| 1149 | 1157 | ||
| 1158 | /* Unlike the other detect_coding_XXX, this function counts number of | ||
| 1159 | characters and check EOL format. */ | ||
| 1160 | |||
| 1150 | static bool | 1161 | static bool |
| 1151 | detect_coding_utf_8 (struct coding_system *coding, | 1162 | detect_coding_utf_8 (struct coding_system *coding, |
| 1152 | struct coding_detection_info *detect_info) | 1163 | struct coding_detection_info *detect_info) |
| @@ -1156,11 +1167,23 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1156 | bool multibytep = coding->src_multibyte; | 1167 | bool multibytep = coding->src_multibyte; |
| 1157 | ptrdiff_t consumed_chars = 0; | 1168 | ptrdiff_t consumed_chars = 0; |
| 1158 | bool bom_found = 0; | 1169 | bool bom_found = 0; |
| 1159 | bool found = 0; | 1170 | int nchars = coding->head_ascii; |
| 1171 | int eol_seen = coding->eol_seen; | ||
| 1160 | 1172 | ||
| 1161 | detect_info->checked |= CATEGORY_MASK_UTF_8; | 1173 | detect_info->checked |= CATEGORY_MASK_UTF_8; |
| 1162 | /* A coding system of this category is always ASCII compatible. */ | 1174 | /* A coding system of this category is always ASCII compatible. */ |
| 1163 | src += coding->head_ascii; | 1175 | src += nchars; |
| 1176 | |||
| 1177 | if (src == coding->source /* BOM should be at the head. */ | ||
| 1178 | && src + 3 < src_end /* BOM is 3-byte long. */ | ||
| 1179 | && src[0] == UTF_8_BOM_1 | ||
| 1180 | && src[1] == UTF_8_BOM_2 | ||
| 1181 | && src[2] == UTF_8_BOM_3) | ||
| 1182 | { | ||
| 1183 | bom_found = 1; | ||
| 1184 | src += 3; | ||
| 1185 | nchars++; | ||
| 1186 | } | ||
| 1164 | 1187 | ||
| 1165 | while (1) | 1188 | while (1) |
| 1166 | { | 1189 | { |
| @@ -1169,13 +1192,29 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1169 | src_base = src; | 1192 | src_base = src; |
| 1170 | ONE_MORE_BYTE (c); | 1193 | ONE_MORE_BYTE (c); |
| 1171 | if (c < 0 || UTF_8_1_OCTET_P (c)) | 1194 | if (c < 0 || UTF_8_1_OCTET_P (c)) |
| 1172 | continue; | 1195 | { |
| 1196 | nchars++; | ||
| 1197 | if (c == '\r') | ||
| 1198 | { | ||
| 1199 | if (src < src_end && *src == '\n') | ||
| 1200 | { | ||
| 1201 | eol_seen |= EOL_SEEN_CRLF; | ||
| 1202 | src++; | ||
| 1203 | nchars++; | ||
| 1204 | } | ||
| 1205 | else | ||
| 1206 | eol_seen |= EOL_SEEN_CR; | ||
| 1207 | } | ||
| 1208 | else if (c == '\n') | ||
| 1209 | eol_seen |= EOL_SEEN_LF; | ||
| 1210 | continue; | ||
| 1211 | } | ||
| 1173 | ONE_MORE_BYTE (c1); | 1212 | ONE_MORE_BYTE (c1); |
| 1174 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) | 1213 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) |
| 1175 | break; | 1214 | break; |
| 1176 | if (UTF_8_2_OCTET_LEADING_P (c)) | 1215 | if (UTF_8_2_OCTET_LEADING_P (c)) |
| 1177 | { | 1216 | { |
| 1178 | found = 1; | 1217 | nchars++; |
| 1179 | continue; | 1218 | continue; |
| 1180 | } | 1219 | } |
| 1181 | ONE_MORE_BYTE (c2); | 1220 | ONE_MORE_BYTE (c2); |
| @@ -1183,10 +1222,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1183 | break; | 1222 | break; |
| 1184 | if (UTF_8_3_OCTET_LEADING_P (c)) | 1223 | if (UTF_8_3_OCTET_LEADING_P (c)) |
| 1185 | { | 1224 | { |
| 1186 | found = 1; | 1225 | nchars++; |
| 1187 | if (src_base == coding->source | ||
| 1188 | && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3) | ||
| 1189 | bom_found = 1; | ||
| 1190 | continue; | 1226 | continue; |
| 1191 | } | 1227 | } |
| 1192 | ONE_MORE_BYTE (c3); | 1228 | ONE_MORE_BYTE (c3); |
| @@ -1194,7 +1230,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1194 | break; | 1230 | break; |
| 1195 | if (UTF_8_4_OCTET_LEADING_P (c)) | 1231 | if (UTF_8_4_OCTET_LEADING_P (c)) |
| 1196 | { | 1232 | { |
| 1197 | found = 1; | 1233 | nchars++; |
| 1198 | continue; | 1234 | continue; |
| 1199 | } | 1235 | } |
| 1200 | ONE_MORE_BYTE (c4); | 1236 | ONE_MORE_BYTE (c4); |
| @@ -1202,7 +1238,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1202 | break; | 1238 | break; |
| 1203 | if (UTF_8_5_OCTET_LEADING_P (c)) | 1239 | if (UTF_8_5_OCTET_LEADING_P (c)) |
| 1204 | { | 1240 | { |
| 1205 | found = 1; | 1241 | nchars++; |
| 1206 | continue; | 1242 | continue; |
| 1207 | } | 1243 | } |
| 1208 | break; | 1244 | break; |
| @@ -1219,14 +1255,17 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1219 | if (bom_found) | 1255 | if (bom_found) |
| 1220 | { | 1256 | { |
| 1221 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ | 1257 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ |
| 1222 | detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; | 1258 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; |
| 1223 | } | 1259 | } |
| 1224 | else | 1260 | else |
| 1225 | { | 1261 | { |
| 1226 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; | 1262 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; |
| 1227 | if (found) | 1263 | if (nchars < src_end - coding->source) |
| 1228 | detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG; | 1264 | /* The found characters are less than source bytes, which |
| 1265 | means that we found a valid non-ASCII characters. */ | ||
| 1266 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; | ||
| 1229 | } | 1267 | } |
| 1268 | coding->detected_utf8_chars = nchars; | ||
| 1230 | return 1; | 1269 | return 1; |
| 1231 | } | 1270 | } |
| 1232 | 1271 | ||
| @@ -5622,7 +5661,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5622 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); | 5661 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); |
| 5623 | 5662 | ||
| 5624 | coding->mode = 0; | 5663 | coding->mode = 0; |
| 5625 | coding->head_ascii = -1; | ||
| 5626 | if (VECTORP (eol_type)) | 5664 | if (VECTORP (eol_type)) |
| 5627 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK | 5665 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK |
| 5628 | | CODING_REQUIRE_DETECTION_MASK); | 5666 | | CODING_REQUIRE_DETECTION_MASK); |
| @@ -6074,46 +6112,35 @@ complement_process_encoding_system (Lisp_Object coding_system) | |||
| 6074 | 6112 | ||
| 6075 | */ | 6113 | */ |
| 6076 | 6114 | ||
| 6077 | #define EOL_SEEN_NONE 0 | ||
| 6078 | #define EOL_SEEN_LF 1 | ||
| 6079 | #define EOL_SEEN_CR 2 | ||
| 6080 | #define EOL_SEEN_CRLF 4 | ||
| 6081 | |||
| 6082 | |||
| 6083 | static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, | 6115 | static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, |
| 6084 | int eol_seen); | 6116 | int eol_seen); |
| 6085 | 6117 | ||
| 6086 | 6118 | ||
| 6087 | /* Return the number of ASCII characters at the head of the source. | 6119 | /* Return the number of ASCII characters at the head of the source. |
| 6088 | By side effects, set coding->head_ascii and coding->eol_seen. The | 6120 | By side effects, set coding->head_ascii and update |
| 6089 | value of coding->eol_seen is "logical or" of EOL_SEEN_LF, | 6121 | coding->eol_seen. The value of coding->eol_seen is "logical or" of |
| 6090 | EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when | 6122 | EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is |
| 6091 | all the source bytes are ASCII. */ | 6123 | reliable only when all the source bytes are ASCII. */ |
| 6092 | 6124 | ||
| 6093 | static int | 6125 | static int |
| 6094 | check_ascii (struct coding_system *coding) | 6126 | check_ascii (struct coding_system *coding) |
| 6095 | { | 6127 | { |
| 6096 | const unsigned char *src, *end; | 6128 | const unsigned char *src, *end; |
| 6097 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | 6129 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); |
| 6098 | int eol_seen; | 6130 | int eol_seen = coding->eol_seen; |
| 6099 | 6131 | ||
| 6100 | eol_seen = (VECTORP (eol_type) ? EOL_SEEN_NONE | ||
| 6101 | : EQ (eol_type, Qunix) ? EOL_SEEN_LF | ||
| 6102 | : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF | ||
| 6103 | : EOL_SEEN_CR); | ||
| 6104 | coding_set_source (coding); | 6132 | coding_set_source (coding); |
| 6105 | src = coding->source; | 6133 | src = coding->source; |
| 6106 | end = src + coding->src_bytes; | 6134 | end = src + coding->src_bytes; |
| 6107 | 6135 | ||
| 6108 | if (inhibit_eol_conversion | 6136 | if (inhibit_eol_conversion |
| 6109 | || eol_seen != EOL_SEEN_NONE) | 6137 | || SYMBOLP (eol_type)) |
| 6110 | { | 6138 | { |
| 6111 | /* We don't have to check EOL format. */ | 6139 | /* We don't have to check EOL format. */ |
| 6112 | while (src < end && !( *src & 0x80)) src++; | 6140 | while (src < end && !( *src & 0x80)) |
| 6113 | if (inhibit_eol_conversion) | ||
| 6114 | { | 6141 | { |
| 6115 | eol_seen = EOL_SEEN_LF; | 6142 | if (*src++ == '\n') |
| 6116 | adjust_coding_eol_type (coding, eol_seen); | 6143 | eol_seen |= EOL_SEEN_LF; |
| 6117 | } | 6144 | } |
| 6118 | } | 6145 | } |
| 6119 | else | 6146 | else |
| @@ -6171,7 +6198,7 @@ static int | |||
| 6171 | check_utf_8 (struct coding_system *coding) | 6198 | check_utf_8 (struct coding_system *coding) |
| 6172 | { | 6199 | { |
| 6173 | const unsigned char *src, *end; | 6200 | const unsigned char *src, *end; |
| 6174 | int eol_seen = coding->eol_seen; | 6201 | int eol_seen; |
| 6175 | int nchars = coding->head_ascii; | 6202 | int nchars = coding->head_ascii; |
| 6176 | 6203 | ||
| 6177 | if (coding->head_ascii < 0) | 6204 | if (coding->head_ascii < 0) |
| @@ -6181,7 +6208,7 @@ check_utf_8 (struct coding_system *coding) | |||
| 6181 | src = coding->source + coding->head_ascii; | 6208 | src = coding->source + coding->head_ascii; |
| 6182 | /* We look ahead one byte for CR LF. */ | 6209 | /* We look ahead one byte for CR LF. */ |
| 6183 | end = coding->source + coding->src_bytes - 1; | 6210 | end = coding->source + coding->src_bytes - 1; |
| 6184 | 6211 | eol_seen = coding->eol_seen; | |
| 6185 | while (src < end) | 6212 | while (src < end) |
| 6186 | { | 6213 | { |
| 6187 | int c = *src; | 6214 | int c = *src; |
| @@ -6402,6 +6429,8 @@ detect_coding (struct coding_system *coding) | |||
| 6402 | { | 6429 | { |
| 6403 | const unsigned char *src, *src_end; | 6430 | const unsigned char *src, *src_end; |
| 6404 | unsigned int saved_mode = coding->mode; | 6431 | unsigned int saved_mode = coding->mode; |
| 6432 | Lisp_Object found = Qnil; | ||
| 6433 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6405 | 6434 | ||
| 6406 | coding->consumed = coding->consumed_char = 0; | 6435 | coding->consumed = coding->consumed_char = 0; |
| 6407 | coding->produced = coding->produced_char = 0; | 6436 | coding->produced = coding->produced_char = 0; |
| @@ -6409,6 +6438,7 @@ detect_coding (struct coding_system *coding) | |||
| 6409 | 6438 | ||
| 6410 | src_end = coding->source + coding->src_bytes; | 6439 | src_end = coding->source + coding->src_bytes; |
| 6411 | 6440 | ||
| 6441 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6412 | /* If we have not yet decided the text encoding type, detect it | 6442 | /* If we have not yet decided the text encoding type, detect it |
| 6413 | now. */ | 6443 | now. */ |
| 6414 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 6444 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
| @@ -6418,7 +6448,6 @@ detect_coding (struct coding_system *coding) | |||
| 6418 | bool null_byte_found = 0, eight_bit_found = 0; | 6448 | bool null_byte_found = 0, eight_bit_found = 0; |
| 6419 | 6449 | ||
| 6420 | coding->head_ascii = 0; | 6450 | coding->head_ascii = 0; |
| 6421 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6422 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 6451 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 6423 | for (src = coding->source; src < src_end; src++) | 6452 | for (src = coding->source; src < src_end; src++) |
| 6424 | { | 6453 | { |
| @@ -6529,32 +6558,58 @@ detect_coding (struct coding_system *coding) | |||
| 6529 | } | 6558 | } |
| 6530 | else if ((*(this->detector)) (coding, &detect_info) | 6559 | else if ((*(this->detector)) (coding, &detect_info) |
| 6531 | && detect_info.found & (1 << category)) | 6560 | && detect_info.found & (1 << category)) |
| 6532 | { | 6561 | break; |
| 6533 | if (category == coding_category_utf_16_auto) | ||
| 6534 | { | ||
| 6535 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6536 | category = coding_category_utf_16_le; | ||
| 6537 | else | ||
| 6538 | category = coding_category_utf_16_be; | ||
| 6539 | } | ||
| 6540 | break; | ||
| 6541 | } | ||
| 6542 | } | 6562 | } |
| 6543 | } | 6563 | } |
| 6544 | 6564 | ||
| 6545 | if (i < coding_category_raw_text) | 6565 | if (i < coding_category_raw_text) |
| 6546 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6566 | { |
| 6567 | if (category == coding_category_utf_8_auto) | ||
| 6568 | { | ||
| 6569 | Lisp_Object coding_systems; | ||
| 6570 | |||
| 6571 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6572 | coding_attr_utf_bom); | ||
| 6573 | if (CONSP (coding_systems)) | ||
| 6574 | { | ||
| 6575 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6576 | found = XCAR (coding_systems); | ||
| 6577 | else | ||
| 6578 | found = XCDR (coding_systems); | ||
| 6579 | } | ||
| 6580 | else | ||
| 6581 | found = CODING_ID_NAME (this->id); | ||
| 6582 | } | ||
| 6583 | else if (category == coding_category_utf_16_auto) | ||
| 6584 | { | ||
| 6585 | Lisp_Object coding_systems; | ||
| 6586 | |||
| 6587 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6588 | coding_attr_utf_bom); | ||
| 6589 | if (CONSP (coding_systems)) | ||
| 6590 | { | ||
| 6591 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6592 | found = XCAR (coding_systems); | ||
| 6593 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | ||
| 6594 | found = XCDR (coding_systems); | ||
| 6595 | } | ||
| 6596 | else | ||
| 6597 | found = CODING_ID_NAME (this->id); | ||
| 6598 | } | ||
| 6599 | else | ||
| 6600 | found = CODING_ID_NAME (this->id); | ||
| 6601 | } | ||
| 6547 | else if (null_byte_found) | 6602 | else if (null_byte_found) |
| 6548 | setup_coding_system (Qno_conversion, coding); | 6603 | found = Qno_conversion; |
| 6549 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) | 6604 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) |
| 6550 | == CATEGORY_MASK_ANY) | 6605 | == CATEGORY_MASK_ANY) |
| 6551 | setup_coding_system (Qraw_text, coding); | 6606 | found = Qraw_text; |
| 6552 | else if (detect_info.rejected) | 6607 | else if (detect_info.rejected) |
| 6553 | for (i = 0; i < coding_category_raw_text; i++) | 6608 | for (i = 0; i < coding_category_raw_text; i++) |
| 6554 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) | 6609 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) |
| 6555 | { | 6610 | { |
| 6556 | this = coding_categories + coding_priorities[i]; | 6611 | this = coding_categories + coding_priorities[i]; |
| 6557 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6612 | found = CODING_ID_NAME (this->id); |
| 6558 | break; | 6613 | break; |
| 6559 | } | 6614 | } |
| 6560 | } | 6615 | } |
| @@ -6570,12 +6625,8 @@ detect_coding (struct coding_system *coding) | |||
| 6570 | detect_info.found = detect_info.rejected = 0; | 6625 | detect_info.found = detect_info.rejected = 0; |
| 6571 | if (check_ascii (coding) == coding->src_bytes) | 6626 | if (check_ascii (coding) == coding->src_bytes) |
| 6572 | { | 6627 | { |
| 6573 | int head_ascii = coding->head_ascii; | 6628 | if (CONSP (coding_systems)) |
| 6574 | 6629 | found = XCDR (coding_systems); | |
| 6575 | if (coding->eol_seen != EOL_SEEN_NONE) | ||
| 6576 | adjust_coding_eol_type (coding, coding->eol_seen); | ||
| 6577 | setup_coding_system (XCDR (coding_systems), coding); | ||
| 6578 | coding->head_ascii = head_ascii; | ||
| 6579 | } | 6630 | } |
| 6580 | else | 6631 | else |
| 6581 | { | 6632 | { |
| @@ -6583,9 +6634,9 @@ detect_coding (struct coding_system *coding) | |||
| 6583 | && detect_coding_utf_8 (coding, &detect_info)) | 6634 | && detect_coding_utf_8 (coding, &detect_info)) |
| 6584 | { | 6635 | { |
| 6585 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | 6636 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) |
| 6586 | setup_coding_system (XCAR (coding_systems), coding); | 6637 | found = XCAR (coding_systems); |
| 6587 | else | 6638 | else |
| 6588 | setup_coding_system (XCDR (coding_systems), coding); | 6639 | found = XCDR (coding_systems); |
| 6589 | } | 6640 | } |
| 6590 | } | 6641 | } |
| 6591 | } | 6642 | } |
| @@ -6599,16 +6650,28 @@ detect_coding (struct coding_system *coding) | |||
| 6599 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); | 6650 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); |
| 6600 | detect_info.found = detect_info.rejected = 0; | 6651 | detect_info.found = detect_info.rejected = 0; |
| 6601 | coding->head_ascii = 0; | 6652 | coding->head_ascii = 0; |
| 6602 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6603 | if (CONSP (coding_systems) | 6653 | if (CONSP (coding_systems) |
| 6604 | && detect_coding_utf_16 (coding, &detect_info)) | 6654 | && detect_coding_utf_16 (coding, &detect_info)) |
| 6605 | { | 6655 | { |
| 6606 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | 6656 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) |
| 6607 | setup_coding_system (XCAR (coding_systems), coding); | 6657 | found = XCAR (coding_systems); |
| 6608 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | 6658 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) |
| 6609 | setup_coding_system (XCDR (coding_systems), coding); | 6659 | found = XCDR (coding_systems); |
| 6610 | } | 6660 | } |
| 6611 | } | 6661 | } |
| 6662 | |||
| 6663 | if (! NILP (found)) | ||
| 6664 | { | ||
| 6665 | int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE | ||
| 6666 | : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF | ||
| 6667 | : EQ (eol_type, Qmac) ? EOL_SEEN_CR | ||
| 6668 | : EOL_SEEN_LF); | ||
| 6669 | |||
| 6670 | setup_coding_system (found, coding); | ||
| 6671 | if (specified_eol != EOL_SEEN_NONE) | ||
| 6672 | adjust_coding_eol_type (coding, specified_eol); | ||
| 6673 | } | ||
| 6674 | |||
| 6612 | coding->mode = saved_mode; | 6675 | coding->mode = saved_mode; |
| 6613 | } | 6676 | } |
| 6614 | 6677 | ||
| @@ -7729,6 +7792,9 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7729 | coding->dst_pos_byte = PT_BYTE; | 7792 | coding->dst_pos_byte = PT_BYTE; |
| 7730 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); | 7793 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 7731 | 7794 | ||
| 7795 | coding->head_ascii = -1; | ||
| 7796 | coding->detected_utf8_chars = -1; | ||
| 7797 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 7732 | if (CODING_REQUIRE_DETECTION (coding)) | 7798 | if (CODING_REQUIRE_DETECTION (coding)) |
| 7733 | detect_coding (coding); | 7799 | detect_coding (coding); |
| 7734 | attrs = CODING_ID_ATTRS (coding->id); | 7800 | attrs = CODING_ID_ATTRS (coding->id); |
| @@ -7743,17 +7809,38 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7743 | chars = check_ascii (coding); | 7809 | chars = check_ascii (coding); |
| 7744 | if (chars != bytes) | 7810 | if (chars != bytes) |
| 7745 | { | 7811 | { |
| 7812 | /* There exists a non-ASCII byte. */ | ||
| 7746 | if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)) | 7813 | if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)) |
| 7747 | chars = check_utf_8 (coding); | 7814 | { |
| 7815 | if (coding->detected_utf8_chars >= 0) | ||
| 7816 | chars = coding->detected_utf8_chars; | ||
| 7817 | else | ||
| 7818 | chars = check_utf_8 (coding); | ||
| 7819 | if (CODING_UTF_8_BOM (coding) != utf_without_bom | ||
| 7820 | && coding->head_ascii == 0 | ||
| 7821 | && coding->source[0] == UTF_8_BOM_1 | ||
| 7822 | && coding->source[1] == UTF_8_BOM_2 | ||
| 7823 | && coding->source[2] == UTF_8_BOM_3) | ||
| 7824 | { | ||
| 7825 | chars--; | ||
| 7826 | bytes -= 3; | ||
| 7827 | coding->src_bytes -= 3; | ||
| 7828 | } | ||
| 7829 | } | ||
| 7748 | else | 7830 | else |
| 7749 | chars = -1; | 7831 | chars = -1; |
| 7750 | } | 7832 | } |
| 7751 | if (chars >= 0) | 7833 | if (chars >= 0) |
| 7752 | { | 7834 | { |
| 7753 | if (coding->eol_seen != EOL_SEEN_NONE) | 7835 | Lisp_Object eol_type; |
| 7754 | adjust_coding_eol_type (coding, coding->eol_seen); | ||
| 7755 | 7836 | ||
| 7756 | if (coding->eol_seen == EOL_SEEN_CR) | 7837 | eol_type = CODING_ID_EOL_TYPE (coding->id); |
| 7838 | if (VECTORP (eol_type)) | ||
| 7839 | { | ||
| 7840 | if (coding->eol_seen != EOL_SEEN_NONE) | ||
| 7841 | eol_type = adjust_coding_eol_type (coding, coding->eol_seen); | ||
| 7842 | } | ||
| 7843 | if (EQ (eol_type, Qmac)) | ||
| 7757 | { | 7844 | { |
| 7758 | unsigned char *src_end = GAP_END_ADDR; | 7845 | unsigned char *src_end = GAP_END_ADDR; |
| 7759 | unsigned char *src = src_end - coding->src_bytes; | 7846 | unsigned char *src = src_end - coding->src_bytes; |
| @@ -7764,7 +7851,7 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7764 | src[-1] = '\n'; | 7851 | src[-1] = '\n'; |
| 7765 | } | 7852 | } |
| 7766 | } | 7853 | } |
| 7767 | else if (coding->eol_seen == EOL_SEEN_CRLF) | 7854 | else if (EQ (eol_type, Qdos)) |
| 7768 | { | 7855 | { |
| 7769 | unsigned char *src = GAP_END_ADDR; | 7856 | unsigned char *src = GAP_END_ADDR; |
| 7770 | unsigned char *src_beg = src - coding->src_bytes; | 7857 | unsigned char *src_beg = src - coding->src_bytes; |