diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 197 |
1 files changed, 113 insertions, 84 deletions
diff --git a/src/coding.c b/src/coding.c index d618f4d4497..0761425dcd8 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1418,26 +1418,6 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1418 | } | 1418 | } |
| 1419 | else if (c1 >= 0 && c2 >= 0) | 1419 | else if (c1 >= 0 && c2 >= 0) |
| 1420 | { | 1420 | { |
| 1421 | unsigned char b1[256], b2[256]; | ||
| 1422 | int b1_variants = 1, b2_variants = 1; | ||
| 1423 | int n; | ||
| 1424 | |||
| 1425 | bzero (b1, 256), bzero (b2, 256); | ||
| 1426 | b1[c1]++, b2[c2]++; | ||
| 1427 | for (n = 0; n < 256 && src < src_end; n++) | ||
| 1428 | { | ||
| 1429 | src_base = src; | ||
| 1430 | ONE_MORE_BYTE (c1); | ||
| 1431 | ONE_MORE_BYTE (c2); | ||
| 1432 | if (c1 < 0 || c2 < 0) | ||
| 1433 | break; | ||
| 1434 | if (! b1[c1++]) b1_variants++; | ||
| 1435 | if (! b2[c2++]) b2_variants++; | ||
| 1436 | } | ||
| 1437 | if (b1_variants < b2_variants) | ||
| 1438 | detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG; | ||
| 1439 | else | ||
| 1440 | detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG; | ||
| 1441 | detect_info->rejected | 1421 | detect_info->rejected |
| 1442 | |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE); | 1422 | |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE); |
| 1443 | } | 1423 | } |
| @@ -5421,53 +5401,78 @@ detect_coding (coding) | |||
| 5421 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 5401 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
| 5422 | { | 5402 | { |
| 5423 | int c, i; | 5403 | int c, i; |
| 5404 | struct coding_detection_info detect_info; | ||
| 5424 | 5405 | ||
| 5406 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | ||
| 5425 | for (i = 0, src = coding->source; src < src_end; i++, src++) | 5407 | for (i = 0, src = coding->source; src < src_end; i++, src++) |
| 5426 | { | 5408 | { |
| 5427 | c = *src; | 5409 | c = *src; |
| 5428 | if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC | 5410 | if (c & 0x80) |
| 5429 | || c == ISO_CODE_SI | ||
| 5430 | || c == ISO_CODE_SO))) | ||
| 5431 | break; | 5411 | break; |
| 5412 | if (c < 0x20 | ||
| 5413 | && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | ||
| 5414 | && ! inhibit_iso_escape_detection | ||
| 5415 | && ! detect_info.checked) | ||
| 5416 | { | ||
| 5417 | coding->head_ascii = src - (coding->source + coding->consumed); | ||
| 5418 | if (detect_coding_iso_2022 (coding, &detect_info)) | ||
| 5419 | { | ||
| 5420 | /* We have scanned the whole data. */ | ||
| 5421 | if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) | ||
| 5422 | /* We didn't find an 8-bit code. */ | ||
| 5423 | src = src_end; | ||
| 5424 | break; | ||
| 5425 | } | ||
| 5426 | } | ||
| 5432 | } | 5427 | } |
| 5433 | coding->head_ascii = src - (coding->source + coding->consumed); | 5428 | coding->head_ascii = src - (coding->source + coding->consumed); |
| 5434 | 5429 | ||
| 5435 | if (coding->head_ascii < coding->src_bytes) | 5430 | if (coding->head_ascii == coding->src_bytes |
| 5431 | || detect_info.found) | ||
| 5436 | { | 5432 | { |
| 5437 | struct coding_detection_info detect_info; | ||
| 5438 | enum coding_category category; | 5433 | enum coding_category category; |
| 5439 | struct coding_system *this; | 5434 | struct coding_system *this; |
| 5440 | 5435 | ||
| 5441 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 5436 | if (coding->head_ascii == coding->src_bytes) |
| 5442 | for (i = 0; i < coding_category_raw_text; i++) | 5437 | /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */ |
| 5443 | { | 5438 | for (i = 0; i < coding_category_raw_text; i++) |
| 5444 | category = coding_priorities[i]; | 5439 | { |
| 5445 | this = coding_categories + category; | 5440 | category = coding_priorities[i]; |
| 5446 | if (this->id < 0) | 5441 | this = coding_categories + category; |
| 5447 | { | 5442 | if (detect_info.found & (1 << category)) |
| 5448 | /* No coding system of this category is defined. */ | ||
| 5449 | detect_info.rejected |= (1 << category); | ||
| 5450 | } | ||
| 5451 | else if (category >= coding_category_raw_text) | ||
| 5452 | continue; | ||
| 5453 | else if (detect_info.checked & (1 << category)) | ||
| 5454 | { | ||
| 5455 | if (detect_info.found & (1 << category)) | ||
| 5456 | break; | ||
| 5457 | } | ||
| 5458 | else if ((*(this->detector)) (coding, &detect_info) | ||
| 5459 | && detect_info.found & (1 << category)) | ||
| 5460 | { | ||
| 5461 | if (category == coding_category_utf_16_auto) | ||
| 5462 | { | ||
| 5463 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 5464 | category = coding_category_utf_16_le; | ||
| 5465 | else | ||
| 5466 | category = coding_category_utf_16_be; | ||
| 5467 | } | ||
| 5468 | break; | 5443 | break; |
| 5469 | } | 5444 | } |
| 5470 | } | 5445 | else |
| 5446 | for (i = 0; i < coding_category_raw_text; i++) | ||
| 5447 | { | ||
| 5448 | category = coding_priorities[i]; | ||
| 5449 | this = coding_categories + category; | ||
| 5450 | if (this->id < 0) | ||
| 5451 | { | ||
| 5452 | /* No coding system of this category is defined. */ | ||
| 5453 | detect_info.rejected |= (1 << category); | ||
| 5454 | } | ||
| 5455 | else if (category >= coding_category_raw_text) | ||
| 5456 | continue; | ||
| 5457 | else if (detect_info.checked & (1 << category)) | ||
| 5458 | { | ||
| 5459 | if (detect_info.found & (1 << category)) | ||
| 5460 | break; | ||
| 5461 | } | ||
| 5462 | else if ((*(this->detector)) (coding, &detect_info) | ||
| 5463 | && detect_info.found & (1 << category)) | ||
| 5464 | { | ||
| 5465 | if (category == coding_category_utf_16_auto) | ||
| 5466 | { | ||
| 5467 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 5468 | category = coding_category_utf_16_le; | ||
| 5469 | else | ||
| 5470 | category = coding_category_utf_16_be; | ||
| 5471 | } | ||
| 5472 | break; | ||
| 5473 | } | ||
| 5474 | } | ||
| 5475 | |||
| 5471 | if (i < coding_category_raw_text) | 5476 | if (i < coding_category_raw_text) |
| 5472 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 5477 | setup_coding_system (CODING_ID_NAME (this->id), coding); |
| 5473 | else if (detect_info.rejected == CATEGORY_MASK_ANY) | 5478 | else if (detect_info.rejected == CATEGORY_MASK_ANY) |
| @@ -7120,49 +7125,73 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, | |||
| 7120 | for (i = 0; src < src_end; i++, src++) | 7125 | for (i = 0; src < src_end; i++, src++) |
| 7121 | { | 7126 | { |
| 7122 | c = *src; | 7127 | c = *src; |
| 7123 | if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC | 7128 | if (c & 0x80) |
| 7124 | || c == ISO_CODE_SI | ||
| 7125 | || c == ISO_CODE_SO))) | ||
| 7126 | break; | 7129 | break; |
| 7130 | if (c < 0x20 | ||
| 7131 | && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | ||
| 7132 | && inhibit_iso_escape_detection) | ||
| 7133 | { | ||
| 7134 | coding.head_ascii = src - coding.source; | ||
| 7135 | if (detect_coding_iso_2022 (&coding, &detect_info)) | ||
| 7136 | { | ||
| 7137 | /* We have scanned the whole data. */ | ||
| 7138 | if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) | ||
| 7139 | /* We didn't find an 8-bit code. */ | ||
| 7140 | src = src_end; | ||
| 7141 | break; | ||
| 7142 | } | ||
| 7143 | } | ||
| 7127 | } | 7144 | } |
| 7128 | coding.head_ascii = src - coding.source; | 7145 | coding.head_ascii = src - coding.source; |
| 7129 | 7146 | ||
| 7130 | if (src < src_end) | 7147 | if (src < src_end |
| 7131 | for (i = 0; i < coding_category_raw_text; i++) | 7148 | || detect_info.found) |
| 7132 | { | 7149 | { |
| 7133 | category = coding_priorities[i]; | 7150 | if (src == src_end) |
| 7134 | this = coding_categories + category; | 7151 | /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */ |
| 7135 | 7152 | for (i = 0; i < coding_category_raw_text; i++) | |
| 7136 | if (this->id < 0) | ||
| 7137 | { | ||
| 7138 | /* No coding system of this category is defined. */ | ||
| 7139 | detect_info.rejected |= (1 << category); | ||
| 7140 | } | ||
| 7141 | else if (category >= coding_category_raw_text) | ||
| 7142 | continue; | ||
| 7143 | else if (detect_info.checked & (1 << category)) | ||
| 7144 | { | 7153 | { |
| 7145 | if (highest | 7154 | category = coding_priorities[i]; |
| 7146 | && (detect_info.found & (1 << category))) | 7155 | if (detect_info.found & (1 << category)) |
| 7147 | break; | 7156 | break; |
| 7148 | } | 7157 | } |
| 7149 | else | 7158 | else |
| 7159 | for (i = 0; i < coding_category_raw_text; i++) | ||
| 7150 | { | 7160 | { |
| 7151 | if ((*(this->detector)) (&coding, &detect_info) | 7161 | category = coding_priorities[i]; |
| 7152 | && highest | 7162 | this = coding_categories + category; |
| 7153 | && (detect_info.found & (1 << category))) | 7163 | |
| 7164 | if (this->id < 0) | ||
| 7154 | { | 7165 | { |
| 7155 | if (category == coding_category_utf_16_auto) | 7166 | /* No coding system of this category is defined. */ |
| 7167 | detect_info.rejected |= (1 << category); | ||
| 7168 | } | ||
| 7169 | else if (category >= coding_category_raw_text) | ||
| 7170 | continue; | ||
| 7171 | else if (detect_info.checked & (1 << category)) | ||
| 7172 | { | ||
| 7173 | if (highest | ||
| 7174 | && (detect_info.found & (1 << category))) | ||
| 7175 | break; | ||
| 7176 | } | ||
| 7177 | else | ||
| 7178 | { | ||
| 7179 | if ((*(this->detector)) (&coding, &detect_info) | ||
| 7180 | && highest | ||
| 7181 | && (detect_info.found & (1 << category))) | ||
| 7156 | { | 7182 | { |
| 7157 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | 7183 | if (category == coding_category_utf_16_auto) |
| 7158 | category = coding_category_utf_16_le; | 7184 | { |
| 7159 | else | 7185 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) |
| 7160 | category = coding_category_utf_16_be; | 7186 | category = coding_category_utf_16_le; |
| 7187 | else | ||
| 7188 | category = coding_category_utf_16_be; | ||
| 7189 | } | ||
| 7190 | break; | ||
| 7161 | } | 7191 | } |
| 7162 | break; | ||
| 7163 | } | 7192 | } |
| 7164 | } | 7193 | } |
| 7165 | } | 7194 | } |
| 7166 | 7195 | ||
| 7167 | if (detect_info.rejected == CATEGORY_MASK_ANY) | 7196 | if (detect_info.rejected == CATEGORY_MASK_ANY) |
| 7168 | { | 7197 | { |