diff options
| author | Kenichi Handa | 2008-06-04 07:53:59 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2008-06-04 07:53:59 +0000 |
| commit | c0e16b143680b9ecb51122add128577925d34159 (patch) | |
| tree | 284bf20b8647d2707c76dac8de91d2d9315d01dc /src/coding.c | |
| parent | 07e810660ae7d7899ddfd367a24da788c1920838 (diff) | |
| download | emacs-c0e16b143680b9ecb51122add128577925d34159.tar.gz emacs-c0e16b143680b9ecb51122add128577925d34159.zip | |
(detect_coding): Fix handling of coding->head_ascii.
Be sure to call setup_coding_system when a proper coding system is
found.
(detect_coding_system): Fix handling of coding->head_ascii.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 82 |
1 files changed, 43 insertions, 39 deletions
diff --git a/src/coding.c b/src/coding.c index 7f9dc42ffa8..3ee2a7248cf 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -5782,6 +5782,7 @@ detect_coding (coding) | |||
| 5782 | coding_set_source (coding); | 5782 | coding_set_source (coding); |
| 5783 | 5783 | ||
| 5784 | src_end = coding->source + coding->src_bytes; | 5784 | src_end = coding->source + coding->src_bytes; |
| 5785 | coding->head_ascii = 0; | ||
| 5785 | 5786 | ||
| 5786 | /* If we have not yet decided the text encoding type, detect it | 5787 | /* If we have not yet decided the text encoding type, detect it |
| 5787 | now. */ | 5788 | now. */ |
| @@ -5792,15 +5793,12 @@ detect_coding (coding) | |||
| 5792 | int null_byte_found = 0, eight_bit_found = 0; | 5793 | int null_byte_found = 0, eight_bit_found = 0; |
| 5793 | 5794 | ||
| 5794 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 5795 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 5795 | coding->head_ascii = -1; | ||
| 5796 | for (src = coding->source; src < src_end; src++) | 5796 | for (src = coding->source; src < src_end; src++) |
| 5797 | { | 5797 | { |
| 5798 | c = *src; | 5798 | c = *src; |
| 5799 | if (c & 0x80) | 5799 | if (c & 0x80) |
| 5800 | { | 5800 | { |
| 5801 | eight_bit_found = 1; | 5801 | eight_bit_found = 1; |
| 5802 | if (coding->head_ascii < 0) | ||
| 5803 | coding->head_ascii = src - coding->source; | ||
| 5804 | if (null_byte_found) | 5802 | if (null_byte_found) |
| 5805 | break; | 5803 | break; |
| 5806 | } | 5804 | } |
| @@ -5810,16 +5808,19 @@ detect_coding (coding) | |||
| 5810 | && ! inhibit_iso_escape_detection | 5808 | && ! inhibit_iso_escape_detection |
| 5811 | && ! detect_info.checked) | 5809 | && ! detect_info.checked) |
| 5812 | { | 5810 | { |
| 5813 | if (coding->head_ascii < 0) | ||
| 5814 | coding->head_ascii = src - coding->source; | ||
| 5815 | if (detect_coding_iso_2022 (coding, &detect_info)) | 5811 | if (detect_coding_iso_2022 (coding, &detect_info)) |
| 5816 | { | 5812 | { |
| 5817 | /* We have scanned the whole data. */ | 5813 | /* We have scanned the whole data. */ |
| 5818 | if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) | 5814 | if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) |
| 5819 | /* We didn't find an 8-bit code. We may have | 5815 | { |
| 5820 | found a null-byte, but it's very rare that | 5816 | /* We didn't find an 8-bit code. We may |
| 5821 | a binary file confirm to ISO-2022. */ | 5817 | have found a null-byte, but it's very |
| 5822 | src = src_end; | 5818 | rare that a binary file confirm to |
| 5819 | ISO-2022. */ | ||
| 5820 | src = src_end; | ||
| 5821 | coding->head_ascii = src - coding->source; | ||
| 5822 | } | ||
| 5823 | detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; | ||
| 5823 | break; | 5824 | break; |
| 5824 | } | 5825 | } |
| 5825 | } | 5826 | } |
| @@ -5829,10 +5830,11 @@ detect_coding (coding) | |||
| 5829 | if (eight_bit_found) | 5830 | if (eight_bit_found) |
| 5830 | break; | 5831 | break; |
| 5831 | } | 5832 | } |
| 5833 | coding->head_ascii++; | ||
| 5832 | } | 5834 | } |
| 5835 | else | ||
| 5836 | coding->head_ascii++; | ||
| 5833 | } | 5837 | } |
| 5834 | if (coding->head_ascii < 0) | ||
| 5835 | coding->head_ascii = src - coding->source; | ||
| 5836 | 5838 | ||
| 5837 | if (null_byte_found || eight_bit_found | 5839 | if (null_byte_found || eight_bit_found |
| 5838 | || coding->head_ascii < coding->src_bytes | 5840 | || coding->head_ascii < coding->src_bytes |
| @@ -5886,23 +5888,23 @@ detect_coding (coding) | |||
| 5886 | break; | 5888 | break; |
| 5887 | } | 5889 | } |
| 5888 | } | 5890 | } |
| 5889 | |||
| 5890 | if (i < coding_category_raw_text) | ||
| 5891 | setup_coding_system (CODING_ID_NAME (this->id), coding); | ||
| 5892 | else if (null_byte_found) | ||
| 5893 | setup_coding_system (Qno_conversion, coding); | ||
| 5894 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) | ||
| 5895 | == CATEGORY_MASK_ANY) | ||
| 5896 | setup_coding_system (Qraw_text, coding); | ||
| 5897 | else if (detect_info.rejected) | ||
| 5898 | for (i = 0; i < coding_category_raw_text; i++) | ||
| 5899 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) | ||
| 5900 | { | ||
| 5901 | this = coding_categories + coding_priorities[i]; | ||
| 5902 | setup_coding_system (CODING_ID_NAME (this->id), coding); | ||
| 5903 | break; | ||
| 5904 | } | ||
| 5905 | } | 5891 | } |
| 5892 | |||
| 5893 | if (i < coding_category_raw_text) | ||
| 5894 | setup_coding_system (CODING_ID_NAME (this->id), coding); | ||
| 5895 | else if (null_byte_found) | ||
| 5896 | setup_coding_system (Qno_conversion, coding); | ||
| 5897 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) | ||
| 5898 | == CATEGORY_MASK_ANY) | ||
| 5899 | setup_coding_system (Qraw_text, coding); | ||
| 5900 | else if (detect_info.rejected) | ||
| 5901 | for (i = 0; i < coding_category_raw_text; i++) | ||
| 5902 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) | ||
| 5903 | { | ||
| 5904 | this = coding_categories + coding_priorities[i]; | ||
| 5905 | setup_coding_system (CODING_ID_NAME (this->id), coding); | ||
| 5906 | break; | ||
| 5907 | } | ||
| 5906 | } | 5908 | } |
| 5907 | } | 5909 | } |
| 5908 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) | 5910 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
| @@ -7655,6 +7657,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, | |||
| 7655 | coding.src_multibyte = multibytep; | 7657 | coding.src_multibyte = multibytep; |
| 7656 | coding.consumed = 0; | 7658 | coding.consumed = 0; |
| 7657 | coding.mode |= CODING_MODE_LAST_BLOCK; | 7659 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 7660 | coding.head_ascii = 0; | ||
| 7658 | 7661 | ||
| 7659 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 7662 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 7660 | 7663 | ||
| @@ -7666,7 +7669,6 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, | |||
| 7666 | struct coding_system *this; | 7669 | struct coding_system *this; |
| 7667 | int c, i; | 7670 | int c, i; |
| 7668 | 7671 | ||
| 7669 | coding.head_ascii = -1; | ||
| 7670 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ | 7672 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ |
| 7671 | for (; src < src_end; src++) | 7673 | for (; src < src_end; src++) |
| 7672 | { | 7674 | { |
| @@ -7674,27 +7676,28 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, | |||
| 7674 | if (c & 0x80) | 7676 | if (c & 0x80) |
| 7675 | { | 7677 | { |
| 7676 | eight_bit_found = 1; | 7678 | eight_bit_found = 1; |
| 7677 | if (coding.head_ascii < 0) | ||
| 7678 | coding.head_ascii = src - coding.source; | ||
| 7679 | if (null_byte_found) | 7679 | if (null_byte_found) |
| 7680 | break; | 7680 | break; |
| 7681 | } | 7681 | } |
| 7682 | if (c < 0x20) | 7682 | else if (c < 0x20) |
| 7683 | { | 7683 | { |
| 7684 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 7684 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
| 7685 | && ! inhibit_iso_escape_detection | 7685 | && ! inhibit_iso_escape_detection |
| 7686 | && ! detect_info.checked) | 7686 | && ! detect_info.checked) |
| 7687 | { | 7687 | { |
| 7688 | if (coding.head_ascii < 0) | ||
| 7689 | coding.head_ascii = src - coding.source; | ||
| 7690 | if (detect_coding_iso_2022 (&coding, &detect_info)) | 7688 | if (detect_coding_iso_2022 (&coding, &detect_info)) |
| 7691 | { | 7689 | { |
| 7692 | /* We have scanned the whole data. */ | 7690 | /* We have scanned the whole data. */ |
| 7693 | if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) | 7691 | if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) |
| 7694 | /* We didn't find an 8-bit code. We may have | 7692 | { |
| 7695 | found a null-byte, but it's very rare that | 7693 | /* We didn't find an 8-bit code. We may |
| 7696 | a binary file confirm to ISO-2022. */ | 7694 | have found a null-byte, but it's very |
| 7697 | src = src_end; | 7695 | rare that a binary file confirm to |
| 7696 | ISO-2022. */ | ||
| 7697 | src = src_end; | ||
| 7698 | coding.head_ascii = src - coding.source; | ||
| 7699 | } | ||
| 7700 | detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; | ||
| 7698 | break; | 7701 | break; |
| 7699 | } | 7702 | } |
| 7700 | } | 7703 | } |
| @@ -7704,10 +7707,11 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, | |||
| 7704 | if (eight_bit_found) | 7707 | if (eight_bit_found) |
| 7705 | break; | 7708 | break; |
| 7706 | } | 7709 | } |
| 7710 | coding.head_ascii++; | ||
| 7707 | } | 7711 | } |
| 7712 | else | ||
| 7713 | coding.head_ascii++; | ||
| 7708 | } | 7714 | } |
| 7709 | if (coding.head_ascii < 0) | ||
| 7710 | coding.head_ascii = src - coding.source; | ||
| 7711 | 7715 | ||
| 7712 | if (null_byte_found || eight_bit_found | 7716 | if (null_byte_found || eight_bit_found |
| 7713 | || coding.head_ascii < coding.src_bytes | 7717 | || coding.head_ascii < coding.src_bytes |