aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorKenichi Handa2008-06-04 07:53:59 +0000
committerKenichi Handa2008-06-04 07:53:59 +0000
commitc0e16b143680b9ecb51122add128577925d34159 (patch)
tree284bf20b8647d2707c76dac8de91d2d9315d01dc /src/coding.c
parent07e810660ae7d7899ddfd367a24da788c1920838 (diff)
downloademacs-c0e16b143680b9ecb51122add128577925d34159.tar.gz
emacs-c0e16b143680b9ecb51122add128577925d34159.zip
(detect_coding): Fix handling of coding->head_ascii.
Be sure to call setup_coding_system when a proper coding system is found. (detect_coding_system): Fix handling of coding->head_ascii.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c82
1 files changed, 43 insertions, 39 deletions
diff --git a/src/coding.c b/src/coding.c
index 7f9dc42ffa8..3ee2a7248cf 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -5782,6 +5782,7 @@ detect_coding (coding)
5782 coding_set_source (coding); 5782 coding_set_source (coding);
5783 5783
5784 src_end = coding->source + coding->src_bytes; 5784 src_end = coding->source + coding->src_bytes;
5785 coding->head_ascii = 0;
5785 5786
5786 /* If we have not yet decided the text encoding type, detect it 5787 /* If we have not yet decided the text encoding type, detect it
5787 now. */ 5788 now. */
@@ -5792,15 +5793,12 @@ detect_coding (coding)
5792 int null_byte_found = 0, eight_bit_found = 0; 5793 int null_byte_found = 0, eight_bit_found = 0;
5793 5794
5794 detect_info.checked = detect_info.found = detect_info.rejected = 0; 5795 detect_info.checked = detect_info.found = detect_info.rejected = 0;
5795 coding->head_ascii = -1;
5796 for (src = coding->source; src < src_end; src++) 5796 for (src = coding->source; src < src_end; src++)
5797 { 5797 {
5798 c = *src; 5798 c = *src;
5799 if (c & 0x80) 5799 if (c & 0x80)
5800 { 5800 {
5801 eight_bit_found = 1; 5801 eight_bit_found = 1;
5802 if (coding->head_ascii < 0)
5803 coding->head_ascii = src - coding->source;
5804 if (null_byte_found) 5802 if (null_byte_found)
5805 break; 5803 break;
5806 } 5804 }
@@ -5810,16 +5808,19 @@ detect_coding (coding)
5810 && ! inhibit_iso_escape_detection 5808 && ! inhibit_iso_escape_detection
5811 && ! detect_info.checked) 5809 && ! detect_info.checked)
5812 { 5810 {
5813 if (coding->head_ascii < 0)
5814 coding->head_ascii = src - coding->source;
5815 if (detect_coding_iso_2022 (coding, &detect_info)) 5811 if (detect_coding_iso_2022 (coding, &detect_info))
5816 { 5812 {
5817 /* We have scanned the whole data. */ 5813 /* We have scanned the whole data. */
5818 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) 5814 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5819 /* We didn't find an 8-bit code. We may have 5815 {
5820 found a null-byte, but it's very rare that 5816 /* We didn't find an 8-bit code. We may
5821 a binary file confirm to ISO-2022. */ 5817 have found a null-byte, but it's very
5822 src = src_end; 5818 rare that a binary file confirm to
5819 ISO-2022. */
5820 src = src_end;
5821 coding->head_ascii = src - coding->source;
5822 }
5823 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5823 break; 5824 break;
5824 } 5825 }
5825 } 5826 }
@@ -5829,10 +5830,11 @@ detect_coding (coding)
5829 if (eight_bit_found) 5830 if (eight_bit_found)
5830 break; 5831 break;
5831 } 5832 }
5833 coding->head_ascii++;
5832 } 5834 }
5835 else
5836 coding->head_ascii++;
5833 } 5837 }
5834 if (coding->head_ascii < 0)
5835 coding->head_ascii = src - coding->source;
5836 5838
5837 if (null_byte_found || eight_bit_found 5839 if (null_byte_found || eight_bit_found
5838 || coding->head_ascii < coding->src_bytes 5840 || coding->head_ascii < coding->src_bytes
@@ -5886,23 +5888,23 @@ detect_coding (coding)
5886 break; 5888 break;
5887 } 5889 }
5888 } 5890 }
5889
5890 if (i < coding_category_raw_text)
5891 setup_coding_system (CODING_ID_NAME (this->id), coding);
5892 else if (null_byte_found)
5893 setup_coding_system (Qno_conversion, coding);
5894 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5895 == CATEGORY_MASK_ANY)
5896 setup_coding_system (Qraw_text, coding);
5897 else if (detect_info.rejected)
5898 for (i = 0; i < coding_category_raw_text; i++)
5899 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5900 {
5901 this = coding_categories + coding_priorities[i];
5902 setup_coding_system (CODING_ID_NAME (this->id), coding);
5903 break;
5904 }
5905 } 5891 }
5892
5893 if (i < coding_category_raw_text)
5894 setup_coding_system (CODING_ID_NAME (this->id), coding);
5895 else if (null_byte_found)
5896 setup_coding_system (Qno_conversion, coding);
5897 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5898 == CATEGORY_MASK_ANY)
5899 setup_coding_system (Qraw_text, coding);
5900 else if (detect_info.rejected)
5901 for (i = 0; i < coding_category_raw_text; i++)
5902 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5903 {
5904 this = coding_categories + coding_priorities[i];
5905 setup_coding_system (CODING_ID_NAME (this->id), coding);
5906 break;
5907 }
5906 } 5908 }
5907 } 5909 }
5908 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) 5910 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -7655,6 +7657,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7655 coding.src_multibyte = multibytep; 7657 coding.src_multibyte = multibytep;
7656 coding.consumed = 0; 7658 coding.consumed = 0;
7657 coding.mode |= CODING_MODE_LAST_BLOCK; 7659 coding.mode |= CODING_MODE_LAST_BLOCK;
7660 coding.head_ascii = 0;
7658 7661
7659 detect_info.checked = detect_info.found = detect_info.rejected = 0; 7662 detect_info.checked = detect_info.found = detect_info.rejected = 0;
7660 7663
@@ -7666,7 +7669,6 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7666 struct coding_system *this; 7669 struct coding_system *this;
7667 int c, i; 7670 int c, i;
7668 7671
7669 coding.head_ascii = -1;
7670 /* Skip all ASCII bytes except for a few ISO2022 controls. */ 7672 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7671 for (; src < src_end; src++) 7673 for (; src < src_end; src++)
7672 { 7674 {
@@ -7674,27 +7676,28 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7674 if (c & 0x80) 7676 if (c & 0x80)
7675 { 7677 {
7676 eight_bit_found = 1; 7678 eight_bit_found = 1;
7677 if (coding.head_ascii < 0)
7678 coding.head_ascii = src - coding.source;
7679 if (null_byte_found) 7679 if (null_byte_found)
7680 break; 7680 break;
7681 } 7681 }
7682 if (c < 0x20) 7682 else if (c < 0x20)
7683 { 7683 {
7684 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 7684 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7685 && ! inhibit_iso_escape_detection 7685 && ! inhibit_iso_escape_detection
7686 && ! detect_info.checked) 7686 && ! detect_info.checked)
7687 { 7687 {
7688 if (coding.head_ascii < 0)
7689 coding.head_ascii = src - coding.source;
7690 if (detect_coding_iso_2022 (&coding, &detect_info)) 7688 if (detect_coding_iso_2022 (&coding, &detect_info))
7691 { 7689 {
7692 /* We have scanned the whole data. */ 7690 /* We have scanned the whole data. */
7693 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) 7691 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7694 /* We didn't find an 8-bit code. We may have 7692 {
7695 found a null-byte, but it's very rare that 7693 /* We didn't find an 8-bit code. We may
7696 a binary file confirm to ISO-2022. */ 7694 have found a null-byte, but it's very
7697 src = src_end; 7695 rare that a binary file confirm to
7696 ISO-2022. */
7697 src = src_end;
7698 coding.head_ascii = src - coding.source;
7699 }
7700 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7698 break; 7701 break;
7699 } 7702 }
7700 } 7703 }
@@ -7704,10 +7707,11 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7704 if (eight_bit_found) 7707 if (eight_bit_found)
7705 break; 7708 break;
7706 } 7709 }
7710 coding.head_ascii++;
7707 } 7711 }
7712 else
7713 coding.head_ascii++;
7708 } 7714 }
7709 if (coding.head_ascii < 0)
7710 coding.head_ascii = src - coding.source;
7711 7715
7712 if (null_byte_found || eight_bit_found 7716 if (null_byte_found || eight_bit_found
7713 || coding.head_ascii < coding.src_bytes 7717 || coding.head_ascii < coding.src_bytes