aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert2022-05-31 01:19:32 -0700
committerPaul Eggert2022-05-31 01:26:47 -0700
commit82c05c034e1ecec49e4e8916b2cb6163d7a5bb74 (patch)
tree8be231eb91fb2245f2f108f484fe2838f514564b /src
parent877be9098ee3ecc041216d39dbb20d0d044a46c0 (diff)
downloademacs-82c05c034e1ecec49e4e8916b2cb6163d7a5bb74.tar.gz
emacs-82c05c034e1ecec49e4e8916b2cb6163d7a5bb74.zip
Avoid undefined behavior in detect_coding routines
* src/coding.c (detect_coding): Always initialize all components of detect_info, so that detect_coding_utf_8 etc. do not have undefined behavior when they read detect_info.checked. This bug is not likely to cause problems on real systems. Problem found by GCC 12 -fanalyzer. (detect_coding_system): Use consistent style with detect_coding initialization.
Diffstat (limited to 'src')
-rw-r--r--src/coding.c35
1 files changed, 14 insertions, 21 deletions
diff --git a/src/coding.c b/src/coding.c
index 2bed293d571..aa32efc3f61 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -6528,7 +6528,7 @@ detect_coding (struct coding_system *coding)
6528 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) 6528 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6529 { 6529 {
6530 int c, i; 6530 int c, i;
6531 struct coding_detection_info detect_info; 6531 struct coding_detection_info detect_info = {0};
6532 bool null_byte_found = 0, eight_bit_found = 0; 6532 bool null_byte_found = 0, eight_bit_found = 0;
6533 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd, 6533 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
6534 inhibit_null_byte_detection); 6534 inhibit_null_byte_detection);
@@ -6537,7 +6537,6 @@ detect_coding (struct coding_system *coding)
6537 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8; 6537 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
6538 6538
6539 coding->head_ascii = 0; 6539 coding->head_ascii = 0;
6540 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6541 for (src = coding->source; src < src_end; src++) 6540 for (src = coding->source; src < src_end; src++)
6542 { 6541 {
6543 c = *src; 6542 c = *src;
@@ -6712,12 +6711,8 @@ detect_coding (struct coding_system *coding)
6712 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) 6711 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6713 == coding_category_utf_8_auto) 6712 == coding_category_utf_8_auto)
6714 { 6713 {
6715 Lisp_Object coding_systems; 6714 Lisp_Object coding_systems
6716 struct coding_detection_info detect_info;
6717
6718 coding_systems
6719 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); 6715 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6720 detect_info.found = detect_info.rejected = 0;
6721 if (check_ascii (coding) == coding->src_bytes) 6716 if (check_ascii (coding) == coding->src_bytes)
6722 { 6717 {
6723 if (CONSP (coding_systems)) 6718 if (CONSP (coding_systems))
@@ -6725,6 +6720,7 @@ detect_coding (struct coding_system *coding)
6725 } 6720 }
6726 else 6721 else
6727 { 6722 {
6723 struct coding_detection_info detect_info = {0};
6728 if (CONSP (coding_systems) 6724 if (CONSP (coding_systems)
6729 && detect_coding_utf_8 (coding, &detect_info)) 6725 && detect_coding_utf_8 (coding, &detect_info))
6730 { 6726 {
@@ -6738,20 +6734,19 @@ detect_coding (struct coding_system *coding)
6738 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) 6734 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6739 == coding_category_utf_16_auto) 6735 == coding_category_utf_16_auto)
6740 { 6736 {
6741 Lisp_Object coding_systems; 6737 Lisp_Object coding_systems
6742 struct coding_detection_info detect_info;
6743
6744 coding_systems
6745 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); 6738 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6746 detect_info.found = detect_info.rejected = 0;
6747 coding->head_ascii = 0; 6739 coding->head_ascii = 0;
6748 if (CONSP (coding_systems) 6740 if (CONSP (coding_systems))
6749 && detect_coding_utf_16 (coding, &detect_info))
6750 { 6741 {
6751 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) 6742 struct coding_detection_info detect_info = {0};
6752 found = XCAR (coding_systems); 6743 if (detect_coding_utf_16 (coding, &detect_info))
6753 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) 6744 {
6754 found = XCDR (coding_systems); 6745 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6746 found = XCAR (coding_systems);
6747 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6748 found = XCDR (coding_systems);
6749 }
6755 } 6750 }
6756 } 6751 }
6757 6752
@@ -8639,7 +8634,7 @@ detect_coding_system (const unsigned char *src,
8639 Lisp_Object val = Qnil; 8634 Lisp_Object val = Qnil;
8640 struct coding_system coding; 8635 struct coding_system coding;
8641 ptrdiff_t id; 8636 ptrdiff_t id;
8642 struct coding_detection_info detect_info; 8637 struct coding_detection_info detect_info = {0};
8643 enum coding_category base_category; 8638 enum coding_category base_category;
8644 bool null_byte_found = 0, eight_bit_found = 0; 8639 bool null_byte_found = 0, eight_bit_found = 0;
8645 8640
@@ -8658,8 +8653,6 @@ detect_coding_system (const unsigned char *src,
8658 coding.mode |= CODING_MODE_LAST_BLOCK; 8653 coding.mode |= CODING_MODE_LAST_BLOCK;
8659 coding.head_ascii = 0; 8654 coding.head_ascii = 0;
8660 8655
8661 detect_info.checked = detect_info.found = detect_info.rejected = 0;
8662
8663 /* At first, detect text-format if necessary. */ 8656 /* At first, detect text-format if necessary. */
8664 base_category = XFIXNUM (CODING_ATTR_CATEGORY (attrs)); 8657 base_category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8665 if (base_category == coding_category_undecided) 8658 if (base_category == coding_category_undecided)