aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert2011-09-24 18:22:30 -0700
committerPaul Eggert2011-09-24 18:22:30 -0700
commit3c7649c1859d6252444044fd64c7b27d8e487f68 (patch)
treec36db71bf71b503000c025d9586fb962e88cecb9 /src
parent7c85f529fd12a8cb12eb85944877d0a52b6380a7 (diff)
downloademacs-3c7649c1859d6252444044fd64c7b27d8e487f68.tar.gz
emacs-3c7649c1859d6252444044fd64c7b27d8e487f68.zip
* charset.c: Integer overflow fixes.
Don't rely on undefined behavior with signed left shift overflow. Don't assume unsigned int fits into fixnum, or that fixnum fits into unsigned int. Don't require max_code to be a valid fixnum; that's not true for gb10830 4-byte on a 32-bit host. Allow invalid_code to be a cons, for the same reason. Require code_offset to be a character. Avoid int overflow if max_char is close to INT_MAX. (CODE_POINT_TO_INDEX): On 32-bit hosts, return int, not unsigned; this is intended anyway and avoids some undefined behavior. (load_charset_map): Pass unsigned, not int, as 2nd arg of INDEX_TO_CODE_POINT, as that's what it expects. (Funify_charset, encode_char): Don't stuff unsigned vals into int vars.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog16
-rw-r--r--src/charset.c61
2 files changed, 45 insertions, 32 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index f67d1b72bf2..7973cc277e2 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,4 +1,4 @@
12011-09-21 Paul Eggert <eggert@cs.ucla.edu> 12011-09-25 Paul Eggert <eggert@cs.ucla.edu>
2 2
3 * alloc.c (pure_bytes_used_lisp, pure_bytes_used_non_lisp): 3 * alloc.c (pure_bytes_used_lisp, pure_bytes_used_non_lisp):
4 (allocate_vectorlike, buffer_memory_full, struct sdata, SDATA_SIZE) 4 (allocate_vectorlike, buffer_memory_full, struct sdata, SDATA_SIZE)
@@ -103,10 +103,22 @@
103 Use ptrdiff_t, not int, to avoid needless 32-bit limit on 64-bit hosts. 103 Use ptrdiff_t, not int, to avoid needless 32-bit limit on 64-bit hosts.
104 (load_charset_map_from_file): Redo idx calculation to avoid overflow. 104 (load_charset_map_from_file): Redo idx calculation to avoid overflow.
105 (load_charset_map_from_vector, Fdefine_charset_internal): 105 (load_charset_map_from_vector, Fdefine_charset_internal):
106 Don't assume fixnum fits in int or unsigned int. 106 Don't assume fixnum fits in int.
107 (load_charset_map_from_vector, Fmap_charset_chars): 107 (load_charset_map_from_vector, Fmap_charset_chars):
108 Remove now-unnecessary CHECK_NATNUMs. 108 Remove now-unnecessary CHECK_NATNUMs.
109 (Fdefine_charset_internal): Check ranges here, more carefully. 109 (Fdefine_charset_internal): Check ranges here, more carefully.
110 Don't rely on undefined behavior with signed left shift overflow.
111 Don't assume unsigned int fits into fixnum, or that fixnum fits
112 into unsigned int. Don't require max_code to be a valid fixnum;
113 that's not true for gb10830 4-byte on a 32-bit host. Allow
114 invalid_code to be a cons, for the same reason. Require code_offset
115 to be a character. Avoid int overflow if max_char is close
116 to INT_MAX.
117 (CODE_POINT_TO_INDEX): On 32-bit hosts, return int, not unsigned;
118 this is intended anyway and avoids some undefined behavior.
119 (load_charset_map): Pass unsigned, not int, as 2nd arg of
120 INDEX_TO_CODE_POINT, as that's what it expects.
121 (Funify_charset, encode_char): Don't stuff unsigned vals into int vars.
110 * chartab.c (Fmake_char_table, Fset_char_table_range) 122 * chartab.c (Fmake_char_table, Fset_char_table_range)
111 (uniprop_get_decoder, uniprop_get_encoder): 123 (uniprop_get_decoder, uniprop_get_encoder):
112 Don't assume fixnum fits in int. 124 Don't assume fixnum fits in int.
diff --git a/src/charset.c b/src/charset.c
index 9d58d29d05c..2451c55e92a 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -118,24 +118,25 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
118 118
119#define CODE_POINT_TO_INDEX(charset, code) \ 119#define CODE_POINT_TO_INDEX(charset, code) \
120 ((charset)->code_linear_p \ 120 ((charset)->code_linear_p \
121 ? (code) - (charset)->min_code \ 121 ? (int) ((code) - (charset)->min_code) \
122 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \ 122 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
123 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \ 123 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
124 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \ 124 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
125 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \ 125 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
126 ? (((((code) >> 24) - (charset)->code_space[12]) \ 126 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
127 * (charset)->code_space[11]) \ 127 * (charset)->code_space[11]) \
128 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ 128 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
129 * (charset)->code_space[7]) \ 129 * (charset)->code_space[7]) \
130 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ 130 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
131 * (charset)->code_space[3]) \ 131 * (charset)->code_space[3]) \
132 + (((code) & 0xFF) - (charset)->code_space[0]) \ 132 + (((code) & 0xFF) - (charset)->code_space[0]) \
133 - ((charset)->char_index_offset)) \ 133 - ((charset)->char_index_offset)) \
134 : -1) 134 : -1)
135 135
136 136
137/* Convert the character index IDX to code-point CODE for CHARSET. 137/* Return the code-point for the character index IDX in CHARSET.
138 It is assumed that IDX is in a valid range. */ 138 IDX should be an unsigned int variable in a valid range (which is
139 always in nonnegative int range too). IDX contains garbage afterwards. */
139 140
140#define INDEX_TO_CODE_POINT(charset, idx) \ 141#define INDEX_TO_CODE_POINT(charset, idx) \
141 ((charset)->code_linear_p \ 142 ((charset)->code_linear_p \
@@ -363,7 +364,8 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
363 && CHARSET_COMPACT_CODES_P (charset)) 364 && CHARSET_COMPACT_CODES_P (charset))
364 for (; from_index < lim_index; from_index++, from_c++) 365 for (; from_index < lim_index; from_index++, from_c++)
365 { 366 {
366 unsigned code = INDEX_TO_CODE_POINT (charset, from_index); 367 unsigned code = from_index;
368 code = INDEX_TO_CODE_POINT (charset, code);
367 369
368 if (NILP (CHAR_TABLE_REF (table, from_c))) 370 if (NILP (CHAR_TABLE_REF (table, from_c)))
369 CHAR_TABLE_SET (table, from_c, make_number (code)); 371 CHAR_TABLE_SET (table, from_c, make_number (code));
@@ -923,11 +925,11 @@ usage: (define-charset-internal ...) */)
923 charset.min_code = (charset.code_space[0] 925 charset.min_code = (charset.code_space[0]
924 | (charset.code_space[4] << 8) 926 | (charset.code_space[4] << 8)
925 | (charset.code_space[8] << 16) 927 | (charset.code_space[8] << 16)
926 | (charset.code_space[12] << 24)); 928 | ((unsigned) charset.code_space[12] << 24));
927 charset.max_code = (charset.code_space[1] 929 charset.max_code = (charset.code_space[1]
928 | (charset.code_space[5] << 8) 930 | (charset.code_space[5] << 8)
929 | (charset.code_space[9] << 16) 931 | (charset.code_space[9] << 16)
930 | (charset.code_space[13] << 24)); 932 | ((unsigned) charset.code_space[13] << 24));
931 charset.char_index_offset = 0; 933 charset.char_index_offset = 0;
932 934
933 val = args[charset_arg_min_code]; 935 val = args[charset_arg_min_code];
@@ -937,8 +939,8 @@ usage: (define-charset-internal ...) */)
937 939
938 if (code < charset.min_code 940 if (code < charset.min_code
939 || code > charset.max_code) 941 || code > charset.max_code)
940 args_out_of_range_3 (make_number (charset.min_code), 942 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
941 make_number (charset.max_code), val); 943 make_fixnum_or_float (charset.max_code), val);
942 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code); 944 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
943 charset.min_code = code; 945 charset.min_code = code;
944 } 946 }
@@ -950,8 +952,8 @@ usage: (define-charset-internal ...) */)
950 952
951 if (code < charset.min_code 953 if (code < charset.min_code
952 || code > charset.max_code) 954 || code > charset.max_code)
953 args_out_of_range_3 (make_number (charset.min_code), 955 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
954 make_number (charset.max_code), val); 956 make_fixnum_or_float (charset.max_code), val);
955 charset.max_code = code; 957 charset.max_code = code;
956 } 958 }
957 959
@@ -964,14 +966,14 @@ usage: (define-charset-internal ...) */)
964 charset.invalid_code = 0; 966 charset.invalid_code = 0;
965 else 967 else
966 { 968 {
967 if (charset.max_code < min (UINT_MAX, MOST_POSITIVE_FIXNUM)) 969 if (charset.max_code < UINT_MAX)
968 charset.invalid_code = charset.max_code + 1; 970 charset.invalid_code = charset.max_code + 1;
969 else 971 else
970 error ("Attribute :invalid-code must be specified"); 972 error ("Attribute :invalid-code must be specified");
971 } 973 }
972 } 974 }
973 else 975 else
974 charset.invalid_code = XFASTINT (val); 976 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
975 977
976 val = args[charset_arg_iso_final]; 978 val = args[charset_arg_iso_final];
977 if (NILP (val)) 979 if (NILP (val))
@@ -1015,17 +1017,17 @@ usage: (define-charset-internal ...) */)
1015 if (! NILP (args[charset_arg_code_offset])) 1017 if (! NILP (args[charset_arg_code_offset]))
1016 { 1018 {
1017 val = args[charset_arg_code_offset]; 1019 val = args[charset_arg_code_offset];
1018 CHECK_TYPE_RANGED_INTEGER (int, val); 1020 CHECK_CHARACTER (val);
1019 1021
1020 charset.method = CHARSET_METHOD_OFFSET; 1022 charset.method = CHARSET_METHOD_OFFSET;
1021 charset.code_offset = XINT (val); 1023 charset.code_offset = XINT (val);
1022 1024
1023 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1024 charset.min_char = i + charset.code_offset;
1025 i = CODE_POINT_TO_INDEX (&charset, charset.max_code); 1025 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1026 charset.max_char = i + charset.code_offset; 1026 if (MAX_CHAR - charset.code_offset < i)
1027 if (charset.max_char > MAX_CHAR)
1028 error ("Unsupported max char: %d", charset.max_char); 1027 error ("Unsupported max char: %d", charset.max_char);
1028 charset.max_char = i + charset.code_offset;
1029 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1030 charset.min_char = i + charset.code_offset;
1029 1031
1030 i = (charset.min_char >> 7) << 7; 1032 i = (charset.min_char >> 7) << 7;
1031 for (; i < 0x10000 && i <= charset.max_char; i += 128) 1033 for (; i < 0x10000 && i <= charset.max_char; i += 128)
@@ -1385,8 +1387,8 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1385 } 1387 }
1386 else if (CHAR_TABLE_P (Vchar_unify_table)) 1388 else if (CHAR_TABLE_P (Vchar_unify_table))
1387 { 1389 {
1388 int min_code = CHARSET_MIN_CODE (cs); 1390 unsigned min_code = CHARSET_MIN_CODE (cs);
1389 int max_code = CHARSET_MAX_CODE (cs); 1391 unsigned max_code = CHARSET_MAX_CODE (cs);
1390 int min_char = DECODE_CHAR (cs, min_code); 1392 int min_char = DECODE_CHAR (cs, min_code);
1391 int max_char = DECODE_CHAR (cs, max_code); 1393 int max_char = DECODE_CHAR (cs, max_code);
1392 1394
@@ -1830,9 +1832,8 @@ encode_char (struct charset *charset, int c)
1830 } 1832 }
1831 else /* method == CHARSET_METHOD_OFFSET */ 1833 else /* method == CHARSET_METHOD_OFFSET */
1832 { 1834 {
1833 int code_index = c - CHARSET_CODE_OFFSET (charset); 1835 code = c - CHARSET_CODE_OFFSET (charset);
1834 1836 code = INDEX_TO_CODE_POINT (charset, code);
1835 code = INDEX_TO_CODE_POINT (charset, code_index);
1836 } 1837 }
1837 1838
1838 return code; 1839 return code;