diff options
| author | Paul Eggert | 2011-09-24 18:22:30 -0700 |
|---|---|---|
| committer | Paul Eggert | 2011-09-24 18:22:30 -0700 |
| commit | 3c7649c1859d6252444044fd64c7b27d8e487f68 (patch) | |
| tree | c36db71bf71b503000c025d9586fb962e88cecb9 /src | |
| parent | 7c85f529fd12a8cb12eb85944877d0a52b6380a7 (diff) | |
| download | emacs-3c7649c1859d6252444044fd64c7b27d8e487f68.tar.gz emacs-3c7649c1859d6252444044fd64c7b27d8e487f68.zip | |
* charset.c: Integer overflow fixes.
Don't rely on undefined behavior with signed left shift overflow.
Don't assume unsigned int fits into fixnum, or that fixnum fits
into unsigned int. Don't require max_code to be a valid fixnum;
that's not true for gb10830 4-byte on a 32-bit host. Allow
invalid_code to be a cons, for the same reason. Require code_offset
to be a character. Avoid int overflow if max_char is close
to INT_MAX.
(CODE_POINT_TO_INDEX): On 32-bit hosts, return int, not unsigned;
this is intended anyway and avoids some undefined behavior.
(load_charset_map): Pass unsigned, not int, as 2nd arg of
INDEX_TO_CODE_POINT, as that's what it expects.
(Funify_charset, encode_char): Don't stuff unsigned vals into int vars.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 16 | ||||
| -rw-r--r-- | src/charset.c | 61 |
2 files changed, 45 insertions, 32 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index f67d1b72bf2..7973cc277e2 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | 2011-09-21 Paul Eggert <eggert@cs.ucla.edu> | 1 | 2011-09-25 Paul Eggert <eggert@cs.ucla.edu> |
| 2 | 2 | ||
| 3 | * alloc.c (pure_bytes_used_lisp, pure_bytes_used_non_lisp): | 3 | * alloc.c (pure_bytes_used_lisp, pure_bytes_used_non_lisp): |
| 4 | (allocate_vectorlike, buffer_memory_full, struct sdata, SDATA_SIZE) | 4 | (allocate_vectorlike, buffer_memory_full, struct sdata, SDATA_SIZE) |
| @@ -103,10 +103,22 @@ | |||
| 103 | Use ptrdiff_t, not int, to avoid needless 32-bit limit on 64-bit hosts. | 103 | Use ptrdiff_t, not int, to avoid needless 32-bit limit on 64-bit hosts. |
| 104 | (load_charset_map_from_file): Redo idx calculation to avoid overflow. | 104 | (load_charset_map_from_file): Redo idx calculation to avoid overflow. |
| 105 | (load_charset_map_from_vector, Fdefine_charset_internal): | 105 | (load_charset_map_from_vector, Fdefine_charset_internal): |
| 106 | Don't assume fixnum fits in int or unsigned int. | 106 | Don't assume fixnum fits in int. |
| 107 | (load_charset_map_from_vector, Fmap_charset_chars): | 107 | (load_charset_map_from_vector, Fmap_charset_chars): |
| 108 | Remove now-unnecessary CHECK_NATNUMs. | 108 | Remove now-unnecessary CHECK_NATNUMs. |
| 109 | (Fdefine_charset_internal): Check ranges here, more carefully. | 109 | (Fdefine_charset_internal): Check ranges here, more carefully. |
| 110 | Don't rely on undefined behavior with signed left shift overflow. | ||
| 111 | Don't assume unsigned int fits into fixnum, or that fixnum fits | ||
| 112 | into unsigned int. Don't require max_code to be a valid fixnum; | ||
| 113 | that's not true for gb10830 4-byte on a 32-bit host. Allow | ||
| 114 | invalid_code to be a cons, for the same reason. Require code_offset | ||
| 115 | to be a character. Avoid int overflow if max_char is close | ||
| 116 | to INT_MAX. | ||
| 117 | (CODE_POINT_TO_INDEX): On 32-bit hosts, return int, not unsigned; | ||
| 118 | this is intended anyway and avoids some undefined behavior. | ||
| 119 | (load_charset_map): Pass unsigned, not int, as 2nd arg of | ||
| 120 | INDEX_TO_CODE_POINT, as that's what it expects. | ||
| 121 | (Funify_charset, encode_char): Don't stuff unsigned vals into int vars. | ||
| 110 | * chartab.c (Fmake_char_table, Fset_char_table_range) | 122 | * chartab.c (Fmake_char_table, Fset_char_table_range) |
| 111 | (uniprop_get_decoder, uniprop_get_encoder): | 123 | (uniprop_get_decoder, uniprop_get_encoder): |
| 112 | Don't assume fixnum fits in int. | 124 | Don't assume fixnum fits in int. |
diff --git a/src/charset.c b/src/charset.c index 9d58d29d05c..2451c55e92a 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -118,24 +118,25 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; | |||
| 118 | 118 | ||
| 119 | #define CODE_POINT_TO_INDEX(charset, code) \ | 119 | #define CODE_POINT_TO_INDEX(charset, code) \ |
| 120 | ((charset)->code_linear_p \ | 120 | ((charset)->code_linear_p \ |
| 121 | ? (code) - (charset)->min_code \ | 121 | ? (int) ((code) - (charset)->min_code) \ |
| 122 | : (((charset)->code_space_mask[(code) >> 24] & 0x8) \ | 122 | : (((charset)->code_space_mask[(code) >> 24] & 0x8) \ |
| 123 | && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \ | 123 | && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \ |
| 124 | && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \ | 124 | && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \ |
| 125 | && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \ | 125 | && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \ |
| 126 | ? (((((code) >> 24) - (charset)->code_space[12]) \ | 126 | ? (int) (((((code) >> 24) - (charset)->code_space[12]) \ |
| 127 | * (charset)->code_space[11]) \ | 127 | * (charset)->code_space[11]) \ |
| 128 | + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ | 128 | + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ |
| 129 | * (charset)->code_space[7]) \ | 129 | * (charset)->code_space[7]) \ |
| 130 | + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ | 130 | + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ |
| 131 | * (charset)->code_space[3]) \ | 131 | * (charset)->code_space[3]) \ |
| 132 | + (((code) & 0xFF) - (charset)->code_space[0]) \ | 132 | + (((code) & 0xFF) - (charset)->code_space[0]) \ |
| 133 | - ((charset)->char_index_offset)) \ | 133 | - ((charset)->char_index_offset)) \ |
| 134 | : -1) | 134 | : -1) |
| 135 | 135 | ||
| 136 | 136 | ||
| 137 | /* Convert the character index IDX to code-point CODE for CHARSET. | 137 | /* Return the code-point for the character index IDX in CHARSET. |
| 138 | It is assumed that IDX is in a valid range. */ | 138 | IDX should be an unsigned int variable in a valid range (which is |
| 139 | always in nonnegative int range too). IDX contains garbage afterwards. */ | ||
| 139 | 140 | ||
| 140 | #define INDEX_TO_CODE_POINT(charset, idx) \ | 141 | #define INDEX_TO_CODE_POINT(charset, idx) \ |
| 141 | ((charset)->code_linear_p \ | 142 | ((charset)->code_linear_p \ |
| @@ -363,7 +364,8 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, | |||
| 363 | && CHARSET_COMPACT_CODES_P (charset)) | 364 | && CHARSET_COMPACT_CODES_P (charset)) |
| 364 | for (; from_index < lim_index; from_index++, from_c++) | 365 | for (; from_index < lim_index; from_index++, from_c++) |
| 365 | { | 366 | { |
| 366 | unsigned code = INDEX_TO_CODE_POINT (charset, from_index); | 367 | unsigned code = from_index; |
| 368 | code = INDEX_TO_CODE_POINT (charset, code); | ||
| 367 | 369 | ||
| 368 | if (NILP (CHAR_TABLE_REF (table, from_c))) | 370 | if (NILP (CHAR_TABLE_REF (table, from_c))) |
| 369 | CHAR_TABLE_SET (table, from_c, make_number (code)); | 371 | CHAR_TABLE_SET (table, from_c, make_number (code)); |
| @@ -923,11 +925,11 @@ usage: (define-charset-internal ...) */) | |||
| 923 | charset.min_code = (charset.code_space[0] | 925 | charset.min_code = (charset.code_space[0] |
| 924 | | (charset.code_space[4] << 8) | 926 | | (charset.code_space[4] << 8) |
| 925 | | (charset.code_space[8] << 16) | 927 | | (charset.code_space[8] << 16) |
| 926 | | (charset.code_space[12] << 24)); | 928 | | ((unsigned) charset.code_space[12] << 24)); |
| 927 | charset.max_code = (charset.code_space[1] | 929 | charset.max_code = (charset.code_space[1] |
| 928 | | (charset.code_space[5] << 8) | 930 | | (charset.code_space[5] << 8) |
| 929 | | (charset.code_space[9] << 16) | 931 | | (charset.code_space[9] << 16) |
| 930 | | (charset.code_space[13] << 24)); | 932 | | ((unsigned) charset.code_space[13] << 24)); |
| 931 | charset.char_index_offset = 0; | 933 | charset.char_index_offset = 0; |
| 932 | 934 | ||
| 933 | val = args[charset_arg_min_code]; | 935 | val = args[charset_arg_min_code]; |
| @@ -937,8 +939,8 @@ usage: (define-charset-internal ...) */) | |||
| 937 | 939 | ||
| 938 | if (code < charset.min_code | 940 | if (code < charset.min_code |
| 939 | || code > charset.max_code) | 941 | || code > charset.max_code) |
| 940 | args_out_of_range_3 (make_number (charset.min_code), | 942 | args_out_of_range_3 (make_fixnum_or_float (charset.min_code), |
| 941 | make_number (charset.max_code), val); | 943 | make_fixnum_or_float (charset.max_code), val); |
| 942 | charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code); | 944 | charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code); |
| 943 | charset.min_code = code; | 945 | charset.min_code = code; |
| 944 | } | 946 | } |
| @@ -950,8 +952,8 @@ usage: (define-charset-internal ...) */) | |||
| 950 | 952 | ||
| 951 | if (code < charset.min_code | 953 | if (code < charset.min_code |
| 952 | || code > charset.max_code) | 954 | || code > charset.max_code) |
| 953 | args_out_of_range_3 (make_number (charset.min_code), | 955 | args_out_of_range_3 (make_fixnum_or_float (charset.min_code), |
| 954 | make_number (charset.max_code), val); | 956 | make_fixnum_or_float (charset.max_code), val); |
| 955 | charset.max_code = code; | 957 | charset.max_code = code; |
| 956 | } | 958 | } |
| 957 | 959 | ||
| @@ -964,14 +966,14 @@ usage: (define-charset-internal ...) */) | |||
| 964 | charset.invalid_code = 0; | 966 | charset.invalid_code = 0; |
| 965 | else | 967 | else |
| 966 | { | 968 | { |
| 967 | if (charset.max_code < min (UINT_MAX, MOST_POSITIVE_FIXNUM)) | 969 | if (charset.max_code < UINT_MAX) |
| 968 | charset.invalid_code = charset.max_code + 1; | 970 | charset.invalid_code = charset.max_code + 1; |
| 969 | else | 971 | else |
| 970 | error ("Attribute :invalid-code must be specified"); | 972 | error ("Attribute :invalid-code must be specified"); |
| 971 | } | 973 | } |
| 972 | } | 974 | } |
| 973 | else | 975 | else |
| 974 | charset.invalid_code = XFASTINT (val); | 976 | charset.invalid_code = cons_to_unsigned (val, UINT_MAX); |
| 975 | 977 | ||
| 976 | val = args[charset_arg_iso_final]; | 978 | val = args[charset_arg_iso_final]; |
| 977 | if (NILP (val)) | 979 | if (NILP (val)) |
| @@ -1015,17 +1017,17 @@ usage: (define-charset-internal ...) */) | |||
| 1015 | if (! NILP (args[charset_arg_code_offset])) | 1017 | if (! NILP (args[charset_arg_code_offset])) |
| 1016 | { | 1018 | { |
| 1017 | val = args[charset_arg_code_offset]; | 1019 | val = args[charset_arg_code_offset]; |
| 1018 | CHECK_TYPE_RANGED_INTEGER (int, val); | 1020 | CHECK_CHARACTER (val); |
| 1019 | 1021 | ||
| 1020 | charset.method = CHARSET_METHOD_OFFSET; | 1022 | charset.method = CHARSET_METHOD_OFFSET; |
| 1021 | charset.code_offset = XINT (val); | 1023 | charset.code_offset = XINT (val); |
| 1022 | 1024 | ||
| 1023 | i = CODE_POINT_TO_INDEX (&charset, charset.min_code); | ||
| 1024 | charset.min_char = i + charset.code_offset; | ||
| 1025 | i = CODE_POINT_TO_INDEX (&charset, charset.max_code); | 1025 | i = CODE_POINT_TO_INDEX (&charset, charset.max_code); |
| 1026 | charset.max_char = i + charset.code_offset; | 1026 | if (MAX_CHAR - charset.code_offset < i) |
| 1027 | if (charset.max_char > MAX_CHAR) | ||
| 1028 | error ("Unsupported max char: %d", charset.max_char); | 1027 | error ("Unsupported max char: %d", charset.max_char); |
| 1028 | charset.max_char = i + charset.code_offset; | ||
| 1029 | i = CODE_POINT_TO_INDEX (&charset, charset.min_code); | ||
| 1030 | charset.min_char = i + charset.code_offset; | ||
| 1029 | 1031 | ||
| 1030 | i = (charset.min_char >> 7) << 7; | 1032 | i = (charset.min_char >> 7) << 7; |
| 1031 | for (; i < 0x10000 && i <= charset.max_char; i += 128) | 1033 | for (; i < 0x10000 && i <= charset.max_char; i += 128) |
| @@ -1385,8 +1387,8 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) | |||
| 1385 | } | 1387 | } |
| 1386 | else if (CHAR_TABLE_P (Vchar_unify_table)) | 1388 | else if (CHAR_TABLE_P (Vchar_unify_table)) |
| 1387 | { | 1389 | { |
| 1388 | int min_code = CHARSET_MIN_CODE (cs); | 1390 | unsigned min_code = CHARSET_MIN_CODE (cs); |
| 1389 | int max_code = CHARSET_MAX_CODE (cs); | 1391 | unsigned max_code = CHARSET_MAX_CODE (cs); |
| 1390 | int min_char = DECODE_CHAR (cs, min_code); | 1392 | int min_char = DECODE_CHAR (cs, min_code); |
| 1391 | int max_char = DECODE_CHAR (cs, max_code); | 1393 | int max_char = DECODE_CHAR (cs, max_code); |
| 1392 | 1394 | ||
| @@ -1830,9 +1832,8 @@ encode_char (struct charset *charset, int c) | |||
| 1830 | } | 1832 | } |
| 1831 | else /* method == CHARSET_METHOD_OFFSET */ | 1833 | else /* method == CHARSET_METHOD_OFFSET */ |
| 1832 | { | 1834 | { |
| 1833 | int code_index = c - CHARSET_CODE_OFFSET (charset); | 1835 | code = c - CHARSET_CODE_OFFSET (charset); |
| 1834 | 1836 | code = INDEX_TO_CODE_POINT (charset, code); | |
| 1835 | code = INDEX_TO_CODE_POINT (charset, code_index); | ||
| 1836 | } | 1837 | } |
| 1837 | 1838 | ||
| 1838 | return code; | 1839 | return code; |