diff options
| author | Paul Eggert | 2018-10-10 23:17:18 -0700 |
|---|---|---|
| committer | Paul Eggert | 2018-10-10 23:19:42 -0700 |
| commit | 5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d (patch) | |
| tree | c51b2a470f62679f5e5c3f6f55d90aa0082d0133 /src/lread.c | |
| parent | fd3a48fcd8bb212ec12b9b10a79de0ae605ee93b (diff) | |
| download | emacs-5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d.tar.gz emacs-5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d.zip | |
Fix mishandling of symbols that look like numbers
* src/bignum.c (make_neg_biguint): New function.
* src/lread.c (read1): Do not mishandle an unquoted symbol
with name equal to something like "1\0x", i.e., a string
of numeric form followed by a NUL byte.
Formerly these symbols were misread as numbers.
(string_to_number): Change last argument from an integer flag
to a pointer to the length. This lets the caller figure out
how much of the prefix was used. All callers changed.
Add a fast path if the integer (sans sign) fits in uintmax_t.
Update comments and simplify now that bignums are present.
* src/print.c (print_object): Fix quoting of symbols that look
like numbers, by relying on string_to_number for the tricky
cases rather than trying to redo its logic, incorrectly. For
example, (read (prin1-to-string '\1e+NaN)) formerly returned
"1e+NaN", which was wrong: a backslash is needed in the output
to prevent it from being read as a NaN. Escape NO_BREAK_SPACE
too, since lread.c treats it like SPACE.
* test/src/print-tests.el (print-read-roundtrip):
Add tests illustrating the abovementioned bugs.
Diffstat (limited to 'src/lread.c')
| -rw-r--r-- | src/lread.c | 100 |
1 files changed, 50 insertions, 50 deletions
diff --git a/src/lread.c b/src/lread.c index 73e38d89954..62616cb6819 100644 --- a/src/lread.c +++ b/src/lread.c | |||
| @@ -2354,12 +2354,14 @@ character_name_to_code (char const *name, ptrdiff_t name_len) | |||
| 2354 | { | 2354 | { |
| 2355 | /* For "U+XXXX", pass the leading '+' to string_to_number to reject | 2355 | /* For "U+XXXX", pass the leading '+' to string_to_number to reject |
| 2356 | monstrosities like "U+-0000". */ | 2356 | monstrosities like "U+-0000". */ |
| 2357 | ptrdiff_t len = name_len - 1; | ||
| 2357 | Lisp_Object code | 2358 | Lisp_Object code |
| 2358 | = (name[0] == 'U' && name[1] == '+' | 2359 | = (name[0] == 'U' && name[1] == '+' |
| 2359 | ? string_to_number (name + 1, 16, 0) | 2360 | ? string_to_number (name + 1, 16, &len) |
| 2360 | : call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt)); | 2361 | : call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt)); |
| 2361 | 2362 | ||
| 2362 | if (! RANGED_FIXNUMP (0, code, MAX_UNICODE_CHAR) | 2363 | if (! RANGED_FIXNUMP (0, code, MAX_UNICODE_CHAR) |
| 2364 | || len != name_len - 1 | ||
| 2363 | || char_surrogate_p (XFIXNUM (code))) | 2365 | || char_surrogate_p (XFIXNUM (code))) |
| 2364 | { | 2366 | { |
| 2365 | AUTO_STRING (format, "\\N{%s}"); | 2367 | AUTO_STRING (format, "\\N{%s}"); |
| @@ -3531,12 +3533,14 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) | |||
| 3531 | || strchr ("\"';()[]#`,", c) == NULL)); | 3533 | || strchr ("\"';()[]#`,", c) == NULL)); |
| 3532 | 3534 | ||
| 3533 | *p = 0; | 3535 | *p = 0; |
| 3536 | ptrdiff_t nbytes = p - read_buffer; | ||
| 3534 | UNREAD (c); | 3537 | UNREAD (c); |
| 3535 | 3538 | ||
| 3536 | if (!quoted && !uninterned_symbol) | 3539 | if (!quoted && !uninterned_symbol) |
| 3537 | { | 3540 | { |
| 3538 | Lisp_Object result = string_to_number (read_buffer, 10, 0); | 3541 | ptrdiff_t len; |
| 3539 | if (! NILP (result)) | 3542 | Lisp_Object result = string_to_number (read_buffer, 10, &len); |
| 3543 | if (! NILP (result) && len == nbytes) | ||
| 3540 | return unbind_to (count, result); | 3544 | return unbind_to (count, result); |
| 3541 | } | 3545 | } |
| 3542 | if (!quoted && multibyte) | 3546 | if (!quoted && multibyte) |
| @@ -3548,7 +3552,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) | |||
| 3548 | } | 3552 | } |
| 3549 | { | 3553 | { |
| 3550 | Lisp_Object result; | 3554 | Lisp_Object result; |
| 3551 | ptrdiff_t nbytes = p - read_buffer; | ||
| 3552 | ptrdiff_t nchars | 3555 | ptrdiff_t nchars |
| 3553 | = (multibyte | 3556 | = (multibyte |
| 3554 | ? multibyte_chars_in_text ((unsigned char *) read_buffer, | 3557 | ? multibyte_chars_in_text ((unsigned char *) read_buffer, |
| @@ -3700,18 +3703,18 @@ substitute_in_interval (INTERVAL interval, void *arg) | |||
| 3700 | } | 3703 | } |
| 3701 | 3704 | ||
| 3702 | 3705 | ||
| 3703 | /* Convert STRING to a number, assuming base BASE. When STRING has | 3706 | /* Convert the initial prefix of STRING to a number, assuming base BASE. |
| 3704 | floating point syntax and BASE is 10, return a nearest float. When | 3707 | If the prefix has floating point syntax and BASE is 10, return a |
| 3705 | STRING has integer syntax, return a fixnum if the integer fits, or | 3708 | nearest float; otherwise, if the prefix has integer syntax, return |
| 3706 | else a bignum. Otherwise, return nil. If FLAGS & | 3709 | the integer; otherwise, return nil. If PLEN, set *PLEN to the |
| 3707 | S2N_IGNORE_TRAILING is nonzero, consider just the longest prefix of | 3710 | length of the numeric prefix if there is one, otherwise *PLEN is |
| 3708 | STRING that has valid syntax. */ | 3711 | unspecified. */ |
| 3709 | 3712 | ||
| 3710 | Lisp_Object | 3713 | Lisp_Object |
| 3711 | string_to_number (char const *string, int base, int flags) | 3714 | string_to_number (char const *string, int base, ptrdiff_t *plen) |
| 3712 | { | 3715 | { |
| 3713 | char const *cp = string; | 3716 | char const *cp = string; |
| 3714 | bool float_syntax = 0; | 3717 | bool float_syntax = false; |
| 3715 | double value = 0; | 3718 | double value = 0; |
| 3716 | 3719 | ||
| 3717 | /* Negate the value ourselves. This treats 0, NaNs, and infinity properly on | 3720 | /* Negate the value ourselves. This treats 0, NaNs, and infinity properly on |
| @@ -3797,49 +3800,46 @@ string_to_number (char const *string, int base, int flags) | |||
| 3797 | || (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP)); | 3800 | || (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP)); |
| 3798 | } | 3801 | } |
| 3799 | 3802 | ||
| 3800 | /* Return nil if the number uses invalid syntax. If FLAGS & | 3803 | if (plen) |
| 3801 | S2N_IGNORE_TRAILING, accept any prefix that matches. Otherwise, | 3804 | *plen = cp - string; |
| 3802 | the entire string must match. */ | ||
| 3803 | if (! (flags & S2N_IGNORE_TRAILING | ||
| 3804 | ? ((state & LEAD_INT) != 0 || float_syntax) | ||
| 3805 | : (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT | ||
| 3806 | || float_syntax)))) | ||
| 3807 | return Qnil; | ||
| 3808 | 3805 | ||
| 3809 | /* If the number uses integer and not float syntax, and is in C-language | 3806 | /* Return a float if the number uses float syntax. */ |
| 3810 | range, use its value, preferably as a fixnum. */ | 3807 | if (float_syntax) |
| 3811 | if (leading_digit >= 0 && ! float_syntax) | ||
| 3812 | { | 3808 | { |
| 3813 | if ((state & INTOVERFLOW) == 0 | 3809 | /* Convert to floating point, unless the value is already known |
| 3814 | && n <= (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM)) | 3810 | because it is infinite or a NaN. */ |
| 3815 | { | 3811 | if (! value) |
| 3816 | EMACS_INT signed_n = n; | 3812 | value = atof (string + signedp); |
| 3817 | return make_fixnum (negative ? -signed_n : signed_n); | 3813 | return make_float (negative ? -value : value); |
| 3818 | } | ||
| 3819 | |||
| 3820 | /* Trim any leading "+" and trailing nondigits, then convert to | ||
| 3821 | bignum. */ | ||
| 3822 | string += positive; | ||
| 3823 | if (!*after_digits) | ||
| 3824 | return make_bignum_str (string, base); | ||
| 3825 | ptrdiff_t trimmed_len = after_digits - string; | ||
| 3826 | USE_SAFE_ALLOCA; | ||
| 3827 | char *trimmed = SAFE_ALLOCA (trimmed_len + 1); | ||
| 3828 | memcpy (trimmed, string, trimmed_len); | ||
| 3829 | trimmed[trimmed_len] = '\0'; | ||
| 3830 | Lisp_Object result = make_bignum_str (trimmed, base); | ||
| 3831 | SAFE_FREE (); | ||
| 3832 | return result; | ||
| 3833 | } | 3814 | } |
| 3834 | 3815 | ||
| 3835 | /* Either the number uses float syntax, or it does not fit into a fixnum. | 3816 | /* Return nil if the number uses invalid syntax. */ |
| 3836 | Convert it from string to floating point, unless the value is already | 3817 | if (! (state & LEAD_INT)) |
| 3837 | known because it is an infinity, a NAN, or its absolute value fits in | 3818 | return Qnil; |
| 3838 | uintmax_t. */ | 3819 | |
| 3839 | if (! value) | 3820 | /* Fast path if the integer (san sign) fits in uintmax_t. */ |
| 3840 | value = atof (string + signedp); | 3821 | if (! (state & INTOVERFLOW)) |
| 3822 | { | ||
| 3823 | if (!negative) | ||
| 3824 | return make_uint (n); | ||
| 3825 | if (-MOST_NEGATIVE_FIXNUM < n) | ||
| 3826 | return make_neg_biguint (n); | ||
| 3827 | EMACS_INT signed_n = n; | ||
| 3828 | return make_fixnum (-signed_n); | ||
| 3829 | } | ||
| 3841 | 3830 | ||
| 3842 | return make_float (negative ? -value : value); | 3831 | /* Trim any leading "+" and trailing nondigits, then return a bignum. */ |
| 3832 | string += positive; | ||
| 3833 | if (!*after_digits) | ||
| 3834 | return make_bignum_str (string, base); | ||
| 3835 | ptrdiff_t trimmed_len = after_digits - string; | ||
| 3836 | USE_SAFE_ALLOCA; | ||
| 3837 | char *trimmed = SAFE_ALLOCA (trimmed_len + 1); | ||
| 3838 | memcpy (trimmed, string, trimmed_len); | ||
| 3839 | trimmed[trimmed_len] = '\0'; | ||
| 3840 | Lisp_Object result = make_bignum_str (trimmed, base); | ||
| 3841 | SAFE_FREE (); | ||
| 3842 | return result; | ||
| 3843 | } | 3843 | } |
| 3844 | 3844 | ||
| 3845 | 3845 | ||