aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggert2011-04-20 23:03:09 -0700
committerPaul Eggert2011-04-20 23:03:09 -0700
commit452f4150134e4ba7bbd2bad9ce87d19c200505de (patch)
tree776249edb45be8b6811ab6577a4038058a87f315
parent6703b2e490339a624bb83c9543f1e51ede26b52b (diff)
downloademacs-452f4150134e4ba7bbd2bad9ce87d19c200505de.tar.gz
emacs-452f4150134e4ba7bbd2bad9ce87d19c200505de.zip
Treat large integers as floats in the Lisp reader and in string-to-number.
-rw-r--r--src/ChangeLog52
-rw-r--r--src/data.c38
-rw-r--r--src/lisp.h2
-rw-r--r--src/lread.c226
4 files changed, 169 insertions, 149 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 7eaa153f79a..2b9978f3d6a 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,32 @@
12011-04-21 Paul Eggert <eggert@cs.ucla.edu>
2
3 Make the Lisp reader and string-to-float more consistent.
4 * data.c (atof): Remove decl; no longer used or needed.
5 (digit_to_number): Move to lread.c.
6 (Fstring_to_number): Use new string_to_number function, to be
7 consistent with how the Lisp reader treats infinities and NaNs.
8 Do not assume that floating-point numbers represent EMACS_INT
9 without losing information; this is not true on most 64-bit hosts.
10 Avoid double-rounding errors, by insisting on integers when
11 parsing non-base-10 numbers, as the documentation specifies.
12 * lisp.h (string_to_number): New decl, replacing ...
13 (isfloat_string): Remove.
14 * lread.c (read1): Do not accept +. and -. as integers; this
15 appears to have been a coding error. Similarly, do not accept
16 strings like +-1e0 as floating point numbers. Do not report
17 overflow for integer overflows unless the base is not 10 which
18 means we have no simple and reliable way to continue.
19 Break out the floating-point parsing into a new
20 function string_to_number, so that Fstring_to_number parses
21 floating point numbers consistently with the Lisp reader.
22 (digit_to_number): Moved here from data.c. Make it static inline.
23 (E_CHAR, EXP_INT): Remove, replacing with ...
24 (E_EXP): New macro, to solve the "1.0e+" problem mentioned below.
25 (string_to_number): New function, replacing isfloat_string.
26 This function checks for valid syntax and produces the resulting
27 Lisp float number too. Rework it so that string-to-number
28 no longer mishandles examples like "1.0e+".
29
12011-04-20 Paul Eggert <eggert@cs.ucla.edu> 302011-04-20 Paul Eggert <eggert@cs.ucla.edu>
2 31
3 * textprop.c (set_text_properties_1): Rewrite for clarity, 32 * textprop.c (set_text_properties_1): Rewrite for clarity,
@@ -15,29 +44,6 @@
15 * alloc.c (overrun_check_malloc, overrun_check_realloc): Now static. 44 * alloc.c (overrun_check_malloc, overrun_check_realloc): Now static.
16 (overrun_check_free): Likewise. 45 (overrun_check_free): Likewise.
17 46
18 Make the Lisp reader and string-to-float more consistent.
19 * data.c (atof): Remove decl; no longer used or needed.
20 (Fstring_to_number): Use new string_to_float function, to be
21 consistent with how the Lisp reader treats infinities and NaNs.
22 Do not assume that floating-point numbers represent EMACS_INT
23 without losing information; this is not true on most 64-bit hosts.
24 Avoid double-rounding errors, by insisting on integers when
25 parsing non-base-10 numbers, as the documentation specifies.
26 Report integer overflow instead of silently converting to
27 integers.
28 * lisp.h (string_to_float): New decl, replacing ...
29 (isfloat_string): Remove.
30 * lread.c (read1): Do not accept +. and -. as integers; this
31 appears to have been a coding error. Similarly, do not accept
32 strings like +-1e0 as floating point numbers. Do not report
33 overflow for some integer overflows and not others; instead,
34 report them all. Break out the floating-point parsing into a new
35 function string_to_float, so that Fstring_to_number parses
36 floating point numbers consistently with the Lisp reader.
37 (string_to_float): New function, replacing isfloat_string.
38 This function checks for valid syntax and produces the resulting
39 Lisp float number too.
40
41 * alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check 47 * alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check
42 in the common case where SDATA_DATA_OFFSET is a multiple of Emacs 48 in the common case where SDATA_DATA_OFFSET is a multiple of Emacs
43 word size. 49 word size.
diff --git a/src/data.c b/src/data.c
index 486816cac70..4e81c80d0ed 100644
--- a/src/data.c
+++ b/src/data.c
@@ -2374,26 +2374,6 @@ NUMBER may be an integer or a floating point number. */)
2374 return build_string (buffer); 2374 return build_string (buffer);
2375} 2375}
2376 2376
2377INLINE static int
2378digit_to_number (int character, int base)
2379{
2380 int digit;
2381
2382 if (character >= '0' && character <= '9')
2383 digit = character - '0';
2384 else if (character >= 'a' && character <= 'z')
2385 digit = character - 'a' + 10;
2386 else if (character >= 'A' && character <= 'Z')
2387 digit = character - 'A' + 10;
2388 else
2389 return -1;
2390
2391 if (digit >= base)
2392 return -1;
2393 else
2394 return digit;
2395}
2396
2397DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0, 2377DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0,
2398 doc: /* Parse STRING as a decimal number and return the number. 2378 doc: /* Parse STRING as a decimal number and return the number.
2399This parses both integers and floating point numbers. 2379This parses both integers and floating point numbers.
@@ -2406,7 +2386,7 @@ If the base used is not 10, STRING is always parsed as integer. */)
2406{ 2386{
2407 register char *p; 2387 register char *p;
2408 register int b; 2388 register int b;
2409 EMACS_INT n; 2389 Lisp_Object val;
2410 2390
2411 CHECK_STRING (string); 2391 CHECK_STRING (string);
2412 2392
@@ -2420,25 +2400,13 @@ If the base used is not 10, STRING is always parsed as integer. */)
2420 xsignal1 (Qargs_out_of_range, base); 2400 xsignal1 (Qargs_out_of_range, base);
2421 } 2401 }
2422 2402
2423 /* Skip any whitespace at the front of the number. Typically strtol does
2424 this anyway, so we might as well be consistent. */
2425 p = SSDATA (string); 2403 p = SSDATA (string);
2426 while (*p == ' ' || *p == '\t') 2404 while (*p == ' ' || *p == '\t')
2427 p++; 2405 p++;
2428 2406
2429 if (b == 10) 2407 val = string_to_number (p, b, 1);
2430 { 2408 return NILP (val) ? make_number (0) : val;
2431 Lisp_Object val = string_to_float (p, 1);
2432 if (FLOATP (val))
2433 return val;
2434 }
2435
2436 n = strtol (p, NULL, b);
2437 if (FIXNUM_OVERFLOW_P (n))
2438 xsignal (Qoverflow_error, list1 (string));
2439 return make_number (n);
2440} 2409}
2441
2442 2410
2443enum arithop 2411enum arithop
2444 { 2412 {
diff --git a/src/lisp.h b/src/lisp.h
index 8d333a3999f..5bace90e53e 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT);
2782 } while (0) 2782 } while (0)
2783extern int openp (Lisp_Object, Lisp_Object, Lisp_Object, 2783extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
2784 Lisp_Object *, Lisp_Object); 2784 Lisp_Object *, Lisp_Object);
2785Lisp_Object string_to_float (char const *, int); 2785Lisp_Object string_to_number (char const *, int, int);
2786extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object), 2786extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
2787 Lisp_Object); 2787 Lisp_Object);
2788extern void dir_warning (const char *, Lisp_Object); 2788extern void dir_warning (const char *, Lisp_Object);
diff --git a/src/lread.c b/src/lread.c
index a872929e08f..390c57d1678 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3005,32 +3005,8 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list)
3005 3005
3006 if (!quoted && !uninterned_symbol) 3006 if (!quoted && !uninterned_symbol)
3007 { 3007 {
3008 register char *p1; 3008 Lisp_Object result = string_to_number (read_buffer, 10, 0);
3009 Lisp_Object result; 3009 if (! NILP (result))
3010 p1 = read_buffer;
3011 if (*p1 == '+' || *p1 == '-') p1++;
3012 /* Is it an integer? */
3013 if ('0' <= *p1 && *p1 <= '9')
3014 {
3015 do
3016 p1++;
3017 while ('0' <= *p1 && *p1 <= '9');
3018
3019 /* Integers can have trailing decimal points. */
3020 p1 += (*p1 == '.');
3021 if (p1 == p)
3022 {
3023 /* It is an integer. */
3024 EMACS_INT n = strtol (read_buffer, NULL, 10);
3025 if (FIXNUM_OVERFLOW_P (n))
3026 xsignal (Qoverflow_error,
3027 list1 (build_string (read_buffer)));
3028 return make_number (n);
3029 }
3030 }
3031
3032 result = string_to_float (read_buffer, 0);
3033 if (FLOATP (result))
3034 return result; 3010 return result;
3035 } 3011 }
3036 { 3012 {
@@ -3189,23 +3165,44 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
3189} 3165}
3190 3166
3191 3167
3168static inline int
3169digit_to_number (int character, int base)
3170{
3171 int digit;
3172
3173 if ('0' <= character && character <= '9')
3174 digit = character - '0';
3175 else if ('a' <= character && character <= 'z')
3176 digit = character - 'a' + 10;
3177 else if ('A' <= character && character <= 'Z')
3178 digit = character - 'A' + 10;
3179 else
3180 return -1;
3181
3182 return digit < base ? digit : -1;
3183}
3184
3192#define LEAD_INT 1 3185#define LEAD_INT 1
3193#define DOT_CHAR 2 3186#define DOT_CHAR 2
3194#define TRAIL_INT 4 3187#define TRAIL_INT 4
3195#define E_CHAR 8 3188#define E_EXP 16
3196#define EXP_INT 16
3197 3189
3198 3190
3199/* Convert CP to a floating point number. Return a non-float value if CP does 3191/* Convert STRING to a number, assuming base BASE. Return a fixnum if CP has
3200 not have valid floating point syntax. If IGNORE_TRAILING is nonzero, 3192 integer syntax and fits in a fixnum, else return the nearest float if CP has
3201 consider just the longest prefix of CP that has valid floating point 3193 either floating point or integer syntax and BASE is 10, else return nil. If
3202 syntax. */ 3194 IGNORE_TRAILING is nonzero, consider just the longest prefix of CP that has
3195 valid floating point syntax. Signal an overflow if BASE is not 10 and the
3196 number has integer syntax but does not fit. */
3203 3197
3204Lisp_Object 3198Lisp_Object
3205string_to_float (char const *cp, int ignore_trailing) 3199string_to_number (char const *string, int base, int ignore_trailing)
3206{ 3200{
3207 int state; 3201 int state;
3208 const char *start = cp; 3202 char const *cp = string;
3203 int leading_digit;
3204 int float_syntax = 0;
3205 double value = 0;
3209 3206
3210 /* Compute NaN and infinities using a variable, to cope with compilers that 3207 /* Compute NaN and infinities using a variable, to cope with compilers that
3211 think they are smarter than we are. */ 3208 think they are smarter than we are. */
@@ -3216,88 +3213,137 @@ string_to_float (char const *cp, int ignore_trailing)
3216 atof ("-0.0") drops the sign. */ 3213 atof ("-0.0") drops the sign. */
3217 int negative = *cp == '-'; 3214 int negative = *cp == '-';
3218 3215
3219 double value = 0; 3216 int signedp = negative || *cp == '+';
3217 cp += signedp;
3220 3218
3221 state = 0; 3219 state = 0;
3222 if (negative || *cp == '+')
3223 cp++;
3224 3220
3225 if (*cp >= '0' && *cp <= '9') 3221 leading_digit = digit_to_number (*cp, base);
3222 if (0 <= leading_digit)
3226 { 3223 {
3227 state |= LEAD_INT; 3224 state |= LEAD_INT;
3228 while (*cp >= '0' && *cp <= '9') 3225 do
3229 cp++; 3226 ++cp;
3227 while (0 <= digit_to_number (*cp, base));
3230 } 3228 }
3229
3231 if (*cp == '.') 3230 if (*cp == '.')
3232 { 3231 {
3233 state |= DOT_CHAR; 3232 state |= DOT_CHAR;
3234 cp++; 3233 cp++;
3235 } 3234 }
3236 if (*cp >= '0' && *cp <= '9')
3237 {
3238 state |= TRAIL_INT;
3239 while (*cp >= '0' && *cp <= '9')
3240 cp++;
3241 }
3242 if (*cp == 'e' || *cp == 'E')
3243 {
3244 state |= E_CHAR;
3245 cp++;
3246 if (*cp == '+' || *cp == '-')
3247 cp++;
3248 }
3249 3235
3250 if (*cp >= '0' && *cp <= '9') 3236 if (base == 10)
3251 { 3237 {
3252 state |= EXP_INT; 3238 if ('0' <= *cp && *cp <= '9')
3253 while (*cp >= '0' && *cp <= '9') 3239 {
3254 cp++; 3240 state |= TRAIL_INT;
3255 } 3241 do
3256 else if (cp == start) 3242 cp++;
3257 ; 3243 while ('0' <= *cp && *cp <= '9');
3258 else if (cp[-1] == '+' && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F') 3244 }
3259 { 3245 if (*cp == 'e' || *cp == 'E')
3260 state |= EXP_INT; 3246 {
3261 cp += 3; 3247 char const *ecp = cp;
3262 value = 1.0 / zero; 3248 cp++;
3249 if (*cp == '+' || *cp == '-')
3250 cp++;
3251 if ('0' <= *cp && *cp <= '9')
3252 {
3253 state |= E_EXP;
3254 do
3255 cp++;
3256 while ('0' <= *cp && *cp <= '9');
3257 }
3258 else if (cp[-1] == '+'
3259 && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
3260 {
3261 state |= E_EXP;
3262 cp += 3;
3263 value = 1.0 / zero;
3264 }
3265 else if (cp[-1] == '+'
3266 && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
3267 {
3268 state |= E_EXP;
3269 cp += 3;
3270 value = zero / zero;
3271
3272 /* If that made a "negative" NaN, negate it. */
3273 {
3274 int i;
3275 union { double d; char c[sizeof (double)]; }
3276 u_data, u_minus_zero;
3277 u_data.d = value;
3278 u_minus_zero.d = -0.0;
3279 for (i = 0; i < sizeof (double); i++)
3280 if (u_data.c[i] & u_minus_zero.c[i])
3281 {
3282 value = -value;
3283 break;
3284 }
3285 }
3286 /* Now VALUE is a positive NaN. */
3287 }
3288 else
3289 cp = ecp;
3290 }
3291
3292 float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
3293 || state == (LEAD_INT|E_EXP));
3263 } 3294 }
3264 else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
3265 {
3266 state |= EXP_INT;
3267 cp += 3;
3268 value = zero / zero;
3269 3295
3270 /* If that made a "negative" NaN, negate it. */ 3296 /* Return nil if the number uses invalid syntax. If IGNORE_TRAILING, accept
3271 { 3297 any prefix that matches. Otherwise, the entire string must match. */
3272 int i; 3298 if (! (ignore_trailing
3273 union { double d; char c[sizeof (double)]; } u_data, u_minus_zero; 3299 ? ((state & LEAD_INT) != 0 || float_syntax)
3300 : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
3301 return Qnil;
3274 3302
3275 u_data.d = value; 3303 /* If the number does not use float syntax, and fits into a fixnum, return
3276 u_minus_zero.d = - 0.0; 3304 the fixnum. */
3277 for (i = 0; i < sizeof (double); i++) 3305 if (0 <= leading_digit && ! float_syntax)
3278 if (u_data.c[i] & u_minus_zero.c[i]) 3306 {
3307 /* Convert string to EMACS_INT. Do not use strtol, to avoid assuming
3308 that EMACS_INT is no wider than 'long', and because when BASE is 16
3309 strtol might accept numbers like "0x1" that are not allowed here. */
3310 EMACS_INT n = leading_digit;
3311 EMACS_INT abs_bound =
3312 (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM);
3313 EMACS_INT abs_bound_over_base = abs_bound / base;
3314
3315 for (cp = string + signedp + 1; ; cp++)
3316 {
3317 int d = digit_to_number (*cp, base);
3318 if (d < 0)
3279 { 3319 {
3280 value = - value; 3320 if (n <= abs_bound)
3321 return make_number (negative ? -n : n);
3281 break; 3322 break;
3282 } 3323 }
3283 } 3324 if (abs_bound_over_base < n)
3284 /* Now VALUE is a positive NaN. */ 3325 break;
3285 } 3326 n = base * n + d;
3327 }
3286 3328
3287 if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT) 3329 /* Unfortunately there's no simple and reliable way to convert
3288 || state == (DOT_CHAR|TRAIL_INT) 3330 non-base-10 to floating point. */
3289 || state == (LEAD_INT|E_CHAR|EXP_INT) 3331 if (base != 10)
3290 || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT) 3332 xsignal (Qoverflow_error, list1 (build_string (string)));
3291 || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT))) 3333 }
3292 return make_number (0); /* Any non-float value will do. */
3293 3334
3335 /* Either the number uses float syntax, or it does not fit into a fixnum.
3336 Convert it from string to floating point, unless the value is already
3337 known because it is an infinity or a NAN. */
3294 if (! value) 3338 if (! value)
3295 value = atof (start + negative); 3339 value = atof (string + signedp);
3340
3296 if (negative) 3341 if (negative)
3297 value = - value; 3342 value = -value;
3298 return make_float (value); 3343 return make_float (value);
3299} 3344}
3300 3345
3346
3301 3347
3302static Lisp_Object 3348static Lisp_Object
3303read_vector (Lisp_Object readcharfun, int bytecodeflag) 3349read_vector (Lisp_Object readcharfun, int bytecodeflag)