Treat large integers as floats in the Lisp reader and in string-to-number.

author: Paul Eggert 2011-04-20 23:03:09 -0700
committer: Paul Eggert 2011-04-20 23:03:09 -0700
commit: 452f4150134e4ba7bbd2bad9ce87d19c200505de (patch)
tree: 776249edb45be8b6811ab6577a4038058a87f315
parent: 6703b2e490339a624bb83c9543f1e51ede26b52b (diff)
download: emacs-452f4150134e4ba7bbd2bad9ce87d19c200505de.tar.gz
emacs-452f4150134e4ba7bbd2bad9ce87d19c200505de.zip
4 files changed, 169 insertions, 149 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 7eaa153f79a..2b9978f3d6a 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,32 @@
+2011-04-21  Paul Eggert  <eggert@cs.ucla.edu>
+        Make the Lisp reader and string-to-float more consistent.
+        * data.c (atof): Remove decl; no longer used or needed.
+        (digit_to_number): Move to lread.c.
+        (Fstring_to_number): Use new string_to_number function, to be
+        consistent with how the Lisp reader treats infinities and NaNs.
+        Do not assume that floating-point numbers represent EMACS_INT
+        without losing information; this is not true on most 64-bit hosts.
+        Avoid double-rounding errors, by insisting on integers when
+        parsing non-base-10 numbers, as the documentation specifies.
+        * lisp.h (string_to_number): New decl, replacing ...
+        (isfloat_string): Remove.
+        * lread.c (read1): Do not accept +. and -. as integers; this
+        appears to have been a coding error.  Similarly, do not accept
+        strings like +-1e0 as floating point numbers.  Do not report
+        overflow for integer overflows unless the base is not 10 which
+        means we have no simple and reliable way to continue.
+        Break out the floating-point parsing into a new
+        function string_to_number, so that Fstring_to_number parses
+        floating point numbers consistently with the Lisp reader.
+        (digit_to_number): Moved here from data.c.  Make it static inline.
+        (E_CHAR, EXP_INT): Remove, replacing with ...
+        (E_EXP): New macro, to solve the "1.0e+" problem mentioned below.
+        (string_to_number): New function, replacing isfloat_string.
+        This function checks for valid syntax and produces the resulting
+        Lisp float number too.  Rework it so that string-to-number
+        no longer mishandles examples like "1.0e+".
 2011-04-20  Paul Eggert  <eggert@cs.ucla.edu>
        * textprop.c (set_text_properties_1): Rewrite for clarity,
@@ -15,29 +44,6 @@
        * alloc.c (overrun_check_malloc, overrun_check_realloc): Now static.
        (overrun_check_free): Likewise.
-        Make the Lisp reader and string-to-float more consistent.
-        * data.c (atof): Remove decl; no longer used or needed.
-        (Fstring_to_number): Use new string_to_float function, to be
-        consistent with how the Lisp reader treats infinities and NaNs.
-        Do not assume that floating-point numbers represent EMACS_INT
-        without losing information; this is not true on most 64-bit hosts.
-        Avoid double-rounding errors, by insisting on integers when
-        parsing non-base-10 numbers, as the documentation specifies.
-        Report integer overflow instead of silently converting to
-        integers.
-        * lisp.h (string_to_float): New decl, replacing ...
-        (isfloat_string): Remove.
-        * lread.c (read1): Do not accept +. and -. as integers; this
-        appears to have been a coding error.  Similarly, do not accept
-        strings like +-1e0 as floating point numbers.  Do not report
-        overflow for some integer overflows and not others; instead,
-        report them all.  Break out the floating-point parsing into a new
-        function string_to_float, so that Fstring_to_number parses
-        floating point numbers consistently with the Lisp reader.
-        (string_to_float): New function, replacing isfloat_string.
-        This function checks for valid syntax and produces the resulting
-        Lisp float number too.
        * alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check
        in the common case where SDATA_DATA_OFFSET is a multiple of Emacs
        word size.
diff --git a/src/data.c b/src/data.c
index 486816cac70..4e81c80d0ed 100644
--- a/src/data.c
+++ b/src/data.c
@@ -2374,26 +2374,6 @@ NUMBER may be an integer or a floating point number.  */)
  return build_string (buffer);
 }
-INLINE static int
-digit_to_number (int character, int base)
-{
-  int digit;
-  if (character >= '0' && character <= '9')
-    digit = character - '0';
-  else if (character >= 'a' && character <= 'z')
-    digit = character - 'a' + 10;
-  else if (character >= 'A' && character <= 'Z')
-    digit = character - 'A' + 10;
-  else
-    return -1;
-  if (digit >= base)
-    return -1;
-  else
-    return digit;
-}
 DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0,
       doc: /* Parse STRING as a decimal number and return the number.
 This parses both integers and floating point numbers.
@@ -2406,7 +2386,7 @@ If the base used is not 10, STRING is always parsed as integer.  */)
 {
  register char *p;
  register int b;
-  EMACS_INT n;
+  Lisp_Object val;
  CHECK_STRING (string);
@@ -2420,25 +2400,13 @@ If the base used is not 10, STRING is always parsed as integer.  */)
        xsignal1 (Qargs_out_of_range, base);
    }
-  /* Skip any whitespace at the front of the number.  Typically strtol does
-     this anyway, so we might as well be consistent.  */
  p = SSDATA (string);
  while (*p == ' ' || *p == '\t')
    p++;
-  if (b == 10)
+  val = string_to_number (p, b, 1);
-    {
+  return NILP (val) ? make_number (0) : val;
-      Lisp_Object val = string_to_float (p, 1);
-      if (FLOATP (val))
-        return val;
-    }
-  n = strtol (p, NULL, b);
-  if (FIXNUM_OVERFLOW_P (n))
-    xsignal (Qoverflow_error, list1 (string));
-  return make_number (n);
 }
 enum arithop
  {
diff --git a/src/lisp.h b/src/lisp.h
index 8d333a3999f..5bace90e53e 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT);
  } while (0)
 extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
                  Lisp_Object *, Lisp_Object);
-Lisp_Object string_to_float (char const *, int);
+Lisp_Object string_to_number (char const *, int, int);
 extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
                         Lisp_Object);
 extern void dir_warning (const char *, Lisp_Object);
diff --git a/src/lread.c b/src/lread.c
index a872929e08f..390c57d1678 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3005,32 +3005,8 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list)
        if (!quoted && !uninterned_symbol)
          {
-            register char *p1;
+            Lisp_Object result = string_to_number (read_buffer, 10, 0);
-            Lisp_Object result;
+            if (! NILP (result))
-            p1 = read_buffer;
-            if (*p1 == '+' || *p1 == '-') p1++;
-            /* Is it an integer? */
-            if ('0' <= *p1 && *p1 <= '9')
-              {
-                do
-                  p1++;
-                while ('0' <= *p1 && *p1 <= '9');
-                /* Integers can have trailing decimal points.  */
-                p1 += (*p1 == '.');
-                if (p1 == p)
-                  {
-                    /* It is an integer. */
-                    EMACS_INT n = strtol (read_buffer, NULL, 10);
-                    if (FIXNUM_OVERFLOW_P (n))
-                      xsignal (Qoverflow_error,
-                               list1 (build_string (read_buffer)));
-                    return make_number (n);
-                  }
-              }
-            result = string_to_float (read_buffer, 0);
-            if (FLOATP (result))
              return result;
          }
        {
@@ -3189,23 +3165,44 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
 }
+static inline int
+digit_to_number (int character, int base)
+{
+  int digit;
+  if ('0' <= character && character <= '9')
+    digit = character - '0';
+  else if ('a' <= character && character <= 'z')
+    digit = character - 'a' + 10;
+  else if ('A' <= character && character <= 'Z')
+    digit = character - 'A' + 10;
+  else
+    return -1;
+  return digit < base ? digit : -1;
+}
 #define LEAD_INT 1
 #define DOT_CHAR 2
 #define TRAIL_INT 4
-#define E_CHAR 8
+#define E_EXP 16
-#define EXP_INT 16
-/* Convert CP to a floating point number.  Return a non-float value if CP does
+/* Convert STRING to a number, assuming base BASE.  Return a fixnum if CP has
-   not have valid floating point syntax.  If IGNORE_TRAILING is nonzero,
+   integer syntax and fits in a fixnum, else return the nearest float if CP has
-   consider just the longest prefix of CP that has valid floating point
+   either floating point or integer syntax and BASE is 10, else return nil.  If
-   syntax.  */
+   IGNORE_TRAILING is nonzero, consider just the longest prefix of CP that has
+   valid floating point syntax.  Signal an overflow if BASE is not 10 and the
+   number has integer syntax but does not fit.  */
 Lisp_Object
-string_to_float (char const *cp, int ignore_trailing)
+string_to_number (char const *string, int base, int ignore_trailing)
 {
  int state;
-  const char *start = cp;
+  char const *cp = string;
+  int leading_digit;
+  int float_syntax = 0;
+  double value = 0;
  /* Compute NaN and infinities using a variable, to cope with compilers that
     think they are smarter than we are.  */
@@ -3216,88 +3213,137 @@ string_to_float (char const *cp, int ignore_trailing)
     atof ("-0.0") drops the sign.  */
  int negative = *cp == '-';
-  double value = 0;
+  int signedp = negative || *cp == '+';
+  cp += signedp;
  state = 0;
-  if (negative || *cp == '+')
-    cp++;
-  if (*cp >= '0' && *cp <= '9')
+  leading_digit = digit_to_number (*cp, base);
+  if (0 <= leading_digit)
    {
      state |= LEAD_INT;
-      while (*cp >= '0' && *cp <= '9')
+      do
-        cp++;
+        ++cp;
+      while (0 <= digit_to_number (*cp, base));
    }
  if (*cp == '.')
    {
      state |= DOT_CHAR;
      cp++;
    }
-  if (*cp >= '0' && *cp <= '9')
-    {
-      state |= TRAIL_INT;
-      while (*cp >= '0' && *cp <= '9')
-        cp++;
-    }
-  if (*cp == 'e' || *cp == 'E')
-    {
-      state |= E_CHAR;
-      cp++;
-      if (*cp == '+' || *cp == '-')
-        cp++;
-    }
-  if (*cp >= '0' && *cp <= '9')
+  if (base == 10)
    {
-      state |= EXP_INT;
+      if ('0' <= *cp && *cp <= '9')
-      while (*cp >= '0' && *cp <= '9')
+        {
-        cp++;
+          state |= TRAIL_INT;
-    }
+          do
-  else if (cp == start)
+            cp++;
-    ;
+          while ('0' <= *cp && *cp <= '9');
-  else if (cp[-1] == '+' && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
+        }
-    {
+      if (*cp == 'e' || *cp == 'E')
-      state |= EXP_INT;
+        {
-      cp += 3;
+          char const *ecp = cp;
-      value = 1.0 / zero;
+          cp++;
+          if (*cp == '+' || *cp == '-')
+            cp++;
+          if ('0' <= *cp && *cp <= '9')
+            {
+              state |= E_EXP;
+              do
+                cp++;
+              while ('0' <= *cp && *cp <= '9');
+            }
+          else if (cp[-1] == '+'
+                   && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
+            {
+              state |= E_EXP;
+              cp += 3;
+              value = 1.0 / zero;
+            }
+          else if (cp[-1] == '+'
+                   && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
+            {
+              state |= E_EXP;
+              cp += 3;
+              value = zero / zero;
+              /* If that made a "negative" NaN, negate it.  */
+              {
+                int i;
+                union { double d; char c[sizeof (double)]; }
+                  u_data, u_minus_zero;
+                u_data.d = value;
+                u_minus_zero.d = -0.0;
+                for (i = 0; i < sizeof (double); i++)
+                  if (u_data.c[i] & u_minus_zero.c[i])
+                    {
+                      value = -value;
+                      break;
+                    }
+              }
+              /* Now VALUE is a positive NaN.  */
+            }
+          else
+            cp = ecp;
+        }
+      float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
+                      || state == (LEAD_INT|E_EXP));
    }
-  else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
-    {
-      state |= EXP_INT;
-      cp += 3;
-      value = zero / zero;
-      /* If that made a "negative" NaN, negate it.  */
+  /* Return nil if the number uses invalid syntax.  If IGNORE_TRAILING, accept
-      {
+     any prefix that matches.  Otherwise, the entire string must match.  */
-        int i;
+  if (! (ignore_trailing
-        union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
+         ? ((state & LEAD_INT) != 0 || float_syntax)
+         : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
+    return Qnil;
-        u_data.d = value;
+  /* If the number does not use float syntax, and fits into a fixnum, return
-        u_minus_zero.d = - 0.0;
+     the fixnum.  */
-        for (i = 0; i < sizeof (double); i++)
+  if (0 <= leading_digit && ! float_syntax)
-          if (u_data.c[i] & u_minus_zero.c[i])
+    {
+      /* Convert string to EMACS_INT.  Do not use strtol, to avoid assuming
+         that EMACS_INT is no wider than 'long', and because when BASE is 16
+         strtol might accept numbers like "0x1" that are not allowed here.  */
+      EMACS_INT n = leading_digit;
+      EMACS_INT abs_bound =
+        (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM);
+      EMACS_INT abs_bound_over_base = abs_bound / base;
+      for (cp = string + signedp + 1; ; cp++)
+        {
+          int d = digit_to_number (*cp, base);
+          if (d < 0)
            {
-              value = - value;
+              if (n <= abs_bound)
+                return make_number (negative ? -n : n);
              break;
            }
-      }
+          if (abs_bound_over_base < n)
-      /* Now VALUE is a positive NaN.  */
+            break;
-    }
+          n = base * n + d;
+        }
-  if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
+      /* Unfortunately there's no simple and reliable way to convert
-         || state == (DOT_CHAR|TRAIL_INT)
+         non-base-10 to floating point.  */
-         || state == (LEAD_INT|E_CHAR|EXP_INT)
+      if (base != 10)
-         || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
+        xsignal (Qoverflow_error, list1 (build_string (string)));
-         || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)))
+    }
-    return make_number (0); /* Any non-float value will do.  */
+  /* Either the number uses float syntax, or it does not fit into a fixnum.
+     Convert it from string to floating point, unless the value is already
+     known because it is an infinity or a NAN.  */
  if (! value)
-    value = atof (start + negative);
+    value = atof (string + signedp);
  if (negative)
-    value = - value;
+    value = -value;
  return make_float (value);
 }
 static Lisp_Object
 read_vector (Lisp_Object readcharfun, int bytecodeflag)
author	Paul Eggert	2011-04-20 23:03:09 -0700
committer	Paul Eggert	2011-04-20 23:03:09 -0700
commit	452f4150134e4ba7bbd2bad9ce87d19c200505de (patch)
tree	776249edb45be8b6811ab6577a4038058a87f315
parent	6703b2e490339a624bb83c9543f1e51ede26b52b (diff)
download	emacs-452f4150134e4ba7bbd2bad9ce87d19c200505de.tar.gz emacs-452f4150134e4ba7bbd2bad9ce87d19c200505de.zip