Improve performance by avoiding strtoumax

This made (string-to-number "10") 20% faster on my old desktop, an AMD Phenom II X4 910e running Fedora 25 x86-64. * admin/merge-gnulib (GNULIB_MODULES): Remove strtoumax. * lib/gnulib.mk.in, m4/gnulib-comp.m4: Regenerate. * lib/strtoul.c, lib/strtoull.c, lib/strtoumax.c, m4/strtoull.m4: * m4/strtoumax.m4: Remove. * src/editfns.c (str2num): New function. (styled_format): Use it instead of strtoumax. Use ptrdiff_t instead of uintmax_t. Check for integer overflow. * src/lread.c (LEAD_INT, DOT_CHAR, TRAIL_INT, E_EXP): Move to private scope and make them enums. (string_to_number): Compute integer value directly during first pass instead of revisiting it with strtoumax later.
author: Paul Eggert 2017-06-01 16:03:12 -0700
committer: Paul Eggert 2017-06-01 16:06:38 -0700
commit: 178d0cb5f530e6d7eb36eb9987ff405c854ccdb3 (patch)
tree: d5c8c63dc97ed4635b354bb16803cbfd1d953470 /src/lread.c
parent: 53247108411a1e9d1aa5352c231fa049f3f918aa (diff)
download: emacs-178d0cb5f530e6d7eb36eb9987ff405c854ccdb3.tar.gz
emacs-178d0cb5f530e6d7eb36eb9987ff405c854ccdb3.zip
1 files changed, 25 insertions, 34 deletions
diff --git a/src/lread.c b/src/lread.c
index 368b86e8189..f8493982c67 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3495,25 +3495,18 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
 }
-#define LEAD_INT 1
+/* Convert STRING to a number, assuming base BASE.  Return a fixnum if
-#define DOT_CHAR 2
+   STRING has integer syntax and fits in a fixnum, else return the
-#define TRAIL_INT 4
+   nearest float if STRING has either floating point or integer syntax
-#define E_EXP 16
+   and BASE is 10, else return nil.  If IGNORE_TRAILING, consider just
+   the longest prefix of STRING that has valid floating point syntax.
+   Signal an overflow if BASE is not 10 and the number has integer
-/* Convert STRING to a number, assuming base BASE.  Return a fixnum if CP has
+   syntax but does not fit.  */
-   integer syntax and fits in a fixnum, else return the nearest float if CP has
-   either floating point or integer syntax and BASE is 10, else return nil.  If
-   IGNORE_TRAILING, consider just the longest prefix of CP that has
-   valid floating point syntax.  Signal an overflow if BASE is not 10 and the
-   number has integer syntax but does not fit.  */
 Lisp_Object
 string_to_number (char const *string, int base, bool ignore_trailing)
 {
-  int state;
  char const *cp = string;
-  int leading_digit;
  bool float_syntax = 0;
  double value = 0;
@@ -3525,15 +3518,23 @@ string_to_number (char const *string, int base, bool ignore_trailing)
  bool signedp = negative || *cp == '+';
  cp += signedp;
-  state = 0;
+  enum { INTOVERFLOW = 1, LEAD_INT = 2, DOT_CHAR = 4, TRAIL_INT = 8,
+         E_EXP = 16 };
-  leading_digit = digit_to_number (*cp, base);
+  int state = 0;
+  int leading_digit = digit_to_number (*cp, base);
+  uintmax_t n = leading_digit;
  if (leading_digit >= 0)
    {
      state |= LEAD_INT;
-      do
+      for (int digit; 0 <= (digit = digit_to_number (*++cp, base)); )
-        ++cp;
+        {
-      while (digit_to_number (*cp, base) >= 0);
+          if (INT_MULTIPLY_OVERFLOW (n, base))
+            state |= INTOVERFLOW;
+          n *= base;
+          if (INT_ADD_OVERFLOW (n, digit))
+            state |= INTOVERFLOW;
+          n += digit;
+        }
    }
  if (*cp == '.')
    {
@@ -3583,32 +3584,22 @@ string_to_number (char const *string, int base, bool ignore_trailing)
        }
      float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
-                      || state == (LEAD_INT|E_EXP));
+                      || (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP));
    }
  /* Return nil if the number uses invalid syntax.  If IGNORE_TRAILING, accept
     any prefix that matches.  Otherwise, the entire string must match.  */
  if (! (ignore_trailing
         ? ((state & LEAD_INT) != 0 || float_syntax)
-         : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
+         : (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT
+                     || float_syntax))))
    return Qnil;
  /* If the number uses integer and not float syntax, and is in C-language
     range, use its value, preferably as a fixnum.  */
  if (leading_digit >= 0 && ! float_syntax)
    {
-      uintmax_t n;
+      if (state & INTOVERFLOW)
-      /* Fast special case for single-digit integers.  This also avoids a
-         glitch when BASE is 16 and IGNORE_TRAILING, because in that
-         case some versions of strtoumax accept numbers like "0x1" that Emacs
-         does not allow.  */
-      if (digit_to_number (string[signedp + 1], base) < 0)
-        return make_number (negative ? -leading_digit : leading_digit);
-      errno = 0;
-      n = strtoumax (string + signedp, NULL, base);
-      if (errno == ERANGE)
        {
          /* Unfortunately there's no simple and accurate way to convert
             non-base-10 numbers that are out of C-language range.  */
author	Paul Eggert	2017-06-01 16:03:12 -0700
committer	Paul Eggert	2017-06-01 16:06:38 -0700
commit	178d0cb5f530e6d7eb36eb9987ff405c854ccdb3 (patch)
tree	d5c8c63dc97ed4635b354bb16803cbfd1d953470 /src/lread.c
parent	53247108411a1e9d1aa5352c231fa049f3f918aa (diff)
download	emacs-178d0cb5f530e6d7eb36eb9987ff405c854ccdb3.tar.gz emacs-178d0cb5f530e6d7eb36eb9987ff405c854ccdb3.zip

diff --git a/src/lread.c b/src/lread.c index 368b86e8189..f8493982c67 100644 --- a/src/lread.c +++ b/src/lread.c
@@ -3495,25 +3495,18 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
3495	}	3495	}
3496		3496
3497		3497
3498	#define LEAD_INT 1	3498	/* Convert STRING to a number, assuming base BASE. Return a fixnum if
3499	#define DOT_CHAR 2	3499	STRING has integer syntax and fits in a fixnum, else return the
3500	#define TRAIL_INT 4	3500	nearest float if STRING has either floating point or integer syntax
3501	#define E_EXP 16	3501	and BASE is 10, else return nil. If IGNORE_TRAILING, consider just
3502		3502	the longest prefix of STRING that has valid floating point syntax.
3503		3503	Signal an overflow if BASE is not 10 and the number has integer
3504	/* Convert STRING to a number, assuming base BASE. Return a fixnum if CP has	3504	syntax but does not fit. */
3505	integer syntax and fits in a fixnum, else return the nearest float if CP has
3506	either floating point or integer syntax and BASE is 10, else return nil. If
3507	IGNORE_TRAILING, consider just the longest prefix of CP that has
3508	valid floating point syntax. Signal an overflow if BASE is not 10 and the
3509	number has integer syntax but does not fit. */
3510		3505
3511	Lisp_Object	3506	Lisp_Object
3512	string_to_number (char const *string, int base, bool ignore_trailing)	3507	string_to_number (char const *string, int base, bool ignore_trailing)
3513	{	3508	{
3514	int state;
3515	char const *cp = string;	3509	char const *cp = string;
3516	int leading_digit;
3517	bool float_syntax = 0;	3510	bool float_syntax = 0;
3518	double value = 0;	3511	double value = 0;
3519		3512
@@ -3525,15 +3518,23 @@ string_to_number (char const *string, int base, bool ignore_trailing)
3525	bool signedp = negative \|\| *cp == '+';	3518	bool signedp = negative \|\| *cp == '+';
3526	cp += signedp;	3519	cp += signedp;
3527		3520
3528	state = 0;	3521	enum { INTOVERFLOW = 1, LEAD_INT = 2, DOT_CHAR = 4, TRAIL_INT = 8,
3529		3522	E_EXP = 16 };
3530	leading_digit = digit_to_number (*cp, base);	3523	int state = 0;
		3524	int leading_digit = digit_to_number (*cp, base);
		3525	uintmax_t n = leading_digit;
3531	if (leading_digit >= 0)	3526	if (leading_digit >= 0)
3532	{	3527	{
3533	state \|= LEAD_INT;	3528	state \|= LEAD_INT;
3534	do	3529	for (int digit; 0 <= (digit = digit_to_number (*++cp, base)); )
3535	++cp;	3530	{
3536	while (digit_to_number (*cp, base) >= 0);	3531	if (INT_MULTIPLY_OVERFLOW (n, base))
		3532	state \|= INTOVERFLOW;
		3533	n *= base;
		3534	if (INT_ADD_OVERFLOW (n, digit))
		3535	state \|= INTOVERFLOW;
		3536	n += digit;
		3537	}
3537	}	3538	}
3538	if (*cp == '.')	3539	if (*cp == '.')
3539	{	3540	{
@@ -3583,32 +3584,22 @@ string_to_number (char const *string, int base, bool ignore_trailing)
3583	}	3584	}
3584		3585
3585	float_syntax = ((state & (DOT_CHAR\|TRAIL_INT)) == (DOT_CHAR\|TRAIL_INT)	3586	float_syntax = ((state & (DOT_CHAR\|TRAIL_INT)) == (DOT_CHAR\|TRAIL_INT)
3586	\|\| state == (LEAD_INT\|E_EXP));	3587	\|\| (state & ~INTOVERFLOW) == (LEAD_INT\|E_EXP));
3587	}	3588	}
3588		3589
3589	/* Return nil if the number uses invalid syntax. If IGNORE_TRAILING, accept	3590	/* Return nil if the number uses invalid syntax. If IGNORE_TRAILING, accept
3590	any prefix that matches. Otherwise, the entire string must match. */	3591	any prefix that matches. Otherwise, the entire string must match. */
3591	if (! (ignore_trailing	3592	if (! (ignore_trailing
3592	? ((state & LEAD_INT) != 0 \|\| float_syntax)	3593	? ((state & LEAD_INT) != 0 \|\| float_syntax)
3593	: (!*cp && ((state & ~DOT_CHAR) == LEAD_INT \|\| float_syntax))))	3594	: (!*cp && ((state & ~(INTOVERFLOW \| DOT_CHAR)) == LEAD_INT
		3595	\|\| float_syntax))))
3594	return Qnil;	3596	return Qnil;
3595		3597
3596	/* If the number uses integer and not float syntax, and is in C-language	3598	/* If the number uses integer and not float syntax, and is in C-language
3597	range, use its value, preferably as a fixnum. */	3599	range, use its value, preferably as a fixnum. */
3598	if (leading_digit >= 0 && ! float_syntax)	3600	if (leading_digit >= 0 && ! float_syntax)
3599	{	3601	{
3600	uintmax_t n;	3602	if (state & INTOVERFLOW)
3601
3602	/* Fast special case for single-digit integers. This also avoids a
3603	glitch when BASE is 16 and IGNORE_TRAILING, because in that
3604	case some versions of strtoumax accept numbers like "0x1" that Emacs
3605	does not allow. */
3606	if (digit_to_number (string[signedp + 1], base) < 0)
3607	return make_number (negative ? -leading_digit : leading_digit);
3608
3609	errno = 0;
3610	n = strtoumax (string + signedp, NULL, base);
3611	if (errno == ERANGE)
3612	{	3603	{
3613	/* Unfortunately there's no simple and accurate way to convert	3604	/* Unfortunately there's no simple and accurate way to convert
3614	non-base-10 numbers that are out of C-language range. */	3605	non-base-10 numbers that are out of C-language range. */