Fix mishandling of symbols that look like numbers

* src/bignum.c (make_neg_biguint): New function. * src/lread.c (read1): Do not mishandle an unquoted symbol with name equal to something like "1\0x", i.e., a string of numeric form followed by a NUL byte. Formerly these symbols were misread as numbers. (string_to_number): Change last argument from an integer flag to a pointer to the length. This lets the caller figure out how much of the prefix was used. All callers changed. Add a fast path if the integer (sans sign) fits in uintmax_t. Update comments and simplify now that bignums are present. * src/print.c (print_object): Fix quoting of symbols that look like numbers, by relying on string_to_number for the tricky cases rather than trying to redo its logic, incorrectly. For example, (read (prin1-to-string '\1e+NaN)) formerly returned "1e+NaN", which was wrong: a backslash is needed in the output to prevent it from being read as a NaN. Escape NO_BREAK_SPACE too, since lread.c treats it like SPACE. * test/src/print-tests.el (print-read-roundtrip): Add tests illustrating the abovementioned bugs.
author: Paul Eggert 2018-10-10 23:17:18 -0700
committer: Paul Eggert 2018-10-10 23:19:42 -0700
commit: 5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d (patch)
tree: c51b2a470f62679f5e5c3f6f55d90aa0082d0133 /src/print.c
parent: fd3a48fcd8bb212ec12b9b10a79de0ae605ee93b (diff)
download: emacs-5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d.tar.gz
emacs-5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d.zip
1 files changed, 15 insertions, 34 deletions
diff --git a/src/print.c b/src/print.c
index c0c90bc7e9a..d15ff97b00c 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1993,39 +1993,17 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
    case Lisp_Symbol:
      {
-        bool confusing;
+        Lisp_Object name = SYMBOL_NAME (obj);
-        unsigned char *p = SDATA (SYMBOL_NAME (obj));
+        ptrdiff_t size_byte = SBYTES (name);
-        unsigned char *end = p + SBYTES (SYMBOL_NAME (obj));
-        int c;
+        /* Set CONFUSING if NAME looks like a number, calling
-        ptrdiff_t i, i_byte;
+           string_to_number for non-obvious cases.  */
-        ptrdiff_t size_byte;
+        char *p = SSDATA (name);
-        Lisp_Object name;
+        bool signedp = *p == '-' || *p == '+';
+        ptrdiff_t len;
-        name = SYMBOL_NAME (obj);
+        bool confusing = ((c_isdigit (p[signedp]) || p[signedp] == '.')
+                          && !NILP (string_to_number (p, 10, &len))
-        if (p != end && (*p == '-' || *p == '+')) p++;
+                          && len == size_byte);
-        if (p == end)
-          confusing = 0;
-        /* If symbol name begins with a digit, and ends with a digit,
-           and contains nothing but digits and `e', it could be treated
-           as a number.  So set CONFUSING.
-           Symbols that contain periods could also be taken as numbers,
-           but periods are always escaped, so we don't have to worry
-           about them here.  */
-        else if (*p >= '0' && *p <= '9'
-                 && end[-1] >= '0' && end[-1] <= '9')
-          {
-            while (p != end && ((*p >= '0' && *p <= '9')
-                                /* Needed for \2e10.  */
-                                || *p == 'e' || *p == 'E'))
-              p++;
-            confusing = (end == p);
-          }
-        else
-          confusing = 0;
-        size_byte = SBYTES (name);
        if (! NILP (Vprint_gensym)
            && !SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (obj))
@@ -2036,10 +2014,12 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
            break;
          }
-        for (i = 0, i_byte = 0; i_byte < size_byte;)
+        ptrdiff_t i = 0;
+        for (ptrdiff_t i_byte = 0; i_byte < size_byte; )
          {
            /* Here, we must convert each multi-byte form to the
               corresponding character code before handing it to PRINTCHAR.  */
+            int c;
            FETCH_STRING_CHAR_ADVANCE (c, name, i, i_byte);
            maybe_quit ();
@@ -2049,6 +2029,7 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
                    || c == ';' || c == '#' || c == '(' || c == ')'
                    || c == ',' || c == '.' || c == '`'
                    || c == '[' || c == ']' || c == '?' || c <= 040
+                    || c == NO_BREAK_SPACE
                    || confusing
                    || (i == 1 && confusable_symbol_character_p (c)))
                  {
author	Paul Eggert	2018-10-10 23:17:18 -0700
committer	Paul Eggert	2018-10-10 23:19:42 -0700
commit	5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d (patch)
tree	c51b2a470f62679f5e5c3f6f55d90aa0082d0133 /src/print.c
parent	fd3a48fcd8bb212ec12b9b10a79de0ae605ee93b (diff)
download	emacs-5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d.tar.gz emacs-5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d.zip

diff --git a/src/print.c b/src/print.c index c0c90bc7e9a..d15ff97b00c 100644 --- a/src/print.c +++ b/src/print.c
@@ -1993,39 +1993,17 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
1993		1993
1994	case Lisp_Symbol:	1994	case Lisp_Symbol:
1995	{	1995	{
1996	bool confusing;	1996	Lisp_Object name = SYMBOL_NAME (obj);
1997	unsigned char *p = SDATA (SYMBOL_NAME (obj));	1997	ptrdiff_t size_byte = SBYTES (name);
1998	unsigned char *end = p + SBYTES (SYMBOL_NAME (obj));	1998
1999	int c;	1999	/* Set CONFUSING if NAME looks like a number, calling
2000	ptrdiff_t i, i_byte;	2000	string_to_number for non-obvious cases. */
2001	ptrdiff_t size_byte;	2001	char *p = SSDATA (name);
2002	Lisp_Object name;	2002	bool signedp = p == '-' \|\| p == '+';
2003		2003	ptrdiff_t len;
2004	name = SYMBOL_NAME (obj);	2004	bool confusing = ((c_isdigit (p[signedp]) \|\| p[signedp] == '.')
2005		2005	&& !NILP (string_to_number (p, 10, &len))
2006	if (p != end && (p == '-' \|\| p == '+')) p++;	2006	&& len == size_byte);
2007	if (p == end)
2008	confusing = 0;
2009	/* If symbol name begins with a digit, and ends with a digit,
2010	and contains nothing but digits and `e', it could be treated
2011	as a number. So set CONFUSING.
2012
2013	Symbols that contain periods could also be taken as numbers,
2014	but periods are always escaped, so we don't have to worry
2015	about them here. */
2016	else if (p >= '0' && p <= '9'
2017	&& end[-1] >= '0' && end[-1] <= '9')
2018	{
2019	while (p != end && ((p >= '0' && p <= '9')
2020	/* Needed for \2e10. */
2021	\|\| p == 'e' \|\| p == 'E'))
2022	p++;
2023	confusing = (end == p);
2024	}
2025	else
2026	confusing = 0;
2027
2028	size_byte = SBYTES (name);
2029		2007
2030	if (! NILP (Vprint_gensym)	2008	if (! NILP (Vprint_gensym)
2031	&& !SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (obj))	2009	&& !SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (obj))
@@ -2036,10 +2014,12 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
2036	break;	2014	break;
2037	}	2015	}
2038		2016
2039	for (i = 0, i_byte = 0; i_byte < size_byte;)	2017	ptrdiff_t i = 0;
		2018	for (ptrdiff_t i_byte = 0; i_byte < size_byte; )
2040	{	2019	{
2041	/* Here, we must convert each multi-byte form to the	2020	/* Here, we must convert each multi-byte form to the
2042	corresponding character code before handing it to PRINTCHAR. */	2021	corresponding character code before handing it to PRINTCHAR. */
		2022	int c;
2043	FETCH_STRING_CHAR_ADVANCE (c, name, i, i_byte);	2023	FETCH_STRING_CHAR_ADVANCE (c, name, i, i_byte);
2044	maybe_quit ();	2024	maybe_quit ();
2045		2025
@@ -2049,6 +2029,7 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
2049	\|\| c == ';' \|\| c == '#' \|\| c == '(' \|\| c == ')'	2029	\|\| c == ';' \|\| c == '#' \|\| c == '(' \|\| c == ')'
2050	\|\| c == ',' \|\| c == '.' \|\| c == '`'	2030	\|\| c == ',' \|\| c == '.' \|\| c == '`'
2051	\|\| c == '[' \|\| c == ']' \|\| c == '?' \|\| c <= 040	2031	\|\| c == '[' \|\| c == ']' \|\| c == '?' \|\| c <= 040
		2032	\|\| c == NO_BREAK_SPACE
2052	\|\| confusing	2033	\|\| confusing
2053	\|\| (i == 1 && confusable_symbol_character_p (c)))	2034	\|\| (i == 1 && confusable_symbol_character_p (c)))
2054	{	2035	{