aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMattias EngdegÄrd2025-08-06 15:29:58 +0200
committerMattias EngdegÄrd2025-08-21 16:42:45 +0200
commitc04553f655a05810f02dd77dac4f544018158e94 (patch)
tree331c4e5d5b9b3f89e1cc1e97b0125f1c4b803970 /src
parent3b80b706e552732825f80594c8459935a940a353 (diff)
downloademacs-c04553f655a05810f02dd77dac4f544018158e94.tar.gz
emacs-c04553f655a05810f02dd77dac4f544018158e94.zip
Speed up JSON parsing by not maintaining line and column (bug#79192)
We use the current parsing position instead. The line and column in the error weren't used (nor very accurate to begin with) and the user can easily compute them when needed. The line number calculation is kept just in case but deprecated, for removal in Emacs 32. * src/json.c (struct json_parser, json_parser_init): Update parser state. (json_signal_error): New position computation. (json_skip_whitespace_internal): Remove. (is_json_whitespace): New. (json_skip_whitespace, json_skip_whitespace_if_possible) (json_parse_unicode, json_parse_string, json_parse_number) (json_parse_value): Simplify and rewrite for efficiency. (count_chars, count_newlines) (string_byte_to_pos, string_byte_to_line) (buffer_byte_to_pos, buffer_byte_to_line): New. (Fjson_parse_string, Fjson_parse_buffer): Adapt to new parser state. * test/src/json-tests.el (json-tests--parse-string-error-pos) (json-tests--parse-buffer-error-pos, json-parse-error-position): New. * etc/NEWS: Note deprecation of line and column.
Diffstat (limited to 'src')
-rw-r--r--src/json.c219
1 files changed, 131 insertions, 88 deletions
diff --git a/src/json.c b/src/json.c
index 44eae653eb5..30a22dc8038 100644
--- a/src/json.c
+++ b/src/json.c
@@ -684,10 +684,6 @@ struct json_parser
684 const unsigned char *secondary_input_begin; 684 const unsigned char *secondary_input_begin;
685 const unsigned char *secondary_input_end; 685 const unsigned char *secondary_input_end;
686 686
687 ptrdiff_t current_line;
688 ptrdiff_t current_column;
689 ptrdiff_t point_of_current_line;
690
691 /* The parser has a maximum allowed depth. available_depth 687 /* The parser has a maximum allowed depth. available_depth
692 decreases at each object/array begin. If reaches zero, then an 688 decreases at each object/array begin. If reaches zero, then an
693 error is generated */ 689 error is generated */
@@ -717,15 +713,22 @@ struct json_parser
717 unsigned char *byte_workspace; 713 unsigned char *byte_workspace;
718 unsigned char *byte_workspace_end; 714 unsigned char *byte_workspace_end;
719 unsigned char *byte_workspace_current; 715 unsigned char *byte_workspace_current;
716
717 Lisp_Object obj;
718 ptrdiff_t (*byte_to_pos) (Lisp_Object obj, ptrdiff_t byte);
719 ptrdiff_t (*byte_to_line) (Lisp_Object obj, ptrdiff_t byte);
720}; 720};
721 721
722static AVOID 722static AVOID
723json_signal_error (struct json_parser *parser, Lisp_Object error) 723json_signal_error (struct json_parser *p, Lisp_Object error)
724{ 724{
725 xsignal3 (error, INT_TO_INTEGER (parser->current_line), 725 ptrdiff_t byte = (p->input_current - p->input_begin
726 INT_TO_INTEGER (parser->current_column), 726 + p->additional_bytes_count);
727 INT_TO_INTEGER (parser->point_of_current_line 727 ptrdiff_t pos = p->byte_to_pos (p->obj, byte);
728 + parser->current_column)); 728 ptrdiff_t line = p->byte_to_line (p->obj, byte) + 1;
729 /* The line number here is deprecated and provided for compatibility only.
730 It is scheduled for removal in Emacs 32. */
731 xsignal3 (error, INT_TO_INTEGER (line), Qnil, INT_TO_INTEGER (pos));
729} 732}
730 733
731static void 734static void
@@ -734,7 +737,10 @@ json_parser_init (struct json_parser *parser,
734 const unsigned char *input, 737 const unsigned char *input,
735 const unsigned char *input_end, 738 const unsigned char *input_end,
736 const unsigned char *secondary_input, 739 const unsigned char *secondary_input,
737 const unsigned char *secondary_input_end) 740 const unsigned char *secondary_input_end,
741 ptrdiff_t (*byte_to_pos) (Lisp_Object, ptrdiff_t),
742 ptrdiff_t (*byte_to_line) (Lisp_Object, ptrdiff_t),
743 Lisp_Object obj)
738{ 744{
739 if (secondary_input >= secondary_input_end) 745 if (secondary_input >= secondary_input_end)
740 { 746 {
@@ -761,9 +767,6 @@ json_parser_init (struct json_parser *parser,
761 767
762 parser->input_current = parser->input_begin; 768 parser->input_current = parser->input_begin;
763 769
764 parser->current_line = 1;
765 parser->current_column = 0;
766 parser->point_of_current_line = 0;
767 parser->available_depth = 10000; 770 parser->available_depth = 10000;
768 parser->conf = conf; 771 parser->conf = conf;
769 772
@@ -777,6 +780,9 @@ json_parser_init (struct json_parser *parser,
777 parser->byte_workspace = parser->internal_byte_workspace; 780 parser->byte_workspace = parser->internal_byte_workspace;
778 parser->byte_workspace_end = (parser->byte_workspace 781 parser->byte_workspace_end = (parser->byte_workspace
779 + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE); 782 + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE);
783 parser->byte_to_pos = byte_to_pos;
784 parser->byte_to_line = byte_to_line;
785 parser->obj = obj;
780} 786}
781 787
782static void 788static void
@@ -956,20 +962,9 @@ json_input_put_back (struct json_parser *parser)
956} 962}
957 963
958static bool 964static bool
959json_skip_whitespace_internal (struct json_parser *parser, int c) 965is_json_whitespace (int c)
960{ 966{
961 parser->current_column++; 967 return c == 0x20 || c == 0x09 || c == 0x0d || c == 0x0a;
962 if (c == 0x20 || c == 0x09 || c == 0x0d)
963 return false;
964 else if (c == 0x0a)
965 {
966 parser->current_line++;
967 parser->point_of_current_line += parser->current_column;
968 parser->current_column = 0;
969 return false;
970 }
971 else
972 return true;
973} 968}
974 969
975/* Skips JSON whitespace, and returns with the first non-whitespace 970/* Skips JSON whitespace, and returns with the first non-whitespace
@@ -980,7 +975,7 @@ json_skip_whitespace (struct json_parser *parser)
980 for (;;) 975 for (;;)
981 { 976 {
982 int c = json_input_get (parser); 977 int c = json_input_get (parser);
983 if (json_skip_whitespace_internal (parser, c)) 978 if (!is_json_whitespace (c))
984 return c; 979 return c;
985 } 980 }
986} 981}
@@ -994,9 +989,7 @@ json_skip_whitespace_if_possible (struct json_parser *parser)
994 for (;;) 989 for (;;)
995 { 990 {
996 int c = json_input_get_if_possible (parser); 991 int c = json_input_get_if_possible (parser);
997 if (c < 0) 992 if (!is_json_whitespace (c) || c < 0)
998 return c;
999 if (json_skip_whitespace_internal (parser, c))
1000 return c; 993 return c;
1001 } 994 }
1002} 995}
@@ -1022,7 +1015,6 @@ json_parse_unicode (struct json_parser *parser)
1022 for (int i = 0; i < 4; i++) 1015 for (int i = 0; i < 4; i++)
1023 { 1016 {
1024 int c = json_hex_value (json_input_get (parser)); 1017 int c = json_hex_value (json_input_get (parser));
1025 parser->current_column++;
1026 if (c < 0) 1018 if (c < 0)
1027 json_signal_error (parser, Qjson_escape_sequence_error); 1019 json_signal_error (parser, Qjson_escape_sequence_error);
1028 v[i] = c; 1020 v[i] = c;
@@ -1068,13 +1060,11 @@ json_parse_string (struct json_parser *parser, bool intern, bool leading_colon)
1068 json_byte_workspace_put (parser, c2); 1060 json_byte_workspace_put (parser, c2);
1069 json_byte_workspace_put (parser, c3); 1061 json_byte_workspace_put (parser, c3);
1070 parser->input_current += 4; 1062 parser->input_current += 4;
1071 parser->current_column += 4;
1072 continue; 1063 continue;
1073 } 1064 }
1074 } 1065 }
1075 1066
1076 int c = json_input_get (parser); 1067 int c = json_input_get (parser);
1077 parser->current_column++;
1078 if (json_plain_char[c]) 1068 if (json_plain_char[c])
1079 { 1069 {
1080 json_byte_workspace_put (parser, c); 1070 json_byte_workspace_put (parser, c);
@@ -1137,7 +1127,6 @@ json_parse_string (struct json_parser *parser, bool intern, bool leading_colon)
1137 { 1127 {
1138 /* Handle escape sequences */ 1128 /* Handle escape sequences */
1139 c = json_input_get (parser); 1129 c = json_input_get (parser);
1140 parser->current_column++;
1141 if (c == '"') 1130 if (c == '"')
1142 json_byte_workspace_put (parser, '"'); 1131 json_byte_workspace_put (parser, '"');
1143 else if (c == '\\') 1132 else if (c == '\\')
@@ -1160,11 +1149,9 @@ json_parse_string (struct json_parser *parser, bool intern, bool leading_colon)
1160 /* is the first half of the surrogate pair */ 1149 /* is the first half of the surrogate pair */
1161 if (num >= 0xd800 && num < 0xdc00) 1150 if (num >= 0xd800 && num < 0xdc00)
1162 { 1151 {
1163 parser->current_column++;
1164 if (json_input_get (parser) != '\\') 1152 if (json_input_get (parser) != '\\')
1165 json_signal_error (parser, 1153 json_signal_error (parser,
1166 Qjson_invalid_surrogate_error); 1154 Qjson_invalid_surrogate_error);
1167 parser->current_column++;
1168 if (json_input_get (parser) != 'u') 1155 if (json_input_get (parser) != 'u')
1169 json_signal_error (parser, 1156 json_signal_error (parser,
1170 Qjson_invalid_surrogate_error); 1157 Qjson_invalid_surrogate_error);
@@ -1285,7 +1272,6 @@ json_parse_number (struct json_parser *parser, int c)
1285 negative = true; 1272 negative = true;
1286 c = json_input_get (parser); 1273 c = json_input_get (parser);
1287 json_byte_workspace_put (parser, c); 1274 json_byte_workspace_put (parser, c);
1288 parser->current_column++;
1289 } 1275 }
1290 if (c < '0' || c > '9') 1276 if (c < '0' || c > '9')
1291 json_signal_error (parser, Qjson_parse_error); 1277 json_signal_error (parser, Qjson_parse_error);
@@ -1317,7 +1303,6 @@ json_parse_number (struct json_parser *parser, int c)
1317 if (c < '0' || c > '9') 1303 if (c < '0' || c > '9')
1318 break; 1304 break;
1319 json_byte_workspace_put (parser, c); 1305 json_byte_workspace_put (parser, c);
1320 parser->current_column++;
1321 1306
1322 integer_overflow |= ckd_mul (&integer, integer, 10); 1307 integer_overflow |= ckd_mul (&integer, integer, 10);
1323 integer_overflow |= ckd_add (&integer, integer, c - '0'); 1308 integer_overflow |= ckd_add (&integer, integer, c - '0');
@@ -1328,12 +1313,10 @@ json_parse_number (struct json_parser *parser, int c)
1328 if (c == '.') 1313 if (c == '.')
1329 { 1314 {
1330 json_byte_workspace_put (parser, c); 1315 json_byte_workspace_put (parser, c);
1331 parser->current_column++;
1332 1316
1333 is_float = true; 1317 is_float = true;
1334 c = json_input_get (parser); 1318 c = json_input_get (parser);
1335 json_byte_workspace_put (parser, c); 1319 json_byte_workspace_put (parser, c);
1336 parser->current_column++;
1337 if (c < '0' || c > '9') 1320 if (c < '0' || c > '9')
1338 json_signal_error (parser, Qjson_parse_error); 1321 json_signal_error (parser, Qjson_parse_error);
1339 for (;;) 1322 for (;;)
@@ -1344,23 +1327,19 @@ json_parse_number (struct json_parser *parser, int c)
1344 if (c < '0' || c > '9') 1327 if (c < '0' || c > '9')
1345 break; 1328 break;
1346 json_byte_workspace_put (parser, c); 1329 json_byte_workspace_put (parser, c);
1347 parser->current_column++;
1348 } 1330 }
1349 } 1331 }
1350 if (c == 'e' || c == 'E') 1332 if (c == 'e' || c == 'E')
1351 { 1333 {
1352 json_byte_workspace_put (parser, c); 1334 json_byte_workspace_put (parser, c);
1353 parser->current_column++;
1354 1335
1355 is_float = true; 1336 is_float = true;
1356 c = json_input_get (parser); 1337 c = json_input_get (parser);
1357 json_byte_workspace_put (parser, c); 1338 json_byte_workspace_put (parser, c);
1358 parser->current_column++;
1359 if (c == '-' || c == '+') 1339 if (c == '-' || c == '+')
1360 { 1340 {
1361 c = json_input_get (parser); 1341 c = json_input_get (parser);
1362 json_byte_workspace_put (parser, c); 1342 json_byte_workspace_put (parser, c);
1363 parser->current_column++;
1364 } 1343 }
1365 if (c < '0' || c > '9') 1344 if (c < '0' || c > '9')
1366 json_signal_error (parser, Qjson_parse_error); 1345 json_signal_error (parser, Qjson_parse_error);
@@ -1372,7 +1351,6 @@ json_parse_number (struct json_parser *parser, int c)
1372 if (c < '0' || c > '9') 1351 if (c < '0' || c > '9')
1373 break; 1352 break;
1374 json_byte_workspace_put (parser, c); 1353 json_byte_workspace_put (parser, c);
1375 parser->current_column++;
1376 } 1354 }
1377 } 1355 }
1378 1356
@@ -1605,57 +1583,67 @@ json_is_token_char (int c)
1605 || (c >= '0' && c <= '9') || (c == '-')); 1583 || (c >= '0' && c <= '9') || (c == '-'));
1606} 1584}
1607 1585
1608/* This is the entry point to the value parser, this parses a JSON 1586static Lisp_Object
1609 * value */
1610Lisp_Object
1611json_parse_value (struct json_parser *parser, int c) 1587json_parse_value (struct json_parser *parser, int c)
1612{ 1588{
1613 if (c == '{') 1589 switch (c)
1614 return json_parse_object (parser);
1615 else if (c == '[')
1616 return json_parse_array (parser);
1617 else if (c == '"')
1618 return json_parse_string (parser, false, false);
1619 else if ((c >= '0' && c <= '9') || (c == '-'))
1620 return json_parse_number (parser, c);
1621 else
1622 { 1590 {
1623 int c2 = json_input_get_if_possible (parser); 1591 case '{':
1624 int c3 = json_input_get_if_possible (parser); 1592 return json_parse_object (parser);
1625 int c4 = json_input_get_if_possible (parser); 1593 case '[':
1626 int c5 = json_input_get_if_possible (parser); 1594 return json_parse_array (parser);
1627 1595 case '"':
1628 if (c == 't' && c2 == 'r' && c3 == 'u' && c4 == 'e' 1596 return json_parse_string (parser, false, false);
1629 && (c5 < 0 || !json_is_token_char (c5))) 1597 case '0': case '1': case '2': case '3': case '4':
1598 case '5': case '6': case '7': case '8': case '9':
1599 case '-':
1600 return json_parse_number (parser, c);
1601 case 't':
1602 if (json_input_get_if_possible (parser) == 'r'
1603 && json_input_get_if_possible (parser) == 'u'
1604 && json_input_get_if_possible (parser) == 'e')
1630 { 1605 {
1631 if (c5 >= 0) 1606 int c2 = json_input_get_if_possible (parser);
1632 json_input_put_back (parser); 1607 if (!json_is_token_char (c2))
1633 parser->current_column += 3; 1608 {
1634 return Qt; 1609 if (c2 >= 0)
1610 json_input_put_back (parser);
1611 return Qt;
1612 }
1635 } 1613 }
1636 if (c == 'n' && c2 == 'u' && c3 == 'l' && c4 == 'l' 1614 break;
1637 && (c5 < 0 || !json_is_token_char (c5))) 1615 case 'f':
1616 if (json_input_get_if_possible (parser) == 'a'
1617 && json_input_get_if_possible (parser) == 'l'
1618 && json_input_get_if_possible (parser) == 's'
1619 && json_input_get_if_possible (parser) == 'e')
1638 { 1620 {
1639 if (c5 >= 0) 1621 int c2 = json_input_get_if_possible (parser);
1640 json_input_put_back (parser); 1622 if (!json_is_token_char (c2))
1641 parser->current_column += 3; 1623 {
1642 return parser->conf.null_object; 1624 if (c2 >= 0)
1625 json_input_put_back (parser);
1626 return parser->conf.false_object;
1627 }
1643 } 1628 }
1644 if (c == 'f' && c2 == 'a' && c3 == 'l' && c4 == 's' 1629 break;
1645 && c5 == 'e') 1630 case 'n':
1631 if (json_input_get_if_possible (parser) == 'u'
1632 && json_input_get_if_possible (parser) == 'l'
1633 && json_input_get_if_possible (parser) == 'l')
1646 { 1634 {
1647 int c6 = json_input_get_if_possible (parser); 1635 int c2 = json_input_get_if_possible (parser);
1648 if (c6 < 0 || !json_is_token_char (c6)) 1636 if (!json_is_token_char (c2))
1649 { 1637 {
1650 if (c6 >= 0) 1638 if (c2 >= 0)
1651 json_input_put_back (parser); 1639 json_input_put_back (parser);
1652 parser->current_column += 4; 1640 return parser->conf.null_object;
1653 return parser->conf.false_object;
1654 } 1641 }
1655 } 1642 }
1656 1643 break;
1657 json_signal_error (parser, Qjson_parse_error);
1658 } 1644 }
1645
1646 json_signal_error (parser, Qjson_parse_error);
1659} 1647}
1660 1648
1661static Lisp_Object 1649static Lisp_Object
@@ -1664,6 +1652,42 @@ json_parse (struct json_parser *parser)
1664 return json_parse_value (parser, json_skip_whitespace (parser)); 1652 return json_parse_value (parser, json_skip_whitespace (parser));
1665} 1653}
1666 1654
1655/* Count number of characters in the NBYTES bytes at S. */
1656static ptrdiff_t
1657count_chars (const unsigned char *s, ptrdiff_t nbytes)
1658{
1659 ptrdiff_t nchars = 0;
1660 for (ptrdiff_t i = 0; i < nbytes; i++)
1661 nchars += (s[i] & 0xc0) != 0x80;
1662 return nchars;
1663}
1664
1665/* Count number of newlines in the NBYTES bytes at S. */
1666static ptrdiff_t
1667count_newlines (const unsigned char *s, ptrdiff_t nbytes)
1668{
1669 ptrdiff_t nls = 0;
1670 for (ptrdiff_t i = 0; i < nbytes; i++)
1671 nls += (s[i] == '\n');
1672 return nls;
1673}
1674
1675static ptrdiff_t
1676string_byte_to_pos (Lisp_Object obj, ptrdiff_t byte)
1677{
1678 eassert (STRINGP (obj));
1679 eassert (byte <= SBYTES (obj));
1680 return STRING_MULTIBYTE (obj) ? count_chars (SDATA (obj), byte) : byte;
1681}
1682
1683static ptrdiff_t
1684string_byte_to_line (Lisp_Object obj, ptrdiff_t byte)
1685{
1686 eassert (STRINGP (obj));
1687 eassert (byte <= SBYTES (obj));
1688 return count_newlines (SDATA (obj), byte);
1689}
1690
1667DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY, 1691DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY,
1668 NULL, 1692 NULL,
1669 doc: /* Parse the JSON STRING into a Lisp value. 1693 doc: /* Parse the JSON STRING into a Lisp value.
@@ -1703,7 +1727,8 @@ usage: (json-parse-string STRING &rest ARGS) */)
1703 1727
1704 struct json_parser p; 1728 struct json_parser p;
1705 const unsigned char *begin = SDATA (string); 1729 const unsigned char *begin = SDATA (string);
1706 json_parser_init (&p, conf, begin, begin + SBYTES (string), NULL, NULL); 1730 json_parser_init (&p, conf, begin, begin + SBYTES (string), NULL, NULL,
1731 string_byte_to_pos, string_byte_to_line, string);
1707 record_unwind_protect_ptr (json_parser_done, &p); 1732 record_unwind_protect_ptr (json_parser_done, &p);
1708 Lisp_Object result = json_parse (&p); 1733 Lisp_Object result = json_parse (&p);
1709 1734
@@ -1713,6 +1738,24 @@ usage: (json-parse-string STRING &rest ARGS) */)
1713 return unbind_to (count, result); 1738 return unbind_to (count, result);
1714} 1739}
1715 1740
1741static ptrdiff_t
1742buffer_byte_to_pos (Lisp_Object obj, ptrdiff_t byte)
1743{
1744 /* The position from the start of the parse (for compatibility). */
1745 return BYTE_TO_CHAR (PT_BYTE + byte) - PT;
1746}
1747
1748static ptrdiff_t
1749buffer_byte_to_line (Lisp_Object obj, ptrdiff_t byte)
1750{
1751 /* Line from start of the parse (for compatibility). */
1752 ptrdiff_t to_gap = GPT_BYTE - PT_BYTE;
1753 return (to_gap > 0 && to_gap < byte
1754 ? (count_newlines (PT_ADDR, to_gap)
1755 + count_newlines (GAP_END_ADDR, byte - to_gap))
1756 : count_newlines (PT_ADDR, byte));
1757}
1758
1716DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer, 1759DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
1717 0, MANY, NULL, 1760 0, MANY, NULL,
1718 doc: /* Read a JSON value from current buffer starting at point. 1761 doc: /* Read a JSON value from current buffer starting at point.
@@ -1766,8 +1809,8 @@ usage: (json-parse-buffer &rest args) */)
1766 secondary_end = ZV_ADDR; 1809 secondary_end = ZV_ADDR;
1767 } 1810 }
1768 1811
1769 json_parser_init (&p, conf, begin, end, secondary_begin, 1812 json_parser_init (&p, conf, begin, end, secondary_begin, secondary_end,
1770 secondary_end); 1813 buffer_byte_to_pos, buffer_byte_to_line, Qnil);
1771 record_unwind_protect_ptr (json_parser_done, &p); 1814 record_unwind_protect_ptr (json_parser_done, &p);
1772 Lisp_Object result = json_parse (&p); 1815 Lisp_Object result = json_parse (&p);
1773 1816
@@ -1776,7 +1819,7 @@ usage: (json-parse-buffer &rest args) */)
1776 ptrdiff_t position = (NILP (BVAR (current_buffer, 1819 ptrdiff_t position = (NILP (BVAR (current_buffer,
1777 enable_multibyte_characters)) 1820 enable_multibyte_characters))
1778 ? byte 1821 ? byte
1779 : PT + p.point_of_current_line + p.current_column); 1822 : BYTE_TO_CHAR (byte));
1780 SET_PT_BOTH (position, byte); 1823 SET_PT_BOTH (position, byte);
1781 1824
1782 return unbind_to (count, result); 1825 return unbind_to (count, result);