aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/treesit.c267
-rw-r--r--src/treesit.h20
2 files changed, 194 insertions, 93 deletions
diff --git a/src/treesit.c b/src/treesit.c
index fc7e6c55932..2565464deac 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -89,6 +89,8 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
89 - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and 89 - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
90 node type. 90 node type.
91 91
92 Regarding signals: only raise signals in lisp functions.
93
92 We don't parse at every keystroke. Instead we only record the 94 We don't parse at every keystroke. Instead we only record the
93 changes at each keystroke, and only parse when requested. It is 95 changes at each keystroke, and only parse when requested. It is
94 possible that lazy parsing is worse: instead of dispersed little 96 possible that lazy parsing is worse: instead of dispersed little
@@ -196,10 +198,11 @@ ts_load_language_push_for_each_suffix
196 Qtreesit_load_language_error carries the error message from 198 Qtreesit_load_language_error carries the error message from
197 trying to load the library with each extension. 199 trying to load the library with each extension.
198 200
199 If SIGNAL is true, signal an error when failed to load LANGUAGE; if 201 If error occurs, return NULL and fill SIGNAL_SYMBOL and SIGNAL_DATA
200 false, return NULL when failed. */ 202 with values suitable for xsignal. */
201static TSLanguage * 203static TSLanguage *
202ts_load_language (Lisp_Object language_symbol, bool signal) 204ts_load_language (Lisp_Object language_symbol,
205 Lisp_Object *signal_symbol, Lisp_Object *signal_data)
203{ 206{
204 Lisp_Object symbol_name = Fsymbol_name (language_symbol); 207 Lisp_Object symbol_name = Fsymbol_name (language_symbol);
205 208
@@ -263,11 +266,9 @@ ts_load_language (Lisp_Object language_symbol, bool signal)
263 } 266 }
264 if (error != NULL) 267 if (error != NULL)
265 { 268 {
266 if (signal) 269 *signal_symbol = Qtreesit_load_language_error;
267 xsignal2 (Qtreesit_load_language_error, 270 *signal_data = list2 (symbol_name, Fnreverse (error_list));
268 symbol_name, Fnreverse (error_list)); 271 return NULL;
269 else
270 return NULL;
271 } 272 }
272 273
273 /* Load TSLanguage. */ 274 /* Load TSLanguage. */
@@ -277,11 +278,9 @@ ts_load_language (Lisp_Object language_symbol, bool signal)
277 error = dynlib_error (); 278 error = dynlib_error ();
278 if (error != NULL) 279 if (error != NULL)
279 { 280 {
280 if (signal) 281 *signal_symbol = Qtreesit_load_language_error;
281 xsignal1 (Qtreesit_load_language_error, 282 *signal_data = build_string (error);
282 build_string (error)); 283 return NULL;
283 else
284 return NULL;
285 } 284 }
286 TSLanguage *lang = (*langfn) (); 285 TSLanguage *lang = (*langfn) ();
287 286
@@ -291,12 +290,10 @@ ts_load_language (Lisp_Object language_symbol, bool signal)
291 ts_parser_delete (parser); 290 ts_parser_delete (parser);
292 if (!success) 291 if (!success)
293 { 292 {
294 if (signal) 293 *signal_symbol = Qtreesit_load_language_error;
295 xsignal2 (Qtreesit_load_language_error, 294 *signal_data = list2 (build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"),
296 build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"), 295 make_fixnum (ts_language_version (lang)));
297 make_fixnum (ts_language_version (lang))); 296 return NULL;
298 else
299 return NULL;
300 } 297 }
301 return lang; 298 return lang;
302} 299}
@@ -310,7 +307,9 @@ DEFUN ("treesit-language-available-p",
310{ 307{
311 CHECK_SYMBOL (language); 308 CHECK_SYMBOL (language);
312 ts_initialize (); 309 ts_initialize ();
313 if (ts_load_language(language, false) == NULL) 310 Lisp_Object signal_symbol = Qnil;
311 Lisp_Object signal_data = Qnil;
312 if (ts_load_language(language, &signal_symbol, &signal_data) == NULL)
314 return Qnil; 313 return Qnil;
315 else 314 else
316 return Qt; 315 return Qt;
@@ -634,29 +633,96 @@ make_ts_node (Lisp_Object parser, TSNode node)
634 return make_lisp_ptr (lisp_node, Lisp_Vectorlike); 633 return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
635} 634}
636 635
637/* Make a compiled query struct. Return NULL if error occurs. QUERY 636/* Make a compiled query. QUERY has to be either a cons or a
638 has to be either a cons or a string. */ 637 string. */
639static struct Lisp_TS_Query * 638static Lisp_Object
640make_ts_query (Lisp_Object query, const TSLanguage *language, 639make_ts_query (Lisp_Object query, Lisp_Object language)
641 uint32_t *error_offset, TSQueryError *error_type)
642{ 640{
643 if (CONSP (query))
644 query = Ftreesit_query_expand (query);
645 char *source = SSDATA (query);
646
647 TSQuery *ts_query = ts_query_new (language, source, strlen (source),
648 error_offset, error_type);
649 TSQueryCursor *ts_cursor = ts_query_cursor_new (); 641 TSQueryCursor *ts_cursor = ts_query_cursor_new ();
642 struct Lisp_TS_Query *lisp_query
643 = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Query, source,
644 PVEC_TS_COMPILED_QUERY);
650 645
651 if (ts_query == NULL) 646 lisp_query->language = language;
647 lisp_query->source = query;
648 lisp_query->query = NULL;
649 lisp_query->cursor = ts_cursor;
650 return make_lisp_ptr (lisp_query, Lisp_Vectorlike);
651}
652
653static const char*
654ts_query_error_to_string (TSQueryError error)
655{
656 switch (error)
657 {
658 case TSQueryErrorNone:
659 return "None";
660 case TSQueryErrorSyntax:
661 return "Syntax error at";
662 case TSQueryErrorNodeType:
663 return "Node type error at";
664 case TSQueryErrorField:
665 return "Field error at";
666 case TSQueryErrorCapture:
667 return "Capture error at";
668 case TSQueryErrorStructure:
669 return "Structure error at";
670 default:
671 return "Unknown error";
672 }
673}
674
675static Lisp_Object
676ts_compose_query_signal_data
677(uint32_t error_offset, TSQueryError error_type)
678{
679 return list3 (build_string
680 (ts_query_error_to_string (error_type)),
681 make_fixnum (error_offset + 1),
682 build_pure_c_string("Debug the query with `treesit-query-validate'"));
683}
684
685/* Ensure the QUERY is compiled. Return the TSQuery. It could be
686 NULL if error occurs, in which case ERROR_OFFSET and ERROR_TYPE are
687 bound. If error occures, return NULL, and assign SIGNAL_SYMBOL and
688 SIGNAL_DATA accordingly. */
689static TSQuery *
690ts_ensure_query_compiled
691(Lisp_Object query, Lisp_Object *signal_symbol, Lisp_Object *signal_data)
692{
693 /* If query is already compiled (not null), return that, otherwise
694 compile and return it. */
695 TSQuery *ts_query = XTS_COMPILED_QUERY (query)->query;
696 if (ts_query != NULL)
697 return ts_query;
698
699 /* Get query source and TSLanguage ready. */
700 Lisp_Object source = XTS_COMPILED_QUERY (query)->source;
701 Lisp_Object language = XTS_COMPILED_QUERY (query)->language;
702 /* This is the main reason why we compile query lazily: to avoid
703 loading languages early. */
704 TSLanguage *ts_lang = ts_load_language (language, signal_symbol,
705 signal_data);
706 if (ts_lang == NULL)
652 return NULL; 707 return NULL;
653 708
654 struct Lisp_TS_Query *lisp_query 709 if (CONSP (source))
655 = ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_TS_Query, 710 source = Ftreesit_query_expand (source);
656 PVEC_TS_COMPILED_QUERY); 711
657 lisp_query->query = ts_query; 712 /* Create TSQuery. */
658 lisp_query->cursor = ts_cursor; 713 uint32_t error_offset;
659 return lisp_query; 714 TSQueryError error_type;
715 char *ts_source = SSDATA (source);
716 ts_query = ts_query_new (ts_lang, ts_source, strlen (ts_source),
717 &error_offset, &error_type);
718 if (ts_query == NULL)
719 {
720 *signal_symbol = Qtreesit_query_error;
721 *signal_data = ts_compose_query_signal_data
722 (error_offset, error_type);
723 }
724 XTS_COMPILED_QUERY (query)->query = ts_query;
725 return ts_query;
660} 726}
661 727
662DEFUN ("treesit-parser-p", 728DEFUN ("treesit-parser-p",
@@ -750,15 +816,23 @@ parser. If NO-REUSE is non-nil, always create a new parser. */)
750 } 816 }
751 } 817 }
752 818
819 /* Load language. */
820 Lisp_Object signal_symbol = Qnil;
821 Lisp_Object signal_data = Qnil;
753 TSParser *parser = ts_parser_new (); 822 TSParser *parser = ts_parser_new ();
754 TSLanguage *lang = ts_load_language (language, true); 823 TSLanguage *lang = ts_load_language (language, &signal_symbol,
824 &signal_data);
825 if (lang == NULL)
826 xsignal (signal_symbol, signal_data);
755 /* We check language version when loading a language, so this should 827 /* We check language version when loading a language, so this should
756 always succeed. */ 828 always succeed. */
757 ts_parser_set_language (parser, lang); 829 ts_parser_set_language (parser, lang);
758 830
831 /* Create parser. */
759 Lisp_Object lisp_parser 832 Lisp_Object lisp_parser
760 = make_ts_parser (Fcurrent_buffer (), parser, NULL, language); 833 = make_ts_parser (Fcurrent_buffer (), parser, NULL, language);
761 834
835 /* Update parser-list. */
762 BVAR (buf, ts_parser_list) 836 BVAR (buf, ts_parser_list)
763 = Fcons (lisp_parser, BVAR (buf, ts_parser_list)); 837 = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
764 838
@@ -1454,28 +1528,6 @@ explanation. */)
1454 query, build_pure_c_string (" ")); 1528 query, build_pure_c_string (" "));
1455} 1529}
1456 1530
1457static const char*
1458ts_query_error_to_string (TSQueryError error)
1459{
1460 switch (error)
1461 {
1462 case TSQueryErrorNone:
1463 return "None";
1464 case TSQueryErrorSyntax:
1465 return "Syntax error at";
1466 case TSQueryErrorNodeType:
1467 return "Node type error at";
1468 case TSQueryErrorField:
1469 return "Field error at";
1470 case TSQueryErrorCapture:
1471 return "Capture error at";
1472 case TSQueryErrorStructure:
1473 return "Structure error at";
1474 default:
1475 return "Unknown error";
1476 }
1477}
1478
1479/* This struct is used for passing captures to be check against 1531/* This struct is used for passing captures to be check against
1480 predicates. Captures we check for are the ones in START before 1532 predicates. Captures we check for are the ones in START before
1481 END. For example, if START and END are 1533 END. For example, if START and END are
@@ -1656,16 +1708,19 @@ ts_eval_predicates
1656 1708
1657DEFUN ("treesit-query-compile", 1709DEFUN ("treesit-query-compile",
1658 Ftreesit_query_compile, 1710 Ftreesit_query_compile,
1659 Streesit_query_compile, 2, 2, 0, 1711 Streesit_query_compile, 2, 3, 0,
1660 doc: /* Compile QUERY to a compiled query. 1712 doc: /* Compile QUERY to a compiled query.
1661 1713
1662Querying a compiled query is much faster than an uncompiled one. 1714Querying a compiled query is much faster than an uncompiled one.
1663LANGUAGE is the language this query is for. 1715LANGUAGE is the language this query is for.
1664 1716
1717If EAGER is non-nil, immediately load LANGUAGE and compile the query.
1718Otherwise defer until the query is first used.
1719
1665Signals treesit-query-error if QUERY is malformed or something else 1720Signals treesit-query-error if QUERY is malformed or something else
1666goes wrong. You can use `treesit-query-validate' to debug the 1721goes wrong. (This of course would only happen if EAGER is non-nil.)
1667query. */) 1722You can use `treesit-query-validate' to debug the query. */)
1668 (Lisp_Object language, Lisp_Object query) 1723 (Lisp_Object language, Lisp_Object query, Lisp_Object eager)
1669{ 1724{
1670 if (NILP (Ftreesit_query_p (query))) 1725 if (NILP (Ftreesit_query_p (query)))
1671 wrong_type_argument (Qtreesit_query_p, query); 1726 wrong_type_argument (Qtreesit_query_p, query);
@@ -1673,19 +1728,23 @@ query. */)
1673 if (TS_COMPILED_QUERY_P (query)) 1728 if (TS_COMPILED_QUERY_P (query))
1674 return query; 1729 return query;
1675 1730
1676 TSLanguage *ts_lang = ts_load_language (language, true); 1731 Lisp_Object lisp_query = make_ts_query (query, language);
1677 uint32_t error_offset;
1678 TSQueryError error_type;
1679 1732
1680 struct Lisp_TS_Query *lisp_query 1733 /* Maybe actually compile. */
1681 = make_ts_query (query, ts_lang, &error_offset, &error_type); 1734 if (NILP (eager))
1735 return lisp_query;
1736 else
1737 {
1738 Lisp_Object signal_symbol = Qnil;
1739 Lisp_Object signal_data = Qnil;
1740 TSQuery *ts_query = ts_ensure_query_compiled
1741 (lisp_query, &signal_symbol, &signal_data);
1682 1742
1683 if (lisp_query == NULL) 1743 if (ts_query == NULL)
1684 xsignal2 (Qtreesit_query_error, 1744 xsignal (signal_symbol, signal_data);
1685 build_string (ts_query_error_to_string (error_type)),
1686 make_fixnum (error_offset + 1));
1687 1745
1688 return make_lisp_ptr (lisp_query, Lisp_Vectorlike); 1746 return lisp_query;
1747 }
1689} 1748}
1690 1749
1691DEFUN ("treesit-query-capture", 1750DEFUN ("treesit-query-capture",
@@ -1725,7 +1784,7 @@ query. */)
1725 || CONSP (query) || STRINGP (query))) 1784 || CONSP (query) || STRINGP (query)))
1726 wrong_type_argument (Qtreesit_query_p, query); 1785 wrong_type_argument (Qtreesit_query_p, query);
1727 1786
1728 1787 /* Resolve NODE into an actual node. */
1729 Lisp_Object lisp_node; 1788 Lisp_Object lisp_node;
1730 if (TS_NODEP (node)) 1789 if (TS_NODEP (node))
1731 lisp_node = node; 1790 lisp_node = node;
@@ -1751,30 +1810,50 @@ query. */)
1751 const TSLanguage *lang = ts_parser_language 1810 const TSLanguage *lang = ts_parser_language
1752 (XTS_PARSER (lisp_parser)->parser); 1811 (XTS_PARSER (lisp_parser)->parser);
1753 1812
1754 /* Initialize query objects, and execute query. */ 1813 /* Initialize query objects. At the end of this block, we should
1755 struct Lisp_TS_Query *lisp_query; 1814 have a working TSQuery and a TSQueryCursor. */
1815 TSQuery *ts_query;
1816 TSQueryCursor *cursor;
1817 bool needs_to_free_query_and_cursor;
1756 if (TS_COMPILED_QUERY_P (query)) 1818 if (TS_COMPILED_QUERY_P (query))
1757 lisp_query = XTS_COMPILED_QUERY (query); 1819 {
1820 Lisp_Object signal_symbol = Qnil;
1821 Lisp_Object signal_data = Qnil;
1822 ts_query = ts_ensure_query_compiled
1823 (query, &signal_symbol, &signal_data);
1824 cursor = XTS_COMPILED_QUERY (query)->cursor;
1825 /* We don't need to free ts_query and cursor because they
1826 are stored in a lisp object, which is tracked by gc. */
1827 needs_to_free_query_and_cursor = false;
1828 if (ts_query == NULL)
1829 {
1830 xsignal (signal_symbol, signal_data);
1831 }
1832 }
1758 else 1833 else
1759 { 1834 {
1835 /* Since query is not TS_COMPILED_QUERY, it can only be a string
1836 or a cons. */
1837 if (CONSP (query))
1838 query = Ftreesit_query_expand (query);
1839 char *query_string = SSDATA (query);
1760 uint32_t error_offset; 1840 uint32_t error_offset;
1761 TSQueryError error_type; 1841 TSQueryError error_type;
1762 lisp_query = make_ts_query (query, lang, 1842 ts_query = ts_query_new (lang, query_string, strlen (query_string),
1763 &error_offset, &error_type); 1843 &error_offset, &error_type);
1764 if (lisp_query == NULL) 1844 if (ts_query == NULL)
1765 { 1845 {
1766 xsignal3 (Qtreesit_query_error, 1846 xsignal (Qtreesit_query_error, ts_compose_query_signal_data
1767 build_string 1847 (error_offset, error_type));
1768 (ts_query_error_to_string (error_type)),
1769 make_fixnum (error_offset + 1),
1770 build_pure_c_string("Debug the query with `treesit-query-validate'"));
1771 } 1848 }
1772 /* We don't need need to free TS_QUERY and CURSOR, they are stored 1849 cursor = ts_query_cursor_new ();
1773 in a lisp object, which is tracked by gc. */ 1850 needs_to_free_query_and_cursor = true;
1774 } 1851 }
1775 TSQuery *ts_query = lisp_query->query;
1776 TSQueryCursor *cursor = lisp_query->cursor;
1777 1852
1853 /* WARN: After this point, free ts_query and cursor before every
1854 signal and return. */
1855
1856 /* Set query range. */
1778 if (!NILP (beg) && !NILP (end)) 1857 if (!NILP (beg) && !NILP (end))
1779 { 1858 {
1780 EMACS_INT beg_byte = XFIXNUM (beg); 1859 EMACS_INT beg_byte = XFIXNUM (beg);
@@ -1784,6 +1863,7 @@ query. */)
1784 (uint32_t) end_byte - visible_beg); 1863 (uint32_t) end_byte - visible_beg);
1785 } 1864 }
1786 1865
1866 /* Execute query. */
1787 ts_query_cursor_exec (cursor, ts_query, ts_node); 1867 ts_query_cursor_exec (cursor, ts_query, ts_node);
1788 TSQueryMatch match; 1868 TSQueryMatch match;
1789 1869
@@ -1838,6 +1918,11 @@ query. */)
1838 result = prev_result; 1918 result = prev_result;
1839 } 1919 }
1840 } 1920 }
1921 if (needs_to_free_query_and_cursor)
1922 {
1923 ts_query_delete (ts_query);
1924 ts_query_cursor_delete (cursor);
1925 }
1841 return Fnreverse (result); 1926 return Fnreverse (result);
1842} 1927}
1843 1928
diff --git a/src/treesit.h b/src/treesit.h
index 0c043f7d250..20e7cd4107c 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -84,11 +84,27 @@ struct Lisp_TS_Node
84 ptrdiff_t timestamp; 84 ptrdiff_t timestamp;
85}; 85};
86 86
87/* A compiled tree-sitter query. */ 87/* A compiled tree-sitter query.
88
89 When we create a query object by treesit-compile-query, it is not
90 immediately compiled, because that would require the language
91 definition to be loaded. For example, python.el contains
92
93 (defvar xxx (treesit-compile-query ...))
94
95 and (require 'python.el) requires python's language definition to
96 be available. In the case of python.el, Emacs requires it when
97 building, so that breaks the build. */
88struct Lisp_TS_Query 98struct Lisp_TS_Query
89{ 99{
90 union vectorlike_header header; 100 union vectorlike_header header;
91 /* Pointer to the query object. */ 101 /* Language symbol for the query. */
102 Lisp_Object language;
103 /* Source lisp (sexp or string) query. */
104 Lisp_Object source;
105 /* Pointer to the query object. This can be NULL, meaning this
106 query is not initialized/compiled. We compile the query when
107 it is used the first time (in treesit-query-capture). */
92 TSQuery *query; 108 TSQuery *query;
93 /* Pointer to a cursor. If we are storing the query object, we 109 /* Pointer to a cursor. If we are storing the query object, we
94 might as well store a cursor, too. */ 110 might as well store a cursor, too. */