diff options
| author | Yuan Fu | 2022-09-24 19:35:11 -0700 |
|---|---|---|
| committer | Yuan Fu | 2022-09-24 21:11:30 -0700 |
| commit | eba65824364474bde89bdce5f57a772d74a2c409 (patch) | |
| tree | db18cd4296be31f085e586923a7666c7100af3c9 /src | |
| parent | c957832cbf3e87e5a25f7c2bdb70abd959391d98 (diff) | |
| download | emacs-eba65824364474bde89bdce5f57a772d74a2c409.tar.gz emacs-eba65824364474bde89bdce5f57a772d74a2c409.zip | |
Add the treesit-search functions that supplant the removed ones
The signatures also changed.
treesit-traverse-depth-first -> treesit-search-subtree
treesit-traverse-breadth-first ->
treesit-traverse-forward -> treesit-search-forward
treesit-search-forward -> treesit-search-forward-goto
treesit-search-beginning/end -> treesit-search-forward-goto
-> treesit-induce-sparse-tree
* doc/lispref/parsing.texi (Retrieving Node): Add relevant manual
sections.
* lisp/treesit.el (treesit-search-forward-goto): New function.
* src/treesit.c (ts_traverse_sibling_helper)
(ts_traverse_match_predicate)
(ts_search_dfs)
(ts_search_forward)
(treesit-search-subtree)
(treesit-search-forward)
(ts_build_sparse_tree)
(Ftreesit_induce_sparse_tree): Add functions.
* test/src/treesit-tests.el (treesit-node-supplemental): Add comments.
Diffstat (limited to 'src')
| -rw-r--r-- | src/treesit.c | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/src/treesit.c b/src/treesit.c index 51261c34a26..f3efcbe5964 100644 --- a/src/treesit.c +++ b/src/treesit.c | |||
| @@ -1805,6 +1805,358 @@ query. */) | |||
| 1805 | return Fnreverse (result); | 1805 | return Fnreverse (result); |
| 1806 | } | 1806 | } |
| 1807 | 1807 | ||
| 1808 | /*** Navigation */ | ||
| 1809 | |||
| 1810 | /* Return the next/previous named/unnamed sibling of NODE. FORWARD | ||
| 1811 | controls the direction and NAMED controls the nameness. | ||
| 1812 | */ | ||
| 1813 | static TSNode | ||
| 1814 | ts_traverse_sibling_helper (TSNode node, bool forward, bool named) | ||
| 1815 | { | ||
| 1816 | if (forward) | ||
| 1817 | { | ||
| 1818 | if (named) | ||
| 1819 | return ts_node_next_named_sibling (node); | ||
| 1820 | else | ||
| 1821 | return ts_node_next_sibling (node); | ||
| 1822 | } | ||
| 1823 | else | ||
| 1824 | { | ||
| 1825 | if (named) | ||
| 1826 | return ts_node_prev_named_sibling (node); | ||
| 1827 | else | ||
| 1828 | return ts_node_prev_sibling (node); | ||
| 1829 | } | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | /* Return true if NODE matches PRED. PRED can be a string or a | ||
| 1833 | function. This function doesn't check for PRED's type. */ | ||
| 1834 | static bool | ||
| 1835 | ts_traverse_match_predicate | ||
| 1836 | (TSNode node, Lisp_Object pred, Lisp_Object parser) | ||
| 1837 | { | ||
| 1838 | if (STRINGP (pred)) | ||
| 1839 | { | ||
| 1840 | const char *type = ts_node_type (node); | ||
| 1841 | return (fast_c_string_match_ignore_case | ||
| 1842 | (pred, type, strlen (type)) >= 0); | ||
| 1843 | } | ||
| 1844 | else | ||
| 1845 | { | ||
| 1846 | Lisp_Object lisp_node = make_ts_node (parser, node); | ||
| 1847 | return !NILP (CALLN (Ffuncall, pred, lisp_node)); | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | } | ||
| 1851 | |||
| 1852 | /* Traverse the parse tree starting from ROOT (but ROOT is not | ||
| 1853 | matches against PRED). PRED can be a function (takes a node and | ||
| 1854 | returns nil/non-nil),or a string (treated as regexp matching the | ||
| 1855 | node's type, ignores case, must be all single byte characters). If | ||
| 1856 | the node satisfies PRED , terminate, set ROOT to that node, and | ||
| 1857 | return true. If no node satisfies PRED, return FALSE. PARSER is | ||
| 1858 | the parser of ROOT. | ||
| 1859 | |||
| 1860 | LIMIT is the number of levels we descend in the tree. If NO_LIMIT | ||
| 1861 | is true, LIMIT is ignored. FORWARD controls the direction in which | ||
| 1862 | we traverse the tree, true means forward, false backward. If NAMED | ||
| 1863 | is true, only traverse named nodes, if false, all nodes. If | ||
| 1864 | SKIP_ROOT is true, don't match ROOT. */ | ||
| 1865 | static bool | ||
| 1866 | ts_search_dfs | ||
| 1867 | (TSNode *root, Lisp_Object pred, Lisp_Object parser, | ||
| 1868 | bool named, bool forward, ptrdiff_t limit, bool no_limit, | ||
| 1869 | bool skip_root) | ||
| 1870 | { | ||
| 1871 | /* TSTreeCursor doesn't allow us to move backward, so we can't use | ||
| 1872 | it. We could use limit == -1 to indicate no_limit == true, but | ||
| 1873 | separating them is safer. */ | ||
| 1874 | TSNode node = *root; | ||
| 1875 | |||
| 1876 | if (!skip_root && ts_traverse_match_predicate (node, pred, parser)) | ||
| 1877 | { | ||
| 1878 | *root = node; | ||
| 1879 | return true; | ||
| 1880 | } | ||
| 1881 | |||
| 1882 | if (!no_limit && limit <= 0) | ||
| 1883 | return false; | ||
| 1884 | else | ||
| 1885 | { | ||
| 1886 | int count = named ? | ||
| 1887 | ts_node_named_child_count( node) | ||
| 1888 | : ts_node_child_count (node); | ||
| 1889 | for (int offset=0; offset < count; offset++) | ||
| 1890 | { | ||
| 1891 | uint32_t idx = forward ? offset | ||
| 1892 | : count - offset - 1; | ||
| 1893 | TSNode child = ts_node_child (node, idx); | ||
| 1894 | |||
| 1895 | if (!ts_node_is_null (child) | ||
| 1896 | && ts_search_dfs (&child, pred, parser, named, | ||
| 1897 | forward, limit - 1, no_limit, false)) | ||
| 1898 | { | ||
| 1899 | *root = child; | ||
| 1900 | return true; | ||
| 1901 | } | ||
| 1902 | } | ||
| 1903 | return false; | ||
| 1904 | } | ||
| 1905 | } | ||
| 1906 | |||
| 1907 | /* Go thought the whole tree linearly depth first, starting from | ||
| 1908 | START. PRED, PARSER, NAMED, FORWARD are the same as in | ||
| 1909 | ts_search_subtre. If UP_ONLY is true, never go to children, only | ||
| 1910 | sibling and parents. If SKIP_START is true, don'tt match | ||
| 1911 | START. */ | ||
| 1912 | static bool | ||
| 1913 | ts_search_forward | ||
| 1914 | (TSNode *start, Lisp_Object pred, Lisp_Object parser, | ||
| 1915 | bool named, bool forward, bool up_only, bool skip_start) | ||
| 1916 | { | ||
| 1917 | TSNode node = *start; | ||
| 1918 | |||
| 1919 | if (!up_only && ts_search_dfs | ||
| 1920 | (start, pred, parser, named, forward, 0, true, skip_start)) | ||
| 1921 | return true; | ||
| 1922 | |||
| 1923 | TSNode next = ts_traverse_sibling_helper(node, forward, named); | ||
| 1924 | while (ts_node_is_null (next)) | ||
| 1925 | { | ||
| 1926 | node = ts_node_parent (node); | ||
| 1927 | if (ts_node_is_null (node)) | ||
| 1928 | return false; | ||
| 1929 | |||
| 1930 | if (ts_traverse_match_predicate (node, pred, parser)) | ||
| 1931 | { | ||
| 1932 | *start = node; | ||
| 1933 | return false; | ||
| 1934 | } | ||
| 1935 | next = ts_traverse_sibling_helper(node, forward, named); | ||
| 1936 | } | ||
| 1937 | if (ts_search_forward | ||
| 1938 | (&next, pred, parser, named, forward, up_only, false)) | ||
| 1939 | { | ||
| 1940 | *start = next; | ||
| 1941 | return true; | ||
| 1942 | } | ||
| 1943 | else | ||
| 1944 | return false; | ||
| 1945 | } | ||
| 1946 | |||
| 1947 | DEFUN ("treesit-search-subtree", | ||
| 1948 | Ftreesit_search_subtree, | ||
| 1949 | Streesit_search_subtree, 2, 5, 0, | ||
| 1950 | doc: /* Traverse the parse tree depth-first. | ||
| 1951 | |||
| 1952 | Traverse the subtree of NODE, and match PREDICATE with each node along | ||
| 1953 | the way. PREDICATE is a regexp string that matches against each | ||
| 1954 | node's type, or a function that takes a node and returns nil/non-nil. | ||
| 1955 | |||
| 1956 | By default, only traverse named nodes, if ALL is non-nil, traverse all | ||
| 1957 | nodes. If BACKWARD is non-nil, traverse backwards. If LIMIT is | ||
| 1958 | non-nil, we only traverse that number of levels down in the tree. | ||
| 1959 | |||
| 1960 | Return the first matched node, or nil if none matches. */) | ||
| 1961 | (Lisp_Object node, Lisp_Object predicate, Lisp_Object all, | ||
| 1962 | Lisp_Object backward, Lisp_Object limit) | ||
| 1963 | { | ||
| 1964 | CHECK_TS_NODE (node); | ||
| 1965 | CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), | ||
| 1966 | list3 (Qor, Qstringp, Qfunctionp), predicate); | ||
| 1967 | CHECK_SYMBOL (all); | ||
| 1968 | CHECK_SYMBOL (backward); | ||
| 1969 | |||
| 1970 | ptrdiff_t the_limit = 0; | ||
| 1971 | bool no_limit = false; | ||
| 1972 | if (NILP (limit)) | ||
| 1973 | no_limit = true; | ||
| 1974 | else | ||
| 1975 | { | ||
| 1976 | CHECK_FIXNUM (limit); | ||
| 1977 | the_limit = XFIXNUM (limit); | ||
| 1978 | } | ||
| 1979 | |||
| 1980 | TSNode ts_node = XTS_NODE (node)->node; | ||
| 1981 | Lisp_Object parser = XTS_NODE (node)->parser; | ||
| 1982 | if (ts_search_dfs | ||
| 1983 | (&ts_node, predicate, parser, NILP (all), | ||
| 1984 | NILP (backward), the_limit, no_limit, false)) | ||
| 1985 | { | ||
| 1986 | return make_ts_node (parser, ts_node); | ||
| 1987 | } | ||
| 1988 | else | ||
| 1989 | return Qnil; | ||
| 1990 | } | ||
| 1991 | |||
| 1992 | DEFUN ("treesit-search-forward", | ||
| 1993 | Ftreesit_search_forward, | ||
| 1994 | Streesit_search_forward, 2, 5, 0, | ||
| 1995 | doc: /* Search for node in the parse tree. | ||
| 1996 | |||
| 1997 | Start traversing the tree from node START, and match PREDICATE with | ||
| 1998 | each node along the way (except START). PREDICATE is a regexp string | ||
| 1999 | that matches against each node's type, or a function that takes a node | ||
| 2000 | and returns nil/non-nil. | ||
| 2001 | |||
| 2002 | By default, only search for named nodes, if ALL is non-nil, search for | ||
| 2003 | all nodes. If BACKWARD is non-nil, search backwards. | ||
| 2004 | |||
| 2005 | Return the first matched node, or nil if none matches. | ||
| 2006 | |||
| 2007 | For a tree like the below where START is marked 1, traverse as | ||
| 2008 | numbered: | ||
| 2009 | 16 | ||
| 2010 | | | ||
| 2011 | 3--------4-----------8 | ||
| 2012 | | | | | ||
| 2013 | o--o-+--1 5--+--6 9---+-----12 | ||
| 2014 | | | | | | | | ||
| 2015 | o o 2 7 +-+-+ +--+--+ | ||
| 2016 | | | | | | | ||
| 2017 | 10 11 13 14 15 | ||
| 2018 | |||
| 2019 | If UP is non-nil, only traverse to siblings and parents. In that | ||
| 2020 | case, only 1 3 4 8 16 would be traversed. */) | ||
| 2021 | (Lisp_Object start, Lisp_Object predicate, Lisp_Object all, | ||
| 2022 | Lisp_Object backward, Lisp_Object up) | ||
| 2023 | { | ||
| 2024 | CHECK_TS_NODE (start); | ||
| 2025 | CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), | ||
| 2026 | list3 (Qor, Qstringp, Qfunctionp), predicate); | ||
| 2027 | CHECK_SYMBOL (all); | ||
| 2028 | CHECK_SYMBOL (backward); | ||
| 2029 | CHECK_SYMBOL (up); | ||
| 2030 | |||
| 2031 | TSNode ts_start = XTS_NODE (start)->node; | ||
| 2032 | Lisp_Object parser = XTS_NODE (start)->parser; | ||
| 2033 | if (ts_search_forward | ||
| 2034 | (&ts_start, predicate, parser, NILP (all), | ||
| 2035 | NILP (backward), !NILP (up), true)) | ||
| 2036 | { | ||
| 2037 | return make_ts_node (parser, ts_start); | ||
| 2038 | } | ||
| 2039 | else | ||
| 2040 | return Qnil; | ||
| 2041 | } | ||
| 2042 | |||
| 2043 | /* Recursively traverse the tree under CURSOR, and append the result | ||
| 2044 | subtree to PARENT's cdr. See more in `ts_build_sparse_tree'. */ | ||
| 2045 | static void | ||
| 2046 | ts_build_sparse_tree | ||
| 2047 | (TSTreeCursor *cursor, Lisp_Object parent, Lisp_Object pred, | ||
| 2048 | Lisp_Object process_fn, ptrdiff_t limit, | ||
| 2049 | bool no_limit, Lisp_Object parser) | ||
| 2050 | { | ||
| 2051 | |||
| 2052 | TSNode node = ts_tree_cursor_current_node (cursor); | ||
| 2053 | bool match = ts_traverse_match_predicate (node, pred, parser); | ||
| 2054 | if (match) | ||
| 2055 | { | ||
| 2056 | /* If this node matches pred, add a new node to the parent's | ||
| 2057 | children list. */ | ||
| 2058 | Lisp_Object lisp_node = make_ts_node (parser, node); | ||
| 2059 | if (!NILP (process_fn)) | ||
| 2060 | { | ||
| 2061 | lisp_node = CALLN (Ffuncall, process_fn, lisp_node); | ||
| 2062 | } | ||
| 2063 | Lisp_Object this = Fcons (lisp_node, Qnil); | ||
| 2064 | Fsetcdr (parent, Fcons (this, Fcdr (parent))); | ||
| 2065 | /* Now for children nodes, this is the new parent. */ | ||
| 2066 | parent = this; | ||
| 2067 | } | ||
| 2068 | /* Go through each child. */ | ||
| 2069 | if ((no_limit || limit > 0) | ||
| 2070 | && ts_tree_cursor_goto_first_child (cursor)) | ||
| 2071 | { | ||
| 2072 | do | ||
| 2073 | { | ||
| 2074 | /* Make sure not to use node after the recursive funcall. | ||
| 2075 | Then C compilers should be smart enough not to copy NODE | ||
| 2076 | to stack. */ | ||
| 2077 | ts_build_sparse_tree | ||
| 2078 | (cursor, parent, pred, process_fn, | ||
| 2079 | limit - 1, no_limit, parser); | ||
| 2080 | } | ||
| 2081 | while (ts_tree_cursor_goto_next_sibling (cursor)); | ||
| 2082 | /* Don't forget to come back to this node. */ | ||
| 2083 | ts_tree_cursor_goto_parent (cursor); | ||
| 2084 | } | ||
| 2085 | /* Before we go, reverse children in the sparse tree. */ | ||
| 2086 | if (match) | ||
| 2087 | { | ||
| 2088 | /* When match == true, "parent" is actually the node we added in | ||
| 2089 | this layer (parent = this). */ | ||
| 2090 | Fsetcdr (parent, Fnreverse (Fcdr (parent))); | ||
| 2091 | } | ||
| 2092 | } | ||
| 2093 | |||
| 2094 | DEFUN ("treesit-induce-sparse-tree", | ||
| 2095 | Ftreesit_induce_sparse_tree, | ||
| 2096 | Streesit_induce_sparse_tree, 2, 4, 0, | ||
| 2097 | doc: /* Create a sparse tree of ROOT's subtree. | ||
| 2098 | |||
| 2099 | Basically, take the subtree under ROOT, and comb it so only the nodes | ||
| 2100 | that match PREDICATE are left, like picking out grapes on the vine. | ||
| 2101 | PREDICATE is a regexp string that matches against each node's type. | ||
| 2102 | |||
| 2103 | For a subtree on the left that consist of both numbers and letters, if | ||
| 2104 | PREDICATE is "is letter", the returned tree is the one on the right. | ||
| 2105 | |||
| 2106 | a a a | ||
| 2107 | | | | | ||
| 2108 | +---+---+ +---+---+ +---+---+ | ||
| 2109 | | | | | | | | | | | ||
| 2110 | b 1 2 b | | b c d | ||
| 2111 | | | => | | => | | ||
| 2112 | c +--+ c + e | ||
| 2113 | | | | | | | ||
| 2114 | +--+ d 4 +--+ d | ||
| 2115 | | | | | ||
| 2116 | e 5 e | ||
| 2117 | |||
| 2118 | If PROCESS-FN is non-nil, instead of returning the matched nodes, pass | ||
| 2119 | each node to PROCESS-FN use the return value instead. If non-nil, | ||
| 2120 | LIMIT is the number of levels to go down from ROOT. | ||
| 2121 | |||
| 2122 | Each node in the returned tree looks like (NODE . (CHILD ...)). The | ||
| 2123 | root of this tree might be nil, if ROOT doesn't match PREDICATE. If | ||
| 2124 | no node matches PRED, return nil. | ||
| 2125 | |||
| 2126 | PREDICATE can also be a function that takes a node and returns | ||
| 2127 | nil/non-nil, but it is slower and more memory consuming than | ||
| 2128 | regexp. */) | ||
| 2129 | (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn, | ||
| 2130 | Lisp_Object limit) | ||
| 2131 | { | ||
| 2132 | CHECK_TS_NODE (root); | ||
| 2133 | CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), | ||
| 2134 | list3 (Qor, Qstringp, Qfunctionp), predicate); | ||
| 2135 | |||
| 2136 | if (!NILP (process_fn)) | ||
| 2137 | CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn); | ||
| 2138 | ptrdiff_t the_limit = 0; | ||
| 2139 | bool no_limit = false; | ||
| 2140 | if (NILP (limit)) | ||
| 2141 | no_limit = true; | ||
| 2142 | else | ||
| 2143 | { | ||
| 2144 | CHECK_FIXNUM (limit); | ||
| 2145 | the_limit = XFIXNUM (limit); | ||
| 2146 | } | ||
| 2147 | |||
| 2148 | TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node); | ||
| 2149 | Lisp_Object parser = XTS_NODE (root)->parser; | ||
| 2150 | Lisp_Object parent = Fcons (Qnil, Qnil); | ||
| 2151 | ts_build_sparse_tree | ||
| 2152 | (&cursor, parent, predicate, process_fn, | ||
| 2153 | the_limit, no_limit, parser); | ||
| 2154 | if (NILP (Fcdr (parent))) | ||
| 2155 | return Qnil; | ||
| 2156 | else | ||
| 2157 | return parent; | ||
| 2158 | } | ||
| 2159 | |||
| 1808 | /*** Initialization */ | 2160 | /*** Initialization */ |
| 1809 | 2161 | ||
| 1810 | /* Initialize the tree-sitter routines. */ | 2162 | /* Initialize the tree-sitter routines. */ |
| @@ -1835,6 +2187,8 @@ syms_of_treesit (void) | |||
| 1835 | "user-emacs-directory"); | 2187 | "user-emacs-directory"); |
| 1836 | DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); | 2188 | DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); |
| 1837 | 2189 | ||
| 2190 | DEFSYM (Qor, "or"); | ||
| 2191 | |||
| 1838 | define_error (Qtreesit_error, "Generic tree-sitter error", Qerror); | 2192 | define_error (Qtreesit_error, "Generic tree-sitter error", Qerror); |
| 1839 | define_error (Qtreesit_query_error, "Query pattern is malformed", | 2193 | define_error (Qtreesit_query_error, "Query pattern is malformed", |
| 1840 | Qtreesit_error); | 2194 | Qtreesit_error); |
| @@ -1925,4 +2279,8 @@ dynamic libraries, in that order. */); | |||
| 1925 | defsubr (&Streesit_query_expand); | 2279 | defsubr (&Streesit_query_expand); |
| 1926 | defsubr (&Streesit_query_compile); | 2280 | defsubr (&Streesit_query_compile); |
| 1927 | defsubr (&Streesit_query_capture); | 2281 | defsubr (&Streesit_query_capture); |
| 2282 | |||
| 2283 | defsubr (&Streesit_search_subtree); | ||
| 2284 | defsubr (&Streesit_search_forward); | ||
| 2285 | defsubr (&Streesit_induce_sparse_tree); | ||
| 1928 | } | 2286 | } |