diff options
| author | Yuan Fu | 2022-10-05 14:11:33 -0700 |
|---|---|---|
| committer | Yuan Fu | 2022-10-05 14:11:33 -0700 |
| commit | cb183f6467401fb5ed2b7fc98ca75be9d943cbe3 (patch) | |
| tree | ef42ea6ae71e0829d900ffb46d8306fbba962a8e /admin/notes | |
| parent | 1ea503ed4b3a14b3dc0a597cfbfe57d73b871422 (diff) | |
| download | emacs-cb183f6467401fb5ed2b7fc98ca75be9d943cbe3.tar.gz emacs-cb183f6467401fb5ed2b7fc98ca75be9d943cbe3.zip | |
Add tree-sitter admin notes
starter-guide: Guide on writing major mode features.
build-module: Script for building official language definitions.
html-manual: HTML version of the manual for easy access.
* admin/notes/tree-sitter/build-module/README: New file.
* admin/notes/tree-sitter/build-module/batch.sh: New file.
* admin/notes/tree-sitter/build-module/build.sh: New file.
* admin/notes/tree-sitter/starter-guide: New file.
* admin/notes/tree-sitter/html-manual/Accessing-Node.html: New file.
* admin/notes/tree-sitter/html-manual/Language-Definitions.html: New file.
* admin/notes/tree-sitter/html-manual/Multiple-Languages.html: New file.
* admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html:
New file.
* admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html:
New file.
* admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html: New
file.
* admin/notes/tree-sitter/html-manual/Pattern-Matching.html: New file.
* admin/notes/tree-sitter/html-manual/Retrieving-Node.html: New file.
* admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html: New
file.
* admin/notes/tree-sitter/html-manual/Using-Parser.html: New file.
* admin/notes/tree-sitter/html-manual/build-manual.sh: New file.
* admin/notes/tree-sitter/html-manual/manual.css: New file.
Diffstat (limited to 'admin/notes')
16 files changed, 3444 insertions, 0 deletions
diff --git a/admin/notes/tree-sitter/build-module/README b/admin/notes/tree-sitter/build-module/README new file mode 100644 index 00000000000..ee6076c119c --- /dev/null +++ b/admin/notes/tree-sitter/build-module/README | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | To build the language definition for a particular language, run | ||
| 2 | |||
| 3 | ./build.sh <language> | ||
| 4 | |||
| 5 | eg, | ||
| 6 | |||
| 7 | ./build.sh html | ||
| 8 | |||
| 9 | The dynamic module will be in /dist directory | ||
| 10 | |||
| 11 | To build all modules at once, run | ||
| 12 | |||
| 13 | ./batch.sh | ||
| 14 | |||
| 15 | This gives you C, JSON, Go, HTML, Javascript, CSS, Python, Typescript, | ||
| 16 | C#, C++, Rust. More can be added to batch.sh unless it's directory | ||
| 17 | strucure is not standard. \ No newline at end of file | ||
diff --git a/admin/notes/tree-sitter/build-module/batch.sh b/admin/notes/tree-sitter/build-module/batch.sh new file mode 100755 index 00000000000..deed18978a1 --- /dev/null +++ b/admin/notes/tree-sitter/build-module/batch.sh | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | languages=( | ||
| 4 | 'c' | ||
| 5 | 'cpp' | ||
| 6 | 'css' | ||
| 7 | 'c-sharp' | ||
| 8 | 'go' | ||
| 9 | 'html' | ||
| 10 | 'javascript' | ||
| 11 | 'json' | ||
| 12 | 'python' | ||
| 13 | 'rust' | ||
| 14 | 'typescript' | ||
| 15 | ) | ||
| 16 | |||
| 17 | for language in "${languages[@]}" | ||
| 18 | do | ||
| 19 | ./build.sh $language | ||
| 20 | done | ||
diff --git a/admin/notes/tree-sitter/build-module/build.sh b/admin/notes/tree-sitter/build-module/build.sh new file mode 100755 index 00000000000..16792d05cbb --- /dev/null +++ b/admin/notes/tree-sitter/build-module/build.sh | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | lang=$1 | ||
| 4 | |||
| 5 | if [ $(uname) == "Darwin" ] | ||
| 6 | then | ||
| 7 | soext="dylib" | ||
| 8 | else | ||
| 9 | soext="so" | ||
| 10 | fi | ||
| 11 | |||
| 12 | echo "Building ${lang}" | ||
| 13 | |||
| 14 | # Retrieve sources. | ||
| 15 | git clone "https://github.com/tree-sitter/tree-sitter-${lang}.git" \ | ||
| 16 | --depth 1 --quiet | ||
| 17 | if [ "${lang}" == "typescript" ] | ||
| 18 | then | ||
| 19 | lang="typescript/tsx" | ||
| 20 | fi | ||
| 21 | cp tree-sitter-lang.in "tree-sitter-${lang}/src" | ||
| 22 | cp emacs-module.h "tree-sitter-${lang}/src" | ||
| 23 | cp "tree-sitter-${lang}/grammar.js" "tree-sitter-${lang}/src" | ||
| 24 | cd "tree-sitter-${lang}/src" | ||
| 25 | |||
| 26 | if [ "${lang}" == "typescript/tsx" ] | ||
| 27 | then | ||
| 28 | lang="typescript" | ||
| 29 | fi | ||
| 30 | |||
| 31 | # Build. | ||
| 32 | cc -c -I. parser.c | ||
| 33 | # Compile scanner.c. | ||
| 34 | if test -f scanner.c | ||
| 35 | then | ||
| 36 | cc -fPIC -c -I. scanner.c | ||
| 37 | fi | ||
| 38 | # Compile scanner.cc. | ||
| 39 | if test -f scanner.cc | ||
| 40 | then | ||
| 41 | c++ -fPIC -I. -c scanner.cc | ||
| 42 | fi | ||
| 43 | # Link. | ||
| 44 | if test -f scanner.cc | ||
| 45 | then | ||
| 46 | c++ -fPIC -shared *.o -o "libtree-sitter-${lang}.${soext}" | ||
| 47 | else | ||
| 48 | cc -fPIC -shared *.o -o "libtree-sitter-${lang}.${soext}" | ||
| 49 | fi | ||
| 50 | |||
| 51 | # Copy out. | ||
| 52 | |||
| 53 | if [ "${lang}" == "typescript" ] | ||
| 54 | then | ||
| 55 | cp "libtree-sitter-${lang}.${soext}" .. | ||
| 56 | cd .. | ||
| 57 | fi | ||
| 58 | |||
| 59 | mkdir -p ../../dist | ||
| 60 | cp "libtree-sitter-${lang}.${soext}" ../../dist | ||
| 61 | cd ../../ | ||
| 62 | rm -rf "tree-sitter-${lang}" | ||
diff --git a/admin/notes/tree-sitter/html-manual/Accessing-Node.html b/admin/notes/tree-sitter/html-manual/Accessing-Node.html new file mode 100644 index 00000000000..00ac63b8339 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Accessing-Node.html | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Accessing Node (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Accessing Node (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Accessing Node (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source"> | ||
| 36 | <link href="Pattern-Matching.html" rel="next" title="Pattern Matching"> | ||
| 37 | <link href="Retrieving-Node.html" rel="prev" title="Retrieving Node"> | ||
| 38 | <style type="text/css"> | ||
| 39 | <!-- | ||
| 40 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 41 | a.summary-letter {text-decoration: none} | ||
| 42 | blockquote.indentedblock {margin-right: 0em} | ||
| 43 | div.display {margin-left: 3.2em} | ||
| 44 | div.example {margin-left: 3.2em} | ||
| 45 | kbd {font-style: oblique} | ||
| 46 | pre.display {font-family: inherit} | ||
| 47 | pre.format {font-family: inherit} | ||
| 48 | pre.menu-comment {font-family: serif} | ||
| 49 | pre.menu-preformatted {font-family: serif} | ||
| 50 | span.nolinebreak {white-space: nowrap} | ||
| 51 | span.roman {font-family: initial; font-weight: normal} | ||
| 52 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 53 | span:hover a.copiable-anchor {visibility: visible} | ||
| 54 | ul.no-bullet {list-style: none} | ||
| 55 | --> | ||
| 56 | </style> | ||
| 57 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 58 | |||
| 59 | |||
| 60 | </head> | ||
| 61 | |||
| 62 | <body lang="en"> | ||
| 63 | <div class="section" id="Accessing-Node"> | ||
| 64 | <div class="header"> | ||
| 65 | <p> | ||
| 66 | Next: <a href="Pattern-Matching.html" accesskey="n" rel="next">Pattern Matching Tree-sitter Nodes</a>, Previous: <a href="Retrieving-Node.html" accesskey="p" rel="prev">Retrieving Node</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 67 | </div> | ||
| 68 | <hr> | ||
| 69 | <span id="Accessing-Node-Information"></span><h3 class="section">37.4 Accessing Node Information</h3> | ||
| 70 | |||
| 71 | <p>Before going further, make sure you have read the basic conventions | ||
| 72 | about tree-sitter nodes in the previous node. | ||
| 73 | </p> | ||
| 74 | <span id="Basic-information"></span><h3 class="heading">Basic information</h3> | ||
| 75 | |||
| 76 | <p>Every node is associated with a parser, and that parser is associated | ||
| 77 | with a buffer. The following functions let you retrieve them. | ||
| 78 | </p> | ||
| 79 | <dl class="def"> | ||
| 80 | <dt id="index-treesit_002dnode_002dparser"><span class="category">Function: </span><span><strong>treesit-node-parser</strong> <em>node</em><a href='#index-treesit_002dnode_002dparser' class='copiable-anchor'> ¶</a></span></dt> | ||
| 81 | <dd><p>This function returns <var>node</var>’s associated parser. | ||
| 82 | </p></dd></dl> | ||
| 83 | |||
| 84 | <dl class="def"> | ||
| 85 | <dt id="index-treesit_002dnode_002dbuffer"><span class="category">Function: </span><span><strong>treesit-node-buffer</strong> <em>node</em><a href='#index-treesit_002dnode_002dbuffer' class='copiable-anchor'> ¶</a></span></dt> | ||
| 86 | <dd><p>This function returns <var>node</var>’s parser’s associated buffer. | ||
| 87 | </p></dd></dl> | ||
| 88 | |||
| 89 | <dl class="def"> | ||
| 90 | <dt id="index-treesit_002dnode_002dlanguage"><span class="category">Function: </span><span><strong>treesit-node-language</strong> <em>node</em><a href='#index-treesit_002dnode_002dlanguage' class='copiable-anchor'> ¶</a></span></dt> | ||
| 91 | <dd><p>This function returns <var>node</var>’s parser’s associated language. | ||
| 92 | </p></dd></dl> | ||
| 93 | |||
| 94 | <p>Each node represents a piece of text in the buffer. Functions below | ||
| 95 | finds relevant information about that text. | ||
| 96 | </p> | ||
| 97 | <dl class="def"> | ||
| 98 | <dt id="index-treesit_002dnode_002dstart"><span class="category">Function: </span><span><strong>treesit-node-start</strong> <em>node</em><a href='#index-treesit_002dnode_002dstart' class='copiable-anchor'> ¶</a></span></dt> | ||
| 99 | <dd><p>Return the start position of <var>node</var>. | ||
| 100 | </p></dd></dl> | ||
| 101 | |||
| 102 | <dl class="def"> | ||
| 103 | <dt id="index-treesit_002dnode_002dend"><span class="category">Function: </span><span><strong>treesit-node-end</strong> <em>node</em><a href='#index-treesit_002dnode_002dend' class='copiable-anchor'> ¶</a></span></dt> | ||
| 104 | <dd><p>Return the end position of <var>node</var>. | ||
| 105 | </p></dd></dl> | ||
| 106 | |||
| 107 | <dl class="def"> | ||
| 108 | <dt id="index-treesit_002dnode_002dtext"><span class="category">Function: </span><span><strong>treesit-node-text</strong> <em>node &optional object</em><a href='#index-treesit_002dnode_002dtext' class='copiable-anchor'> ¶</a></span></dt> | ||
| 109 | <dd><p>Returns the buffer text that <var>node</var> represents. (If <var>node</var> is | ||
| 110 | retrieved from parsing a string, it will be text from that string.) | ||
| 111 | </p></dd></dl> | ||
| 112 | |||
| 113 | <p>Here are some basic checks on tree-sitter nodes. | ||
| 114 | </p> | ||
| 115 | <dl class="def"> | ||
| 116 | <dt id="index-treesit_002dnode_002dp"><span class="category">Function: </span><span><strong>treesit-node-p</strong> <em>object</em><a href='#index-treesit_002dnode_002dp' class='copiable-anchor'> ¶</a></span></dt> | ||
| 117 | <dd><p>Checks if <var>object</var> is a tree-sitter syntax node. | ||
| 118 | </p></dd></dl> | ||
| 119 | |||
| 120 | <dl class="def"> | ||
| 121 | <dt id="index-treesit_002dnode_002deq"><span class="category">Function: </span><span><strong>treesit-node-eq</strong> <em>node1 node2</em><a href='#index-treesit_002dnode_002deq' class='copiable-anchor'> ¶</a></span></dt> | ||
| 122 | <dd><p>Checks if <var>node1</var> and <var>node2</var> are the same node in a syntax | ||
| 123 | tree. | ||
| 124 | </p></dd></dl> | ||
| 125 | |||
| 126 | <span id="Property-information"></span><h3 class="heading">Property information</h3> | ||
| 127 | |||
| 128 | <p>In general, nodes in a concrete syntax tree fall into two categories: | ||
| 129 | <em>named nodes</em> and <em>anonymous nodes</em>. Whether a node is named | ||
| 130 | or anonymous is determined by the language definition | ||
| 131 | (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). | ||
| 132 | </p> | ||
| 133 | <span id="index-tree_002dsitter-missing-node"></span> | ||
| 134 | <p>Apart from being named/anonymous, a node can have other properties. A | ||
| 135 | node can be “missing”: missing nodes are inserted by the parser in | ||
| 136 | order to recover from certain kinds of syntax errors, i.e., something | ||
| 137 | should probably be there according to the grammar, but not there. | ||
| 138 | </p> | ||
| 139 | <span id="index-tree_002dsitter-extra-node"></span> | ||
| 140 | <p>A node can be “extra”: extra nodes represent things like comments, | ||
| 141 | which can appear anywhere in the text. | ||
| 142 | </p> | ||
| 143 | <span id="index-tree_002dsitter-node-that-has-changes"></span> | ||
| 144 | <p>A node “has changes” if the buffer changed since when the node is | ||
| 145 | retrieved, i.e., outdated. | ||
| 146 | </p> | ||
| 147 | <span id="index-tree_002dsitter-node-that-has-error"></span> | ||
| 148 | <p>A node “has error” if the text it spans contains a syntax error. It | ||
| 149 | can be the node itself has an error, or one of its | ||
| 150 | children/grandchildren... has an error. | ||
| 151 | </p> | ||
| 152 | <dl class="def"> | ||
| 153 | <dt id="index-treesit_002dnode_002dcheck"><span class="category">Function: </span><span><strong>treesit-node-check</strong> <em>node property</em><a href='#index-treesit_002dnode_002dcheck' class='copiable-anchor'> ¶</a></span></dt> | ||
| 154 | <dd><p>This function checks if <var>node</var> has <var>property</var>. <var>property</var> | ||
| 155 | can be <code>'named</code>, <code>'missing</code>, <code>'extra</code>, | ||
| 156 | <code>'has-changes</code>, or <code>'has-error</code>. | ||
| 157 | </p></dd></dl> | ||
| 158 | |||
| 159 | |||
| 160 | <dl class="def"> | ||
| 161 | <dt id="index-treesit_002dnode_002dtype"><span class="category">Function: </span><span><strong>treesit-node-type</strong> <em>node</em><a href='#index-treesit_002dnode_002dtype' class='copiable-anchor'> ¶</a></span></dt> | ||
| 162 | <dd><p>Named nodes have “types” (see <a href="Language-Definitions.html#tree_002dsitter-node-type">node type</a>). | ||
| 163 | For example, a named node can be a <code>string_literal</code> node, where | ||
| 164 | <code>string_literal</code> is its type. | ||
| 165 | </p> | ||
| 166 | <p>This function returns <var>node</var>’s type as a string. | ||
| 167 | </p></dd></dl> | ||
| 168 | |||
| 169 | <span id="Information-as-a-child-or-parent"></span><h3 class="heading">Information as a child or parent</h3> | ||
| 170 | |||
| 171 | <dl class="def"> | ||
| 172 | <dt id="index-treesit_002dnode_002dindex"><span class="category">Function: </span><span><strong>treesit-node-index</strong> <em>node &optional named</em><a href='#index-treesit_002dnode_002dindex' class='copiable-anchor'> ¶</a></span></dt> | ||
| 173 | <dd><p>This function returns the index of <var>node</var> as a child node of its | ||
| 174 | parent. If <var>named</var> is non-nil, it only count named nodes | ||
| 175 | (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). | ||
| 176 | </p></dd></dl> | ||
| 177 | |||
| 178 | <dl class="def"> | ||
| 179 | <dt id="index-treesit_002dnode_002dfield_002dname"><span class="category">Function: </span><span><strong>treesit-node-field-name</strong> <em>node</em><a href='#index-treesit_002dnode_002dfield_002dname' class='copiable-anchor'> ¶</a></span></dt> | ||
| 180 | <dd><p>A child of a parent node could have a field name (see <a href="Language-Definitions.html#tree_002dsitter-node-field-name">field name</a>). This function returns the field name | ||
| 181 | of <var>node</var> as a child of its parent. | ||
| 182 | </p></dd></dl> | ||
| 183 | |||
| 184 | <dl class="def"> | ||
| 185 | <dt id="index-treesit_002dnode_002dfield_002dname_002dfor_002dchild"><span class="category">Function: </span><span><strong>treesit-node-field-name-for-child</strong> <em>node n</em><a href='#index-treesit_002dnode_002dfield_002dname_002dfor_002dchild' class='copiable-anchor'> ¶</a></span></dt> | ||
| 186 | <dd><p>This function returns the field name of the <var>n</var>’th child of | ||
| 187 | <var>node</var>. | ||
| 188 | </p></dd></dl> | ||
| 189 | |||
| 190 | <dl class="def"> | ||
| 191 | <dt id="index-treesit_002dchild_002dcount"><span class="category">Function: </span><span><strong>treesit-child-count</strong> <em>node &optional named</em><a href='#index-treesit_002dchild_002dcount' class='copiable-anchor'> ¶</a></span></dt> | ||
| 192 | <dd><p>This function finds the number of children of <var>node</var>. If | ||
| 193 | <var>named</var> is non-nil, it only counts named child (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). | ||
| 194 | </p></dd></dl> | ||
| 195 | |||
| 196 | </div> | ||
| 197 | <hr> | ||
| 198 | <div class="header"> | ||
| 199 | <p> | ||
| 200 | Next: <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>, Previous: <a href="Retrieving-Node.html">Retrieving Node</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 201 | </div> | ||
| 202 | |||
| 203 | |||
| 204 | |||
| 205 | </body> | ||
| 206 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Language-Definitions.html b/admin/notes/tree-sitter/html-manual/Language-Definitions.html new file mode 100644 index 00000000000..ba3eeb9eeb9 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Language-Definitions.html | |||
| @@ -0,0 +1,326 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Language Definitions (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Language Definitions (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Language Definitions (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source"> | ||
| 36 | <link href="Using-Parser.html" rel="next" title="Using Parser"> | ||
| 37 | <style type="text/css"> | ||
| 38 | <!-- | ||
| 39 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 40 | a.summary-letter {text-decoration: none} | ||
| 41 | blockquote.indentedblock {margin-right: 0em} | ||
| 42 | div.display {margin-left: 3.2em} | ||
| 43 | div.example {margin-left: 3.2em} | ||
| 44 | kbd {font-style: oblique} | ||
| 45 | pre.display {font-family: inherit} | ||
| 46 | pre.format {font-family: inherit} | ||
| 47 | pre.menu-comment {font-family: serif} | ||
| 48 | pre.menu-preformatted {font-family: serif} | ||
| 49 | span.nolinebreak {white-space: nowrap} | ||
| 50 | span.roman {font-family: initial; font-weight: normal} | ||
| 51 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 52 | span:hover a.copiable-anchor {visibility: visible} | ||
| 53 | ul.no-bullet {list-style: none} | ||
| 54 | --> | ||
| 55 | </style> | ||
| 56 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 57 | |||
| 58 | |||
| 59 | </head> | ||
| 60 | |||
| 61 | <body lang="en"> | ||
| 62 | <div class="section" id="Language-Definitions"> | ||
| 63 | <div class="header"> | ||
| 64 | <p> | ||
| 65 | Next: <a href="Using-Parser.html" accesskey="n" rel="next">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 66 | </div> | ||
| 67 | <hr> | ||
| 68 | <span id="Tree_002dsitter-Language-Definitions"></span><h3 class="section">37.1 Tree-sitter Language Definitions</h3> | ||
| 69 | |||
| 70 | <span id="Loading-a-language-definition"></span><h3 class="heading">Loading a language definition</h3> | ||
| 71 | |||
| 72 | <p>Tree-sitter relies on language definitions to parse text in that | ||
| 73 | language. In Emacs, A language definition is represented by a symbol. | ||
| 74 | For example, C language definition is represented as <code>c</code>, and | ||
| 75 | <code>c</code> can be passed to tree-sitter functions as the <var>language</var> | ||
| 76 | argument. | ||
| 77 | </p> | ||
| 78 | <span id="index-treesit_002dextra_002dload_002dpath"></span> | ||
| 79 | <span id="index-treesit_002dload_002dlanguage_002derror"></span> | ||
| 80 | <span id="index-treesit_002dload_002dsuffixes"></span> | ||
| 81 | <p>Tree-sitter language definitions are distributed as dynamic libraries. | ||
| 82 | In order to use a language definition in Emacs, you need to make sure | ||
| 83 | that the dynamic library is installed on the system. Emacs looks for | ||
| 84 | language definitions under load paths in | ||
| 85 | <code>treesit-extra-load-path</code>, <code>user-emacs-directory</code>/tree-sitter, | ||
| 86 | and system default locations for dynamic libraries, in that order. | ||
| 87 | Emacs tries each extensions in <code>treesit-load-suffixes</code>. If Emacs | ||
| 88 | cannot find the library or has problem loading it, Emacs signals | ||
| 89 | <code>treesit-load-language-error</code>. The signal data is a list of | ||
| 90 | specific error messages. | ||
| 91 | </p> | ||
| 92 | <dl class="def"> | ||
| 93 | <dt id="index-treesit_002dlanguage_002davailable_002dp"><span class="category">Function: </span><span><strong>treesit-language-available-p</strong> <em>language</em><a href='#index-treesit_002dlanguage_002davailable_002dp' class='copiable-anchor'> ¶</a></span></dt> | ||
| 94 | <dd><p>This function checks whether the dynamic library for <var>language</var> is | ||
| 95 | present on the system, and return non-nil if it is. | ||
| 96 | </p></dd></dl> | ||
| 97 | |||
| 98 | <span id="index-treesit_002dload_002dname_002doverride_002dlist"></span> | ||
| 99 | <p>By convention, the dynamic library for <var>language</var> is | ||
| 100 | <code>libtree-sitter-<var>language</var>.<var>ext</var></code>, where <var>ext</var> is the | ||
| 101 | system-specific extension for dynamic libraries. Also by convention, | ||
| 102 | the function provided by that library is named | ||
| 103 | <code>tree_sitter_<var>language</var></code>. If a language definition doesn’t | ||
| 104 | follow this convention, you should add an entry | ||
| 105 | </p> | ||
| 106 | <div class="example"> | ||
| 107 | <pre class="example">(<var>language</var> <var>library-base-name</var> <var>function-name</var>) | ||
| 108 | </pre></div> | ||
| 109 | |||
| 110 | <p>to <code>treesit-load-name-override-list</code>, where | ||
| 111 | <var>library-base-name</var> is the base filename for the dynamic library | ||
| 112 | (conventionally <code>libtree-sitter-<var>language</var></code>), and | ||
| 113 | <var>function-name</var> is the function provided by the library | ||
| 114 | (conventionally <code>tree_sitter_<var>language</var></code>). For example, | ||
| 115 | </p> | ||
| 116 | <div class="example"> | ||
| 117 | <pre class="example">(cool-lang "libtree-sitter-coool" "tree_sitter_cooool") | ||
| 118 | </pre></div> | ||
| 119 | |||
| 120 | <p>for a language too cool to abide by conventions. | ||
| 121 | </p> | ||
| 122 | <dl class="def"> | ||
| 123 | <dt id="index-treesit_002dlanguage_002dversion"><span class="category">Function: </span><span><strong>treesit-language-version</strong> <em>&optional min-compatible</em><a href='#index-treesit_002dlanguage_002dversion' class='copiable-anchor'> ¶</a></span></dt> | ||
| 124 | <dd><p>Tree-sitter library has a <em>language version</em>, a language | ||
| 125 | definition’s version needs to match this version to be compatible. | ||
| 126 | </p> | ||
| 127 | <p>This function returns tree-sitter library’s language version. If | ||
| 128 | <var>min-compatible</var> is non-nil, it returns the minimal compatible | ||
| 129 | version. | ||
| 130 | </p></dd></dl> | ||
| 131 | |||
| 132 | <span id="Concrete-syntax-tree"></span><h3 class="heading">Concrete syntax tree</h3> | ||
| 133 | |||
| 134 | <p>A syntax tree is what a parser generates. In a syntax tree, each node | ||
| 135 | represents a piece of text, and is connected to each other by a | ||
| 136 | parent-child relationship. For example, if the source text is | ||
| 137 | </p> | ||
| 138 | <div class="example"> | ||
| 139 | <pre class="example">1 + 2 | ||
| 140 | </pre></div> | ||
| 141 | |||
| 142 | <p>its syntax tree could be | ||
| 143 | </p> | ||
| 144 | <div class="example"> | ||
| 145 | <pre class="example"> +--------------+ | ||
| 146 | | root "1 + 2" | | ||
| 147 | +--------------+ | ||
| 148 | | | ||
| 149 | +--------------------------------+ | ||
| 150 | | expression "1 + 2" | | ||
| 151 | +--------------------------------+ | ||
| 152 | | | | | ||
| 153 | +------------+ +--------------+ +------------+ | ||
| 154 | | number "1" | | operator "+" | | number "2" | | ||
| 155 | +------------+ +--------------+ +------------+ | ||
| 156 | </pre></div> | ||
| 157 | |||
| 158 | <p>We can also represent it in s-expression: | ||
| 159 | </p> | ||
| 160 | <div class="example"> | ||
| 161 | <pre class="example">(root (expression (number) (operator) (number))) | ||
| 162 | </pre></div> | ||
| 163 | |||
| 164 | <span id="Node-types"></span><h4 class="subheading">Node types</h4> | ||
| 165 | |||
| 166 | <span id="index-tree_002dsitter-node-type"></span> | ||
| 167 | <span id="tree_002dsitter-node-type"></span><span id="index-tree_002dsitter-named-node"></span> | ||
| 168 | <span id="tree_002dsitter-named-node"></span><span id="index-tree_002dsitter-anonymous-node"></span> | ||
| 169 | <p>Names like <code>root</code>, <code>expression</code>, <code>number</code>, | ||
| 170 | <code>operator</code> are nodes’ <em>type</em>. However, not all nodes in a | ||
| 171 | syntax tree have a type. Nodes that don’t are <em>anonymous nodes</em>, | ||
| 172 | and nodes with a type are <em>named nodes</em>. Anonymous nodes are | ||
| 173 | tokens with fixed spellings, including punctuation characters like | ||
| 174 | bracket ‘<samp>]</samp>’, and keywords like <code>return</code>. | ||
| 175 | </p> | ||
| 176 | <span id="Field-names"></span><h4 class="subheading">Field names</h4> | ||
| 177 | |||
| 178 | <span id="index-tree_002dsitter-node-field-name"></span> | ||
| 179 | <span id="tree_002dsitter-node-field-name"></span><p>To make the syntax tree easier to | ||
| 180 | analyze, many language definitions assign <em>field names</em> to child | ||
| 181 | nodes. For example, a <code>function_definition</code> node could have a | ||
| 182 | <code>declarator</code> and a <code>body</code>: | ||
| 183 | </p> | ||
| 184 | <div class="example"> | ||
| 185 | <pre class="example">(function_definition | ||
| 186 | declarator: (declaration) | ||
| 187 | body: (compound_statement)) | ||
| 188 | </pre></div> | ||
| 189 | |||
| 190 | <dl class="def"> | ||
| 191 | <dt id="index-treesit_002dinspect_002dmode"><span class="category">Command: </span><span><strong>treesit-inspect-mode</strong><a href='#index-treesit_002dinspect_002dmode' class='copiable-anchor'> ¶</a></span></dt> | ||
| 192 | <dd><p>This minor mode displays the node that <em>starts</em> at point in | ||
| 193 | mode-line. The mode-line will display | ||
| 194 | </p> | ||
| 195 | <div class="example"> | ||
| 196 | <pre class="example"><var>parent</var> <var>field-name</var>: (<var>child</var> (<var>grand-child</var> (...))) | ||
| 197 | </pre></div> | ||
| 198 | |||
| 199 | <p><var>child</var>, <var>grand-child</var>, and <var>grand-grand-child</var>, etc, are | ||
| 200 | nodes that have their beginning at point. And <var>parent</var> is the | ||
| 201 | parent of <var>child</var>. | ||
| 202 | </p> | ||
| 203 | <p>If there is no node that starts at point, i.e., point is in the middle | ||
| 204 | of a node, then the mode-line only displays the smallest node that | ||
| 205 | spans point, and its immediate parent. | ||
| 206 | </p> | ||
| 207 | <p>This minor mode doesn’t create parsers on its own. It simply uses the | ||
| 208 | first parser in <code>(treesit-parser-list)</code> (see <a href="Using-Parser.html">Using Tree-sitter Parser</a>). | ||
| 209 | </p></dd></dl> | ||
| 210 | |||
| 211 | <span id="Reading-the-grammar-definition"></span><h3 class="heading">Reading the grammar definition</h3> | ||
| 212 | |||
| 213 | <p>Authors of language definitions define the <em>grammar</em> of a | ||
| 214 | language, and this grammar determines how does a parser construct a | ||
| 215 | concrete syntax tree out of the text. In order to use the syntax | ||
| 216 | tree effectively, we need to read the <em>grammar file</em>. | ||
| 217 | </p> | ||
| 218 | <p>The grammar file is usually <code>grammar.js</code> in a language | ||
| 219 | definition’s project repository. The link to a language definition’s | ||
| 220 | home page can be found in tree-sitter’s homepage | ||
| 221 | (<a href="https://tree-sitter.github.io/tree-sitter">https://tree-sitter.github.io/tree-sitter</a>). | ||
| 222 | </p> | ||
| 223 | <p>The grammar is written in JavaScript syntax. For example, the rule | ||
| 224 | matching a <code>function_definition</code> node looks like | ||
| 225 | </p> | ||
| 226 | <div class="example"> | ||
| 227 | <pre class="example">function_definition: $ => seq( | ||
| 228 | $.declaration_specifiers, | ||
| 229 | field('declarator', $.declaration), | ||
| 230 | field('body', $.compound_statement) | ||
| 231 | ) | ||
| 232 | </pre></div> | ||
| 233 | |||
| 234 | <p>The rule is represented by a function that takes a single argument | ||
| 235 | <var>$</var>, representing the whole grammar. The function itself is | ||
| 236 | constructed by other functions: the <code>seq</code> function puts together a | ||
| 237 | sequence of children; the <code>field</code> function annotates a child with | ||
| 238 | a field name. If we write the above definition in BNF syntax, it | ||
| 239 | would look like | ||
| 240 | </p> | ||
| 241 | <div class="example"> | ||
| 242 | <pre class="example">function_definition := | ||
| 243 | <declaration_specifiers> <declaration> <compound_statement> | ||
| 244 | </pre></div> | ||
| 245 | |||
| 246 | <p>and the node returned by the parser would look like | ||
| 247 | </p> | ||
| 248 | <div class="example"> | ||
| 249 | <pre class="example">(function_definition | ||
| 250 | (declaration_specifier) | ||
| 251 | declarator: (declaration) | ||
| 252 | body: (compound_statement)) | ||
| 253 | </pre></div> | ||
| 254 | |||
| 255 | <p>Below is a list of functions that one will see in a grammar | ||
| 256 | definition. Each function takes other rules as arguments and returns | ||
| 257 | a new rule. | ||
| 258 | </p> | ||
| 259 | <ul> | ||
| 260 | <li> <code>seq(rule1, rule2, ...)</code> matches each rule one after another. | ||
| 261 | |||
| 262 | </li><li> <code>choice(rule1, rule2, ...)</code> matches one of the rules in its | ||
| 263 | arguments. | ||
| 264 | |||
| 265 | </li><li> <code>repeat(rule)</code> matches <var>rule</var> for <em>zero or more</em> times. | ||
| 266 | This is like the ‘<samp>*</samp>’ operator in regular expressions. | ||
| 267 | |||
| 268 | </li><li> <code>repeat1(rule)</code> matches <var>rule</var> for <em>one or more</em> times. | ||
| 269 | This is like the ‘<samp>+</samp>’ operator in regular expressions. | ||
| 270 | |||
| 271 | </li><li> <code>optional(rule)</code> matches <var>rule</var> for <em>zero or one</em> time. | ||
| 272 | This is like the ‘<samp>?</samp>’ operator in regular expressions. | ||
| 273 | |||
| 274 | </li><li> <code>field(name, rule)</code> assigns field name <var>name</var> to the child | ||
| 275 | node matched by <var>rule</var>. | ||
| 276 | |||
| 277 | </li><li> <code>alias(rule, alias)</code> makes nodes matched by <var>rule</var> appear as | ||
| 278 | <var>alias</var> in the syntax tree generated by the parser. For example, | ||
| 279 | |||
| 280 | <div class="example"> | ||
| 281 | <pre class="example">alias(preprocessor_call_exp, call_expression) | ||
| 282 | </pre></div> | ||
| 283 | |||
| 284 | <p>makes any node matched by <code>preprocessor_call_exp</code> to appear as | ||
| 285 | <code>call_expression</code>. | ||
| 286 | </p></li></ul> | ||
| 287 | |||
| 288 | <p>Below are grammar functions less interesting for a reader of a | ||
| 289 | language definition. | ||
| 290 | </p> | ||
| 291 | <ul> | ||
| 292 | <li> <code>token(rule)</code> marks <var>rule</var> to produce a single leaf node. | ||
| 293 | That is, instead of generating a parent node with individual child | ||
| 294 | nodes under it, everything is combined into a single leaf node. | ||
| 295 | |||
| 296 | </li><li> Normally, grammar rules ignore preceding whitespaces, | ||
| 297 | <code>token.immediate(rule)</code> changes <var>rule</var> to match only when | ||
| 298 | there is no preceding whitespaces. | ||
| 299 | |||
| 300 | </li><li> <code>prec(n, rule)</code> gives <var>rule</var> a level <var>n</var> precedence. | ||
| 301 | |||
| 302 | </li><li> <code>prec.left([n,] rule)</code> marks <var>rule</var> as left-associative, | ||
| 303 | optionally with level <var>n</var>. | ||
| 304 | |||
| 305 | </li><li> <code>prec.right([n,] rule)</code> marks <var>rule</var> as right-associative, | ||
| 306 | optionally with level <var>n</var>. | ||
| 307 | |||
| 308 | </li><li> <code>prec.dynamic(n, rule)</code> is like <code>prec</code>, but the precedence | ||
| 309 | is applied at runtime instead. | ||
| 310 | </li></ul> | ||
| 311 | |||
| 312 | <p>The tree-sitter project talks about writing a grammar in more detail: | ||
| 313 | <a href="https://tree-sitter.github.io/tree-sitter/creating-parsers">https://tree-sitter.github.io/tree-sitter/creating-parsers</a>. | ||
| 314 | Read especially “The Grammar DSL” section. | ||
| 315 | </p> | ||
| 316 | </div> | ||
| 317 | <hr> | ||
| 318 | <div class="header"> | ||
| 319 | <p> | ||
| 320 | Next: <a href="Using-Parser.html">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 321 | </div> | ||
| 322 | |||
| 323 | |||
| 324 | |||
| 325 | </body> | ||
| 326 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Multiple-Languages.html b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html new file mode 100644 index 00000000000..1ee2df7f442 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html | |||
| @@ -0,0 +1,255 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Multiple Languages (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Multiple Languages (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Multiple Languages (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source"> | ||
| 36 | <link href="Tree_002dsitter-C-API.html" rel="next" title="Tree-sitter C API"> | ||
| 37 | <link href="Pattern-Matching.html" rel="prev" title="Pattern Matching"> | ||
| 38 | <style type="text/css"> | ||
| 39 | <!-- | ||
| 40 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 41 | a.summary-letter {text-decoration: none} | ||
| 42 | blockquote.indentedblock {margin-right: 0em} | ||
| 43 | div.display {margin-left: 3.2em} | ||
| 44 | div.example {margin-left: 3.2em} | ||
| 45 | kbd {font-style: oblique} | ||
| 46 | pre.display {font-family: inherit} | ||
| 47 | pre.format {font-family: inherit} | ||
| 48 | pre.menu-comment {font-family: serif} | ||
| 49 | pre.menu-preformatted {font-family: serif} | ||
| 50 | span.nolinebreak {white-space: nowrap} | ||
| 51 | span.roman {font-family: initial; font-weight: normal} | ||
| 52 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 53 | span:hover a.copiable-anchor {visibility: visible} | ||
| 54 | ul.no-bullet {list-style: none} | ||
| 55 | --> | ||
| 56 | </style> | ||
| 57 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 58 | |||
| 59 | |||
| 60 | </head> | ||
| 61 | |||
| 62 | <body lang="en"> | ||
| 63 | <div class="section" id="Multiple-Languages"> | ||
| 64 | <div class="header"> | ||
| 65 | <p> | ||
| 66 | Next: <a href="Tree_002dsitter-C-API.html" accesskey="n" rel="next">Tree-sitter C API Correspondence</a>, Previous: <a href="Pattern-Matching.html" accesskey="p" rel="prev">Pattern Matching Tree-sitter Nodes</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 67 | </div> | ||
| 68 | <hr> | ||
| 69 | <span id="Parsing-Text-in-Multiple-Languages"></span><h3 class="section">37.6 Parsing Text in Multiple Languages</h3> | ||
| 70 | |||
| 71 | <p>Sometimes, the source of a programming language could contain sources | ||
| 72 | of other languages, HTML + CSS + JavaScript is one example. In that | ||
| 73 | case, we need to assign individual parsers to text segments written in | ||
| 74 | different languages. Traditionally this is achieved by using | ||
| 75 | narrowing. While tree-sitter works with narrowing (see <a href="Using-Parser.html#tree_002dsitter-narrowing">narrowing</a>), the recommended way is to set ranges in which | ||
| 76 | a parser will operate. | ||
| 77 | </p> | ||
| 78 | <dl class="def"> | ||
| 79 | <dt id="index-treesit_002dparser_002dset_002dincluded_002dranges"><span class="category">Function: </span><span><strong>treesit-parser-set-included-ranges</strong> <em>parser ranges</em><a href='#index-treesit_002dparser_002dset_002dincluded_002dranges' class='copiable-anchor'> ¶</a></span></dt> | ||
| 80 | <dd><p>This function sets the range of <var>parser</var> to <var>ranges</var>. Then | ||
| 81 | <var>parser</var> will only read the text covered in each range. Each | ||
| 82 | range in <var>ranges</var> is a list of cons <code>(<var>beg</var> | ||
| 83 | . <var>end</var>)</code>. | ||
| 84 | </p> | ||
| 85 | <p>Each range in <var>ranges</var> must come in order and not overlap. That | ||
| 86 | is, in pseudo code: | ||
| 87 | </p> | ||
| 88 | <div class="example"> | ||
| 89 | <pre class="example">(cl-loop for idx from 1 to (1- (length ranges)) | ||
| 90 | for prev = (nth (1- idx) ranges) | ||
| 91 | for next = (nth idx ranges) | ||
| 92 | should (<= (car prev) (cdr prev) | ||
| 93 | (car next) (cdr next))) | ||
| 94 | </pre></div> | ||
| 95 | |||
| 96 | <span id="index-treesit_002drange_002dinvalid"></span> | ||
| 97 | <p>If <var>ranges</var> violates this constraint, or something else went | ||
| 98 | wrong, this function signals a <code>treesit-range-invalid</code>. The | ||
| 99 | signal data contains a specific error message and the ranges we are | ||
| 100 | trying to set. | ||
| 101 | </p> | ||
| 102 | <p>This function can also be used for disabling ranges. If <var>ranges</var> | ||
| 103 | is nil, the parser is set to parse the whole buffer. | ||
| 104 | </p> | ||
| 105 | <p>Example: | ||
| 106 | </p> | ||
| 107 | <div class="example"> | ||
| 108 | <pre class="example">(treesit-parser-set-included-ranges | ||
| 109 | parser '((1 . 9) (16 . 24) (24 . 25))) | ||
| 110 | </pre></div> | ||
| 111 | </dd></dl> | ||
| 112 | |||
| 113 | <dl class="def"> | ||
| 114 | <dt id="index-treesit_002dparser_002dincluded_002dranges"><span class="category">Function: </span><span><strong>treesit-parser-included-ranges</strong> <em>parser</em><a href='#index-treesit_002dparser_002dincluded_002dranges' class='copiable-anchor'> ¶</a></span></dt> | ||
| 115 | <dd><p>This function returns the ranges set for <var>parser</var>. The return | ||
| 116 | value is the same as the <var>ranges</var> argument of | ||
| 117 | <code>treesit-parser-included-ranges</code>: a list of cons | ||
| 118 | <code>(<var>beg</var> . <var>end</var>)</code>. And if <var>parser</var> doesn’t have any | ||
| 119 | ranges, the return value is nil. | ||
| 120 | </p> | ||
| 121 | <div class="example"> | ||
| 122 | <pre class="example">(treesit-parser-included-ranges parser) | ||
| 123 | ⇒ ((1 . 9) (16 . 24) (24 . 25)) | ||
| 124 | </pre></div> | ||
| 125 | </dd></dl> | ||
| 126 | |||
| 127 | <dl class="def"> | ||
| 128 | <dt id="index-treesit_002dset_002dranges"><span class="category">Function: </span><span><strong>treesit-set-ranges</strong> <em>parser-or-lang ranges</em><a href='#index-treesit_002dset_002dranges' class='copiable-anchor'> ¶</a></span></dt> | ||
| 129 | <dd><p>Like <code>treesit-parser-set-included-ranges</code>, this function sets | ||
| 130 | the ranges of <var>parser-or-lang</var> to <var>ranges</var>. Conveniently, | ||
| 131 | <var>parser-or-lang</var> could be either a parser or a language. If it is | ||
| 132 | a language, this function looks for the first parser in | ||
| 133 | <code>(treesit-parser-list)</code> for that language in the current buffer, | ||
| 134 | and set range for it. | ||
| 135 | </p></dd></dl> | ||
| 136 | |||
| 137 | <dl class="def"> | ||
| 138 | <dt id="index-treesit_002dget_002dranges"><span class="category">Function: </span><span><strong>treesit-get-ranges</strong> <em>parser-or-lang</em><a href='#index-treesit_002dget_002dranges' class='copiable-anchor'> ¶</a></span></dt> | ||
| 139 | <dd><p>This function returns the ranges of <var>parser-or-lang</var>, like | ||
| 140 | <code>treesit-parser-included-ranges</code>. And like | ||
| 141 | <code>treesit-set-ranges</code>, <var>parser-or-lang</var> can be a parser or | ||
| 142 | a language symbol. | ||
| 143 | </p></dd></dl> | ||
| 144 | |||
| 145 | <dl class="def"> | ||
| 146 | <dt id="index-treesit_002dquery_002drange"><span class="category">Function: </span><span><strong>treesit-query-range</strong> <em>source query &optional beg end</em><a href='#index-treesit_002dquery_002drange' class='copiable-anchor'> ¶</a></span></dt> | ||
| 147 | <dd><p>This function matches <var>source</var> with <var>query</var> and returns the | ||
| 148 | ranges of captured nodes. The return value has the same shape of | ||
| 149 | other functions: a list of <code>(<var>beg</var> . <var>end</var>)</code>. | ||
| 150 | </p> | ||
| 151 | <p>For convenience, <var>source</var> can be a language symbol, a parser, or a | ||
| 152 | node. If a language symbol, this function matches in the root node of | ||
| 153 | the first parser using that language; if a parser, this function | ||
| 154 | matches in the root node of that parser; if a node, this function | ||
| 155 | matches in that node. | ||
| 156 | </p> | ||
| 157 | <p>Parameter <var>query</var> is the query used to capture nodes | ||
| 158 | (see <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>). The capture names don’t matter. Parameter | ||
| 159 | <var>beg</var> and <var>end</var>, if both non-nil, limits the range in which | ||
| 160 | this function queries. | ||
| 161 | </p> | ||
| 162 | <p>Like other query functions, this function raises an | ||
| 163 | <var>treesit-query-error</var> if <var>query</var> is malformed. | ||
| 164 | </p></dd></dl> | ||
| 165 | |||
| 166 | <dl class="def"> | ||
| 167 | <dt id="index-treesit_002dlanguage_002dat"><span class="category">Function: </span><span><strong>treesit-language-at</strong> <em>point</em><a href='#index-treesit_002dlanguage_002dat' class='copiable-anchor'> ¶</a></span></dt> | ||
| 168 | <dd><p>This function tries to figure out which language is responsible for | ||
| 169 | the text at <var>point</var>. It goes over each parser in | ||
| 170 | <code>(treesit-parser-list)</code> and see if that parser’s range covers | ||
| 171 | <var>point</var>. | ||
| 172 | </p></dd></dl> | ||
| 173 | |||
| 174 | <dl class="def"> | ||
| 175 | <dt id="index-treesit_002drange_002dfunctions"><span class="category">Variable: </span><span><strong>treesit-range-functions</strong><a href='#index-treesit_002drange_002dfunctions' class='copiable-anchor'> ¶</a></span></dt> | ||
| 176 | <dd><p>A list of range functions. Font-locking and indenting code uses | ||
| 177 | functions in this alist to set correct ranges for a language parser | ||
| 178 | before using it. | ||
| 179 | </p> | ||
| 180 | <p>The signature of each function should be | ||
| 181 | </p> | ||
| 182 | <div class="example"> | ||
| 183 | <pre class="example">(<var>start</var> <var>end</var> &rest <var>_</var>) | ||
| 184 | </pre></div> | ||
| 185 | |||
| 186 | <p>where <var>start</var> and <var>end</var> marks the region that is about to be | ||
| 187 | used. A range function only need to (but not limited to) update | ||
| 188 | ranges in that region. | ||
| 189 | </p> | ||
| 190 | <p>Each function in the list is called in-order. | ||
| 191 | </p></dd></dl> | ||
| 192 | |||
| 193 | <dl class="def"> | ||
| 194 | <dt id="index-treesit_002dupdate_002dranges"><span class="category">Function: </span><span><strong>treesit-update-ranges</strong> <em>&optional start end</em><a href='#index-treesit_002dupdate_002dranges' class='copiable-anchor'> ¶</a></span></dt> | ||
| 195 | <dd><p>This function is used by font-lock and indent to update ranges before | ||
| 196 | using any parser. Each range function in | ||
| 197 | <var>treesit-range-functions</var> is called in-order. Arguments | ||
| 198 | <var>start</var> and <var>end</var> are passed to each range function. | ||
| 199 | </p></dd></dl> | ||
| 200 | |||
| 201 | <span id="An-example"></span><h3 class="heading">An example</h3> | ||
| 202 | |||
| 203 | <p>Normally, in a set of languages that can be mixed together, there is a | ||
| 204 | major language and several embedded languages. We first parse the | ||
| 205 | whole document with the major language’s parser, set ranges for the | ||
| 206 | embedded languages, then parse the embedded languages. | ||
| 207 | </p> | ||
| 208 | <p>Suppose we want to parse a very simple document that mixes HTML, CSS | ||
| 209 | and JavaScript: | ||
| 210 | </p> | ||
| 211 | <div class="example"> | ||
| 212 | <pre class="example"><html> | ||
| 213 | <script>1 + 2</script> | ||
| 214 | <style>body { color: "blue"; }</style> | ||
| 215 | </html> | ||
| 216 | </pre></div> | ||
| 217 | |||
| 218 | <p>We first parse with HTML, then set ranges for CSS and JavaScript: | ||
| 219 | </p> | ||
| 220 | <div class="example"> | ||
| 221 | <pre class="example">;; Create parsers. | ||
| 222 | (setq html (treesit-get-parser-create 'html)) | ||
| 223 | (setq css (treesit-get-parser-create 'css)) | ||
| 224 | (setq js (treesit-get-parser-create 'javascript)) | ||
| 225 | |||
| 226 | ;; Set CSS ranges. | ||
| 227 | (setq css-range | ||
| 228 | (treesit-query-range | ||
| 229 | 'html | ||
| 230 | "(style_element (raw_text) @capture)")) | ||
| 231 | (treesit-parser-set-included-ranges css css-range) | ||
| 232 | |||
| 233 | ;; Set JavaScript ranges. | ||
| 234 | (setq js-range | ||
| 235 | (treesit-query-range | ||
| 236 | 'html | ||
| 237 | "(script_element (raw_text) @capture)")) | ||
| 238 | (treesit-parser-set-included-ranges js js-range) | ||
| 239 | </pre></div> | ||
| 240 | |||
| 241 | <p>We use a query pattern <code>(style_element (raw_text) @capture)</code> to | ||
| 242 | find CSS nodes in the HTML parse tree. For how to write query | ||
| 243 | patterns, see <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>. | ||
| 244 | </p> | ||
| 245 | </div> | ||
| 246 | <hr> | ||
| 247 | <div class="header"> | ||
| 248 | <p> | ||
| 249 | Next: <a href="Tree_002dsitter-C-API.html">Tree-sitter C API Correspondence</a>, Previous: <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 250 | </div> | ||
| 251 | |||
| 252 | |||
| 253 | |||
| 254 | </body> | ||
| 255 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html new file mode 100644 index 00000000000..ec89b7749c8 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html | |||
| @@ -0,0 +1,160 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Parser-based Font Lock (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Parser-based Font Lock (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Parser-based Font Lock (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Font-Lock-Mode.html" rel="up" title="Font Lock Mode"> | ||
| 36 | <link href="Multiline-Font-Lock.html" rel="prev" title="Multiline Font Lock"> | ||
| 37 | <style type="text/css"> | ||
| 38 | <!-- | ||
| 39 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 40 | a.summary-letter {text-decoration: none} | ||
| 41 | blockquote.indentedblock {margin-right: 0em} | ||
| 42 | div.display {margin-left: 3.2em} | ||
| 43 | div.example {margin-left: 3.2em} | ||
| 44 | kbd {font-style: oblique} | ||
| 45 | pre.display {font-family: inherit} | ||
| 46 | pre.format {font-family: inherit} | ||
| 47 | pre.menu-comment {font-family: serif} | ||
| 48 | pre.menu-preformatted {font-family: serif} | ||
| 49 | span.nolinebreak {white-space: nowrap} | ||
| 50 | span.roman {font-family: initial; font-weight: normal} | ||
| 51 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 52 | span:hover a.copiable-anchor {visibility: visible} | ||
| 53 | ul.no-bullet {list-style: none} | ||
| 54 | --> | ||
| 55 | </style> | ||
| 56 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 57 | |||
| 58 | |||
| 59 | </head> | ||
| 60 | |||
| 61 | <body lang="en"> | ||
| 62 | <div class="subsection" id="Parser_002dbased-Font-Lock"> | ||
| 63 | <div class="header"> | ||
| 64 | <p> | ||
| 65 | Previous: <a href="Multiline-Font-Lock.html" accesskey="p" rel="prev">Multiline Font Lock Constructs</a>, Up: <a href="Font-Lock-Mode.html" accesskey="u" rel="up">Font Lock Mode</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 66 | </div> | ||
| 67 | <hr> | ||
| 68 | <span id="Parser_002dbased-Font-Lock-1"></span><h4 class="subsection">24.6.10 Parser-based Font Lock</h4> | ||
| 69 | |||
| 70 | |||
| 71 | <p>Besides simple syntactic font lock and regexp-based font lock, Emacs | ||
| 72 | also provides complete syntactic font lock with the help of a parser, | ||
| 73 | currently provided by the tree-sitter library (see <a href="Parsing-Program-Source.html">Parsing Program Source</a>). | ||
| 74 | </p> | ||
| 75 | <dl class="def"> | ||
| 76 | <dt id="index-treesit_002dfont_002dlock_002denable"><span class="category">Function: </span><span><strong>treesit-font-lock-enable</strong><a href='#index-treesit_002dfont_002dlock_002denable' class='copiable-anchor'> ¶</a></span></dt> | ||
| 77 | <dd><p>This function enables parser-based font lock in the current buffer. | ||
| 78 | </p></dd></dl> | ||
| 79 | |||
| 80 | <p>Parser-based font lock and other font lock mechanism are not mutually | ||
| 81 | exclusive. By default, if enabled, parser-based font lock runs first, | ||
| 82 | then the simple syntactic font lock (if enabled), then regexp-based | ||
| 83 | font lock. | ||
| 84 | </p> | ||
| 85 | <p>Although parser-based font lock doesn’t share the same customization | ||
| 86 | variables with regexp-based font lock, parser-based font lock uses | ||
| 87 | similar customization schemes. The tree-sitter counterpart of | ||
| 88 | <var>font-lock-keywords</var> is <var>treesit-font-lock-settings</var>. | ||
| 89 | </p> | ||
| 90 | <dl class="def"> | ||
| 91 | <dt id="index-treesit_002dfont_002dlock_002drules"><span class="category">Function: </span><span><strong>treesit-font-lock-rules</strong> <em>:keyword value query...</em><a href='#index-treesit_002dfont_002dlock_002drules' class='copiable-anchor'> ¶</a></span></dt> | ||
| 92 | <dd><p>This function is used to set <var>treesit-font-lock-settings</var>. It | ||
| 93 | takes care of compiling queries and other post-processing and outputs | ||
| 94 | a value that <var>treesit-font-lock-settings</var> accepts. An example: | ||
| 95 | </p> | ||
| 96 | <div class="example"> | ||
| 97 | <pre class="example">(treesit-font-lock-rules | ||
| 98 | :language 'javascript | ||
| 99 | :override t | ||
| 100 | '((true) @font-lock-constant-face | ||
| 101 | (false) @font-lock-constant-face) | ||
| 102 | :language 'html | ||
| 103 | "(script_element) @font-lock-builtin-face") | ||
| 104 | </pre></div> | ||
| 105 | |||
| 106 | <p>This function takes a list of text or s-exp queries. Before each | ||
| 107 | query, there are <var>:keyword</var> and <var>value</var> pairs that configure | ||
| 108 | that query. The <code>:lang</code> keyword sets the query’s language and | ||
| 109 | every query must specify the language. Other keywords are optional: | ||
| 110 | </p> | ||
| 111 | <table> | ||
| 112 | <thead><tr><th width="15%">Keyword</th><th width="15%">Value</th><th width="60%">Description</th></tr></thead> | ||
| 113 | <tr><td width="15%"><code>:override</code></td><td width="15%">nil</td><td width="60%">If the region already has a face, discard the new face</td></tr> | ||
| 114 | <tr><td width="15%"></td><td width="15%">t</td><td width="60%">Always apply the new face</td></tr> | ||
| 115 | <tr><td width="15%"></td><td width="15%"><code>append</code></td><td width="60%">Append the new face to existing ones</td></tr> | ||
| 116 | <tr><td width="15%"></td><td width="15%"><code>prepend</code></td><td width="60%">Prepend the new face to existing ones</td></tr> | ||
| 117 | <tr><td width="15%"></td><td width="15%"><code>keep</code></td><td width="60%">Fill-in regions without an existing face</td></tr> | ||
| 118 | </table> | ||
| 119 | |||
| 120 | <p>Capture names in <var>query</var> should be face names like | ||
| 121 | <code>font-lock-keyword-face</code>. The captured node will be fontified | ||
| 122 | with that face. Capture names can also be function names, in which | ||
| 123 | case the function is called with (<var>start</var> <var>end</var> <var>node</var>), | ||
| 124 | where <var>start</var> and <var>end</var> are the start and end position of the | ||
| 125 | node in buffer, and <var>node</var> is the node itself. If a capture name | ||
| 126 | is both a face and a function, the face takes priority. If a capture | ||
| 127 | name is not a face name nor a function name, it is ignored. | ||
| 128 | </p></dd></dl> | ||
| 129 | |||
| 130 | <dl class="def"> | ||
| 131 | <dt id="index-treesit_002dfont_002dlock_002dsettings"><span class="category">Variable: </span><span><strong>treesit-font-lock-settings</strong><a href='#index-treesit_002dfont_002dlock_002dsettings' class='copiable-anchor'> ¶</a></span></dt> | ||
| 132 | <dd><p>A list of <var>setting</var>s for tree-sitter font lock. The exact format | ||
| 133 | of this variable is considered internal. One should always use | ||
| 134 | <code>treesit-font-lock-rules</code> to set this variable. | ||
| 135 | </p> | ||
| 136 | <p>Each <var>setting</var> is of form | ||
| 137 | </p> | ||
| 138 | <div class="example"> | ||
| 139 | <pre class="example">(<var>language</var> <var>query</var>) | ||
| 140 | </pre></div> | ||
| 141 | |||
| 142 | <p>Each <var>setting</var> controls one parser (often of different language). | ||
| 143 | And <var>language</var> is the language symbol (see <a href="Language-Definitions.html">Tree-sitter Language Definitions</a>); <var>query</var> is the query (see <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>). | ||
| 144 | </p></dd></dl> | ||
| 145 | |||
| 146 | <p>Multi-language major modes should provide range functions in | ||
| 147 | <code>treesit-range-functions</code>, and Emacs will set the ranges | ||
| 148 | accordingly before fontifing a region (see <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>). | ||
| 149 | </p> | ||
| 150 | </div> | ||
| 151 | <hr> | ||
| 152 | <div class="header"> | ||
| 153 | <p> | ||
| 154 | Previous: <a href="Multiline-Font-Lock.html">Multiline Font Lock Constructs</a>, Up: <a href="Font-Lock-Mode.html">Font Lock Mode</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 155 | </div> | ||
| 156 | |||
| 157 | |||
| 158 | |||
| 159 | </body> | ||
| 160 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html new file mode 100644 index 00000000000..691c8fba8c7 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html | |||
| @@ -0,0 +1,244 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Parser-based Indentation (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Parser-based Indentation (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Parser-based Indentation (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Auto_002dIndentation.html" rel="up" title="Auto-Indentation"> | ||
| 36 | <link href="SMIE.html" rel="prev" title="SMIE"> | ||
| 37 | <style type="text/css"> | ||
| 38 | <!-- | ||
| 39 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 40 | a.summary-letter {text-decoration: none} | ||
| 41 | blockquote.indentedblock {margin-right: 0em} | ||
| 42 | div.display {margin-left: 3.2em} | ||
| 43 | div.example {margin-left: 3.2em} | ||
| 44 | kbd {font-style: oblique} | ||
| 45 | pre.display {font-family: inherit} | ||
| 46 | pre.format {font-family: inherit} | ||
| 47 | pre.menu-comment {font-family: serif} | ||
| 48 | pre.menu-preformatted {font-family: serif} | ||
| 49 | span.nolinebreak {white-space: nowrap} | ||
| 50 | span.roman {font-family: initial; font-weight: normal} | ||
| 51 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 52 | span:hover a.copiable-anchor {visibility: visible} | ||
| 53 | ul.no-bullet {list-style: none} | ||
| 54 | --> | ||
| 55 | </style> | ||
| 56 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 57 | |||
| 58 | |||
| 59 | </head> | ||
| 60 | |||
| 61 | <body lang="en"> | ||
| 62 | <div class="subsection" id="Parser_002dbased-Indentation"> | ||
| 63 | <div class="header"> | ||
| 64 | <p> | ||
| 65 | Previous: <a href="SMIE.html" accesskey="p" rel="prev">Simple Minded Indentation Engine</a>, Up: <a href="Auto_002dIndentation.html" accesskey="u" rel="up">Automatic Indentation of code</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 66 | </div> | ||
| 67 | <hr> | ||
| 68 | <span id="Parser_002dbased-Indentation-1"></span><h4 class="subsection">24.7.2 Parser-based Indentation</h4> | ||
| 69 | |||
| 70 | |||
| 71 | <p>When built with the tree-sitter library (see <a href="Parsing-Program-Source.html">Parsing Program Source</a>), Emacs could parse program source and produce a syntax tree. | ||
| 72 | And this syntax tree can be used for indentation. For maximum | ||
| 73 | flexibility, we could write a custom indent function that queries the | ||
| 74 | syntax tree and indents accordingly for each language, but that would | ||
| 75 | be a lot of work. It is more convenient to use the simple indentation | ||
| 76 | engine described below: we only need to write some indentation rules | ||
| 77 | and the engine takes care of the rest. | ||
| 78 | </p> | ||
| 79 | <p>To enable the indentation engine, set the value of | ||
| 80 | <code>indent-line-function</code> to <code>treesit-indent</code>. | ||
| 81 | </p> | ||
| 82 | <dl class="def"> | ||
| 83 | <dt id="index-treesit_002dindent_002dfunction"><span class="category">Variable: </span><span><strong>treesit-indent-function</strong><a href='#index-treesit_002dindent_002dfunction' class='copiable-anchor'> ¶</a></span></dt> | ||
| 84 | <dd><p>This variable stores the actual function called by | ||
| 85 | <code>treesit-indent</code>. By default, its value is | ||
| 86 | <code>treesit-simple-indent</code>. In the future we might add other | ||
| 87 | more complex indentation engines. | ||
| 88 | </p></dd></dl> | ||
| 89 | |||
| 90 | <span id="Writing-indentation-rules"></span><h3 class="heading">Writing indentation rules</h3> | ||
| 91 | |||
| 92 | <dl class="def"> | ||
| 93 | <dt id="index-treesit_002dsimple_002dindent_002drules"><span class="category">Variable: </span><span><strong>treesit-simple-indent-rules</strong><a href='#index-treesit_002dsimple_002dindent_002drules' class='copiable-anchor'> ¶</a></span></dt> | ||
| 94 | <dd><p>This local variable stores indentation rules for every language. It is | ||
| 95 | a list of | ||
| 96 | </p> | ||
| 97 | <div class="example"> | ||
| 98 | <pre class="example">(<var>language</var> . <var>rules</var>) | ||
| 99 | </pre></div> | ||
| 100 | |||
| 101 | <p>where <var>language</var> is a language symbol, and <var>rules</var> is a list | ||
| 102 | of | ||
| 103 | </p> | ||
| 104 | <div class="example"> | ||
| 105 | <pre class="example">(<var>matcher</var> <var>anchor</var> <var>offset</var>) | ||
| 106 | </pre></div> | ||
| 107 | |||
| 108 | <p>First Emacs passes the node at point to <var>matcher</var>, if it return | ||
| 109 | non-nil, this rule applies. Then Emacs passes the node to | ||
| 110 | <var>anchor</var>, it returns a point. Emacs takes the column number of | ||
| 111 | that point, add <var>offset</var> to it, and the result is the indent for | ||
| 112 | the current line. | ||
| 113 | </p> | ||
| 114 | <p>The <var>matcher</var> and <var>anchor</var> are functions, and Emacs provides | ||
| 115 | convenient presets for them. You can skip over to | ||
| 116 | <code>treesit-simple-indent-presets</code> below, those presets should be | ||
| 117 | more than enough. | ||
| 118 | </p> | ||
| 119 | <p>A <var>matcher</var> or an <var>anchor</var> is a function that takes three | ||
| 120 | arguments (<var>node</var> <var>parent</var> <var>bol</var>). Argument <var>bol</var> is | ||
| 121 | the point at where we are indenting: the position of the first | ||
| 122 | non-whitespace character from the beginning of line; <var>node</var> is the | ||
| 123 | largest (highest-in-tree) node that starts at that point; <var>parent</var> | ||
| 124 | is the parent of <var>node</var>. A <var>matcher</var> returns nil/non-nil, and | ||
| 125 | <var>anchor</var> returns a point. | ||
| 126 | </p></dd></dl> | ||
| 127 | |||
| 128 | <dl class="def"> | ||
| 129 | <dt id="index-treesit_002dsimple_002dindent_002dpresets"><span class="category">Variable: </span><span><strong>treesit-simple-indent-presets</strong><a href='#index-treesit_002dsimple_002dindent_002dpresets' class='copiable-anchor'> ¶</a></span></dt> | ||
| 130 | <dd><p>This is a list of presets for <var>matcher</var>s and <var>anchor</var>s in | ||
| 131 | <code>treesit-simple-indent-rules</code>. Each of them represent a function | ||
| 132 | that takes <var>node</var>, <var>parent</var> and <var>bol</var> as arguments. | ||
| 133 | </p> | ||
| 134 | <div class="example"> | ||
| 135 | <pre class="example">no-node | ||
| 136 | </pre></div> | ||
| 137 | |||
| 138 | <p>This matcher matches the case where <var>node</var> is nil, i.e., there is | ||
| 139 | no node that starts at <var>bol</var>. This is the case when <var>bol</var> is | ||
| 140 | at an empty line or inside a multi-line string, etc. | ||
| 141 | </p> | ||
| 142 | <div class="example"> | ||
| 143 | <pre class="example">(parent-is <var>type</var>) | ||
| 144 | </pre></div> | ||
| 145 | |||
| 146 | <p>This matcher matches if <var>parent</var>’s type is <var>type</var>. | ||
| 147 | </p> | ||
| 148 | <div class="example"> | ||
| 149 | <pre class="example">(node-is <var>type</var>) | ||
| 150 | </pre></div> | ||
| 151 | |||
| 152 | <p>This matcher matches if <var>node</var>’s type is <var>type</var>. | ||
| 153 | </p> | ||
| 154 | <div class="example"> | ||
| 155 | <pre class="example">(query <var>query</var>) | ||
| 156 | </pre></div> | ||
| 157 | |||
| 158 | <p>This matcher matches if querying <var>parent</var> with <var>query</var> | ||
| 159 | captures <var>node</var>. The capture name does not matter. | ||
| 160 | </p> | ||
| 161 | <div class="example"> | ||
| 162 | <pre class="example">(match <var>node-type</var> <var>parent-type</var> | ||
| 163 | <var>node-field</var> <var>node-index-min</var> <var>node-index-max</var>) | ||
| 164 | </pre></div> | ||
| 165 | |||
| 166 | <p>This matcher checks if <var>node</var>’s type is <var>node-type</var>, | ||
| 167 | <var>parent</var>’s type is <var>parent-type</var>, <var>node</var>’s field name in | ||
| 168 | <var>parent</var> is <var>node-field</var>, and <var>node</var>’s index among its | ||
| 169 | siblings is between <var>node-index-min</var> and <var>node-index-max</var>. If | ||
| 170 | the value of a constraint is nil, this matcher doesn’t check for that | ||
| 171 | constraint. For example, to match the first child where parent is | ||
| 172 | <code>argument_list</code>, use | ||
| 173 | </p> | ||
| 174 | <div class="example"> | ||
| 175 | <pre class="example">(match nil "argument_list" nil nil 0 0) | ||
| 176 | </pre></div> | ||
| 177 | |||
| 178 | <div class="example"> | ||
| 179 | <pre class="example">first-sibling | ||
| 180 | </pre></div> | ||
| 181 | |||
| 182 | <p>This anchor returns the start of the first child of <var>parent</var>. | ||
| 183 | </p> | ||
| 184 | <div class="example"> | ||
| 185 | <pre class="example">parent | ||
| 186 | </pre></div> | ||
| 187 | |||
| 188 | <p>This anchor returns the start of <var>parent</var>. | ||
| 189 | </p> | ||
| 190 | <div class="example"> | ||
| 191 | <pre class="example">parent-bol | ||
| 192 | </pre></div> | ||
| 193 | |||
| 194 | <p>This anchor returns the beginning of non-space characters on the line | ||
| 195 | where <var>parent</var> is on. | ||
| 196 | </p> | ||
| 197 | <div class="example"> | ||
| 198 | <pre class="example">prev-sibling | ||
| 199 | </pre></div> | ||
| 200 | |||
| 201 | <p>This anchor returns the start of the previous sibling of <var>node</var>. | ||
| 202 | </p> | ||
| 203 | <div class="example"> | ||
| 204 | <pre class="example">no-indent | ||
| 205 | </pre></div> | ||
| 206 | |||
| 207 | <p>This anchor returns the start of <var>node</var>, i.e., no indent. | ||
| 208 | </p> | ||
| 209 | <div class="example"> | ||
| 210 | <pre class="example">prev-line | ||
| 211 | </pre></div> | ||
| 212 | |||
| 213 | <p>This anchor returns the first non-whitespace charater on the previous | ||
| 214 | line. | ||
| 215 | </p></dd></dl> | ||
| 216 | |||
| 217 | <span id="Indentation-utilities"></span><h3 class="heading">Indentation utilities</h3> | ||
| 218 | |||
| 219 | <p>Here are some utility functions that can help writing indentation | ||
| 220 | rules. | ||
| 221 | </p> | ||
| 222 | <dl class="def"> | ||
| 223 | <dt id="index-treesit_002dcheck_002dindent"><span class="category">Function: </span><span><strong>treesit-check-indent</strong> <em>mode</em><a href='#index-treesit_002dcheck_002dindent' class='copiable-anchor'> ¶</a></span></dt> | ||
| 224 | <dd><p>This function checks current buffer’s indentation against major mode | ||
| 225 | <var>mode</var>. It indents the current buffer in <var>mode</var> and compares | ||
| 226 | the indentation with the current indentation. Then it pops up a diff | ||
| 227 | buffer showing the difference. Correct indentation (target) is in | ||
| 228 | green, current indentation is in red. | ||
| 229 | </p></dd></dl> | ||
| 230 | |||
| 231 | <p>It is also helpful to use <code>treesit-inspect-mode</code> when writing | ||
| 232 | indentation rules. | ||
| 233 | </p> | ||
| 234 | </div> | ||
| 235 | <hr> | ||
| 236 | <div class="header"> | ||
| 237 | <p> | ||
| 238 | Previous: <a href="SMIE.html">Simple Minded Indentation Engine</a>, Up: <a href="Auto_002dIndentation.html">Automatic Indentation of code</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 239 | </div> | ||
| 240 | |||
| 241 | |||
| 242 | |||
| 243 | </body> | ||
| 244 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html new file mode 100644 index 00000000000..7b6e51468a6 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Parsing Program Source (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Parsing Program Source (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Parsing Program Source (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="index.html" rel="up" title="Top"> | ||
| 36 | <link href="Abbrevs.html" rel="next" title="Abbrevs"> | ||
| 37 | <link href="Syntax-Tables.html" rel="prev" title="Syntax Tables"> | ||
| 38 | <style type="text/css"> | ||
| 39 | <!-- | ||
| 40 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 41 | a.summary-letter {text-decoration: none} | ||
| 42 | blockquote.indentedblock {margin-right: 0em} | ||
| 43 | div.display {margin-left: 3.2em} | ||
| 44 | div.example {margin-left: 3.2em} | ||
| 45 | kbd {font-style: oblique} | ||
| 46 | pre.display {font-family: inherit} | ||
| 47 | pre.format {font-family: inherit} | ||
| 48 | pre.menu-comment {font-family: serif} | ||
| 49 | pre.menu-preformatted {font-family: serif} | ||
| 50 | span.nolinebreak {white-space: nowrap} | ||
| 51 | span.roman {font-family: initial; font-weight: normal} | ||
| 52 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 53 | span:hover a.copiable-anchor {visibility: visible} | ||
| 54 | ul.no-bullet {list-style: none} | ||
| 55 | --> | ||
| 56 | </style> | ||
| 57 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 58 | |||
| 59 | |||
| 60 | </head> | ||
| 61 | |||
| 62 | <body lang="en"> | ||
| 63 | <div class="chapter" id="Parsing-Program-Source"> | ||
| 64 | <div class="header"> | ||
| 65 | <p> | ||
| 66 | Next: <a href="Abbrevs.html" accesskey="n" rel="next">Abbrevs and Abbrev Expansion</a>, Previous: <a href="Syntax-Tables.html" accesskey="p" rel="prev">Syntax Tables</a>, Up: <a href="index.html" accesskey="u" rel="up">Emacs Lisp</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 67 | </div> | ||
| 68 | <hr> | ||
| 69 | <span id="Parsing-Program-Source-1"></span><h2 class="chapter">37 Parsing Program Source</h2> | ||
| 70 | |||
| 71 | <p>Emacs provides various ways to parse program source text and produce a | ||
| 72 | <em>syntax tree</em>. In a syntax tree, text is no longer a | ||
| 73 | one-dimensional stream but a structured tree of nodes, where each node | ||
| 74 | representing a piece of text. Thus a syntax tree can enable | ||
| 75 | interesting features like precise fontification, indentation, | ||
| 76 | navigation, structured editing, etc. | ||
| 77 | </p> | ||
| 78 | <p>Emacs has a simple facility for parsing balanced expressions | ||
| 79 | (see <a href="Parsing-Expressions.html">Parsing Expressions</a>). There is also SMIE library for generic | ||
| 80 | navigation and indentation (see <a href="SMIE.html">Simple Minded Indentation Engine</a>). | ||
| 81 | </p> | ||
| 82 | <p>Emacs also provides integration with tree-sitter library | ||
| 83 | (<a href="https://tree-sitter.github.io/tree-sitter">https://tree-sitter.github.io/tree-sitter</a>) if compiled with | ||
| 84 | it. The tree-sitter library implements an incremental parser and has | ||
| 85 | support from a wide range of programming languages. | ||
| 86 | </p> | ||
| 87 | <dl class="def"> | ||
| 88 | <dt id="index-treesit_002davailable_002dp"><span class="category">Function: </span><span><strong>treesit-available-p</strong><a href='#index-treesit_002davailable_002dp' class='copiable-anchor'> ¶</a></span></dt> | ||
| 89 | <dd><p>This function returns non-nil if tree-sitter features are available | ||
| 90 | for this Emacs instance. | ||
| 91 | </p></dd></dl> | ||
| 92 | |||
| 93 | <p>For tree-sitter integration with existing Emacs features, | ||
| 94 | see <a href="Parser_002dbased-Font-Lock.html">Parser-based Font Lock</a>, <a href="Parser_002dbased-Indentation.html">Parser-based Indentation</a>, and | ||
| 95 | <a href="List-Motion.html">Moving over Balanced Expressions</a>. | ||
| 96 | </p> | ||
| 97 | <p>To access the syntax tree of the text in a buffer, we need to first | ||
| 98 | load a language definition and create a parser with it. Next, we can | ||
| 99 | query the parser for specific nodes in the syntax tree. Then, we can | ||
| 100 | access various information about the node, and we can pattern-match a | ||
| 101 | node with a powerful syntax. Finally, we explain how to work with | ||
| 102 | source files that mixes multiple languages. The following sections | ||
| 103 | explain how to do each of the tasks in detail. | ||
| 104 | </p> | ||
| 105 | |||
| 106 | <ul class="section-toc"> | ||
| 107 | <li><a href="Language-Definitions.html" accesskey="1">Tree-sitter Language Definitions</a></li> | ||
| 108 | <li><a href="Using-Parser.html" accesskey="2">Using Tree-sitter Parser</a></li> | ||
| 109 | <li><a href="Retrieving-Node.html" accesskey="3">Retrieving Node</a></li> | ||
| 110 | <li><a href="Accessing-Node.html" accesskey="4">Accessing Node Information</a></li> | ||
| 111 | <li><a href="Pattern-Matching.html" accesskey="5">Pattern Matching Tree-sitter Nodes</a></li> | ||
| 112 | <li><a href="Multiple-Languages.html" accesskey="6">Parsing Text in Multiple Languages</a></li> | ||
| 113 | <li><a href="Tree_002dsitter-C-API.html" accesskey="7">Tree-sitter C API Correspondence</a></li> | ||
| 114 | </ul> | ||
| 115 | </div> | ||
| 116 | <hr> | ||
| 117 | <div class="header"> | ||
| 118 | <p> | ||
| 119 | Next: <a href="Abbrevs.html">Abbrevs and Abbrev Expansion</a>, Previous: <a href="Syntax-Tables.html">Syntax Tables</a>, Up: <a href="index.html">Emacs Lisp</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 120 | </div> | ||
| 121 | |||
| 122 | |||
| 123 | |||
| 124 | </body> | ||
| 125 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Pattern-Matching.html b/admin/notes/tree-sitter/html-manual/Pattern-Matching.html new file mode 100644 index 00000000000..e14efe71629 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Pattern-Matching.html | |||
| @@ -0,0 +1,430 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Pattern Matching (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Pattern Matching (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Pattern Matching (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source"> | ||
| 36 | <link href="Multiple-Languages.html" rel="next" title="Multiple Languages"> | ||
| 37 | <link href="Accessing-Node.html" rel="prev" title="Accessing Node"> | ||
| 38 | <style type="text/css"> | ||
| 39 | <!-- | ||
| 40 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 41 | a.summary-letter {text-decoration: none} | ||
| 42 | blockquote.indentedblock {margin-right: 0em} | ||
| 43 | div.display {margin-left: 3.2em} | ||
| 44 | div.example {margin-left: 3.2em} | ||
| 45 | kbd {font-style: oblique} | ||
| 46 | pre.display {font-family: inherit} | ||
| 47 | pre.format {font-family: inherit} | ||
| 48 | pre.menu-comment {font-family: serif} | ||
| 49 | pre.menu-preformatted {font-family: serif} | ||
| 50 | span.nolinebreak {white-space: nowrap} | ||
| 51 | span.roman {font-family: initial; font-weight: normal} | ||
| 52 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 53 | span:hover a.copiable-anchor {visibility: visible} | ||
| 54 | ul.no-bullet {list-style: none} | ||
| 55 | --> | ||
| 56 | </style> | ||
| 57 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 58 | |||
| 59 | |||
| 60 | </head> | ||
| 61 | |||
| 62 | <body lang="en"> | ||
| 63 | <div class="section" id="Pattern-Matching"> | ||
| 64 | <div class="header"> | ||
| 65 | <p> | ||
| 66 | Next: <a href="Multiple-Languages.html" accesskey="n" rel="next">Parsing Text in Multiple Languages</a>, Previous: <a href="Accessing-Node.html" accesskey="p" rel="prev">Accessing Node Information</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 67 | </div> | ||
| 68 | <hr> | ||
| 69 | <span id="Pattern-Matching-Tree_002dsitter-Nodes"></span><h3 class="section">37.5 Pattern Matching Tree-sitter Nodes</h3> | ||
| 70 | |||
| 71 | <p>Tree-sitter let us pattern match with a small declarative language. | ||
| 72 | Pattern matching consists of two steps: first tree-sitter matches a | ||
| 73 | <em>pattern</em> against nodes in the syntax tree, then it <em>captures</em> | ||
| 74 | specific nodes in that pattern and returns the captured nodes. | ||
| 75 | </p> | ||
| 76 | <p>We describe first how to write the most basic query pattern and how to | ||
| 77 | capture nodes in a pattern, then the pattern-match function, finally | ||
| 78 | more advanced pattern syntax. | ||
| 79 | </p> | ||
| 80 | <span id="Basic-query-syntax"></span><h3 class="heading">Basic query syntax</h3> | ||
| 81 | |||
| 82 | <span id="index-Tree_002dsitter-query-syntax"></span> | ||
| 83 | <span id="index-Tree_002dsitter-query-pattern"></span> | ||
| 84 | <p>A <em>query</em> consists of multiple <em>patterns</em>. Each pattern is an | ||
| 85 | s-expression that matches a certain node in the syntax node. A | ||
| 86 | pattern has the following shape: | ||
| 87 | </p> | ||
| 88 | <div class="example"> | ||
| 89 | <pre class="example">(<var>type</var> <var>child</var>...) | ||
| 90 | </pre></div> | ||
| 91 | |||
| 92 | <p>For example, a pattern that matches a <code>binary_expression</code> node that | ||
| 93 | contains <code>number_literal</code> child nodes would look like | ||
| 94 | </p> | ||
| 95 | <div class="example"> | ||
| 96 | <pre class="example">(binary_expression (number_literal)) | ||
| 97 | </pre></div> | ||
| 98 | |||
| 99 | <p>To <em>capture</em> a node in the query pattern above, append | ||
| 100 | <code>@capture-name</code> after the node pattern you want to capture. For | ||
| 101 | example, | ||
| 102 | </p> | ||
| 103 | <div class="example"> | ||
| 104 | <pre class="example">(binary_expression (number_literal) @number-in-exp) | ||
| 105 | </pre></div> | ||
| 106 | |||
| 107 | <p>captures <code>number_literal</code> nodes that are inside a | ||
| 108 | <code>binary_expression</code> node with capture name <code>number-in-exp</code>. | ||
| 109 | </p> | ||
| 110 | <p>We can capture the <code>binary_expression</code> node too, with capture | ||
| 111 | name <code>biexp</code>: | ||
| 112 | </p> | ||
| 113 | <div class="example"> | ||
| 114 | <pre class="example">(binary_expression | ||
| 115 | (number_literal) @number-in-exp) @biexp | ||
| 116 | </pre></div> | ||
| 117 | |||
| 118 | <span id="Query-function"></span><h3 class="heading">Query function</h3> | ||
| 119 | |||
| 120 | <p>Now we can introduce the query functions. | ||
| 121 | </p> | ||
| 122 | <dl class="def"> | ||
| 123 | <dt id="index-treesit_002dquery_002dcapture"><span class="category">Function: </span><span><strong>treesit-query-capture</strong> <em>node query &optional beg end node-only</em><a href='#index-treesit_002dquery_002dcapture' class='copiable-anchor'> ¶</a></span></dt> | ||
| 124 | <dd><p>This function matches patterns in <var>query</var> in <var>node</var>. | ||
| 125 | Parameter <var>query</var> can be either a string, a s-expression, or a | ||
| 126 | compiled query object. For now, we focus on the string syntax; | ||
| 127 | s-expression syntax and compiled query are described at the end of the | ||
| 128 | section. | ||
| 129 | </p> | ||
| 130 | <p>Parameter <var>node</var> can also be a parser or a language symbol. A | ||
| 131 | parser means using its root node, a language symbol means find or | ||
| 132 | create a parser for that language in the current buffer, and use the | ||
| 133 | root node. | ||
| 134 | </p> | ||
| 135 | <p>The function returns all captured nodes in a list of | ||
| 136 | <code>(<var>capture_name</var> . <var>node</var>)</code>. If <var>node-only</var> is | ||
| 137 | non-nil, a list of node is returned instead. If <var>beg</var> and | ||
| 138 | <var>end</var> are both non-nil, this function only pattern matches nodes | ||
| 139 | in that range. | ||
| 140 | </p> | ||
| 141 | <span id="index-treesit_002dquery_002derror"></span> | ||
| 142 | <p>This function raise a <var>treesit-query-error</var> if <var>query</var> is | ||
| 143 | malformed. The signal data contains a description of the specific | ||
| 144 | error. You can use <code>treesit-query-validate</code> to debug the query. | ||
| 145 | </p></dd></dl> | ||
| 146 | |||
| 147 | <p>For example, suppose <var>node</var>’s content is <code>1 + 2</code>, and | ||
| 148 | <var>query</var> is | ||
| 149 | </p> | ||
| 150 | <div class="example"> | ||
| 151 | <pre class="example">(setq query | ||
| 152 | "(binary_expression | ||
| 153 | (number_literal) @number-in-exp) @biexp") | ||
| 154 | </pre></div> | ||
| 155 | |||
| 156 | <p>Querying that query would return | ||
| 157 | </p> | ||
| 158 | <div class="example"> | ||
| 159 | <pre class="example">(treesit-query-capture node query) | ||
| 160 | ⇒ ((biexp . <var><node for "1 + 2"></var>) | ||
| 161 | (number-in-exp . <var><node for "1"></var>) | ||
| 162 | (number-in-exp . <var><node for "2"></var>)) | ||
| 163 | </pre></div> | ||
| 164 | |||
| 165 | <p>As we mentioned earlier, a <var>query</var> could contain multiple | ||
| 166 | patterns. For example, it could have two top-level patterns: | ||
| 167 | </p> | ||
| 168 | <div class="example"> | ||
| 169 | <pre class="example">(setq query | ||
| 170 | "(binary_expression) @biexp | ||
| 171 | (number_literal) @number @biexp") | ||
| 172 | </pre></div> | ||
| 173 | |||
| 174 | <dl class="def"> | ||
| 175 | <dt id="index-treesit_002dquery_002dstring"><span class="category">Function: </span><span><strong>treesit-query-string</strong> <em>string query language</em><a href='#index-treesit_002dquery_002dstring' class='copiable-anchor'> ¶</a></span></dt> | ||
| 176 | <dd><p>This function parses <var>string</var> with <var>language</var>, pattern matches | ||
| 177 | its root node with <var>query</var>, and returns the result. | ||
| 178 | </p></dd></dl> | ||
| 179 | |||
| 180 | <span id="More-query-syntax"></span><h3 class="heading">More query syntax</h3> | ||
| 181 | |||
| 182 | <p>Besides node type and capture, tree-sitter’s query syntax can express | ||
| 183 | anonymous node, field name, wildcard, quantification, grouping, | ||
| 184 | alternation, anchor, and predicate. | ||
| 185 | </p> | ||
| 186 | <span id="Anonymous-node"></span><h4 class="subheading">Anonymous node</h4> | ||
| 187 | |||
| 188 | <p>An anonymous node is written verbatim, surrounded by quotes. A | ||
| 189 | pattern matching (and capturing) keyword <code>return</code> would be | ||
| 190 | </p> | ||
| 191 | <div class="example"> | ||
| 192 | <pre class="example">"return" @keyword | ||
| 193 | </pre></div> | ||
| 194 | |||
| 195 | <span id="Wild-card"></span><h4 class="subheading">Wild card</h4> | ||
| 196 | |||
| 197 | <p>In a query pattern, ‘<samp>(_)</samp>’ matches any named node, and ‘<samp>_</samp>’ | ||
| 198 | matches any named and anonymous node. For example, to capture any | ||
| 199 | named child of a <code>binary_expression</code> node, the pattern would be | ||
| 200 | </p> | ||
| 201 | <div class="example"> | ||
| 202 | <pre class="example">(binary_expression (_) @in_biexp) | ||
| 203 | </pre></div> | ||
| 204 | |||
| 205 | <span id="Field-name"></span><h4 class="subheading">Field name</h4> | ||
| 206 | |||
| 207 | <p>We can capture child nodes that has specific field names: | ||
| 208 | </p> | ||
| 209 | <div class="example"> | ||
| 210 | <pre class="example">(function_definition | ||
| 211 | declarator: (_) @func-declarator | ||
| 212 | body: (_) @func-body) | ||
| 213 | </pre></div> | ||
| 214 | |||
| 215 | <p>We can also capture a node that doesn’t have certain field, say, a | ||
| 216 | <code>function_definition</code> without a <code>body</code> field. | ||
| 217 | </p> | ||
| 218 | <div class="example"> | ||
| 219 | <pre class="example">(function_definition !body) @func-no-body | ||
| 220 | </pre></div> | ||
| 221 | |||
| 222 | <span id="Quantify-node"></span><h4 class="subheading">Quantify node</h4> | ||
| 223 | |||
| 224 | <p>Tree-sitter recognizes quantification operators ‘<samp>*</samp>’, ‘<samp>+</samp>’ and | ||
| 225 | ‘<samp>?</samp>’. Their meanings are the same as in regular expressions: | ||
| 226 | ‘<samp>*</samp>’ matches the preceding pattern zero or more times, ‘<samp>+</samp>’ | ||
| 227 | matches one or more times, and ‘<samp>?</samp>’ matches zero or one time. | ||
| 228 | </p> | ||
| 229 | <p>For example, this pattern matches <code>type_declaration</code> nodes | ||
| 230 | that has <em>zero or more</em> <code>long</code> keyword. | ||
| 231 | </p> | ||
| 232 | <div class="example"> | ||
| 233 | <pre class="example">(type_declaration "long"*) @long-type | ||
| 234 | </pre></div> | ||
| 235 | |||
| 236 | <p>And this pattern matches a type declaration that has zero or one | ||
| 237 | <code>long</code> keyword: | ||
| 238 | </p> | ||
| 239 | <div class="example"> | ||
| 240 | <pre class="example">(type_declaration "long"?) @long-type | ||
| 241 | </pre></div> | ||
| 242 | |||
| 243 | <span id="Grouping"></span><h4 class="subheading">Grouping</h4> | ||
| 244 | |||
| 245 | <p>Similar to groups in regular expression, we can bundle patterns into a | ||
| 246 | group and apply quantification operators to it. For example, to | ||
| 247 | express a comma separated list of identifiers, one could write | ||
| 248 | </p> | ||
| 249 | <div class="example"> | ||
| 250 | <pre class="example">(identifier) ("," (identifier))* | ||
| 251 | </pre></div> | ||
| 252 | |||
| 253 | <span id="Alternation"></span><h4 class="subheading">Alternation</h4> | ||
| 254 | |||
| 255 | <p>Again, similar to regular expressions, we can express “match anyone | ||
| 256 | from this group of patterns” in the query pattern. The syntax is a | ||
| 257 | list of patterns enclosed in square brackets. For example, to capture | ||
| 258 | some keywords in C, the query pattern would be | ||
| 259 | </p> | ||
| 260 | <div class="example"> | ||
| 261 | <pre class="example">[ | ||
| 262 | "return" | ||
| 263 | "break" | ||
| 264 | "if" | ||
| 265 | "else" | ||
| 266 | ] @keyword | ||
| 267 | </pre></div> | ||
| 268 | |||
| 269 | <span id="Anchor"></span><h4 class="subheading">Anchor</h4> | ||
| 270 | |||
| 271 | <p>The anchor operator ‘<samp>.</samp>’ can be used to enforce juxtaposition, | ||
| 272 | i.e., to enforce two things to be directly next to each other. The | ||
| 273 | two “things” can be two nodes, or a child and the end of its parent. | ||
| 274 | For example, to capture the first child, the last child, or two | ||
| 275 | adjacent children: | ||
| 276 | </p> | ||
| 277 | <div class="example"> | ||
| 278 | <pre class="example">;; Anchor the child with the end of its parent. | ||
| 279 | (compound_expression (_) @last-child .) | ||
| 280 | |||
| 281 | ;; Anchor the child with the beginning of its parent. | ||
| 282 | (compound_expression . (_) @first-child) | ||
| 283 | |||
| 284 | ;; Anchor two adjacent children. | ||
| 285 | (compound_expression | ||
| 286 | (_) @prev-child | ||
| 287 | . | ||
| 288 | (_) @next-child) | ||
| 289 | </pre></div> | ||
| 290 | |||
| 291 | <p>Note that the enforcement of juxtaposition ignores any anonymous | ||
| 292 | nodes. | ||
| 293 | </p> | ||
| 294 | <span id="Predicate"></span><h4 class="subheading">Predicate</h4> | ||
| 295 | |||
| 296 | <p>We can add predicate constraints to a pattern. For example, if we use | ||
| 297 | the following query pattern | ||
| 298 | </p> | ||
| 299 | <div class="example"> | ||
| 300 | <pre class="example">( | ||
| 301 | (array . (_) @first (_) @last .) | ||
| 302 | (#equal @first @last) | ||
| 303 | ) | ||
| 304 | </pre></div> | ||
| 305 | |||
| 306 | <p>Then tree-sitter only matches arrays where the first element equals to | ||
| 307 | the last element. To attach a predicate to a pattern, we need to | ||
| 308 | group then together. A predicate always starts with a ‘<samp>#</samp>’. | ||
| 309 | Currently there are two predicates, <code>#equal</code> and <code>#match</code>. | ||
| 310 | </p> | ||
| 311 | <dl class="def"> | ||
| 312 | <dt id="index-equal-1"><span class="category">Predicate: </span><span><strong>equal</strong> <em>arg1 arg2</em><a href='#index-equal-1' class='copiable-anchor'> ¶</a></span></dt> | ||
| 313 | <dd><p>Matches if <var>arg1</var> equals to <var>arg2</var>. Arguments can be either a | ||
| 314 | string or a capture name. Capture names represent the text that the | ||
| 315 | captured node spans in the buffer. | ||
| 316 | </p></dd></dl> | ||
| 317 | |||
| 318 | <dl class="def"> | ||
| 319 | <dt id="index-match"><span class="category">Predicate: </span><span><strong>match</strong> <em>regexp capture-name</em><a href='#index-match' class='copiable-anchor'> ¶</a></span></dt> | ||
| 320 | <dd><p>Matches if the text that <var>capture-name</var>’s node spans in the buffer | ||
| 321 | matches regular expression <var>regexp</var>. Matching is case-sensitive. | ||
| 322 | </p></dd></dl> | ||
| 323 | |||
| 324 | <p>Note that a predicate can only refer to capture names appeared in the | ||
| 325 | same pattern. Indeed, it makes little sense to refer to capture names | ||
| 326 | in other patterns anyway. | ||
| 327 | </p> | ||
| 328 | <span id="S_002dexpression-patterns"></span><h3 class="heading">S-expression patterns</h3> | ||
| 329 | |||
| 330 | <p>Besides strings, Emacs provides a s-expression based syntax for query | ||
| 331 | patterns. It largely resembles the string-based syntax. For example, | ||
| 332 | the following pattern | ||
| 333 | </p> | ||
| 334 | <div class="example"> | ||
| 335 | <pre class="example">(treesit-query-capture | ||
| 336 | node "(addition_expression | ||
| 337 | left: (_) @left | ||
| 338 | \"+\" @plus-sign | ||
| 339 | right: (_) @right) @addition | ||
| 340 | |||
| 341 | [\"return\" \"break\"] @keyword") | ||
| 342 | </pre></div> | ||
| 343 | |||
| 344 | <p>is equivalent to | ||
| 345 | </p> | ||
| 346 | <div class="example"> | ||
| 347 | <pre class="example">(treesit-query-capture | ||
| 348 | node '((addition_expression | ||
| 349 | left: (_) @left | ||
| 350 | "+" @plus-sign | ||
| 351 | right: (_) @right) @addition | ||
| 352 | |||
| 353 | ["return" "break"] @keyword)) | ||
| 354 | </pre></div> | ||
| 355 | |||
| 356 | <p>Most pattern syntax can be written directly as strange but | ||
| 357 | never-the-less valid s-expressions. Only a few of them needs | ||
| 358 | modification: | ||
| 359 | </p> | ||
| 360 | <ul> | ||
| 361 | <li> Anchor ‘<samp>.</samp>’ is written as <code>:anchor</code>. | ||
| 362 | </li><li> ‘<samp>?</samp>’ is written as ‘<samp>:?</samp>’. | ||
| 363 | </li><li> ‘<samp>*</samp>’ is written as ‘<samp>:*</samp>’. | ||
| 364 | </li><li> ‘<samp>+</samp>’ is written as ‘<samp>:+</samp>’. | ||
| 365 | </li><li> <code>#equal</code> is written as <code>:equal</code>. In general, predicates | ||
| 366 | change their ‘<samp>#</samp>’ to ‘<samp>:</samp>’. | ||
| 367 | </li></ul> | ||
| 368 | |||
| 369 | <p>For example, | ||
| 370 | </p> | ||
| 371 | <div class="example"> | ||
| 372 | <pre class="example">"( | ||
| 373 | (compound_expression . (_) @first (_)* @rest) | ||
| 374 | (#match \"love\" @first) | ||
| 375 | )" | ||
| 376 | </pre></div> | ||
| 377 | |||
| 378 | <p>is written in s-expression as | ||
| 379 | </p> | ||
| 380 | <div class="example"> | ||
| 381 | <pre class="example">'(( | ||
| 382 | (compound_expression :anchor (_) @first (_) :* @rest) | ||
| 383 | (:match "love" @first) | ||
| 384 | )) | ||
| 385 | </pre></div> | ||
| 386 | |||
| 387 | <span id="Compiling-queries"></span><h3 class="heading">Compiling queries</h3> | ||
| 388 | |||
| 389 | <p>If a query will be used repeatedly, especially in tight loops, it is | ||
| 390 | important to compile that query, because a compiled query is much | ||
| 391 | faster than an uncompiled one. A compiled query can be used anywhere | ||
| 392 | a query is accepted. | ||
| 393 | </p> | ||
| 394 | <dl class="def"> | ||
| 395 | <dt id="index-treesit_002dquery_002dcompile"><span class="category">Function: </span><span><strong>treesit-query-compile</strong> <em>language query</em><a href='#index-treesit_002dquery_002dcompile' class='copiable-anchor'> ¶</a></span></dt> | ||
| 396 | <dd><p>This function compiles <var>query</var> for <var>language</var> into a compiled | ||
| 397 | query object and returns it. | ||
| 398 | </p> | ||
| 399 | <p>This function raise a <var>treesit-query-error</var> if <var>query</var> is | ||
| 400 | malformed. The signal data contains a description of the specific | ||
| 401 | error. You can use <code>treesit-query-validate</code> to debug the query. | ||
| 402 | </p></dd></dl> | ||
| 403 | |||
| 404 | <dl class="def"> | ||
| 405 | <dt id="index-treesit_002dquery_002dexpand"><span class="category">Function: </span><span><strong>treesit-query-expand</strong> <em>query</em><a href='#index-treesit_002dquery_002dexpand' class='copiable-anchor'> ¶</a></span></dt> | ||
| 406 | <dd><p>This function expands the s-expression <var>query</var> into a string | ||
| 407 | query. | ||
| 408 | </p></dd></dl> | ||
| 409 | |||
| 410 | <dl class="def"> | ||
| 411 | <dt id="index-treesit_002dpattern_002dexpand"><span class="category">Function: </span><span><strong>treesit-pattern-expand</strong> <em>pattern</em><a href='#index-treesit_002dpattern_002dexpand' class='copiable-anchor'> ¶</a></span></dt> | ||
| 412 | <dd><p>This function expands the s-expression <var>pattern</var> into a string | ||
| 413 | pattern. | ||
| 414 | </p></dd></dl> | ||
| 415 | |||
| 416 | <p>Finally, tree-sitter project’s documentation about | ||
| 417 | pattern-matching can be found at | ||
| 418 | <a href="https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries">https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries</a>. | ||
| 419 | </p> | ||
| 420 | </div> | ||
| 421 | <hr> | ||
| 422 | <div class="header"> | ||
| 423 | <p> | ||
| 424 | Next: <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>, Previous: <a href="Accessing-Node.html">Accessing Node Information</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 425 | </div> | ||
| 426 | |||
| 427 | |||
| 428 | |||
| 429 | </body> | ||
| 430 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Retrieving-Node.html b/admin/notes/tree-sitter/html-manual/Retrieving-Node.html new file mode 100644 index 00000000000..1bea0dde76b --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Retrieving-Node.html | |||
| @@ -0,0 +1,362 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Retrieving Node (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Retrieving Node (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Retrieving Node (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source"> | ||
| 36 | <link href="Accessing-Node.html" rel="next" title="Accessing Node"> | ||
| 37 | <link href="Using-Parser.html" rel="prev" title="Using Parser"> | ||
| 38 | <style type="text/css"> | ||
| 39 | <!-- | ||
| 40 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 41 | a.summary-letter {text-decoration: none} | ||
| 42 | blockquote.indentedblock {margin-right: 0em} | ||
| 43 | div.display {margin-left: 3.2em} | ||
| 44 | div.example {margin-left: 3.2em} | ||
| 45 | kbd {font-style: oblique} | ||
| 46 | pre.display {font-family: inherit} | ||
| 47 | pre.format {font-family: inherit} | ||
| 48 | pre.menu-comment {font-family: serif} | ||
| 49 | pre.menu-preformatted {font-family: serif} | ||
| 50 | span.nolinebreak {white-space: nowrap} | ||
| 51 | span.roman {font-family: initial; font-weight: normal} | ||
| 52 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 53 | span:hover a.copiable-anchor {visibility: visible} | ||
| 54 | ul.no-bullet {list-style: none} | ||
| 55 | --> | ||
| 56 | </style> | ||
| 57 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 58 | |||
| 59 | |||
| 60 | </head> | ||
| 61 | |||
| 62 | <body lang="en"> | ||
| 63 | <div class="section" id="Retrieving-Node"> | ||
| 64 | <div class="header"> | ||
| 65 | <p> | ||
| 66 | Next: <a href="Accessing-Node.html" accesskey="n" rel="next">Accessing Node Information</a>, Previous: <a href="Using-Parser.html" accesskey="p" rel="prev">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 67 | </div> | ||
| 68 | <hr> | ||
| 69 | <span id="Retrieving-Node-1"></span><h3 class="section">37.3 Retrieving Node</h3> | ||
| 70 | |||
| 71 | <span id="index-tree_002dsitter-find-node"></span> | ||
| 72 | <span id="index-tree_002dsitter-get-node"></span> | ||
| 73 | <p>Before we continue, lets go over some conventions of tree-sitter | ||
| 74 | functions. | ||
| 75 | </p> | ||
| 76 | <p>We talk about a node being “smaller” or “larger”, and “lower” or | ||
| 77 | “higher”. A smaller and lower node is lower in the syntax tree and | ||
| 78 | therefore spans a smaller piece of text; a larger and higher node is | ||
| 79 | higher up in the syntax tree, containing many smaller nodes as its | ||
| 80 | children, and therefore spans a larger piece of text. | ||
| 81 | </p> | ||
| 82 | <p>When a function cannot find a node, it returns nil. And for the | ||
| 83 | convenience for function chaining, all the functions that take a node | ||
| 84 | as argument and returns a node accept the node to be nil; in that | ||
| 85 | case, the function just returns nil. | ||
| 86 | </p> | ||
| 87 | <span id="index-treesit_002dnode_002doutdated"></span> | ||
| 88 | <p>Nodes are not automatically updated when the associated buffer is | ||
| 89 | modified. And there is no way to update a node once it is retrieved. | ||
| 90 | Using an outdated node throws <code>treesit-node-outdated</code> error. | ||
| 91 | </p> | ||
| 92 | <span id="Retrieving-node-from-syntax-tree"></span><h3 class="heading">Retrieving node from syntax tree</h3> | ||
| 93 | |||
| 94 | <dl class="def"> | ||
| 95 | <dt id="index-treesit_002dnode_002dat"><span class="category">Function: </span><span><strong>treesit-node-at</strong> <em>beg end &optional parser-or-lang named</em><a href='#index-treesit_002dnode_002dat' class='copiable-anchor'> ¶</a></span></dt> | ||
| 96 | <dd><p>This function returns the <em>smallest</em> node that starts at or after | ||
| 97 | the <var>point</var>. In other words, the start of the node is equal or | ||
| 98 | greater than <var>point</var>. | ||
| 99 | </p> | ||
| 100 | <p>When <var>parser-or-lang</var> is nil, this function uses the first parser | ||
| 101 | in <code>(treesit-parser-list)</code> in the current buffer. If | ||
| 102 | <var>parser-or-lang</var> is a parser object, it use that parser; if | ||
| 103 | <var>parser-or-lang</var> is a language, it finds the first parser using | ||
| 104 | that language in <code>(treesit-parser-list)</code> and use that. | ||
| 105 | </p> | ||
| 106 | <p>If <var>named</var> is non-nil, this function looks for a named node | ||
| 107 | only (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). | ||
| 108 | </p> | ||
| 109 | <p>Example: | ||
| 110 | </p><div class="example"> | ||
| 111 | <pre class="example">;; Find the node at point in a C parser's syntax tree. | ||
| 112 | (treesit-node-at (point) 'c) | ||
| 113 | </pre></div> | ||
| 114 | </dd></dl> | ||
| 115 | |||
| 116 | <dl class="def"> | ||
| 117 | <dt id="index-treesit_002dnode_002don"><span class="category">Function: </span><span><strong>treesit-node-on</strong> <em>beg end &optional parser-or-lang named</em><a href='#index-treesit_002dnode_002don' class='copiable-anchor'> ¶</a></span></dt> | ||
| 118 | <dd><p>This function returns the <em>smallest</em> node that covers the span | ||
| 119 | from <var>beg</var> to <var>end</var>. In other words, the start of the node is | ||
| 120 | less or equal to <var>beg</var>, and the end of the node is greater or | ||
| 121 | equal to <var>end</var>. | ||
| 122 | </p> | ||
| 123 | <p><em>Beware</em> that calling this function on an empty line that is not | ||
| 124 | inside any top-level construct (function definition, etc) most | ||
| 125 | probably will give you the root node, because the root node is the | ||
| 126 | smallest node that covers that empty line. Most of the time, you want | ||
| 127 | to use <code>treesit-node-at</code>. | ||
| 128 | </p> | ||
| 129 | <p>When <var>parser-or-lang</var> is nil, this function uses the first parser | ||
| 130 | in <code>(treesit-parser-list)</code> in the current buffer. If | ||
| 131 | <var>parser-or-lang</var> is a parser object, it use that parser; if | ||
| 132 | <var>parser-or-lang</var> is a language, it finds the first parser using | ||
| 133 | that language in <code>(treesit-parser-list)</code> and use that. | ||
| 134 | </p> | ||
| 135 | <p>If <var>named</var> is non-nil, this function looks for a named node only | ||
| 136 | (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). | ||
| 137 | </p></dd></dl> | ||
| 138 | |||
| 139 | <dl class="def"> | ||
| 140 | <dt id="index-treesit_002dparser_002droot_002dnode"><span class="category">Function: </span><span><strong>treesit-parser-root-node</strong> <em>parser</em><a href='#index-treesit_002dparser_002droot_002dnode' class='copiable-anchor'> ¶</a></span></dt> | ||
| 141 | <dd><p>This function returns the root node of the syntax tree generated by | ||
| 142 | <var>parser</var>. | ||
| 143 | </p></dd></dl> | ||
| 144 | |||
| 145 | <dl class="def"> | ||
| 146 | <dt id="index-treesit_002dbuffer_002droot_002dnode"><span class="category">Function: </span><span><strong>treesit-buffer-root-node</strong> <em>&optional language</em><a href='#index-treesit_002dbuffer_002droot_002dnode' class='copiable-anchor'> ¶</a></span></dt> | ||
| 147 | <dd><p>This function finds the first parser that uses <var>language</var> in | ||
| 148 | <code>(treesit-parser-list)</code> in the current buffer, and returns the | ||
| 149 | root node of that buffer. If it cannot find an appropriate parser, | ||
| 150 | nil is returned. | ||
| 151 | </p></dd></dl> | ||
| 152 | |||
| 153 | <p>Once we have a node, we can retrieve other nodes from it, or query for | ||
| 154 | information about this node. | ||
| 155 | </p> | ||
| 156 | <span id="Retrieving-node-from-other-nodes"></span><h3 class="heading">Retrieving node from other nodes</h3> | ||
| 157 | |||
| 158 | <span id="By-kinship"></span><h4 class="subheading">By kinship</h4> | ||
| 159 | |||
| 160 | <dl class="def"> | ||
| 161 | <dt id="index-treesit_002dnode_002dparent"><span class="category">Function: </span><span><strong>treesit-node-parent</strong> <em>node</em><a href='#index-treesit_002dnode_002dparent' class='copiable-anchor'> ¶</a></span></dt> | ||
| 162 | <dd><p>This function returns the immediate parent of <var>node</var>. | ||
| 163 | </p></dd></dl> | ||
| 164 | |||
| 165 | <dl class="def"> | ||
| 166 | <dt id="index-treesit_002dnode_002dchild"><span class="category">Function: </span><span><strong>treesit-node-child</strong> <em>node n &optional named</em><a href='#index-treesit_002dnode_002dchild' class='copiable-anchor'> ¶</a></span></dt> | ||
| 167 | <dd><p>This function returns the <var>n</var>’th child of <var>node</var>. If | ||
| 168 | <var>named</var> is non-nil, then it only counts named nodes | ||
| 169 | (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). For example, in a node | ||
| 170 | that represents a string: <code>"text"</code>, there are three children | ||
| 171 | nodes: the opening quote <code>"</code>, the string content <code>text</code>, and | ||
| 172 | the enclosing quote <code>"</code>. Among these nodes, the first child is | ||
| 173 | the opening quote <code>"</code>, the first named child is the string | ||
| 174 | content <code>text</code>. | ||
| 175 | </p></dd></dl> | ||
| 176 | |||
| 177 | <dl class="def"> | ||
| 178 | <dt id="index-treesit_002dnode_002dchildren"><span class="category">Function: </span><span><strong>treesit-node-children</strong> <em>node &optional named</em><a href='#index-treesit_002dnode_002dchildren' class='copiable-anchor'> ¶</a></span></dt> | ||
| 179 | <dd><p>This function returns all of <var>node</var>’s children in a list. If | ||
| 180 | <var>named</var> is non-nil, then it only retrieves named nodes. | ||
| 181 | </p></dd></dl> | ||
| 182 | |||
| 183 | <dl class="def"> | ||
| 184 | <dt id="index-treesit_002dnext_002dsibling"><span class="category">Function: </span><span><strong>treesit-next-sibling</strong> <em>node &optional named</em><a href='#index-treesit_002dnext_002dsibling' class='copiable-anchor'> ¶</a></span></dt> | ||
| 185 | <dd><p>This function finds the next sibling of <var>node</var>. If <var>named</var> is | ||
| 186 | non-nil, it finds the next named sibling. | ||
| 187 | </p></dd></dl> | ||
| 188 | |||
| 189 | <dl class="def"> | ||
| 190 | <dt id="index-treesit_002dprev_002dsibling"><span class="category">Function: </span><span><strong>treesit-prev-sibling</strong> <em>node &optional named</em><a href='#index-treesit_002dprev_002dsibling' class='copiable-anchor'> ¶</a></span></dt> | ||
| 191 | <dd><p>This function finds the previous sibling of <var>node</var>. If | ||
| 192 | <var>named</var> is non-nil, it finds the previous named sibling. | ||
| 193 | </p></dd></dl> | ||
| 194 | |||
| 195 | <span id="By-field-name"></span><h4 class="subheading">By field name</h4> | ||
| 196 | |||
| 197 | <p>To make the syntax tree easier to analyze, many language definitions | ||
| 198 | assign <em>field names</em> to child nodes (see <a href="Language-Definitions.html#tree_002dsitter-node-field-name">field name</a>). For example, a <code>function_definition</code> node | ||
| 199 | could have a <code>declarator</code> and a <code>body</code>. | ||
| 200 | </p> | ||
| 201 | <dl class="def"> | ||
| 202 | <dt id="index-treesit_002dchild_002dby_002dfield_002dname"><span class="category">Function: </span><span><strong>treesit-child-by-field-name</strong> <em>node field-name</em><a href='#index-treesit_002dchild_002dby_002dfield_002dname' class='copiable-anchor'> ¶</a></span></dt> | ||
| 203 | <dd><p>This function finds the child of <var>node</var> that has <var>field-name</var> | ||
| 204 | as its field name. | ||
| 205 | </p> | ||
| 206 | <div class="example"> | ||
| 207 | <pre class="example">;; Get the child that has "body" as its field name. | ||
| 208 | (treesit-child-by-field-name node "body") | ||
| 209 | </pre></div> | ||
| 210 | </dd></dl> | ||
| 211 | |||
| 212 | <span id="By-position"></span><h4 class="subheading">By position</h4> | ||
| 213 | |||
| 214 | <dl class="def"> | ||
| 215 | <dt id="index-treesit_002dfirst_002dchild_002dfor_002dpos"><span class="category">Function: </span><span><strong>treesit-first-child-for-pos</strong> <em>node pos &optional named</em><a href='#index-treesit_002dfirst_002dchild_002dfor_002dpos' class='copiable-anchor'> ¶</a></span></dt> | ||
| 216 | <dd><p>This function finds the first child of <var>node</var> that extends beyond | ||
| 217 | <var>pos</var>. “Extend beyond” means the end of the child node >= | ||
| 218 | <var>pos</var>. This function only looks for immediate children of | ||
| 219 | <var>node</var>, and doesn’t look in its grand children. If <var>named</var> is | ||
| 220 | non-nil, it only looks for named child (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). | ||
| 221 | </p></dd></dl> | ||
| 222 | |||
| 223 | <dl class="def"> | ||
| 224 | <dt id="index-treesit_002dnode_002ddescendant_002dfor_002drange"><span class="category">Function: </span><span><strong>treesit-node-descendant-for-range</strong> <em>node beg end &optional named</em><a href='#index-treesit_002dnode_002ddescendant_002dfor_002drange' class='copiable-anchor'> ¶</a></span></dt> | ||
| 225 | <dd><p>This function finds the <em>smallest</em> child/grandchild... of | ||
| 226 | <var>node</var> that spans the range from <var>beg</var> to <var>end</var>. It is | ||
| 227 | similar to <code>treesit-node-at</code>. If <var>named</var> is non-nil, it only | ||
| 228 | looks for named child. | ||
| 229 | </p></dd></dl> | ||
| 230 | |||
| 231 | <span id="Searching-for-node"></span><h3 class="heading">Searching for node</h3> | ||
| 232 | |||
| 233 | <dl class="def"> | ||
| 234 | <dt id="index-treesit_002dsearch_002dsubtree"><span class="category">Function: </span><span><strong>treesit-search-subtree</strong> <em>node predicate &optional all backward limit</em><a href='#index-treesit_002dsearch_002dsubtree' class='copiable-anchor'> ¶</a></span></dt> | ||
| 235 | <dd><p>This function traverses the subtree of <var>node</var> (including | ||
| 236 | <var>node</var>), and match <var>predicate</var> with each node along the way. | ||
| 237 | And <var>predicate</var> is a regexp that matches (case-insensitively) | ||
| 238 | against each node’s type, or a function that takes a node and returns | ||
| 239 | nil/non-nil. If a node matches, that node is returned, if no node | ||
| 240 | ever matches, nil is returned. | ||
| 241 | </p> | ||
| 242 | <p>By default, this function only traverses named nodes, if <var>all</var> is | ||
| 243 | non-nil, it traverses all nodes. If <var>backward</var> is non-nil, it | ||
| 244 | traverses backwards. If <var>limit</var> is non-nil, it only traverses | ||
| 245 | that number of levels down in the tree. | ||
| 246 | </p></dd></dl> | ||
| 247 | |||
| 248 | <dl class="def"> | ||
| 249 | <dt id="index-treesit_002dsearch_002dforward"><span class="category">Function: </span><span><strong>treesit-search-forward</strong> <em>start predicate &optional all backward up</em><a href='#index-treesit_002dsearch_002dforward' class='copiable-anchor'> ¶</a></span></dt> | ||
| 250 | <dd><p>This function is somewhat similar to <code>treesit-search-subtree</code>. | ||
| 251 | It also traverse the parse tree and match each node with | ||
| 252 | <var>predicate</var> (except for <var>start</var>), where <var>predicate</var> can be | ||
| 253 | a (case-insensitive) regexp or a function. For a tree like the below | ||
| 254 | where <var>start</var> is marked 1, this function traverses as numbered: | ||
| 255 | </p> | ||
| 256 | <div class="example"> | ||
| 257 | <pre class="example"> o | ||
| 258 | | | ||
| 259 | 3--------4-----------8 | ||
| 260 | | | | | ||
| 261 | o--o-+--1 5--+--6 9---+-----12 | ||
| 262 | | | | | | | | ||
| 263 | o o 2 7 +-+-+ +--+--+ | ||
| 264 | | | | | | | ||
| 265 | 10 11 13 14 15 | ||
| 266 | </pre></div> | ||
| 267 | |||
| 268 | <p>Same as in <code>treesit-search-subtree</code>, this function only searches | ||
| 269 | for named nodes by default. But if <var>all</var> is non-nil, it searches | ||
| 270 | for all nodes. If <var>backward</var> is non-nil, it searches backwards. | ||
| 271 | </p> | ||
| 272 | <p>If <var>up</var> is non-nil, this function will only traverse to siblings | ||
| 273 | and parents. In that case, only 1 3 4 8 would be traversed. | ||
| 274 | </p></dd></dl> | ||
| 275 | |||
| 276 | <dl class="def"> | ||
| 277 | <dt id="index-treesit_002dsearch_002dforward_002dgoto"><span class="category">Function: </span><span><strong>treesit-search-forward-goto</strong> <em>predicate side &optional all backward up</em><a href='#index-treesit_002dsearch_002dforward_002dgoto' class='copiable-anchor'> ¶</a></span></dt> | ||
| 278 | <dd><p>This function jumps to the start or end of the next node in buffer | ||
| 279 | that matches <var>predicate</var>. Parameters <var>predicate</var>, <var>all</var>, | ||
| 280 | <var>backward</var>, and <var>up</var> are the same as in | ||
| 281 | <code>treesit-search-forward</code>. And <var>side</var> controls which side of | ||
| 282 | the matched no do we stop at, it can be <code>start</code> or <code>end</code>. | ||
| 283 | </p></dd></dl> | ||
| 284 | |||
| 285 | <dl class="def"> | ||
| 286 | <dt id="index-treesit_002dinduce_002dsparse_002dtree"><span class="category">Function: </span><span><strong>treesit-induce-sparse-tree</strong> <em>root predicate &optional process-fn limit</em><a href='#index-treesit_002dinduce_002dsparse_002dtree' class='copiable-anchor'> ¶</a></span></dt> | ||
| 287 | <dd><p>This function creates a sparse tree from <var>root</var>’s subtree. | ||
| 288 | </p> | ||
| 289 | <p>Basically, it takes the subtree under <var>root</var>, and combs it so only | ||
| 290 | the nodes that match <var>predicate</var> are left, like picking out grapes | ||
| 291 | on the vine. Like previous functions, <var>predicate</var> can be a regexp | ||
| 292 | string that matches against each node’s type case-insensitively, or a | ||
| 293 | function that takes a node and return nil/non-nil. | ||
| 294 | </p> | ||
| 295 | <p>For example, for a subtree on the left that consist of both numbers | ||
| 296 | and letters, if <var>predicate</var> is “letter only”, the returned tree | ||
| 297 | is the one on the right. | ||
| 298 | </p> | ||
| 299 | <div class="example"> | ||
| 300 | <pre class="example"> a a a | ||
| 301 | | | | | ||
| 302 | +---+---+ +---+---+ +---+---+ | ||
| 303 | | | | | | | | | | | ||
| 304 | b 1 2 b | | b c d | ||
| 305 | | | => | | => | | ||
| 306 | c +--+ c + e | ||
| 307 | | | | | | | ||
| 308 | +--+ d 4 +--+ d | ||
| 309 | | | | | ||
| 310 | e 5 e | ||
| 311 | </pre></div> | ||
| 312 | |||
| 313 | <p>If <var>process-fn</var> is non-nil, instead of returning the matched | ||
| 314 | nodes, this function passes each node to <var>process-fn</var> and uses the | ||
| 315 | returned value instead. If non-nil, <var>limit</var> is the number of | ||
| 316 | levels to go down from <var>root</var>. | ||
| 317 | </p> | ||
| 318 | <p>Each node in the returned tree looks like <code>(<var>tree-sitter | ||
| 319 | node</var> . (<var>child</var> ...))</code>. The <var>tree-sitter node</var> of the root | ||
| 320 | of this tree will be nil if <var>ROOT</var> doesn’t match <var>pred</var>. If | ||
| 321 | no node matches <var>predicate</var>, return nil. | ||
| 322 | </p></dd></dl> | ||
| 323 | |||
| 324 | <span id="More-convenient-functions"></span><h3 class="heading">More convenient functions</h3> | ||
| 325 | |||
| 326 | <dl class="def"> | ||
| 327 | <dt id="index-treesit_002dfilter_002dchild"><span class="category">Function: </span><span><strong>treesit-filter-child</strong> <em>node pred &optional named</em><a href='#index-treesit_002dfilter_002dchild' class='copiable-anchor'> ¶</a></span></dt> | ||
| 328 | <dd><p>This function finds immediate children of <var>node</var> that satisfies | ||
| 329 | <var>pred</var>. | ||
| 330 | </p> | ||
| 331 | <p>Function <var>pred</var> takes the child node as the argument and should | ||
| 332 | return non-nil to indicated keeping the child. If <var>named</var> | ||
| 333 | non-nil, this function only searches for named nodes. | ||
| 334 | </p></dd></dl> | ||
| 335 | |||
| 336 | <dl class="def"> | ||
| 337 | <dt id="index-treesit_002dparent_002duntil"><span class="category">Function: </span><span><strong>treesit-parent-until</strong> <em>node pred</em><a href='#index-treesit_002dparent_002duntil' class='copiable-anchor'> ¶</a></span></dt> | ||
| 338 | <dd><p>This function repeatedly finds the parent of <var>node</var>, and returns | ||
| 339 | the parent if it satisfies <var>pred</var> (which takes the parent as the | ||
| 340 | argument). If no parent satisfies <var>pred</var>, this function returns | ||
| 341 | nil. | ||
| 342 | </p></dd></dl> | ||
| 343 | |||
| 344 | <dl class="def"> | ||
| 345 | <dt id="index-treesit_002dparent_002dwhile"><span class="category">Function: </span><span><strong>treesit-parent-while</strong><a href='#index-treesit_002dparent_002dwhile' class='copiable-anchor'> ¶</a></span></dt> | ||
| 346 | <dd><p>This function repeatedly finds the parent of <var>node</var>, and keeps | ||
| 347 | doing so as long as the parent satisfies <var>pred</var> (which takes the | ||
| 348 | parent as the single argument). I.e., this function returns the | ||
| 349 | farthest parent that still satisfies <var>pred</var>. | ||
| 350 | </p></dd></dl> | ||
| 351 | |||
| 352 | </div> | ||
| 353 | <hr> | ||
| 354 | <div class="header"> | ||
| 355 | <p> | ||
| 356 | Next: <a href="Accessing-Node.html">Accessing Node Information</a>, Previous: <a href="Using-Parser.html">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 357 | </div> | ||
| 358 | |||
| 359 | |||
| 360 | |||
| 361 | </body> | ||
| 362 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html new file mode 100644 index 00000000000..77cea6b3f95 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html | |||
| @@ -0,0 +1,212 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Tree-sitter C API (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Tree-sitter C API (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Tree-sitter C API (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source"> | ||
| 36 | <link href="Multiple-Languages.html" rel="prev" title="Multiple Languages"> | ||
| 37 | <style type="text/css"> | ||
| 38 | <!-- | ||
| 39 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 40 | a.summary-letter {text-decoration: none} | ||
| 41 | blockquote.indentedblock {margin-right: 0em} | ||
| 42 | div.display {margin-left: 3.2em} | ||
| 43 | div.example {margin-left: 3.2em} | ||
| 44 | kbd {font-style: oblique} | ||
| 45 | pre.display {font-family: inherit} | ||
| 46 | pre.format {font-family: inherit} | ||
| 47 | pre.menu-comment {font-family: serif} | ||
| 48 | pre.menu-preformatted {font-family: serif} | ||
| 49 | span.nolinebreak {white-space: nowrap} | ||
| 50 | span.roman {font-family: initial; font-weight: normal} | ||
| 51 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 52 | span:hover a.copiable-anchor {visibility: visible} | ||
| 53 | ul.no-bullet {list-style: none} | ||
| 54 | --> | ||
| 55 | </style> | ||
| 56 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 57 | |||
| 58 | |||
| 59 | </head> | ||
| 60 | |||
| 61 | <body lang="en"> | ||
| 62 | <div class="section" id="Tree_002dsitter-C-API"> | ||
| 63 | <div class="header"> | ||
| 64 | <p> | ||
| 65 | Previous: <a href="Multiple-Languages.html" accesskey="p" rel="prev">Parsing Text in Multiple Languages</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 66 | </div> | ||
| 67 | <hr> | ||
| 68 | <span id="Tree_002dsitter-C-API-Correspondence"></span><h3 class="section">37.7 Tree-sitter C API Correspondence</h3> | ||
| 69 | |||
| 70 | <p>Emacs’ tree-sitter integration doesn’t expose every feature | ||
| 71 | tree-sitter’s C API provides. Missing features include: | ||
| 72 | </p> | ||
| 73 | <ul> | ||
| 74 | <li> Creating a tree cursor and navigating the syntax tree with it. | ||
| 75 | </li><li> Setting timeout and cancellation flag for a parser. | ||
| 76 | </li><li> Setting the logger for a parser. | ||
| 77 | </li><li> Printing a DOT graph of the syntax tree to a file. | ||
| 78 | </li><li> Coping and modifying a syntax tree. (Emacs doesn’t expose a tree | ||
| 79 | object.) | ||
| 80 | </li><li> Using (row, column) coordinates as position. | ||
| 81 | </li><li> Updating a node with changes. (In Emacs, retrieve a new node instead | ||
| 82 | of updating the existing one.) | ||
| 83 | </li><li> Querying statics of a language definition. | ||
| 84 | </li></ul> | ||
| 85 | |||
| 86 | <p>In addition, Emacs makes some changes to the C API to make the API more | ||
| 87 | convenient and idiomatic: | ||
| 88 | </p> | ||
| 89 | <ul> | ||
| 90 | <li> Instead of using byte positions, the ELisp API uses character | ||
| 91 | positions. | ||
| 92 | </li><li> Null nodes are converted to nil. | ||
| 93 | </li></ul> | ||
| 94 | |||
| 95 | <p>Below is the correspondence between all C API functions and their | ||
| 96 | ELisp counterparts. Sometimes one ELisp function corresponds to | ||
| 97 | multiple C functions, and many C functions don’t have an ELisp | ||
| 98 | counterpart. | ||
| 99 | </p> | ||
| 100 | <div class="example"> | ||
| 101 | <pre class="example">ts_parser_new treesit-parser-create | ||
| 102 | ts_parser_delete | ||
| 103 | ts_parser_set_language | ||
| 104 | ts_parser_language treesit-parser-language | ||
| 105 | ts_parser_set_included_ranges treesit-parser-set-included-ranges | ||
| 106 | ts_parser_included_ranges treesit-parser-included-ranges | ||
| 107 | ts_parser_parse | ||
| 108 | ts_parser_parse_string treesit-parse-string | ||
| 109 | ts_parser_parse_string_encoding | ||
| 110 | ts_parser_reset | ||
| 111 | ts_parser_set_timeout_micros | ||
| 112 | ts_parser_timeout_micros | ||
| 113 | ts_parser_set_cancellation_flag | ||
| 114 | ts_parser_cancellation_flag | ||
| 115 | ts_parser_set_logger | ||
| 116 | ts_parser_logger | ||
| 117 | ts_parser_print_dot_graphs | ||
| 118 | ts_tree_copy | ||
| 119 | ts_tree_delete | ||
| 120 | ts_tree_root_node | ||
| 121 | ts_tree_language | ||
| 122 | ts_tree_edit | ||
| 123 | ts_tree_get_changed_ranges | ||
| 124 | ts_tree_print_dot_graph | ||
| 125 | ts_node_type treesit-node-type | ||
| 126 | ts_node_symbol | ||
| 127 | ts_node_start_byte treesit-node-start | ||
| 128 | ts_node_start_point | ||
| 129 | ts_node_end_byte treesit-node-end | ||
| 130 | ts_node_end_point | ||
| 131 | ts_node_string treesit-node-string | ||
| 132 | ts_node_is_null | ||
| 133 | ts_node_is_named treesit-node-check | ||
| 134 | ts_node_is_missing treesit-node-check | ||
| 135 | ts_node_is_extra treesit-node-check | ||
| 136 | ts_node_has_changes treesit-node-check | ||
| 137 | ts_node_has_error treesit-node-check | ||
| 138 | ts_node_parent treesit-node-parent | ||
| 139 | ts_node_child treesit-node-child | ||
| 140 | ts_node_field_name_for_child treesit-node-field-name-for-child | ||
| 141 | ts_node_child_count treesit-node-child-count | ||
| 142 | ts_node_named_child treesit-node-child | ||
| 143 | ts_node_named_child_count treesit-node-child-count | ||
| 144 | ts_node_child_by_field_name treesit-node-by-field-name | ||
| 145 | ts_node_child_by_field_id | ||
| 146 | ts_node_next_sibling treesit-next-sibling | ||
| 147 | ts_node_prev_sibling treesit-prev-sibling | ||
| 148 | ts_node_next_named_sibling treesit-next-sibling | ||
| 149 | ts_node_prev_named_sibling treesit-prev-sibling | ||
| 150 | ts_node_first_child_for_byte treesit-first-child-for-pos | ||
| 151 | ts_node_first_named_child_for_byte treesit-first-child-for-pos | ||
| 152 | ts_node_descendant_for_byte_range treesit-descendant-for-range | ||
| 153 | ts_node_descendant_for_point_range | ||
| 154 | ts_node_named_descendant_for_byte_range treesit-descendant-for-range | ||
| 155 | ts_node_named_descendant_for_point_range | ||
| 156 | ts_node_edit | ||
| 157 | ts_node_eq treesit-node-eq | ||
| 158 | ts_tree_cursor_new | ||
| 159 | ts_tree_cursor_delete | ||
| 160 | ts_tree_cursor_reset | ||
| 161 | ts_tree_cursor_current_node | ||
| 162 | ts_tree_cursor_current_field_name | ||
| 163 | ts_tree_cursor_current_field_id | ||
| 164 | ts_tree_cursor_goto_parent | ||
| 165 | ts_tree_cursor_goto_next_sibling | ||
| 166 | ts_tree_cursor_goto_first_child | ||
| 167 | ts_tree_cursor_goto_first_child_for_byte | ||
| 168 | ts_tree_cursor_goto_first_child_for_point | ||
| 169 | ts_tree_cursor_copy | ||
| 170 | ts_query_new | ||
| 171 | ts_query_delete | ||
| 172 | ts_query_pattern_count | ||
| 173 | ts_query_capture_count | ||
| 174 | ts_query_string_count | ||
| 175 | ts_query_start_byte_for_pattern | ||
| 176 | ts_query_predicates_for_pattern | ||
| 177 | ts_query_step_is_definite | ||
| 178 | ts_query_capture_name_for_id | ||
| 179 | ts_query_string_value_for_id | ||
| 180 | ts_query_disable_capture | ||
| 181 | ts_query_disable_pattern | ||
| 182 | ts_query_cursor_new | ||
| 183 | ts_query_cursor_delete | ||
| 184 | ts_query_cursor_exec treesit-query-capture | ||
| 185 | ts_query_cursor_did_exceed_match_limit | ||
| 186 | ts_query_cursor_match_limit | ||
| 187 | ts_query_cursor_set_match_limit | ||
| 188 | ts_query_cursor_set_byte_range | ||
| 189 | ts_query_cursor_set_point_range | ||
| 190 | ts_query_cursor_next_match | ||
| 191 | ts_query_cursor_remove_match | ||
| 192 | ts_query_cursor_next_capture | ||
| 193 | ts_language_symbol_count | ||
| 194 | ts_language_symbol_name | ||
| 195 | ts_language_symbol_for_name | ||
| 196 | ts_language_field_count | ||
| 197 | ts_language_field_name_for_id | ||
| 198 | ts_language_field_id_for_name | ||
| 199 | ts_language_symbol_type | ||
| 200 | ts_language_version | ||
| 201 | </pre></div> | ||
| 202 | </div> | ||
| 203 | <hr> | ||
| 204 | <div class="header"> | ||
| 205 | <p> | ||
| 206 | Previous: <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 207 | </div> | ||
| 208 | |||
| 209 | |||
| 210 | |||
| 211 | </body> | ||
| 212 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/Using-Parser.html b/admin/notes/tree-sitter/html-manual/Using-Parser.html new file mode 100644 index 00000000000..438e3858f1b --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/Using-Parser.html | |||
| @@ -0,0 +1,186 @@ | |||
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | ||
| 2 | <html> | ||
| 3 | <!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ --> | ||
| 4 | <head> | ||
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
| 6 | <!-- This is the GNU Emacs Lisp Reference Manual | ||
| 7 | corresponding to Emacs version 29.0.50. | ||
| 8 | |||
| 9 | Copyright © 1990-1996, 1998-2022 Free Software Foundation, | ||
| 10 | Inc. | ||
| 11 | |||
| 12 | Permission is granted to copy, distribute and/or modify this document | ||
| 13 | under the terms of the GNU Free Documentation License, Version 1.3 or | ||
| 14 | any later version published by the Free Software Foundation; with the | ||
| 15 | Invariant Sections being "GNU General Public License," with the | ||
| 16 | Front-Cover Texts being "A GNU Manual," and with the Back-Cover | ||
| 17 | Texts as in (a) below. A copy of the license is included in the | ||
| 18 | section entitled "GNU Free Documentation License." | ||
| 19 | |||
| 20 | (a) The FSF's Back-Cover Text is: "You have the freedom to copy and | ||
| 21 | modify this GNU manual. Buying copies from the FSF supports it in | ||
| 22 | developing GNU and promoting software freedom." --> | ||
| 23 | <title>Using Parser (GNU Emacs Lisp Reference Manual)</title> | ||
| 24 | |||
| 25 | <meta name="description" content="Using Parser (GNU Emacs Lisp Reference Manual)"> | ||
| 26 | <meta name="keywords" content="Using Parser (GNU Emacs Lisp Reference Manual)"> | ||
| 27 | <meta name="resource-type" content="document"> | ||
| 28 | <meta name="distribution" content="global"> | ||
| 29 | <meta name="Generator" content="makeinfo"> | ||
| 30 | <meta name="viewport" content="width=device-width,initial-scale=1"> | ||
| 31 | |||
| 32 | <link href="index.html" rel="start" title="Top"> | ||
| 33 | <link href="Index.html" rel="index" title="Index"> | ||
| 34 | <link href="index.html#SEC_Contents" rel="contents" title="Table of Contents"> | ||
| 35 | <link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source"> | ||
| 36 | <link href="Retrieving-Node.html" rel="next" title="Retrieving Node"> | ||
| 37 | <link href="Language-Definitions.html" rel="prev" title="Language Definitions"> | ||
| 38 | <style type="text/css"> | ||
| 39 | <!-- | ||
| 40 | a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em} | ||
| 41 | a.summary-letter {text-decoration: none} | ||
| 42 | blockquote.indentedblock {margin-right: 0em} | ||
| 43 | div.display {margin-left: 3.2em} | ||
| 44 | div.example {margin-left: 3.2em} | ||
| 45 | kbd {font-style: oblique} | ||
| 46 | pre.display {font-family: inherit} | ||
| 47 | pre.format {font-family: inherit} | ||
| 48 | pre.menu-comment {font-family: serif} | ||
| 49 | pre.menu-preformatted {font-family: serif} | ||
| 50 | span.nolinebreak {white-space: nowrap} | ||
| 51 | span.roman {font-family: initial; font-weight: normal} | ||
| 52 | span.sansserif {font-family: sans-serif; font-weight: normal} | ||
| 53 | span:hover a.copiable-anchor {visibility: visible} | ||
| 54 | ul.no-bullet {list-style: none} | ||
| 55 | --> | ||
| 56 | </style> | ||
| 57 | <link rel="stylesheet" type="text/css" href="./manual.css"> | ||
| 58 | |||
| 59 | |||
| 60 | </head> | ||
| 61 | |||
| 62 | <body lang="en"> | ||
| 63 | <div class="section" id="Using-Parser"> | ||
| 64 | <div class="header"> | ||
| 65 | <p> | ||
| 66 | Next: <a href="Retrieving-Node.html" accesskey="n" rel="next">Retrieving Node</a>, Previous: <a href="Language-Definitions.html" accesskey="p" rel="prev">Tree-sitter Language Definitions</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 67 | </div> | ||
| 68 | <hr> | ||
| 69 | <span id="Using-Tree_002dsitter-Parser"></span><h3 class="section">37.2 Using Tree-sitter Parser</h3> | ||
| 70 | <span id="index-Tree_002dsitter-parser"></span> | ||
| 71 | |||
| 72 | <p>This section described how to create and configure a tree-sitter | ||
| 73 | parser. In Emacs, each tree-sitter parser is associated with a | ||
| 74 | buffer. As we edit the buffer, the associated parser and the syntax | ||
| 75 | tree is automatically kept up-to-date. | ||
| 76 | </p> | ||
| 77 | <dl class="def"> | ||
| 78 | <dt id="index-treesit_002dmax_002dbuffer_002dsize"><span class="category">Variable: </span><span><strong>treesit-max-buffer-size</strong><a href='#index-treesit_002dmax_002dbuffer_002dsize' class='copiable-anchor'> ¶</a></span></dt> | ||
| 79 | <dd><p>This variable contains the maximum size of buffers in which | ||
| 80 | tree-sitter can be activated. Major modes should check this value | ||
| 81 | when deciding whether to enable tree-sitter features. | ||
| 82 | </p></dd></dl> | ||
| 83 | |||
| 84 | <dl class="def"> | ||
| 85 | <dt id="index-treesit_002dcan_002denable_002dp"><span class="category">Function: </span><span><strong>treesit-can-enable-p</strong><a href='#index-treesit_002dcan_002denable_002dp' class='copiable-anchor'> ¶</a></span></dt> | ||
| 86 | <dd><p>This function checks whether the current buffer is suitable for | ||
| 87 | activating tree-sitter features. It basically checks | ||
| 88 | <code>treesit-available-p</code> and <code>treesit-max-buffer-size</code>. | ||
| 89 | </p></dd></dl> | ||
| 90 | |||
| 91 | <span id="index-Creating-tree_002dsitter-parsers"></span> | ||
| 92 | <dl class="def"> | ||
| 93 | <dt id="index-treesit_002dparser_002dcreate"><span class="category">Function: </span><span><strong>treesit-parser-create</strong> <em>language &optional buffer no-reuse</em><a href='#index-treesit_002dparser_002dcreate' class='copiable-anchor'> ¶</a></span></dt> | ||
| 94 | <dd><p>To create a parser, we provide a <var>buffer</var> and the <var>language</var> | ||
| 95 | to use (see <a href="Language-Definitions.html">Tree-sitter Language Definitions</a>). If <var>buffer</var> is nil, the | ||
| 96 | current buffer is used. | ||
| 97 | </p> | ||
| 98 | <p>By default, this function reuses a parser if one already exists for | ||
| 99 | <var>language</var> in <var>buffer</var>, if <var>no-reuse</var> is non-nil, this | ||
| 100 | function always creates a new parser. | ||
| 101 | </p></dd></dl> | ||
| 102 | |||
| 103 | <p>Given a parser, we can query information about it: | ||
| 104 | </p> | ||
| 105 | <dl class="def"> | ||
| 106 | <dt id="index-treesit_002dparser_002dbuffer"><span class="category">Function: </span><span><strong>treesit-parser-buffer</strong> <em>parser</em><a href='#index-treesit_002dparser_002dbuffer' class='copiable-anchor'> ¶</a></span></dt> | ||
| 107 | <dd><p>Returns the buffer associated with <var>parser</var>. | ||
| 108 | </p></dd></dl> | ||
| 109 | |||
| 110 | <dl class="def"> | ||
| 111 | <dt id="index-treesit_002dparser_002dlanguage"><span class="category">Function: </span><span><strong>treesit-parser-language</strong> <em>parser</em><a href='#index-treesit_002dparser_002dlanguage' class='copiable-anchor'> ¶</a></span></dt> | ||
| 112 | <dd><p>Returns the language that <var>parser</var> uses. | ||
| 113 | </p></dd></dl> | ||
| 114 | |||
| 115 | <dl class="def"> | ||
| 116 | <dt id="index-treesit_002dparser_002dp"><span class="category">Function: </span><span><strong>treesit-parser-p</strong> <em>object</em><a href='#index-treesit_002dparser_002dp' class='copiable-anchor'> ¶</a></span></dt> | ||
| 117 | <dd><p>Checks if <var>object</var> is a tree-sitter parser. Return non-nil if it | ||
| 118 | is, return nil otherwise. | ||
| 119 | </p></dd></dl> | ||
| 120 | |||
| 121 | <p>There is no need to explicitly parse a buffer, because parsing is done | ||
| 122 | automatically and lazily. A parser only parses when we query for a | ||
| 123 | node in its syntax tree. Therefore, when a parser is first created, | ||
| 124 | it doesn’t parse the buffer; it waits until we query for a node for | ||
| 125 | the first time. Similarly, when some change is made in the buffer, a | ||
| 126 | parser doesn’t re-parse immediately. | ||
| 127 | </p> | ||
| 128 | <span id="index-treesit_002dbuffer_002dtoo_002dlarge"></span> | ||
| 129 | <p>When a parser do parse, it checks for the size of the buffer. | ||
| 130 | Tree-sitter can only handle buffer no larger than about 4GB. If the | ||
| 131 | size exceeds that, Emacs signals <code>treesit-buffer-too-large</code> | ||
| 132 | with signal data being the buffer size. | ||
| 133 | </p> | ||
| 134 | <p>Once a parser is created, Emacs automatically adds it to the | ||
| 135 | internal parser list. Every time a change is made to the buffer, | ||
| 136 | Emacs updates parsers in this list so they can update their syntax | ||
| 137 | tree incrementally. | ||
| 138 | </p> | ||
| 139 | <dl class="def"> | ||
| 140 | <dt id="index-treesit_002dparser_002dlist"><span class="category">Function: </span><span><strong>treesit-parser-list</strong> <em>&optional buffer</em><a href='#index-treesit_002dparser_002dlist' class='copiable-anchor'> ¶</a></span></dt> | ||
| 141 | <dd><p>This function returns the parser list of <var>buffer</var>. And | ||
| 142 | <var>buffer</var> defaults to the current buffer. | ||
| 143 | </p></dd></dl> | ||
| 144 | |||
| 145 | <dl class="def"> | ||
| 146 | <dt id="index-treesit_002dparser_002ddelete"><span class="category">Function: </span><span><strong>treesit-parser-delete</strong> <em>parser</em><a href='#index-treesit_002dparser_002ddelete' class='copiable-anchor'> ¶</a></span></dt> | ||
| 147 | <dd><p>This function deletes <var>parser</var>. | ||
| 148 | </p></dd></dl> | ||
| 149 | |||
| 150 | <span id="index-tree_002dsitter-narrowing"></span> | ||
| 151 | <span id="tree_002dsitter-narrowing"></span><p>Normally, a parser “sees” the whole | ||
| 152 | buffer, but when the buffer is narrowed (see <a href="Narrowing.html">Narrowing</a>), the | ||
| 153 | parser will only see the visible region. As far as the parser can | ||
| 154 | tell, the hidden region is deleted. And when the buffer is later | ||
| 155 | widened, the parser thinks text is inserted in the beginning and in | ||
| 156 | the end. Although parsers respect narrowing, narrowing shouldn’t be | ||
| 157 | the mean to handle a multi-language buffer; instead, set the ranges in | ||
| 158 | which a parser should operate in. See <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>. | ||
| 159 | </p> | ||
| 160 | <p>Because a parser parses lazily, when we narrow the buffer, the parser | ||
| 161 | is not affected immediately; as long as we don’t query for a node | ||
| 162 | while the buffer is narrowed, the parser is oblivious of the | ||
| 163 | narrowing. | ||
| 164 | </p> | ||
| 165 | <span id="index-tree_002dsitter-parse-string"></span> | ||
| 166 | <dl class="def"> | ||
| 167 | <dt id="index-treesit_002dparse_002dstring"><span class="category">Function: </span><span><strong>treesit-parse-string</strong> <em>string language</em><a href='#index-treesit_002dparse_002dstring' class='copiable-anchor'> ¶</a></span></dt> | ||
| 168 | <dd><p>Besides creating a parser for a buffer, we can also just parse a | ||
| 169 | string. Unlike a buffer, parsing a string is a one-time deal, and | ||
| 170 | there is no way to update the result. | ||
| 171 | </p> | ||
| 172 | <p>This function parses <var>string</var> with <var>language</var>, and returns the | ||
| 173 | root node of the generated syntax tree. | ||
| 174 | </p></dd></dl> | ||
| 175 | |||
| 176 | </div> | ||
| 177 | <hr> | ||
| 178 | <div class="header"> | ||
| 179 | <p> | ||
| 180 | Next: <a href="Retrieving-Node.html">Retrieving Node</a>, Previous: <a href="Language-Definitions.html">Tree-sitter Language Definitions</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p> | ||
| 181 | </div> | ||
| 182 | |||
| 183 | |||
| 184 | |||
| 185 | </body> | ||
| 186 | </html> | ||
diff --git a/admin/notes/tree-sitter/html-manual/build-manual.sh b/admin/notes/tree-sitter/html-manual/build-manual.sh new file mode 100755 index 00000000000..adde3f2a2af --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/build-manual.sh | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | MANUAL_DIR="../../../doc/lispref" | ||
| 4 | THIS_DIR=$(pwd) | ||
| 5 | |||
| 6 | echo "Build manual" | ||
| 7 | cd "${MANUAL_DIR}" | ||
| 8 | make elisp.html HTML_OPTS="--html --css-ref=./manual.css" | ||
| 9 | |||
| 10 | cd "${THIS_DIR}" | ||
| 11 | |||
| 12 | echo "Copy manual" | ||
| 13 | cp -f "${MANUAL_DIR}/elisp.html/Parsing-Program-Source.html" . | ||
| 14 | cp -f "${MANUAL_DIR}/elisp.html/Language-Definitions.html" . | ||
| 15 | cp -f "${MANUAL_DIR}/elisp.html/Using-Parser.html" . | ||
| 16 | cp -f "${MANUAL_DIR}/elisp.html/Retrieving-Node.html" . | ||
| 17 | cp -f "${MANUAL_DIR}/elisp.html/Accessing-Node.html" . | ||
| 18 | cp -f "${MANUAL_DIR}/elisp.html/Pattern-Matching.html" . | ||
| 19 | cp -f "${MANUAL_DIR}/elisp.html/Multiple-Languages.html" . | ||
| 20 | cp -f "${MANUAL_DIR}/elisp.html/Tree_002dsitter-C-API.html" . | ||
| 21 | |||
| 22 | cp -f "${MANUAL_DIR}/elisp.html/Parser_002dbased-Font-Lock.html" . | ||
| 23 | cp -f "${MANUAL_DIR}/elisp.html/Parser_002dbased-Indentation.html" . | ||
diff --git a/admin/notes/tree-sitter/html-manual/manual.css b/admin/notes/tree-sitter/html-manual/manual.css new file mode 100644 index 00000000000..5a6790a3458 --- /dev/null +++ b/admin/notes/tree-sitter/html-manual/manual.css | |||
| @@ -0,0 +1,374 @@ | |||
| 1 | /* Style-sheet to use for Emacs manuals */ | ||
| 2 | |||
| 3 | /* Copyright (C) 2013-2014 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | Copying and distribution of this file, with or without modification, | ||
| 6 | are permitted in any medium without royalty provided the copyright | ||
| 7 | notice and this notice are preserved. This file is offered as-is, | ||
| 8 | without any warranty. | ||
| 9 | */ | ||
| 10 | |||
| 11 | /* style.css begins here */ | ||
| 12 | |||
| 13 | /* This stylesheet is used by manuals and a few older resources. */ | ||
| 14 | |||
| 15 | /* reset.css begins here */ | ||
| 16 | |||
| 17 | /* | ||
| 18 | Software License Agreement (BSD License) | ||
| 19 | |||
| 20 | Copyright (c) 2006, Yahoo! Inc. | ||
| 21 | All rights reserved. | ||
| 22 | |||
| 23 | Redistribution and use of this software in source and | ||
| 24 | binary forms, with or without modification, arepermitted | ||
| 25 | provided that the following conditions are met: | ||
| 26 | |||
| 27 | * Redistributions of source code must retain the above | ||
| 28 | copyright notice, this list of conditions and the | ||
| 29 | following disclaimer. | ||
| 30 | |||
| 31 | * Redistributions in binary form must reproduce the above | ||
| 32 | copyright notice, this list of conditions and the | ||
| 33 | following disclaimer in the documentation and/or other | ||
| 34 | materials provided with the distribution. | ||
| 35 | |||
| 36 | * Neither the name of Yahoo! Inc. nor the names of its | ||
| 37 | contributors may be used to endorse or promote products | ||
| 38 | derived from this software without specific prior | ||
| 39 | written permission of Yahoo! Inc. | ||
| 40 | |||
| 41 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | ||
| 42 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | ||
| 43 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 44 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 45 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
| 46 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 47 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
| 48 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| 49 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 50 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER | ||
| 51 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 52 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 53 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 54 | SUCH DAMAGE. | ||
| 55 | */ | ||
| 56 | |||
| 57 | html { | ||
| 58 | color: #000; | ||
| 59 | background: #FFF; | ||
| 60 | } | ||
| 61 | |||
| 62 | body, div, dl, dt, dd, ul, ol, li, h1, h2, h3, h4, | ||
| 63 | h5, h6, pre, code, form, fieldset, legend, input, | ||
| 64 | button, textarea, p, blockquote, th, td { | ||
| 65 | margin: 0; | ||
| 66 | padding: 0; | ||
| 67 | } | ||
| 68 | |||
| 69 | table { | ||
| 70 | border-collapse: collapse; | ||
| 71 | border-spacing: 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | fieldset, img { | ||
| 75 | border: 0; | ||
| 76 | } | ||
| 77 | |||
| 78 | address, caption, cite, code, dfn, em, strong, | ||
| 79 | th, var, optgroup { | ||
| 80 | font-style: inherit; | ||
| 81 | font-weight: inherit; | ||
| 82 | } | ||
| 83 | |||
| 84 | del, ins { | ||
| 85 | text-decoration: none; | ||
| 86 | } | ||
| 87 | |||
| 88 | li { | ||
| 89 | list-style:none; | ||
| 90 | } | ||
| 91 | |||
| 92 | caption, th { | ||
| 93 | text-align: left; | ||
| 94 | } | ||
| 95 | |||
| 96 | h1, h2, h3, h4, h5, h6 { | ||
| 97 | font-size: 100%; | ||
| 98 | font-weight: normal; | ||
| 99 | } | ||
| 100 | |||
| 101 | q:before, q:after { | ||
| 102 | content:''; | ||
| 103 | } | ||
| 104 | |||
| 105 | abbr, acronym { | ||
| 106 | border: 0; | ||
| 107 | font-variant: normal; | ||
| 108 | } | ||
| 109 | |||
| 110 | sup { | ||
| 111 | vertical-align: baseline; | ||
| 112 | } | ||
| 113 | sub { | ||
| 114 | vertical-align: baseline; | ||
| 115 | } | ||
| 116 | |||
| 117 | legend { | ||
| 118 | color: #000; | ||
| 119 | } | ||
| 120 | |||
| 121 | input, button, textarea, select, optgroup, option { | ||
| 122 | font-family: inherit; | ||
| 123 | font-size: inherit; | ||
| 124 | font-style: inherit; | ||
| 125 | font-weight: inherit; | ||
| 126 | } | ||
| 127 | |||
| 128 | input, button, textarea, select { | ||
| 129 | *font-size: 100%; | ||
| 130 | } | ||
| 131 | |||
| 132 | |||
| 133 | /* reset.css ends here */ | ||
| 134 | |||
| 135 | /*** PAGE LAYOUT ***/ | ||
| 136 | |||
| 137 | html, body { | ||
| 138 | font-size: 1em; | ||
| 139 | text-align: left; | ||
| 140 | text-decoration: none; | ||
| 141 | } | ||
| 142 | html { background-color: #e7e7e7; } | ||
| 143 | |||
| 144 | body { | ||
| 145 | max-width: 74.92em; | ||
| 146 | margin: 0 auto; | ||
| 147 | padding: .5em 1em 1em 1em; | ||
| 148 | background-color: white; | ||
| 149 | border: .1em solid #c0c0c0; | ||
| 150 | } | ||
| 151 | |||
| 152 | |||
| 153 | /*** BASIC ELEMENTS ***/ | ||
| 154 | |||
| 155 | /* Size and positioning */ | ||
| 156 | |||
| 157 | p, pre, li, dt, dd, table, code, address { line-height: 1.3em; } | ||
| 158 | |||
| 159 | h1 { font-size: 2em; margin: 1em 0 } | ||
| 160 | h2 { font-size: 1.50em; margin: 1.0em 0 0.87em 0; } | ||
| 161 | h3 { font-size: 1.30em; margin: 1.0em 0 0.87em 0; } | ||
| 162 | h4 { font-size: 1.13em; margin: 1.0em 0 0.88em 0; } | ||
| 163 | h5 { font-size: 1.00em; margin: 1.0em 0 1.00em 0; } | ||
| 164 | |||
| 165 | p, pre { margin: 1em 0; } | ||
| 166 | pre { overflow: auto; padding-bottom: .3em; } | ||
| 167 | |||
| 168 | ul, ol, blockquote { margin-left: 1.5%; margin-right: 1.5%; } | ||
| 169 | hr { margin: 1em 0; } | ||
| 170 | /* Lists of underlined links are difficult to read. The top margin | ||
| 171 | gives a little more spacing between entries. */ | ||
| 172 | ul li { margin: .5em 1em; } | ||
| 173 | ol li { margin: 1em; } | ||
| 174 | ol ul li { margin: .5em 1em; } | ||
| 175 | ul li p, ul ul li { margin-top: .3em; margin-bottom: .3em; } | ||
| 176 | ul ul, ol ul { margin-top: 0; margin-bottom: 0; } | ||
| 177 | |||
| 178 | /* Separate description lists from preceding text */ | ||
| 179 | dl { margin: 1em 0 0 0; } | ||
| 180 | /* separate the "term" from subsequent "description" */ | ||
| 181 | dt { margin: .5em 0; } | ||
| 182 | /* separate the "description" from subsequent list item | ||
| 183 | when the final <dd> child is an anonymous box */ | ||
| 184 | dd { margin: .5em 3% 1em 3%; } | ||
| 185 | /* separate anonymous box (used to be the first element in <dd>) | ||
| 186 | from subsequent <p> */ | ||
| 187 | dd p { margin: .5em 0; } | ||
| 188 | |||
| 189 | table { | ||
| 190 | display: block; overflow: auto; | ||
| 191 | margin-top: 1.5em; margin-bottom: 1.5em; | ||
| 192 | } | ||
| 193 | th { padding: .3em .5em; text-align: center; } | ||
| 194 | td { padding: .2em .5em; } | ||
| 195 | |||
| 196 | address { margin-bottom: 1em; } | ||
| 197 | caption { margin-bottom: .5em; text-align: center; } | ||
| 198 | sup { vertical-align: super; } | ||
| 199 | sub { vertical-align: sub; } | ||
| 200 | |||
| 201 | /* Style */ | ||
| 202 | |||
| 203 | h1, h2, h3, h4, h5, h6, strong, dt, th { font-weight: bold; } | ||
| 204 | |||
| 205 | /* The default color (black) is too dark for large text in | ||
| 206 | bold font. */ | ||
| 207 | h1, h2, h3, h4 { color: #333; } | ||
| 208 | h5, h6, dt { color: #222; } | ||
| 209 | |||
| 210 | a[href] { color: #005090; } | ||
| 211 | a[href]:visited { color: #100070; } | ||
| 212 | a[href]:active, a[href]:hover { | ||
| 213 | color: #100070; | ||
| 214 | text-decoration: none; | ||
| 215 | } | ||
| 216 | |||
| 217 | h1 a[href]:visited, h2 a[href]:visited, h3 a[href]:visited, | ||
| 218 | h4 a[href]:visited { color: #005090; } | ||
| 219 | h1 a[href]:hover, h2 a[href]:hover, h3 a[href]:hover, | ||
| 220 | h4 a[href]:hover { color: #100070; } | ||
| 221 | |||
| 222 | ol { list-style: decimal outside;} | ||
| 223 | ul { list-style: square outside; } | ||
| 224 | ul ul, ol ul { list-style: circle; } | ||
| 225 | li { list-style: inherit; } | ||
| 226 | |||
| 227 | hr { background-color: #ede6d5; } | ||
| 228 | table { border: 0; } | ||
| 229 | |||
| 230 | abbr,acronym { | ||
| 231 | border-bottom:1px dotted #000; | ||
| 232 | text-decoration: none; | ||
| 233 | cursor:help; | ||
| 234 | } | ||
| 235 | del { text-decoration: line-through; } | ||
| 236 | em { font-style: italic; } | ||
| 237 | small { font-size: .9em; } | ||
| 238 | |||
| 239 | img { max-width: 100%} | ||
| 240 | |||
| 241 | |||
| 242 | /*** SIMPLE CLASSES ***/ | ||
| 243 | |||
| 244 | .center, .c { text-align: center; } | ||
| 245 | .nocenter{ text-align: left; } | ||
| 246 | |||
| 247 | .underline { text-decoration: underline; } | ||
| 248 | .nounderline { text-decoration: none; } | ||
| 249 | |||
| 250 | .no-bullet { list-style: none; } | ||
| 251 | .inline-list li { display: inline } | ||
| 252 | |||
| 253 | .netscape4, .no-display { display: none; } | ||
| 254 | |||
| 255 | |||
| 256 | /*** MANUAL PAGES ***/ | ||
| 257 | |||
| 258 | /* This makes the very long tables of contents in Gnulib and other | ||
| 259 | manuals easier to read. */ | ||
| 260 | .contents ul, .shortcontents ul { font-weight: bold; } | ||
| 261 | .contents ul ul, .shortcontents ul ul { font-weight: normal; } | ||
| 262 | .contents ul { list-style: none; } | ||
| 263 | |||
| 264 | /* For colored navigation bars (Emacs manual): make the bar extend | ||
| 265 | across the whole width of the page and give it a decent height. */ | ||
| 266 | .header, .node { margin: 0 -1em; padding: 0 1em; } | ||
| 267 | .header p, .node p { line-height: 2em; } | ||
| 268 | |||
| 269 | /* For navigation links */ | ||
| 270 | .node a, .header a { display: inline-block; line-height: 2em; } | ||
| 271 | .node a:hover, .header a:hover { background: #f2efe4; } | ||
| 272 | |||
| 273 | /* Inserts */ | ||
| 274 | table.cartouche td { padding: 1.5em; } | ||
| 275 | |||
| 276 | div.display, div.lisp, div.smalldisplay, | ||
| 277 | div.smallexample, div.smalllisp { margin-left: 3%; } | ||
| 278 | |||
| 279 | div.example { padding: .8em 1.2em .4em; } | ||
| 280 | pre.example { padding: .8em 1.2em; } | ||
| 281 | div.example, pre.example { | ||
| 282 | margin: 1em 0 1em 3% ; | ||
| 283 | -webkit-border-radius: .3em; | ||
| 284 | -moz-border-radius: .3em; | ||
| 285 | border-radius: .3em; | ||
| 286 | border: 1px solid #d4cbb6; | ||
| 287 | background-color: #f2efe4; | ||
| 288 | } | ||
| 289 | div.example > pre.example { | ||
| 290 | padding: 0 0 .4em; | ||
| 291 | margin: 0; | ||
| 292 | border: none; | ||
| 293 | } | ||
| 294 | |||
| 295 | pre.menu-comment { padding-top: 1.3em; margin: 0; } | ||
| 296 | |||
| 297 | |||
| 298 | /*** FOR WIDE SCREENS ***/ | ||
| 299 | |||
| 300 | @media (min-width: 40em) { | ||
| 301 | body { padding: .5em 3em 1em 3em; } | ||
| 302 | div.header, div.node { margin: 0 -3em; padding: 0 3em; } | ||
| 303 | } | ||
| 304 | |||
| 305 | /* style.css ends here */ | ||
| 306 | |||
| 307 | /* makeinfo convert @deffn and similar functions to something inside | ||
| 308 | <blockquote>. style.css uses italic for blockquote. This looks poor | ||
| 309 | in the Emacs manuals, which make extensive use of @defun (etc). | ||
| 310 | In particular, references to function arguments appear as <var> | ||
| 311 | inside <blockquote>. Since <var> is also italic, it makes it | ||
| 312 | impossible to distinguish variables. We could change <var> to | ||
| 313 | e.g. bold-italic, or normal, or a different color, but that does | ||
| 314 | not look as good IMO. So we just override blockquote to be non-italic. | ||
| 315 | */ | ||
| 316 | blockquote { font-style: normal; } | ||
| 317 | |||
| 318 | var { font-style: italic; } | ||
| 319 | |||
| 320 | div.header { | ||
| 321 | background-color: #DDDDFF; | ||
| 322 | padding-top: 0.2em; | ||
| 323 | } | ||
| 324 | |||
| 325 | |||
| 326 | /*** Customization ***/ | ||
| 327 | |||
| 328 | body { | ||
| 329 | font-family: Charter, serif; | ||
| 330 | font-size: 14pt; | ||
| 331 | line-height: 1.4; | ||
| 332 | background-color: #fefefc; | ||
| 333 | color: #202010; | ||
| 334 | } | ||
| 335 | |||
| 336 | pre.menu-comment { | ||
| 337 | font-family: Charter, serif; | ||
| 338 | font-size: 14pt; | ||
| 339 | } | ||
| 340 | |||
| 341 | body > *, body > div.display, body > div.lisp, body > div.smalldisplay, | ||
| 342 | body > div.example, body > div.smallexample, body > div.smalllisp { | ||
| 343 | width: 700px; | ||
| 344 | margin-left: auto; | ||
| 345 | margin-right: auto; | ||
| 346 | } | ||
| 347 | |||
| 348 | div.header { | ||
| 349 | width: 100%; | ||
| 350 | min-height: 3em; | ||
| 351 | font-size: 13pt; | ||
| 352 | } | ||
| 353 | |||
| 354 | /* Documentation block for functions and variables. Make then | ||
| 355 | narrower*/ | ||
| 356 | dd { | ||
| 357 | margin: .5em 6% 1em 6% | ||
| 358 | } | ||
| 359 | |||
| 360 | code, pre, kbd, samp, tt { | ||
| 361 | font-size: 12pt; | ||
| 362 | font-family: monospace; | ||
| 363 | } | ||
| 364 | |||
| 365 | /* In each node we have index table to all sub-nodes. Make more space | ||
| 366 | for the first column, which is the name to each sub-node. */ | ||
| 367 | table.menu tbody tr td:nth-child(1) { | ||
| 368 | white-space: nowrap; | ||
| 369 | } | ||
| 370 | |||
| 371 | div.header p { | ||
| 372 | text-align: center; | ||
| 373 | margin: 0.5em auto 0.5em auto; | ||
| 374 | } | ||
diff --git a/admin/notes/tree-sitter/starter-guide b/admin/notes/tree-sitter/starter-guide new file mode 100644 index 00000000000..6cf8cf8a236 --- /dev/null +++ b/admin/notes/tree-sitter/starter-guide | |||
| @@ -0,0 +1,442 @@ | |||
| 1 | STARTER GUIDE ON WRITTING MAJOR MODE WITH TREE-SITTER -*- org -*- | ||
| 2 | |||
| 3 | This document guides you on adding tree-sitter support to a major | ||
| 4 | mode. | ||
| 5 | |||
| 6 | TOC: | ||
| 7 | |||
| 8 | - Building Emacs with tree-sitter | ||
| 9 | - Install language definitions | ||
| 10 | - Setup | ||
| 11 | - Font-lock | ||
| 12 | - Indent | ||
| 13 | - Imenu | ||
| 14 | - Navigation | ||
| 15 | - Which-func | ||
| 16 | - More features? | ||
| 17 | - Common tasks (code snippets) | ||
| 18 | - Manual | ||
| 19 | |||
| 20 | * Building Emacs with tree-sitter | ||
| 21 | |||
| 22 | You can either install tree-sitter by your package manager, or from | ||
| 23 | source: | ||
| 24 | |||
| 25 | git clone https://github.com/tree-sitter/tree-sitter.git | ||
| 26 | cd tree-sitter | ||
| 27 | make | ||
| 28 | make install | ||
| 29 | |||
| 30 | Then pull the tree-sitter branch (or the master branch, if it has | ||
| 31 | merged) and rebuild Emacs. | ||
| 32 | |||
| 33 | * Install language definitions | ||
| 34 | |||
| 35 | Tree-sitter by itself doesn’t know how to parse any particular | ||
| 36 | language. We need to install language definitions (or “grammars”) for | ||
| 37 | a language to be able to parse it. There are a couple of ways to get | ||
| 38 | them. | ||
| 39 | |||
| 40 | You can use this script that I put together here: | ||
| 41 | |||
| 42 | https://github.com/casouri/tree-sitter-module | ||
| 43 | |||
| 44 | You can also find them under this directory in /build-modules. | ||
| 45 | |||
| 46 | This script automatically pulls and builds language definitions for C, | ||
| 47 | C++, Rust, JSON, Go, HTML, Javascript, CSS, Python, Typescript, | ||
| 48 | and C#. Better yet, I pre-built these language definitions for | ||
| 49 | GNU/Linux and macOS, they can be downloaded here: | ||
| 50 | |||
| 51 | https://github.com/casouri/tree-sitter-module/releases/tag/v2.1 | ||
| 52 | |||
| 53 | To build them yourself, run | ||
| 54 | |||
| 55 | git clone git@github.com:casouri/tree-sitter-module.git | ||
| 56 | cd tree-sitter-module | ||
| 57 | ./batch.sh | ||
| 58 | |||
| 59 | and language definitions will be in the /dist directory. You can | ||
| 60 | either copy them to standard dynamic library locations of your system, | ||
| 61 | eg, /usr/local/lib, or leave them in /dist and later tell Emacs where | ||
| 62 | to find language definitions by setting ‘treesit-extra-load-path’. | ||
| 63 | |||
| 64 | Language definition sources can be found on GitHub under | ||
| 65 | tree-sitter/xxx, like tree-sitter/tree-sitter-python. The tree-sitter | ||
| 66 | organization has all the "official" language definitions: | ||
| 67 | |||
| 68 | https://github.com/tree-sitter | ||
| 69 | |||
| 70 | * Setting up for adding major mode features | ||
| 71 | |||
| 72 | Start Emacs, and load tree-sitter with | ||
| 73 | |||
| 74 | (require 'treesit) | ||
| 75 | |||
| 76 | Now check if Emacs is built with tree-sitter library | ||
| 77 | |||
| 78 | (treesit-available-p) | ||
| 79 | |||
| 80 | For your major mode, first create a tree-sitter switch: | ||
| 81 | |||
| 82 | #+begin_src elisp | ||
| 83 | (defcustom python-use-tree-sitter nil | ||
| 84 | "If non-nil, `python-mode' tries to use tree-sitter. | ||
| 85 | Currently `python-mode' can utilize tree-sitter for font-locking, | ||
| 86 | imenu, and movement functions." | ||
| 87 | :type 'boolean) | ||
| 88 | #+end_src | ||
| 89 | |||
| 90 | Then in other places, we decide on whether to enable tree-sitter by | ||
| 91 | |||
| 92 | #+begin_src elisp | ||
| 93 | (and python-use-tree-sitter | ||
| 94 | (treesit-can-enable-p)) | ||
| 95 | #+end_src | ||
| 96 | |||
| 97 | * Font-lock | ||
| 98 | |||
| 99 | Tree-sitter works like this: You provide a query made of patterns and | ||
| 100 | capture names, tree-sitter finds the nodes that match these patterns, | ||
| 101 | tag the corresponding capture names onto the nodes and return them to | ||
| 102 | you. The query function returns a list of (capture-name . node). For | ||
| 103 | font-lock, we use face names as capture names. And the captured node | ||
| 104 | will be fontified in their capture name. The capture name could also | ||
| 105 | be a function, in which case (START END NODE) is passed to the | ||
| 106 | function for font-lock. START and END is the start and end the | ||
| 107 | captured NODE. | ||
| 108 | |||
| 109 | ** Query syntax | ||
| 110 | |||
| 111 | There are two types of nodes, named, like (identifier), | ||
| 112 | (function_definition), and anonymous, like "return", "def", "(", | ||
| 113 | "}". Parent-child relationship is expressed as | ||
| 114 | |||
| 115 | (parent (child) (child) (child (grand_child))) | ||
| 116 | |||
| 117 | Eg, an argument list (1, "3", 1) could be: | ||
| 118 | |||
| 119 | (argument_list "(" (number) (string) (number) ")") | ||
| 120 | |||
| 121 | Children could have field names in its parent: | ||
| 122 | |||
| 123 | (function_definition name: (identifier) type: (identifier)) | ||
| 124 | |||
| 125 | Match any of the list: | ||
| 126 | |||
| 127 | ["true" "false" "none"] | ||
| 128 | |||
| 129 | Capture names can come after any node in the pattern: | ||
| 130 | |||
| 131 | (parent (child) @child) @parent | ||
| 132 | |||
| 133 | The query above captures both parent and child. | ||
| 134 | |||
| 135 | ["return" "continue" "break"] @keyword | ||
| 136 | |||
| 137 | The query above captures all the keywords with capture name | ||
| 138 | "keyword". | ||
| 139 | |||
| 140 | These are the common syntax, see all of them in the manual | ||
| 141 | ("Parsing Program Source" section). | ||
| 142 | |||
| 143 | ** Query references | ||
| 144 | |||
| 145 | But how do one come up with the queries? Take python for an | ||
| 146 | example, open any python source file, evaluate | ||
| 147 | |||
| 148 | (treesit-parser-create 'python) | ||
| 149 | |||
| 150 | so there is a parser available, then enable ‘treesit-inspect-mode’. | ||
| 151 | Now you should see information of the node under point in | ||
| 152 | mode-line. Move around and you should be able to get a good | ||
| 153 | picture. Besides this, you can consult the grammar of the language | ||
| 154 | definition. For example, Python’s grammar file is at | ||
| 155 | |||
| 156 | https://github.com/tree-sitter/tree-sitter-python/blob/master/grammar.js | ||
| 157 | |||
| 158 | Neovim also has a bunch of queries to reference: | ||
| 159 | |||
| 160 | https://github.com/nvim-treesitter/nvim-treesitter/tree/master/queries | ||
| 161 | |||
| 162 | The manual explains how to read grammar files in the bottom of section | ||
| 163 | "Tree-sitter Language Definitions". | ||
| 164 | |||
| 165 | ** Debugging queires | ||
| 166 | |||
| 167 | If your query has problems, it usually cannot compile. In that case | ||
| 168 | use ‘treesit-query-validate’ to debug the query. It will pop a buffer | ||
| 169 | containing the query (in text format) and mark the offending part in | ||
| 170 | red. | ||
| 171 | |||
| 172 | ** Code | ||
| 173 | |||
| 174 | To enable tree-sitter font-lock, set ‘treesit-font-lock-settings’ | ||
| 175 | buffer-locally and call ‘treesit-font-lock-enable’. For example, see | ||
| 176 | ‘python--treesit-settings’ in python.el. Below I paste a snippet of | ||
| 177 | it. | ||
| 178 | |||
| 179 | Note that like the current font-lock, if the to-be-fontified region | ||
| 180 | already has a face (ie, an earlier match fontified part/all of the | ||
| 181 | region), the new face is discarded rather than applied. If you want | ||
| 182 | later matches always override earlier matches, use the :override | ||
| 183 | keyword. | ||
| 184 | |||
| 185 | #+begin_src elisp | ||
| 186 | (defvar python--treesit-settings | ||
| 187 | (treesit-font-lock-rules | ||
| 188 | :language 'python | ||
| 189 | :override t | ||
| 190 | `(;; Queries for def and class. | ||
| 191 | (function_definition | ||
| 192 | name: (identifier) @font-lock-function-name-face) | ||
| 193 | |||
| 194 | (class_definition | ||
| 195 | name: (identifier) @font-lock-type-face) | ||
| 196 | |||
| 197 | ;; Comment and string. | ||
| 198 | (comment) @font-lock-comment-face | ||
| 199 | |||
| 200 | ...))) | ||
| 201 | #+end_src | ||
| 202 | |||
| 203 | Then in ‘python-mode’, enable tree-sitter font-lock: | ||
| 204 | |||
| 205 | #+begin_src elisp | ||
| 206 | (treesit-parser-create 'python) | ||
| 207 | ;; This turns off the syntax-based font-lock for comments and | ||
| 208 | ;; strings. So it doesn’t override tree-sitter’s fontification. | ||
| 209 | (setq-local font-lock-keywords-only t) | ||
| 210 | (setq-local treesit-font-lock-settings | ||
| 211 | python--treesit-settings) | ||
| 212 | (treesit-font-lock-enable) | ||
| 213 | #+end_src | ||
| 214 | |||
| 215 | Concretely, something like this: | ||
| 216 | |||
| 217 | #+begin_src elisp | ||
| 218 | (define-derived-mode python-mode prog-mode "Python" | ||
| 219 | ... | ||
| 220 | |||
| 221 | (treesit-parser-create 'python) | ||
| 222 | |||
| 223 | (if (and python-use-tree-sitter | ||
| 224 | (treesit-can-enable-p)) | ||
| 225 | ;; Tree-sitter. | ||
| 226 | (progn | ||
| 227 | (setq-local font-lock-keywords-only t) | ||
| 228 | (setq-local treesit-font-lock-settings | ||
| 229 | python--treesit-settings) | ||
| 230 | (treesit-font-lock-enable)) | ||
| 231 | ;; No tree-sitter | ||
| 232 | (setq-local font-lock-defaults ...)) | ||
| 233 | |||
| 234 | ...) | ||
| 235 | #+end_src | ||
| 236 | |||
| 237 | You’ll notice that tree-sitter’s font-lock doesn’t respect | ||
| 238 | ‘font-lock-maximum-decoration’, major modes are free to set | ||
| 239 | ‘treesit-font-lock-settings’ based on the value of | ||
| 240 | ‘font-lock-maximum-decoration’, or provide more fine-grained control | ||
| 241 | through other mode-specific means. | ||
| 242 | |||
| 243 | * Indent | ||
| 244 | |||
| 245 | Indent works like this: We have a bunch of rules that look like this: | ||
| 246 | |||
| 247 | (MATCHER ANCHOR OFFSET) | ||
| 248 | |||
| 249 | At the beginning point is at the BOL of a line, we want to know which | ||
| 250 | column to indent this line to. Let NODE be the node at point, we pass | ||
| 251 | this node to the MATCHER of each rule, one of them will match the node | ||
| 252 | ("this node is a closing bracket!"). Then we pass the node to the | ||
| 253 | ANCHOR, which returns a point, eg, the BOL of the previous line. We | ||
| 254 | find the column number of that point (eg, 4), add OFFSET to it (eg, | ||
| 255 | 0), and that is the column we want to indent the current line to (4 + | ||
| 256 | 0 = 4). | ||
| 257 | |||
| 258 | For MATHCER we have | ||
| 259 | |||
| 260 | (parent-is TYPE) | ||
| 261 | (node-is TYPE) | ||
| 262 | (query QUERY) => matches if querying PARENT with QUERY | ||
| 263 | captures NODE. | ||
| 264 | |||
| 265 | (match NODE-TYPE PARENT-TYPE NODE-FIELD | ||
| 266 | NODE-INDEX-MIN NODE-INDEX-MAX) | ||
| 267 | |||
| 268 | => checks everything. If an argument is nil, don’t match that. Eg, | ||
| 269 | (match nil nil TYPE) is the same as (parent-is TYPE) | ||
| 270 | |||
| 271 | For ANCHOR we have | ||
| 272 | |||
| 273 | first-sibling => start of the first sibling | ||
| 274 | parent => start of parent | ||
| 275 | parent-bol => BOL of the line parent is on. | ||
| 276 | prev-sibling | ||
| 277 | no-indent => don’t indent | ||
| 278 | prev-line => same indent as previous line | ||
| 279 | |||
| 280 | There is also a manual section for indent: "Parser-based Indentation". | ||
| 281 | |||
| 282 | When writing indent rules, you can use ‘treesit-check-indent’ to | ||
| 283 | check if your indentation is correct. To debug what went wrong, set | ||
| 284 | ‘treesit--indent-verboase’ to non-nil. Then when you indent, Emacs | ||
| 285 | tells you which rule is applied in the echo area. | ||
| 286 | |||
| 287 | #+begin_src elisp | ||
| 288 | (defvar typescript-mode-indent-rules | ||
| 289 | (let ((offset typescript-indent-offset)) | ||
| 290 | `((typescript | ||
| 291 | ;; This rule matches if node at point is "}", ANCHOR is the | ||
| 292 | ;; parent node’s BOL, and offset is 0. | ||
| 293 | ((node-is "}") parent-bol 0) | ||
| 294 | ((node-is ")") parent-bol 0) | ||
| 295 | ((node-is "]") parent-bol 0) | ||
| 296 | ((node-is ">") parent-bol 0) | ||
| 297 | ((node-is ".") parent-bol ,offset) | ||
| 298 | ((parent-is "ternary_expression") parent-bol ,offset) | ||
| 299 | ((parent-is "named_imports") parent-bol ,offset) | ||
| 300 | ((parent-is "statement_block") parent-bol ,offset) | ||
| 301 | ((parent-is "type_arguments") parent-bol ,offset) | ||
| 302 | ((parent-is "variable_declarator") parent-bol ,offset) | ||
| 303 | ((parent-is "arguments") parent-bol ,offset) | ||
| 304 | ((parent-is "array") parent-bol ,offset) | ||
| 305 | ((parent-is "formal_parameters") parent-bol ,offset) | ||
| 306 | ((parent-is "template_substitution") parent-bol ,offset) | ||
| 307 | ((parent-is "object_pattern") parent-bol ,offset) | ||
| 308 | ((parent-is "object") parent-bol ,offset) | ||
| 309 | ((parent-is "object_type") parent-bol ,offset) | ||
| 310 | ((parent-is "enum_body") parent-bol ,offset) | ||
| 311 | ((parent-is "arrow_function") parent-bol ,offset) | ||
| 312 | ((parent-is "parenthesized_expression") parent-bol ,offset) | ||
| 313 | ...)))) | ||
| 314 | #+end_src | ||
| 315 | |||
| 316 | Then you set ‘treesit-simple-indent-rules’ to your rules, and set | ||
| 317 | ‘indent-line-function’: | ||
| 318 | |||
| 319 | #+begin_src elisp | ||
| 320 | (setq-local treesit-simple-indent-rules typescript-mode-indent-rules) | ||
| 321 | (setq-local indent-line-function #'treesit-indent) | ||
| 322 | #+end_src | ||
| 323 | |||
| 324 | * Imenu | ||
| 325 | |||
| 326 | Not much to say except for utilizing ‘treesit-induce-sparse-tree’. | ||
| 327 | See ‘python--imenu-treesit-create-index-1’ in python.el for an | ||
| 328 | example. | ||
| 329 | |||
| 330 | Once you have the index builder, set ‘imenu-create-index-function’. | ||
| 331 | |||
| 332 | * Navigation | ||
| 333 | |||
| 334 | Mainly ‘beginning-of-defun-function’ and ‘end-of-defun-function’. | ||
| 335 | You can find the end of a defun with something like | ||
| 336 | |||
| 337 | (treesit-search-forward-goto "function_definition" 'end) | ||
| 338 | |||
| 339 | where "function_definition" matches the node type of a function | ||
| 340 | definition node, and ’end means we want to go to the end of that | ||
| 341 | node. | ||
| 342 | |||
| 343 | Something like this should suffice: | ||
| 344 | |||
| 345 | #+begin_src elisp | ||
| 346 | (defun xxx-beginning-of-defun (&optional arg) | ||
| 347 | (if (> arg 0) | ||
| 348 | ;; Go backward. | ||
| 349 | (while (and (> arg 0) | ||
| 350 | (treesit-search-forward-goto | ||
| 351 | "function_definition" 'start nil t)) | ||
| 352 | (setq arg (1- arg))) | ||
| 353 | ;; Go forward. | ||
| 354 | (while (and (< arg 0) | ||
| 355 | (treesit-search-forward-goto | ||
| 356 | "function_definition" 'start)) | ||
| 357 | (setq arg (1+ arg))))) | ||
| 358 | |||
| 359 | (setq-local beginning-of-defun-function #'xxx-beginning-of-defun) | ||
| 360 | #+end_src | ||
| 361 | |||
| 362 | And the same for end-of-defun. | ||
| 363 | |||
| 364 | * Which-func | ||
| 365 | |||
| 366 | You can find the current function by going up the tree and looking for | ||
| 367 | the function_definition node. See ‘python-info-treesit-current-defun’ | ||
| 368 | in python.el for an example. Since Python allows nested function | ||
| 369 | definitions, that function keeps going until it reaches the root node, | ||
| 370 | and records all the function names along the way. | ||
| 371 | |||
| 372 | #+begin_src elisp | ||
| 373 | (defun python-info-treesit-current-defun (&optional include-type) | ||
| 374 | "Identical to `python-info-current-defun' but use tree-sitter. | ||
| 375 | For INCLUDE-TYPE see `python-info-current-defun'." | ||
| 376 | (let ((node (treesit-node-at (point))) | ||
| 377 | (name-list ()) | ||
| 378 | (type nil)) | ||
| 379 | (cl-loop while node | ||
| 380 | if (pcase (treesit-node-type node) | ||
| 381 | ("function_definition" | ||
| 382 | (setq type 'def)) | ||
| 383 | ("class_definition" | ||
| 384 | (setq type 'class)) | ||
| 385 | (_ nil)) | ||
| 386 | do (push (treesit-node-text | ||
| 387 | (treesit-node-child-by-field-name node "name") | ||
| 388 | t) | ||
| 389 | name-list) | ||
| 390 | do (setq node (treesit-node-parent node)) | ||
| 391 | finally return (concat (if include-type | ||
| 392 | (format "%s " type) | ||
| 393 | "") | ||
| 394 | (string-join name-list "."))))) | ||
| 395 | #+end_src | ||
| 396 | |||
| 397 | * More features? | ||
| 398 | |||
| 399 | Obviously this list is just a starting point, if there are features in | ||
| 400 | the major mode that would benefit a parse tree, adding tree-sitter | ||
| 401 | support for that would be great. But in the minimal case, just adding | ||
| 402 | font-lock is awesome. | ||
| 403 | |||
| 404 | * Common tasks | ||
| 405 | |||
| 406 | How to... | ||
| 407 | |||
| 408 | ** Get the buffer text corresponding to a node? | ||
| 409 | |||
| 410 | (treesit-node-text node) | ||
| 411 | |||
| 412 | BTW ‘treesit-node-string’ does different things. | ||
| 413 | |||
| 414 | ** Scan the whole tree for stuff? | ||
| 415 | |||
| 416 | (treesit-search-subtree) | ||
| 417 | (treesit-search-forward) | ||
| 418 | (treesit-induce-sparse-tree) | ||
| 419 | |||
| 420 | ** Move to next node that...? | ||
| 421 | |||
| 422 | (treesit-search-forward-goto) | ||
| 423 | |||
| 424 | ** Get the root node? | ||
| 425 | |||
| 426 | (treesit-buffer-root-node) | ||
| 427 | |||
| 428 | ** Get the node at point? | ||
| 429 | |||
| 430 | (treesit-node-at (point)) | ||
| 431 | |||
| 432 | * Manual | ||
| 433 | |||
| 434 | I suggest you read the manual section for tree-sitter in Info. The | ||
| 435 | section is Parsing Program Source. Typing | ||
| 436 | |||
| 437 | C-h i d m elisp RET g Parsing Program Source RET | ||
| 438 | |||
| 439 | will bring you to that section. You can also read the HTML version | ||
| 440 | under /html-manual in this directory. I find the HTML version easier | ||
| 441 | to read. You don’t need to read through every sentence, just read the | ||
| 442 | text paragraphs and glance over function names. | ||