diff options
| author | Richard M. Stallman | 1994-03-28 05:41:05 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 1994-03-28 05:41:05 +0000 |
| commit | 7015aca4520d036305770c96f7ffbb8428b79ff0 (patch) | |
| tree | 4aff94bb41ab202b2d14476b755ae19be0f05c59 | |
| parent | 1e0a78a10c4e6289c1c8157b7466dc8fa6ba8ed4 (diff) | |
| download | emacs-7015aca4520d036305770c96f7ffbb8428b79ff0.tar.gz emacs-7015aca4520d036305770c96f7ffbb8428b79ff0.zip | |
Initial revision
| -rw-r--r-- | lispref/abbrevs.texi | 331 | ||||
| -rw-r--r-- | lispref/positions.texi | 896 | ||||
| -rw-r--r-- | lispref/searching.texi | 1254 | ||||
| -rw-r--r-- | lispref/syntax.texi | 707 | ||||
| -rw-r--r-- | lispref/tips.texi | 582 |
5 files changed, 3770 insertions, 0 deletions
diff --git a/lispref/abbrevs.texi b/lispref/abbrevs.texi new file mode 100644 index 00000000000..59e1995a4d5 --- /dev/null +++ b/lispref/abbrevs.texi | |||
| @@ -0,0 +1,331 @@ | |||
| 1 | @c -*-texinfo-*- | ||
| 2 | @c This is part of the GNU Emacs Lisp Reference Manual. | ||
| 3 | @c Copyright (C) 1990, 1991, 1992, 1993, 1994 Free Software Foundation, Inc. | ||
| 4 | @c See the file elisp.texi for copying conditions. | ||
| 5 | @setfilename ../info/abbrevs | ||
| 6 | @node Abbrevs, Processes, Syntax Tables, Top | ||
| 7 | @chapter Abbrevs And Abbrev Expansion | ||
| 8 | @cindex abbrev | ||
| 9 | @cindex abbrev table | ||
| 10 | |||
| 11 | An abbreviation or @dfn{abbrev} is a string of characters that may be | ||
| 12 | expanded to a longer string. The user can insert the abbrev string and | ||
| 13 | find it replaced automatically with the expansion of the abbrev. This | ||
| 14 | saves typing. | ||
| 15 | |||
| 16 | The set of abbrevs currently in effect is recorded in an @dfn{abbrev | ||
| 17 | table}. Each buffer has a local abbrev table, but normally all buffers | ||
| 18 | in the same major mode share one abbrev table. There is also a global | ||
| 19 | abbrev table. Normally both are used. | ||
| 20 | |||
| 21 | An abbrev table is represented as an obarray containing a symbol for | ||
| 22 | each abbreviation. The symbol's name is the abbreviation. Its value is | ||
| 23 | the expansion; its function definition is the hook function to do the | ||
| 24 | expansion (if any); its property list cell contains the use count, the | ||
| 25 | number of times the abbreviation has been expanded. Because these | ||
| 26 | symbols are not interned in the usual obarray, they will never appear as | ||
| 27 | the result of reading a Lisp expression; in fact, normally they are | ||
| 28 | never used except by the code that handles abbrevs. Therefore, it is | ||
| 29 | safe to use them in an extremely nonstandard way. @xref{Creating | ||
| 30 | Symbols}. | ||
| 31 | |||
| 32 | For the user-level commands for abbrevs, see @ref{Abbrevs,, Abbrev | ||
| 33 | Mode, emacs, The GNU Emacs Manual}. | ||
| 34 | |||
| 35 | @menu | ||
| 36 | * Abbrev Mode:: Setting up Emacs for abbreviation. | ||
| 37 | * Tables: Abbrev Tables. Creating and working with abbrev tables. | ||
| 38 | * Defining Abbrevs:: Specifying abbreviations and their expansions. | ||
| 39 | * Files: Abbrev Files. Saving abbrevs in files. | ||
| 40 | * Expansion: Abbrev Expansion. Controlling expansion; expansion subroutines. | ||
| 41 | * Standard Abbrev Tables:: Abbrev tables used by various major modes. | ||
| 42 | @end menu | ||
| 43 | |||
| 44 | @node Abbrev Mode, Abbrev Tables, Abbrevs, Abbrevs | ||
| 45 | @comment node-name, next, previous, up | ||
| 46 | @section Setting Up Abbrev Mode | ||
| 47 | |||
| 48 | Abbrev mode is a minor mode controlled by the value of the variable | ||
| 49 | @code{abbrev-mode}. | ||
| 50 | |||
| 51 | @defvar abbrev-mode | ||
| 52 | A non-@code{nil} value of this variable turns on the automatic expansion | ||
| 53 | of abbrevs when their abbreviations are inserted into a buffer. | ||
| 54 | If the value is @code{nil}, abbrevs may be defined, but they are not | ||
| 55 | expanded automatically. | ||
| 56 | |||
| 57 | This variable automatically becomes local when set in any fashion. | ||
| 58 | @end defvar | ||
| 59 | |||
| 60 | @defvar default-abbrev-mode | ||
| 61 | This is the value @code{abbrev-mode} for buffers that do not override it. | ||
| 62 | This is the same as @code{(default-value 'abbrev-mode)}. | ||
| 63 | @end defvar | ||
| 64 | |||
| 65 | @node Abbrev Tables, Defining Abbrevs, Abbrev Mode, Abbrevs | ||
| 66 | @section Abbrev Tables | ||
| 67 | |||
| 68 | This section describes how to create and manipulate abbrev tables. | ||
| 69 | |||
| 70 | @defun make-abbrev-table | ||
| 71 | This function creates and returns a new, empty abbrev table---an obarray | ||
| 72 | containing no symbols. It is a vector filled with zeros. | ||
| 73 | @end defun | ||
| 74 | |||
| 75 | @defun clear-abbrev-table table | ||
| 76 | This function undefines all the abbrevs in abbrev table @var{table}, | ||
| 77 | leaving it empty. The function returns @code{nil}. | ||
| 78 | @end defun | ||
| 79 | |||
| 80 | @defun define-abbrev-table tabname definitions | ||
| 81 | This function defines @var{tabname} (a symbol) as an abbrev table name, | ||
| 82 | i.e., as a variable whose value is an abbrev table. It defines abbrevs | ||
| 83 | in the table according to @var{definitions}, a list of elements of the | ||
| 84 | form @code{(@var{abbrevname} @var{expansion} @var{hook} | ||
| 85 | @var{usecount})}. The value is always @code{nil}. | ||
| 86 | @end defun | ||
| 87 | |||
| 88 | @defvar abbrev-table-name-list | ||
| 89 | This is a list of symbols whose values are abbrev tables. | ||
| 90 | @code{define-abbrev-table} adds the new abbrev table name to this list. | ||
| 91 | @end defvar | ||
| 92 | |||
| 93 | @defun insert-abbrev-table-description name &optional human | ||
| 94 | This function inserts before point a description of the abbrev table | ||
| 95 | named @var{name}. The argument @var{name} is a symbol whose value is an | ||
| 96 | abbrev table. The value is always @code{nil}. | ||
| 97 | |||
| 98 | If @var{human} is non-@code{nil}, the description is human-oriented. | ||
| 99 | Otherwise the description is a Lisp expression---a call to | ||
| 100 | @code{define-abbrev-table} which would define @var{name} exactly as it | ||
| 101 | is currently defined. | ||
| 102 | @end defun | ||
| 103 | |||
| 104 | @node Defining Abbrevs, Abbrev Files, Abbrev Tables, Abbrevs | ||
| 105 | @comment node-name, next, previous, up | ||
| 106 | @section Defining Abbrevs | ||
| 107 | |||
| 108 | These functions define an abbrev in a specified abbrev table. | ||
| 109 | @code{define-abbrev} is the low-level basic function, while | ||
| 110 | @code{add-abbrev} is used by commands that ask for information from the | ||
| 111 | user. | ||
| 112 | |||
| 113 | @defun add-abbrev table type arg | ||
| 114 | This function adds an abbreviation to abbrev table @var{table}. The | ||
| 115 | argument @var{type} is a string describing in English the kind of abbrev | ||
| 116 | this will be (typically, @code{"global"} or @code{"mode-specific"}); | ||
| 117 | this is used in prompting the user. The argument @var{arg} is the | ||
| 118 | number of words in the expansion. | ||
| 119 | |||
| 120 | The return value is the symbol which internally represents the new | ||
| 121 | abbrev, or @code{nil} if the user declines to confirm redefining an | ||
| 122 | existing abbrev. | ||
| 123 | @end defun | ||
| 124 | |||
| 125 | @defun define-abbrev table name expansion hook | ||
| 126 | This function defines an abbrev in @var{table} named @var{name}, to | ||
| 127 | expand to @var{expansion}, and call @var{hook}. The return value is an | ||
| 128 | uninterned symbol which represents the abbrev inside Emacs; its name is | ||
| 129 | @var{name}. | ||
| 130 | |||
| 131 | The argument @var{name} should be a string. The argument | ||
| 132 | @var{expansion} should be a string, or @code{nil}, to undefine the | ||
| 133 | abbrev. | ||
| 134 | |||
| 135 | The argument @var{hook} is a function or @code{nil}. If @var{hook} is | ||
| 136 | non-@code{nil}, then it is called with no arguments after the abbrev is | ||
| 137 | replaced with @var{expansion}; point is located at the end of | ||
| 138 | @var{expansion}. | ||
| 139 | |||
| 140 | The use count of the abbrev is initialized to zero. | ||
| 141 | @end defun | ||
| 142 | |||
| 143 | @defopt only-global-abbrevs | ||
| 144 | If this variable is non-@code{nil}, it means that the user plans to use | ||
| 145 | global abbrevs only. This tells the commands that define mode-specific | ||
| 146 | abbrevs to define global ones instead. This variable does not alter the | ||
| 147 | functioning of the functions in this section; it is examined by their | ||
| 148 | callers. | ||
| 149 | @end defopt | ||
| 150 | |||
| 151 | @node Abbrev Files, Abbrev Expansion, Defining Abbrevs, Abbrevs | ||
| 152 | @section Saving Abbrevs in Files | ||
| 153 | |||
| 154 | A file of saved abbrev definitions is actually a file of Lisp code. | ||
| 155 | The abbrevs are saved in the form of a Lisp program to define the same | ||
| 156 | abbrev tables with the same contents. Therefore, you can load the file | ||
| 157 | with @code{load} (@pxref{How Programs Do Loading}). However, the | ||
| 158 | function @code{quietly-read-abbrev-file} is provided as a more | ||
| 159 | convenient interface. | ||
| 160 | |||
| 161 | User-level facilities such as @code{save-some-buffers} can save | ||
| 162 | abbrevs in a file automatically, under the control of variables | ||
| 163 | described here. | ||
| 164 | |||
| 165 | @defopt abbrev-file-name | ||
| 166 | This is the default file name for reading and saving abbrevs. | ||
| 167 | @end defopt | ||
| 168 | |||
| 169 | @defun quietly-read-abbrev-file filename | ||
| 170 | This function reads abbrev definitions from a file named @var{filename}, | ||
| 171 | previously written with @code{write-abbrev-file}. If @var{filename} is | ||
| 172 | @code{nil}, the file specified in @code{abbrev-file-name} is used. | ||
| 173 | @code{save-abbrevs} is set to @code{t} so that changes will be saved. | ||
| 174 | |||
| 175 | This function does not display any messages. It returns @code{nil}. | ||
| 176 | @end defun | ||
| 177 | |||
| 178 | @defopt save-abbrevs | ||
| 179 | A non-@code{nil} value for @code{save-abbrev} means that Emacs should | ||
| 180 | save abbrevs when files are saved. @code{abbrev-file-name} specifies | ||
| 181 | the file to save the abbrevs in. | ||
| 182 | @end defopt | ||
| 183 | |||
| 184 | @defvar abbrevs-changed | ||
| 185 | This variable is set non-@code{nil} by defining or altering any | ||
| 186 | abbrevs. This serves as a flag for various Emacs commands to offer to | ||
| 187 | save your abbrevs. | ||
| 188 | @end defvar | ||
| 189 | |||
| 190 | @deffn Command write-abbrev-file filename | ||
| 191 | Save all abbrev definitions, in all abbrev tables, in the file | ||
| 192 | @var{filename}, in the form of a Lisp program which when loaded will | ||
| 193 | define the same abbrevs. This function returns @code{nil}. | ||
| 194 | @end deffn | ||
| 195 | |||
| 196 | @node Abbrev Expansion, Standard Abbrev Tables, Abbrev Files, Abbrevs | ||
| 197 | @comment node-name, next, previous, up | ||
| 198 | @section Looking Up and Expanding Abbreviations | ||
| 199 | |||
| 200 | Abbrevs are usually expanded by commands for interactive use, | ||
| 201 | including @code{self-insert-command}. This section describes the | ||
| 202 | subroutines used in writing such functions, as well as the variables | ||
| 203 | they use for communication. | ||
| 204 | |||
| 205 | @defun abbrev-symbol abbrev &optional table | ||
| 206 | This function returns the symbol representing the abbrev named | ||
| 207 | @var{abbrev}. The value returned is @code{nil} if that abbrev is not | ||
| 208 | defined. The optional second argument @var{table} is the abbrev table | ||
| 209 | to look it up in. If @var{table} is @code{nil}, this function tries | ||
| 210 | first the current buffer's local abbrev table, and second the global | ||
| 211 | abbrev table. | ||
| 212 | @end defun | ||
| 213 | |||
| 214 | @defopt abbrev-all-caps | ||
| 215 | When this is set non-@code{nil}, an abbrev entered entirely in upper | ||
| 216 | case is expanded using all upper case. Otherwise, an abbrev entered | ||
| 217 | entirely in upper case is expanded by capitalizing each word of the | ||
| 218 | expansion. | ||
| 219 | @end defopt | ||
| 220 | |||
| 221 | @defun abbrev-expansion abbrev &optional table | ||
| 222 | This function returns the string that @var{abbrev} would expand into (as | ||
| 223 | defined by the abbrev tables used for the current buffer). The optional | ||
| 224 | argument @var{table} specifies the abbrev table to use; if it is | ||
| 225 | specified, the abbrev is looked up in that table only. | ||
| 226 | @end defun | ||
| 227 | |||
| 228 | @defvar abbrev-start-location | ||
| 229 | This is the buffer position for @code{expand-abbrev} to use as the start | ||
| 230 | of the next abbrev to be expanded. (@code{nil} means use the word | ||
| 231 | before point instead.) @code{abbrev-start-location} is set to | ||
| 232 | @code{nil} each time @code{expand-abbrev} is called. This variable is | ||
| 233 | also set by @code{abbrev-prefix-mark}. | ||
| 234 | @end defvar | ||
| 235 | |||
| 236 | @defvar abbrev-start-location-buffer | ||
| 237 | The value of this variable is the buffer for which | ||
| 238 | @code{abbrev-start-location} has been set. Trying to expand an abbrev | ||
| 239 | in any other buffer clears @code{abbrev-start-location}. This variable | ||
| 240 | is set by @code{abbrev-prefix-mark}. | ||
| 241 | @end defvar | ||
| 242 | |||
| 243 | @defvar last-abbrev | ||
| 244 | This is the @code{abbrev-symbol} of the last abbrev expanded. This | ||
| 245 | information is left by @code{expand-abbrev} for the sake of the | ||
| 246 | @code{unexpand-abbrev} command. | ||
| 247 | @end defvar | ||
| 248 | |||
| 249 | @defvar last-abbrev-location | ||
| 250 | This is the location of the last abbrev expanded. This contains | ||
| 251 | information left by @code{expand-abbrev} for the sake of the | ||
| 252 | @code{unexpand-abbrev} command. | ||
| 253 | @end defvar | ||
| 254 | |||
| 255 | @defvar last-abbrev-text | ||
| 256 | This is the exact expansion text of the last abbrev expanded, as | ||
| 257 | results from case conversion. Its value is | ||
| 258 | @code{nil} if the abbrev has already been unexpanded. This | ||
| 259 | contains information left by @code{expand-abbrev} for the sake of the | ||
| 260 | @code{unexpand-abbrev} command. | ||
| 261 | @end defvar | ||
| 262 | |||
| 263 | @c Emacs 19 feature | ||
| 264 | @defvar pre-abbrev-expand-hook | ||
| 265 | This is a normal hook whose functions are executed, in sequence, just | ||
| 266 | before any expansion of an abbrev. @xref{Hooks}. Since it is a normal | ||
| 267 | hook, the hook functions receive no arguments. However, they can find | ||
| 268 | the abbrev to be expanded by looking in the buffer before point. | ||
| 269 | @end defvar | ||
| 270 | |||
| 271 | The following sample code shows a simple use of | ||
| 272 | @code{pre-abbrev-expand-hook}. If the user terminates an abbrev with a | ||
| 273 | punctuation character, the hook function asks for confirmation. Thus, | ||
| 274 | this hook allows the user to decide whether to expand the abbrev, and | ||
| 275 | aborts expansion if it is not confirmed. | ||
| 276 | |||
| 277 | @smallexample | ||
| 278 | (add-hook 'pre-abbrev-expand-hook 'query-if-not-space) | ||
| 279 | |||
| 280 | ;; @r{This is the function invoked by @code{pre-abbrev-expand-hook}.} | ||
| 281 | |||
| 282 | ;; @r{If the user terminated the abbrev with a space, the function does} | ||
| 283 | ;; @r{nothing (that is, it returns so that the abbrev can expand). If the} | ||
| 284 | ;; @r{user entered some other character, this function asks whether} | ||
| 285 | ;; @r{expansion should continue.} | ||
| 286 | |||
| 287 | ;; @r{If the user enters the prompt with @kbd{y}, the function returns} | ||
| 288 | ;; @r{@code{nil} (because of the @code{not} function), but that is} | ||
| 289 | ;; @r{acceptable; the return value has no effect on expansion.} | ||
| 290 | |||
| 291 | (defun query-if-not-space () | ||
| 292 | (if (/= ?\ (preceding-char)) | ||
| 293 | (if (not (y-or-n-p "Do you want to expand this abbrev? ")) | ||
| 294 | (error "Not expanding this abbrev")))) | ||
| 295 | @end smallexample | ||
| 296 | |||
| 297 | @node Standard Abbrev Tables, , Abbrev Expansion, Abbrevs | ||
| 298 | @comment node-name, next, previous, up | ||
| 299 | @section Standard Abbrev Tables | ||
| 300 | |||
| 301 | Here we list the variables that hold the abbrev tables for the | ||
| 302 | preloaded major modes of Emacs. | ||
| 303 | |||
| 304 | @defvar global-abbrev-table | ||
| 305 | This is the abbrev table for mode-independent abbrevs. The abbrevs | ||
| 306 | defined in it apply to all buffers. Each buffer may also have a local | ||
| 307 | abbrev table, whose abbrev definitions take precedence over those in the | ||
| 308 | global table. | ||
| 309 | @end defvar | ||
| 310 | |||
| 311 | @defvar local-abbrev-table | ||
| 312 | The value of this buffer-local variable is the (mode-specific) | ||
| 313 | abbreviation table of the current buffer. | ||
| 314 | @end defvar | ||
| 315 | |||
| 316 | @defvar fundamental-mode-abbrev-table | ||
| 317 | This is the local abbrev table used in Fundamental mode. It is the | ||
| 318 | local abbrev table in all buffers in Fundamental mode. | ||
| 319 | @end defvar | ||
| 320 | |||
| 321 | @defvar text-mode-abbrev-table | ||
| 322 | This is the local abbrev table used in Text mode. | ||
| 323 | @end defvar | ||
| 324 | |||
| 325 | @defvar c-mode-abbrev-table | ||
| 326 | This is the local abbrev table used in C mode. | ||
| 327 | @end defvar | ||
| 328 | |||
| 329 | @defvar lisp-mode-abbrev-table | ||
| 330 | This is the local abbrev table used in Lisp mode and Emacs Lisp mode. | ||
| 331 | @end defvar | ||
diff --git a/lispref/positions.texi b/lispref/positions.texi new file mode 100644 index 00000000000..28f52d719bd --- /dev/null +++ b/lispref/positions.texi | |||
| @@ -0,0 +1,896 @@ | |||
| 1 | @c -*-texinfo-*- | ||
| 2 | @c This is part of the GNU Emacs Lisp Reference Manual. | ||
| 3 | @c Copyright (C) 1990, 1991, 1992, 1993, 1994 Free Software Foundation, Inc. | ||
| 4 | @c See the file elisp.texi for copying conditions. | ||
| 5 | @setfilename ../info/positions | ||
| 6 | @node Positions, Markers, Frames, Top | ||
| 7 | @chapter Positions | ||
| 8 | @cindex position (in buffer) | ||
| 9 | |||
| 10 | A @dfn{position} is the index of a character in the text of buffer. | ||
| 11 | More precisely, a position identifies the place between two characters | ||
| 12 | (or before the first character, or after the last character), so we can | ||
| 13 | speak of the character before or after a given position. However, the | ||
| 14 | we often speak of the character ``at'' a position, meaning the character | ||
| 15 | after that position. | ||
| 16 | |||
| 17 | Positions are usually represented as integers starting from 1, but can | ||
| 18 | also be represented as @dfn{markers}---special objects which relocate | ||
| 19 | automatically when text is inserted or deleted so they stay with the | ||
| 20 | surrounding characters. @xref{Markers}. | ||
| 21 | |||
| 22 | @menu | ||
| 23 | * Point:: The special position where editing takes place. | ||
| 24 | * Motion:: Changing point. | ||
| 25 | * Excursions:: Temporary motion and buffer changes. | ||
| 26 | * Narrowing:: Restricting editing to a portion of the buffer. | ||
| 27 | @end menu | ||
| 28 | |||
| 29 | @node Point | ||
| 30 | @section Point | ||
| 31 | @cindex point | ||
| 32 | |||
| 33 | @dfn{Point} is a special buffer position used by many editing | ||
| 34 | commands, including the self-inserting typed characters and text | ||
| 35 | insertion functions. Other commands move point through the text | ||
| 36 | to allow editing and insertion at different places. | ||
| 37 | |||
| 38 | Like other positions, point designates a place between two characters | ||
| 39 | (or before the first character, or after the last character), rather | ||
| 40 | than a particular character. Many terminals display the cursor over the | ||
| 41 | character that immediately follows point; on such terminals, point is | ||
| 42 | actually before the character on which the cursor sits. | ||
| 43 | |||
| 44 | @cindex point with narrowing | ||
| 45 | The value of point is a number between 1 and the buffer size plus 1. | ||
| 46 | If narrowing is in effect (@pxref{Narrowing}), then point is constrained | ||
| 47 | to fall within the accessible portion of the buffer (possibly at one end | ||
| 48 | of it). | ||
| 49 | |||
| 50 | Each buffer has its own value of point, which is independent of the | ||
| 51 | value of point in other buffers. Each window also has a value of point, | ||
| 52 | which is independent of the value of point in other windows on the same | ||
| 53 | buffer. This is why point can have different values in various windows | ||
| 54 | that display the same buffer. When a buffer appears in only one window, | ||
| 55 | the buffer's point and the window's point normally have the same value, | ||
| 56 | so the distinction is rarely important. @xref{Window Point}, for more | ||
| 57 | details. | ||
| 58 | |||
| 59 | @defun point | ||
| 60 | @cindex current buffer position | ||
| 61 | This function returns the position of point in the current buffer, | ||
| 62 | as an integer. | ||
| 63 | |||
| 64 | @need 700 | ||
| 65 | @example | ||
| 66 | @group | ||
| 67 | (point) | ||
| 68 | @result{} 175 | ||
| 69 | @end group | ||
| 70 | @end example | ||
| 71 | @end defun | ||
| 72 | |||
| 73 | @defun point-min | ||
| 74 | This function returns the minimum accessible value of point in the | ||
| 75 | current buffer. This is 1, unless narrowing is in effect, in | ||
| 76 | which case it is the position of the start of the region that you | ||
| 77 | narrowed to. (@xref{Narrowing}.) | ||
| 78 | @end defun | ||
| 79 | |||
| 80 | @defun point-max | ||
| 81 | This function returns the maximum accessible value of point in the | ||
| 82 | current buffer. This is @code{(1+ (buffer-size))}, unless narrowing is | ||
| 83 | in effect, in which case it is the position of the end of the region | ||
| 84 | that you narrowed to. (@xref{Narrowing}). | ||
| 85 | @end defun | ||
| 86 | |||
| 87 | @defun buffer-end flag | ||
| 88 | This function returns @code{(point-min)} if @var{flag} is less than 1, | ||
| 89 | @code{(point-max)} otherwise. The argument @var{flag} must be a number. | ||
| 90 | @end defun | ||
| 91 | |||
| 92 | @defun buffer-size | ||
| 93 | This function returns the total number of characters in the current | ||
| 94 | buffer. In the absence of any narrowing (@pxref{Narrowing}), | ||
| 95 | @code{point-max} returns a value one larger than this. | ||
| 96 | |||
| 97 | @example | ||
| 98 | @group | ||
| 99 | (buffer-size) | ||
| 100 | @result{} 35 | ||
| 101 | @end group | ||
| 102 | @group | ||
| 103 | (point-max) | ||
| 104 | @result{} 36 | ||
| 105 | @end group | ||
| 106 | @end example | ||
| 107 | @end defun | ||
| 108 | |||
| 109 | @defvar buffer-saved-size | ||
| 110 | The value of this buffer-local variable is the former length of the | ||
| 111 | current buffer, as of the last time it was read in, saved or auto-saved. | ||
| 112 | @end defvar | ||
| 113 | |||
| 114 | @node Motion | ||
| 115 | @section Motion | ||
| 116 | |||
| 117 | Motion functions change the value of point, either relative to the | ||
| 118 | current value of point, relative to the beginning or end of the buffer, | ||
| 119 | or relative to the edges of the selected window. @xref{Point}. | ||
| 120 | |||
| 121 | @menu | ||
| 122 | * Character Motion:: Moving in terms of characters. | ||
| 123 | * Word Motion:: Moving in terms of words. | ||
| 124 | * Buffer End Motion:: Moving to the beginning or end of the buffer. | ||
| 125 | * Text Lines:: Moving in terms of lines of text. | ||
| 126 | * Screen Lines:: Moving in terms of lines as displayed. | ||
| 127 | * Vertical Motion:: Implementation of @code{next-line} and | ||
| 128 | @code{previous-line}. | ||
| 129 | * List Motion:: Moving by parsing lists and sexps. | ||
| 130 | * Skipping Characters:: Skipping characters belonging to a certain set. | ||
| 131 | @end menu | ||
| 132 | |||
| 133 | @node Character Motion | ||
| 134 | @subsection Motion by Characters | ||
| 135 | |||
| 136 | These functions move point based on a count of characters. | ||
| 137 | @code{goto-char} is the fundamental primitive; the functions others use | ||
| 138 | that. | ||
| 139 | |||
| 140 | @deffn Command goto-char position | ||
| 141 | This function sets point in the current buffer to the value | ||
| 142 | @var{position}. If @var{position} is less than 1, it moves point to the | ||
| 143 | beginning of the buffer. If @var{position} is greater than the length | ||
| 144 | of the buffer, it moves point to the end. | ||
| 145 | |||
| 146 | If narrowing is in effect, @var{position} still counts from the | ||
| 147 | beginning of the buffer, but point cannot go outside the accessible | ||
| 148 | portion. If @var{position} is out of range, @code{goto-char} moves | ||
| 149 | point to the beginning or the end of the accessible portion. | ||
| 150 | |||
| 151 | When this function is called interactively, @var{position} is the | ||
| 152 | numeric prefix argument, if provided; otherwise it is read from the | ||
| 153 | minibuffer. | ||
| 154 | |||
| 155 | @code{goto-char} returns @var{position}. | ||
| 156 | @end deffn | ||
| 157 | |||
| 158 | @deffn Command forward-char &optional count | ||
| 159 | @c @kindex beginning-of-buffer | ||
| 160 | @c @kindex end-of-buffer | ||
| 161 | This function moves point @var{count} characters forward, towards the | ||
| 162 | end of the buffer (or backward, towards the beginning of the buffer, if | ||
| 163 | @var{count} is negative). If the function attempts to move point past | ||
| 164 | the beginning or end of the buffer (or the limits of the accessible | ||
| 165 | portion, when narrowing is in effect), an error is signaled with error | ||
| 166 | code @code{beginning-of-buffer} or @code{end-of-buffer}. | ||
| 167 | |||
| 168 | In an interactive call, @var{count} is the numeric prefix argument. | ||
| 169 | @end deffn | ||
| 170 | |||
| 171 | @deffn Command backward-char &optional count | ||
| 172 | This function moves point @var{count} characters backward, towards the | ||
| 173 | beginning of the buffer (or forward, towards the end of the buffer, if | ||
| 174 | @var{count} is negative). If the function attempts to move point past | ||
| 175 | the beginning or end of the buffer (or the limits of the accessible | ||
| 176 | portion, when narrowing is in effect), an error is signaled with error | ||
| 177 | code @code{beginning-of-buffer} or @code{end-of-buffer}. | ||
| 178 | |||
| 179 | In an interactive call, @var{count} is the numeric prefix argument. | ||
| 180 | @end deffn | ||
| 181 | |||
| 182 | @node Word Motion | ||
| 183 | @subsection Motion by Words | ||
| 184 | |||
| 185 | These functions for parsing words use the syntax table to decide | ||
| 186 | whether a given character is part of a word. @xref{Syntax Tables}. | ||
| 187 | |||
| 188 | @deffn Command forward-word count | ||
| 189 | This function moves point forward @var{count} words (or backward if | ||
| 190 | @var{count} is negative). Normally it returns @code{t}. If this motion | ||
| 191 | encounters the beginning or end of the buffer, or the limits of the | ||
| 192 | accessible portion when narrowing is in effect, point stops there | ||
| 193 | and the value is @code{nil}. | ||
| 194 | |||
| 195 | In an interactive call, @var{count} is set to the numeric prefix | ||
| 196 | argument. | ||
| 197 | @end deffn | ||
| 198 | |||
| 199 | @deffn Command backward-word count | ||
| 200 | This function just like @code{forward-word}, except that it moves | ||
| 201 | backward until encountering the front of a word, rather than forward. | ||
| 202 | |||
| 203 | In an interactive call, @var{count} is set to the numeric prefix | ||
| 204 | argument. | ||
| 205 | |||
| 206 | This function is rarely used in programs, as it is more efficient to | ||
| 207 | call @code{forward-word} with negative argument. | ||
| 208 | @end deffn | ||
| 209 | |||
| 210 | @defvar words-include-escapes | ||
| 211 | @c Emacs 19 feature | ||
| 212 | This variable affects the behavior of @code{forward-word} and everything | ||
| 213 | that uses it. If it is non-@code{nil}, then characters in the | ||
| 214 | ``escape'' and ``character quote'' syntax classes count as part of | ||
| 215 | words. Otherwise, they do not. | ||
| 216 | @end defvar | ||
| 217 | |||
| 218 | @node Buffer End Motion | ||
| 219 | @subsection Motion to an End of the Buffer | ||
| 220 | |||
| 221 | To move point to the beginning of the buffer, write: | ||
| 222 | |||
| 223 | @example | ||
| 224 | @group | ||
| 225 | (goto-char (point-min)) | ||
| 226 | @end group | ||
| 227 | @end example | ||
| 228 | |||
| 229 | @noindent | ||
| 230 | Likewise, to move to the end of the buffer, use: | ||
| 231 | |||
| 232 | @example | ||
| 233 | @group | ||
| 234 | (goto-char (point-max)) | ||
| 235 | @end group | ||
| 236 | @end example | ||
| 237 | |||
| 238 | Here are two commands which users use to do these things. They are | ||
| 239 | documented here to warn you not to use them in Lisp programs, because | ||
| 240 | they set the mark and display messages in the echo area. | ||
| 241 | |||
| 242 | @deffn Command beginning-of-buffer &optional n | ||
| 243 | This function moves point to the beginning of the buffer (or the limits | ||
| 244 | of the accessible portion, when narrowing is in effect), setting the | ||
| 245 | mark at the previous position. If @var{n} is non-@code{nil}, then it | ||
| 246 | puts point @var{n} tenths of the way from the beginning of the buffer. | ||
| 247 | |||
| 248 | In an interactive call, @var{n} is the numeric prefix argument, | ||
| 249 | if provided; otherwise @var{n} defaults to @code{nil}. | ||
| 250 | |||
| 251 | Don't use this function in Lisp programs! | ||
| 252 | @end deffn | ||
| 253 | |||
| 254 | @deffn Command end-of-buffer &optional n | ||
| 255 | This function moves point to the end of the buffer (or the limits of | ||
| 256 | the accessible portion, when narrowing is in effect), setting the mark | ||
| 257 | at the previous position. If @var{n} is non-@code{nil}, then it puts | ||
| 258 | point @var{n} tenths of the way from the end. | ||
| 259 | |||
| 260 | In an interactive call, @var{n} is the numeric prefix argument, | ||
| 261 | if provided; otherwise @var{n} defaults to @code{nil}. | ||
| 262 | |||
| 263 | Don't use this function in Lisp programs! | ||
| 264 | @end deffn | ||
| 265 | |||
| 266 | @node Text Lines | ||
| 267 | @subsection Motion by Text Lines | ||
| 268 | @cindex lines | ||
| 269 | |||
| 270 | Text lines are portions of the buffer delimited by newline characters, | ||
| 271 | which are regarded as part of the previous line. The first text line | ||
| 272 | begins at the beginning of the buffer, and the last text line ends at | ||
| 273 | the end of the buffer whether or not the last character is a newline. | ||
| 274 | The division of the buffer into text lines is not affected by the width | ||
| 275 | of the window, by line continuation in display, or by how tabs and | ||
| 276 | control characters are displayed. | ||
| 277 | |||
| 278 | @deffn Command goto-line line | ||
| 279 | This function moves point to the front of the @var{line}th line, | ||
| 280 | counting from line 1 at beginning of buffer. If @var{line} is less than | ||
| 281 | 1, it moves point to the beginning of the buffer. If @var{line} is | ||
| 282 | greater than the number of lines in the buffer, it moves point to the | ||
| 283 | @emph{end of the last line} of the buffer. | ||
| 284 | |||
| 285 | If narrowing is in effect, then @var{line} still counts from the | ||
| 286 | beginning of the buffer, but point cannot go outside the accessible | ||
| 287 | portion. So @code{goto-line} moves point to the beginning or end of the | ||
| 288 | accessible portion, if the line number specifies an inaccessible | ||
| 289 | position. | ||
| 290 | |||
| 291 | The return value of @code{goto-line} is the difference between | ||
| 292 | @var{line} and the line number of the line to which point actually was | ||
| 293 | able move (in the full buffer, disregarding any narrowing). Thus, the | ||
| 294 | value is positive if the scan encounters the real end of the buffer. | ||
| 295 | |||
| 296 | In an interactive call, @var{line} is the numeric prefix argument if | ||
| 297 | one has been provided. Otherwise @var{line} is read in the minibuffer. | ||
| 298 | @end deffn | ||
| 299 | |||
| 300 | @deffn Command beginning-of-line &optional count | ||
| 301 | This function moves point to the beginning of the current line. With an | ||
| 302 | argument @var{count} not @code{nil} or 1, it moves forward | ||
| 303 | @var{count}@minus{}1 lines and then to the beginning of the line. | ||
| 304 | |||
| 305 | If this function reaches the end of the buffer (or of the accessible | ||
| 306 | portion, if narrowing is in effect), it positions point at the end of | ||
| 307 | the buffer. No error is signaled. | ||
| 308 | @end deffn | ||
| 309 | |||
| 310 | @deffn Command end-of-line &optional count | ||
| 311 | This function moves point to the end of the current line. With an | ||
| 312 | argument @var{count} not @code{nil} or 1, it moves forward | ||
| 313 | @var{count}@minus{}1 lines and then to the end of the line. | ||
| 314 | |||
| 315 | If this function reaches the end of the buffer (or of the accessible | ||
| 316 | portion, if narrowing is in effect), it positions point at the end of | ||
| 317 | the buffer. No error is signaled. | ||
| 318 | @end deffn | ||
| 319 | |||
| 320 | @deffn Command forward-line &optional count | ||
| 321 | @cindex beginning of line | ||
| 322 | This function moves point forward @var{count} lines, to the beginning of | ||
| 323 | the line. If @var{count} is negative, it moves point | ||
| 324 | @minus{}@var{count} lines backward, to the beginning of the line. | ||
| 325 | |||
| 326 | If @code{forward-line} encounters the beginning or end of the buffer (or | ||
| 327 | of the accessible portion) before finding that many lines, it sets point | ||
| 328 | there. No error is signaled. | ||
| 329 | |||
| 330 | @code{forward-line} returns the difference between @var{count} and the | ||
| 331 | number of lines actually moved. If you attempt to move down five lines | ||
| 332 | from the beginning of a buffer that has only three lines, point stops at | ||
| 333 | the end of the last line, and the value will be 2. | ||
| 334 | |||
| 335 | In an interactive call, @var{count} is the numeric prefix argument. | ||
| 336 | @end deffn | ||
| 337 | |||
| 338 | @defun count-lines start end | ||
| 339 | @cindex lines in region | ||
| 340 | This function returns the number of lines between the positions | ||
| 341 | @var{start} and @var{end} in the current buffer. If @var{start} and | ||
| 342 | @var{end} are equal, then it returns 0. Otherwise it returns at least | ||
| 343 | 1, even if @var{start} and @var{end} are on the same line. This is | ||
| 344 | because the text between them, considered in isolation, must contain at | ||
| 345 | least one line unless it is empty. | ||
| 346 | |||
| 347 | Here is an example of using @code{count-lines}: | ||
| 348 | |||
| 349 | @example | ||
| 350 | @group | ||
| 351 | (defun current-line () | ||
| 352 | "Return the vertical position of point@dots{}" | ||
| 353 | (+ (count-lines (window-start) (point)) | ||
| 354 | (if (= (current-column) 0) 1 0) | ||
| 355 | -1)) | ||
| 356 | @end group | ||
| 357 | @end example | ||
| 358 | @end defun | ||
| 359 | |||
| 360 | @ignore | ||
| 361 | @c ================ | ||
| 362 | The @code{previous-line} and @code{next-line} commands are functions | ||
| 363 | that should not be used in programs. They are for users and are | ||
| 364 | mentioned here only for completeness. | ||
| 365 | |||
| 366 | @deffn Command previous-line count | ||
| 367 | @cindex goal column | ||
| 368 | This function moves point up @var{count} lines (down if @var{count} | ||
| 369 | is negative). In moving, it attempts to keep point in the ``goal column'' | ||
| 370 | (normally the same column that it was at the beginning of the move). | ||
| 371 | |||
| 372 | If there is no character in the target line exactly under the current | ||
| 373 | column, point is positioned after the character in that line which | ||
| 374 | spans this column, or at the end of the line if it is not long enough. | ||
| 375 | |||
| 376 | If it attempts to move beyond the top or bottom of the buffer (or clipped | ||
| 377 | region), then point is positioned in the goal column in the top or | ||
| 378 | bottom line. No error is signaled. | ||
| 379 | |||
| 380 | In an interactive call, @var{count} will be the numeric | ||
| 381 | prefix argument. | ||
| 382 | |||
| 383 | The command @code{set-goal-column} can be used to create a semipermanent | ||
| 384 | goal column to which this command always moves. Then it does not try to | ||
| 385 | move vertically. | ||
| 386 | |||
| 387 | If you are thinking of using this in a Lisp program, consider using | ||
| 388 | @code{forward-line} with a negative argument instead. It is usually easier | ||
| 389 | to use and more reliable (no dependence on goal column, etc.). | ||
| 390 | @end deffn | ||
| 391 | |||
| 392 | @deffn Command next-line count | ||
| 393 | This function moves point down @var{count} lines (up if @var{count} | ||
| 394 | is negative). In moving, it attempts to keep point in the ``goal column'' | ||
| 395 | (normally the same column that it was at the beginning of the move). | ||
| 396 | |||
| 397 | If there is no character in the target line exactly under the current | ||
| 398 | column, point is positioned after the character in that line which | ||
| 399 | spans this column, or at the end of the line if it is not long enough. | ||
| 400 | |||
| 401 | If it attempts to move beyond the top or bottom of the buffer (or clipped | ||
| 402 | region), then point is positioned in the goal column in the top or | ||
| 403 | bottom line. No error is signaled. | ||
| 404 | |||
| 405 | In the case where the @var{count} is 1, and point is on the last | ||
| 406 | line of the buffer (or clipped region), a new empty line is inserted at the | ||
| 407 | end of the buffer (or clipped region) and point moved there. | ||
| 408 | |||
| 409 | In an interactive call, @var{count} will be the numeric | ||
| 410 | prefix argument. | ||
| 411 | |||
| 412 | The command @code{set-goal-column} can be used to create a semipermanent | ||
| 413 | goal column to which this command always moves. Then it does not try to | ||
| 414 | move vertically. | ||
| 415 | |||
| 416 | If you are thinking of using this in a Lisp program, consider using | ||
| 417 | @code{forward-line} instead. It is usually easier | ||
| 418 | to use and more reliable (no dependence on goal column, etc.). | ||
| 419 | @end deffn | ||
| 420 | |||
| 421 | @c ================ | ||
| 422 | @end ignore | ||
| 423 | |||
| 424 | Also see the functions @code{bolp} and @code{eolp} in @ref{Near Point}. | ||
| 425 | These functions do not move point, but test whether it is already at the | ||
| 426 | beginning or end of a line. | ||
| 427 | |||
| 428 | @node Screen Lines | ||
| 429 | @subsection Motion by Screen Lines | ||
| 430 | |||
| 431 | The line functions in the previous section count text lines, delimited | ||
| 432 | only by newline characters. By contrast, these functions count screen | ||
| 433 | lines, which are defined by the way the text appears on the screen. A | ||
| 434 | text line is a single screen line if it is short enough to fit the width | ||
| 435 | of the selected window, but otherwise it may occupy several screen | ||
| 436 | lines. | ||
| 437 | |||
| 438 | In some cases, text lines are truncated on the screen rather than | ||
| 439 | continued onto additional screen lines. In these cases, | ||
| 440 | @code{vertical-motion} moves point much like @code{forward-line}. | ||
| 441 | @xref{Truncation}. | ||
| 442 | |||
| 443 | Because the width of a given string depends on the flags which control | ||
| 444 | the appearance of certain characters, @code{vertical-motion} behaves | ||
| 445 | differently, for a given piece of text, depending on the buffer it is | ||
| 446 | in, and even on the selected window (because the width, the truncation | ||
| 447 | flag, and display table may vary between windows). @xref{Usual | ||
| 448 | Display}. | ||
| 449 | |||
| 450 | @defun vertical-motion count | ||
| 451 | This function moves point to the start of the screen line @var{count} | ||
| 452 | screen lines down from the screen line containing point. If @var{count} | ||
| 453 | is negative, it moves up instead. | ||
| 454 | |||
| 455 | This function returns the number of lines moved. The value may be less | ||
| 456 | in absolute value than @var{count} if the beginning or end of the buffer | ||
| 457 | was reached. | ||
| 458 | @end defun | ||
| 459 | |||
| 460 | @deffn Command move-to-window-line count | ||
| 461 | This function moves point with respect to the text currently displayed | ||
| 462 | in the selected window. It moves point to the beginning of the screen | ||
| 463 | line @var{count} screen lines from the top of the window. If | ||
| 464 | @var{count} is negative, that specifies a position | ||
| 465 | @w{@minus{}@var{count}} lines from the bottom---or else the last line of | ||
| 466 | the buffer, if the buffer ends above the specified screen position. | ||
| 467 | |||
| 468 | If @var{count} is @code{nil}, then point moves to the beginning of the | ||
| 469 | line in the middle of the window. If the absolute value of @var{count} | ||
| 470 | is greater than the size of the window, then point moves to the place | ||
| 471 | which would appear on that screen line if the window were tall enough. | ||
| 472 | This will probably cause the next redisplay to scroll to bring that | ||
| 473 | location onto the screen. | ||
| 474 | |||
| 475 | In an interactive call, @var{count} is the numeric prefix argument. | ||
| 476 | |||
| 477 | The value returned is the window line number, with the top line in the | ||
| 478 | window numbered 0. | ||
| 479 | @end deffn | ||
| 480 | |||
| 481 | @defun compute-motion from frompos to topos width offsets | ||
| 482 | This function scan through the current buffer, calculating screen | ||
| 483 | position. It scans the current buffer forward from position @var{from}, | ||
| 484 | assuming that is at screen coordinates @var{frompos}, to position | ||
| 485 | @var{to} or coordinates @var{topos}, whichever comes first. It returns | ||
| 486 | the ending buffer position and screen coordinates. | ||
| 487 | |||
| 488 | The coordinate arguments @var{frompos} and @var{topos} are cons cells of | ||
| 489 | the form @code{(@var{hpos} . @var{vpos})}. | ||
| 490 | |||
| 491 | The argument @var{width} is the number of columns available to display | ||
| 492 | text; this affects handling of continuation lines. Use the value | ||
| 493 | returned by @code{window-width} for the window of your choice. | ||
| 494 | |||
| 495 | The argument @var{offsets} is either @code{nil} or a cons cell of the | ||
| 496 | form @code{(@var{hscroll} . @var{tab-offset})}. Here @var{hscroll} is | ||
| 497 | the number of columns not being displayed at the left margin; in most | ||
| 498 | calls, this comes from @code{window-hscroll}. Meanwhile, | ||
| 499 | @var{tab-offset} is the number of columns of an initial tab character | ||
| 500 | (at @var{from}) that aren't included in the display, perhaps because the | ||
| 501 | line was continued within that character. | ||
| 502 | |||
| 503 | The return value is a list of five elements: | ||
| 504 | |||
| 505 | @example | ||
| 506 | (@var{pos} @var{vpos} @var{hpos} @var{prevhpos} @var{contin}) | ||
| 507 | @end example | ||
| 508 | |||
| 509 | @noindent | ||
| 510 | Here @var{pos} is the buffer position where the scan stopped, @var{vpos} | ||
| 511 | is the vertical position, and @var{hpos} is the horizontal position. | ||
| 512 | |||
| 513 | The result @var{prevhpos} is the horizontal position one character back | ||
| 514 | from @var{pos}. The result @var{contin} is @code{t} if a line was | ||
| 515 | continued after (or within) the previous character. | ||
| 516 | |||
| 517 | For example, to find the buffer position of column @var{col} of line | ||
| 518 | @var{line} of a certain window, pass the window's display start location | ||
| 519 | as @var{from} and the window's upper-left coordinates as @var{frompos}. | ||
| 520 | Pass the buffer's @code{(point-max)} as @var{to}, to limit the scan to | ||
| 521 | the end of the visible section of the buffer, and pass @var{line} and | ||
| 522 | @var{col} as @var{topos}. Here's a function that does this: | ||
| 523 | |||
| 524 | @example | ||
| 525 | (defun coordinates-of-position (col line) | ||
| 526 | (car (compute-motion (window-start) | ||
| 527 | '(0 . 0) | ||
| 528 | (point) | ||
| 529 | (cons col line) | ||
| 530 | (window-width) | ||
| 531 | (cons (window-hscroll) 0)))) | ||
| 532 | @end example | ||
| 533 | @end defun | ||
| 534 | |||
| 535 | @node Vertical Motion | ||
| 536 | @comment node-name, next, previous, up | ||
| 537 | @subsection The User-Level Vertical Motion Commands | ||
| 538 | @cindex goal column | ||
| 539 | @cindex vertical text line motion | ||
| 540 | @findex next-line | ||
| 541 | @findex previous-line | ||
| 542 | |||
| 543 | A goal column is useful if you want to edit text such as a table in | ||
| 544 | which you want to move point to a certain column on each line. The goal | ||
| 545 | column affects the vertical text line motion commands, @code{next-line} | ||
| 546 | and @code{previous-line}. @xref{Basic,, Basic Editing Commands, emacs, | ||
| 547 | The GNU Emacs Manual}. | ||
| 548 | |||
| 549 | @defopt goal-column | ||
| 550 | This variable holds an explicitly specified goal column for vertical | ||
| 551 | line motion commands. If it is an integer, it specifies a column, and | ||
| 552 | these commands try to move to that column on each line. If it is | ||
| 553 | @code{nil}, then the commands set their own goal columns. Any other | ||
| 554 | value is invalid. | ||
| 555 | @end defopt | ||
| 556 | |||
| 557 | @defvar temporary-goal-column | ||
| 558 | This variable holds the temporary goal column during a sequence of | ||
| 559 | consecutive vertical line motion commands. It is overridden by | ||
| 560 | @code{goal-column} if that is non-@code{nil}. It is set each time a | ||
| 561 | vertical motion command is invoked, unless the previous command was also | ||
| 562 | a vertical motion command. | ||
| 563 | @end defvar | ||
| 564 | |||
| 565 | @defopt track-eol | ||
| 566 | This variable controls how the vertical line motion commands operate | ||
| 567 | when starting at the end of a line. If @code{track-eol} is | ||
| 568 | non-@code{nil}, then vertical motion starting at the end of a line will | ||
| 569 | keep to the ends of lines. This means moving to the end of each line | ||
| 570 | moved onto. The value of @code{track-eol} has no effect if point is not | ||
| 571 | at the end of a line when the first vertical motion command is given. | ||
| 572 | |||
| 573 | @code{track-eol} has its effect by telling line motion commands to set | ||
| 574 | @code{temporary-goal-column} to 9999 instead of to the current column. | ||
| 575 | @end defopt | ||
| 576 | |||
| 577 | @node List Motion | ||
| 578 | @comment node-name, next, previous, up | ||
| 579 | @subsection Moving over Balanced Expressions | ||
| 580 | @cindex sexp motion | ||
| 581 | @cindex Lisp expression motion | ||
| 582 | @cindex list motion | ||
| 583 | |||
| 584 | Here are several functions concerned with balanced-parenthesis | ||
| 585 | expressions (also called @dfn{sexps} in connection with moving across | ||
| 586 | them in Emacs). The syntax table controls how these functions interpret | ||
| 587 | various characters; see @ref{Syntax Tables}. @xref{Parsing | ||
| 588 | Expressions}, for lower-level primitives for scanning sexps or parts of | ||
| 589 | sexps. For user-level commands, see @ref{Lists and Sexps,,, emacs, GNU | ||
| 590 | Emacs Manual}. | ||
| 591 | |||
| 592 | @deffn Command forward-list arg | ||
| 593 | Move forward across @var{arg} balanced groups of parentheses. | ||
| 594 | (Other syntactic entities such as words or paired string quotes | ||
| 595 | are ignored.) | ||
| 596 | @end deffn | ||
| 597 | |||
| 598 | @deffn Command backward-list arg | ||
| 599 | Move backward across @var{arg} balanced groups of parentheses. | ||
| 600 | (Other syntactic entities such as words or paired string quotes | ||
| 601 | are ignored.) | ||
| 602 | @end deffn | ||
| 603 | |||
| 604 | @deffn Command up-list arg | ||
| 605 | Move forward out of @var{arg} levels of parentheses. | ||
| 606 | A negative argument means move backward but still to a less deep spot. | ||
| 607 | @end deffn | ||
| 608 | |||
| 609 | @deffn Command down-list arg | ||
| 610 | Move forward down @var{arg} levels of parentheses. A negative argument | ||
| 611 | means move backward but still go down @var{arg} levels. | ||
| 612 | @end deffn | ||
| 613 | |||
| 614 | @deffn Command forward-sexp arg | ||
| 615 | Move forward across @var{arg} balanced expressions. | ||
| 616 | Balanced expressions include both those delimited by parentheses | ||
| 617 | and other kinds, such as words and string constants. For example, | ||
| 618 | |||
| 619 | @example | ||
| 620 | @group | ||
| 621 | ---------- Buffer: foo ---------- | ||
| 622 | (concat@point{} "foo " (car x) y z) | ||
| 623 | ---------- Buffer: foo ---------- | ||
| 624 | @end group | ||
| 625 | |||
| 626 | @group | ||
| 627 | (forward-sexp 3) | ||
| 628 | @result{} nil | ||
| 629 | |||
| 630 | ---------- Buffer: foo ---------- | ||
| 631 | (concat "foo " (car x) y@point{} z) | ||
| 632 | ---------- Buffer: foo ---------- | ||
| 633 | @end group | ||
| 634 | @end example | ||
| 635 | @end deffn | ||
| 636 | |||
| 637 | @deffn Command backward-sexp arg | ||
| 638 | Move backward across @var{arg} balanced expressions. | ||
| 639 | @end deffn | ||
| 640 | |||
| 641 | @node Skipping Characters | ||
| 642 | @comment node-name, next, previous, up | ||
| 643 | @subsection Skipping Characters | ||
| 644 | @cindex skipping characters | ||
| 645 | |||
| 646 | The following two functions move point over a specified set of | ||
| 647 | characters. For example, they are often used to skip whitespace. For | ||
| 648 | related functions, see @ref{Motion and Syntax}. | ||
| 649 | |||
| 650 | @defun skip-chars-forward character-set &optional limit | ||
| 651 | This function moves point in the current buffer forward, skipping over a | ||
| 652 | given set of characters. It examines the character following point, | ||
| 653 | then advances point if the character matches @var{character-set}. This | ||
| 654 | continues until it reaches a character that does not match. The | ||
| 655 | function returns @code{nil}. | ||
| 656 | |||
| 657 | The argument @var{character-set} is like the inside of a | ||
| 658 | @samp{[@dots{}]} in a regular expression except that @samp{]} is never | ||
| 659 | special and @samp{\} quotes @samp{^}, @samp{-} or @samp{\}. Thus, | ||
| 660 | @code{"a-zA-Z"} skips over all letters, stopping before the first | ||
| 661 | nonletter, and @code{"^a-zA-Z}" skips nonletters stopping before the | ||
| 662 | first letter. @xref{Regular Expressions}. | ||
| 663 | |||
| 664 | If @var{limit} is supplied (it must be a number or a marker), it | ||
| 665 | specifies the maximum position in the buffer that point can be skipped | ||
| 666 | to. Point will stop at or before @var{limit}. | ||
| 667 | |||
| 668 | In the following example, point is initially located directly before the | ||
| 669 | @samp{T}. After the form is evaluated, point is located at the end of | ||
| 670 | that line (between the @samp{t} of @samp{hat} and the newline). The | ||
| 671 | function skips all letters and spaces, but not newlines. | ||
| 672 | |||
| 673 | @example | ||
| 674 | @group | ||
| 675 | ---------- Buffer: foo ---------- | ||
| 676 | I read "@point{}The cat in the hat | ||
| 677 | comes back" twice. | ||
| 678 | ---------- Buffer: foo ---------- | ||
| 679 | @end group | ||
| 680 | |||
| 681 | @group | ||
| 682 | (skip-chars-forward "a-zA-Z ") | ||
| 683 | @result{} nil | ||
| 684 | |||
| 685 | ---------- Buffer: foo ---------- | ||
| 686 | I read "The cat in the hat@point{} | ||
| 687 | comes back" twice. | ||
| 688 | ---------- Buffer: foo ---------- | ||
| 689 | @end group | ||
| 690 | @end example | ||
| 691 | @end defun | ||
| 692 | |||
| 693 | @defun skip-chars-backward character-set &optional limit | ||
| 694 | This function moves point backward, skipping characters that match | ||
| 695 | @var{character-set}, until @var{limit}. It just like | ||
| 696 | @code{skip-chars-forward} except for the direction of motion. | ||
| 697 | @end defun | ||
| 698 | |||
| 699 | @node Excursions | ||
| 700 | @section Excursions | ||
| 701 | @cindex excursion | ||
| 702 | |||
| 703 | It is often useful to move point ``temporarily'' within a localized | ||
| 704 | portion of the program, or to switch buffers temporarily. This is | ||
| 705 | called an @dfn{excursion}, and it is done with the @code{save-excursion} | ||
| 706 | special form. This construct saves the current buffer and its values of | ||
| 707 | point and the mark so they can be restored after the completion of the | ||
| 708 | excursion. | ||
| 709 | |||
| 710 | The forms for saving and restoring the configuration of windows are | ||
| 711 | described elsewhere (see @ref{Window Configurations}, and @pxref{Frame | ||
| 712 | Configurations}). | ||
| 713 | |||
| 714 | @defspec save-excursion forms@dots{} | ||
| 715 | @cindex mark excursion | ||
| 716 | @cindex point excursion | ||
| 717 | @cindex current buffer excursion | ||
| 718 | The @code{save-excursion} special form saves the identity of the current | ||
| 719 | buffer and the values of point and the mark in it, evaluates @var{forms}, | ||
| 720 | and finally restores the buffer and its saved values of point and the mark. | ||
| 721 | All three saved values are restored even in case of an abnormal exit | ||
| 722 | via throw or error (@pxref{Nonlocal Exits}). | ||
| 723 | |||
| 724 | The @code{save-excursion} special form is the standard way to switch | ||
| 725 | buffers or move point within one part of a program and avoid affecting | ||
| 726 | the rest of the program. It is used more than 500 times in the Lisp | ||
| 727 | sources of Emacs. | ||
| 728 | |||
| 729 | @code{save-excursion} does not save the values of point and the mark for | ||
| 730 | other buffers, so changes in other buffers remain in effect after | ||
| 731 | @code{save-excursion} exits. | ||
| 732 | |||
| 733 | @cindex window excursions | ||
| 734 | Likewise, @code{save-excursion} does not restore window-buffer | ||
| 735 | correspondences altered by functions such as @code{switch-to-buffer}. | ||
| 736 | One way to restore these correspondences, and the selected window, is to | ||
| 737 | use @code{save-window-excursion} inside @code{save-excursion} | ||
| 738 | (@pxref{Window Configurations}). | ||
| 739 | |||
| 740 | The value returned by @code{save-excursion} is the result of the last of | ||
| 741 | @var{forms}, or @code{nil} if no @var{forms} are given. | ||
| 742 | |||
| 743 | @example | ||
| 744 | @group | ||
| 745 | (save-excursion | ||
| 746 | @var{forms}) | ||
| 747 | @equiv{} | ||
| 748 | (let ((old-buf (current-buffer)) | ||
| 749 | (old-pnt (point-marker)) | ||
| 750 | (old-mark (copy-marker (mark-marker)))) | ||
| 751 | (unwind-protect | ||
| 752 | (progn @var{forms}) | ||
| 753 | (set-buffer old-buf) | ||
| 754 | (goto-char old-pnt) | ||
| 755 | (set-marker (mark-marker) old-mark))) | ||
| 756 | @end group | ||
| 757 | @end example | ||
| 758 | @end defspec | ||
| 759 | |||
| 760 | @node Narrowing | ||
| 761 | @section Narrowing | ||
| 762 | @cindex narrowing | ||
| 763 | @cindex restriction (in a buffer) | ||
| 764 | @cindex accessible portion (of a buffer) | ||
| 765 | |||
| 766 | @dfn{Narrowing} means limiting the text addressable by Emacs editing | ||
| 767 | commands to a limited range of characters in a buffer. The text that | ||
| 768 | remains addressable is called the @dfn{accessible portion} of the | ||
| 769 | buffer. | ||
| 770 | |||
| 771 | Narrowing is specified with two buffer positions which become the | ||
| 772 | beginning and end of the accessible portion. For most editing commands | ||
| 773 | and most Emacs primitives, these positions replace the values of the | ||
| 774 | beginning and end of the buffer. While narrowing is in effect, no text | ||
| 775 | outside the accessible portion is displayed, and point cannot move | ||
| 776 | outside the accessible portion. | ||
| 777 | |||
| 778 | Values such as positions or line numbers, that usually count from the | ||
| 779 | beginning of the buffer, do so despite narrowing, but the functions | ||
| 780 | which use them refuse to operate on text that is inaccessible. | ||
| 781 | |||
| 782 | The commands for saving buffers are unaffected by narrowing; they save | ||
| 783 | the entire buffer regardless of the any narrowing. | ||
| 784 | |||
| 785 | @deffn Command narrow-to-region start end | ||
| 786 | This function sets the accessible portion of the current buffer to start | ||
| 787 | at @var{start} and end at @var{end}. Both arguments should be character | ||
| 788 | positions. | ||
| 789 | |||
| 790 | In an interactive call, @var{start} and @var{end} are set to the bounds | ||
| 791 | of the current region (point and the mark, with the smallest first). | ||
| 792 | @end deffn | ||
| 793 | |||
| 794 | @deffn Command narrow-to-page move-count | ||
| 795 | This function sets the accessible portion of the current buffer to | ||
| 796 | include just the current page. An optional first argument | ||
| 797 | @var{move-count} non-@code{nil} means to move forward or backward by | ||
| 798 | @var{move-count} pages and then narrow. The variable | ||
| 799 | @code{page-delimiter} specifies where pages start and end | ||
| 800 | (@pxref{Standard Regexps}). | ||
| 801 | |||
| 802 | In an interactive call, @var{move-count} is set to the numeric prefix | ||
| 803 | argument. | ||
| 804 | @end deffn | ||
| 805 | |||
| 806 | @deffn Command widen | ||
| 807 | @cindex widening | ||
| 808 | This function cancels any narrowing in the current buffer, so that the | ||
| 809 | entire contents are accessible. This is called @dfn{widening}. | ||
| 810 | It is equivalent to the following expression: | ||
| 811 | |||
| 812 | @example | ||
| 813 | (narrow-to-region 1 (1+ (buffer-size))) | ||
| 814 | @end example | ||
| 815 | @end deffn | ||
| 816 | |||
| 817 | @defspec save-restriction body@dots{} | ||
| 818 | This special form saves the current bounds of the accessible portion, | ||
| 819 | evaluates the @var{body} forms, and finally restores the saved bounds, | ||
| 820 | thus restoring the same state of narrowing (or absence thereof) formerly | ||
| 821 | in effect. The state of narrowing is restored even in the event of an | ||
| 822 | abnormal exit via throw or error (@pxref{Nonlocal Exits}). Therefore, | ||
| 823 | this construct is a clean way to narrow a buffer temporarily. | ||
| 824 | |||
| 825 | The value returned by @code{save-restriction} is that returned by the | ||
| 826 | last form in @var{body}, or @code{nil} if no body forms were given. | ||
| 827 | |||
| 828 | @c Wordy to avoid overfull hbox. --rjc 16mar92 | ||
| 829 | @strong{Caution:} it is easy to make a mistake when using the | ||
| 830 | @code{save-restriction} construct. Read the entire description here | ||
| 831 | before you try it. | ||
| 832 | |||
| 833 | If @var{body} changes the current buffer, @code{save-restriction} still | ||
| 834 | restores the restrictions on the original buffer (the buffer whose | ||
| 835 | restructions it saved from), but it does not restore the identity of the | ||
| 836 | current buffer. | ||
| 837 | |||
| 838 | @code{save-restriction} does @emph{not} restore point and the mark; use | ||
| 839 | @code{save-excursion} for that. If you use both @code{save-restriction} | ||
| 840 | and @code{save-excursion} together, @code{save-excursion} should come | ||
| 841 | first (on the outside). Otherwise, the old point value would be | ||
| 842 | restored with temporary narrowing still in effect. If the old point | ||
| 843 | value were outside the limits of the temporary narrowing, this would | ||
| 844 | fail to restore it accurately. | ||
| 845 | |||
| 846 | The @code{save-restriction} special form records the values of the | ||
| 847 | beginning and end of the accessible portion as distances from the | ||
| 848 | beginning and end of the buffer. In other words, it records the amount | ||
| 849 | of inaccessible text before and after the accessible portion. | ||
| 850 | |||
| 851 | This method yields correct results if @var{body} does further narrowing. | ||
| 852 | However, @code{save-restriction} can become confused if the body widens | ||
| 853 | and then make changes outside the range of the saved narrowing. When | ||
| 854 | this is what you want to do, @code{save-restriction} is not the right | ||
| 855 | tool for the job. Here is what you must use instead: | ||
| 856 | |||
| 857 | @example | ||
| 858 | @group | ||
| 859 | (let ((beg (point-min-marker)) | ||
| 860 | (end (point-max-marker))) | ||
| 861 | (unwind-protect | ||
| 862 | (progn @var{body}) | ||
| 863 | (save-excursion | ||
| 864 | (set-buffer (marker-buffer beg)) | ||
| 865 | (narrow-to-region beg end)))) | ||
| 866 | @end group | ||
| 867 | @end example | ||
| 868 | |||
| 869 | Here is a simple example of correct use of @code{save-restriction}: | ||
| 870 | |||
| 871 | @example | ||
| 872 | @group | ||
| 873 | ---------- Buffer: foo ---------- | ||
| 874 | This is the contents of foo | ||
| 875 | This is the contents of foo | ||
| 876 | This is the contents of foo@point{} | ||
| 877 | ---------- Buffer: foo ---------- | ||
| 878 | @end group | ||
| 879 | |||
| 880 | @group | ||
| 881 | (save-excursion | ||
| 882 | (save-restriction | ||
| 883 | (goto-char 1) | ||
| 884 | (forward-line 2) | ||
| 885 | (narrow-to-region 1 (point)) | ||
| 886 | (goto-char (point-min)) | ||
| 887 | (replace-string "foo" "bar"))) | ||
| 888 | |||
| 889 | ---------- Buffer: foo ---------- | ||
| 890 | This is the contents of bar | ||
| 891 | This is the contents of bar | ||
| 892 | This is the contents of foo@point{} | ||
| 893 | ---------- Buffer: foo ---------- | ||
| 894 | @end group | ||
| 895 | @end example | ||
| 896 | @end defspec | ||
diff --git a/lispref/searching.texi b/lispref/searching.texi new file mode 100644 index 00000000000..28625c25bdb --- /dev/null +++ b/lispref/searching.texi | |||
| @@ -0,0 +1,1254 @@ | |||
| 1 | @c -*-texinfo-*- | ||
| 2 | @c This is part of the GNU Emacs Lisp Reference Manual. | ||
| 3 | @c Copyright (C) 1990, 1991, 1992, 1993, 1994 Free Software Foundation, Inc. | ||
| 4 | @c See the file elisp.texi for copying conditions. | ||
| 5 | @setfilename ../info/searching | ||
| 6 | @node Searching and Matching, Syntax Tables, Text, Top | ||
| 7 | @chapter Searching and Matching | ||
| 8 | @cindex searching | ||
| 9 | |||
| 10 | GNU Emacs provides two ways to search through a buffer for specified | ||
| 11 | text: exact string searches and regular expression searches. After a | ||
| 12 | regular expression search, you can examine the @dfn{match data} to | ||
| 13 | determine which text matched the whole regular expression or various | ||
| 14 | portions of it. | ||
| 15 | |||
| 16 | @menu | ||
| 17 | * String Search:: Search for an exact match. | ||
| 18 | * Regular Expressions:: Describing classes of strings. | ||
| 19 | * Regexp Search:: Searching for a match for a regexp. | ||
| 20 | * Search and Replace:: Internals of @code{query-replace}. | ||
| 21 | * Match Data:: Finding out which part of the text matched | ||
| 22 | various parts of a regexp, after regexp search. | ||
| 23 | * Searching and Case:: Case-independent or case-significant searching. | ||
| 24 | * Standard Regexps:: Useful regexps for finding sentences, pages,... | ||
| 25 | @end menu | ||
| 26 | |||
| 27 | The @samp{skip-chars@dots{}} functions also perform a kind of searching. | ||
| 28 | @xref{Skipping Characters}. | ||
| 29 | |||
| 30 | @node String Search | ||
| 31 | @section Searching for Strings | ||
| 32 | @cindex string search | ||
| 33 | |||
| 34 | These are the primitive functions for searching through the text in a | ||
| 35 | buffer. They are meant for use in programs, but you may call them | ||
| 36 | interactively. If you do so, they prompt for the search string; | ||
| 37 | @var{limit} and @var{noerror} are set to @code{nil}, and @var{repeat} | ||
| 38 | is set to 1. | ||
| 39 | |||
| 40 | @deffn Command search-forward string &optional limit noerror repeat | ||
| 41 | This function searches forward from point for an exact match for | ||
| 42 | @var{string}. If successful, it sets point to the end of the occurrence | ||
| 43 | found, and returns the new value of point. If no match is found, the | ||
| 44 | value and side effects depend on @var{noerror} (see below). | ||
| 45 | @c Emacs 19 feature | ||
| 46 | |||
| 47 | In the following example, point is initially at the beginning of the | ||
| 48 | line. Then @code{(search-forward "fox")} moves point after the last | ||
| 49 | letter of @samp{fox}: | ||
| 50 | |||
| 51 | @example | ||
| 52 | @group | ||
| 53 | ---------- Buffer: foo ---------- | ||
| 54 | @point{}The quick brown fox jumped over the lazy dog. | ||
| 55 | ---------- Buffer: foo ---------- | ||
| 56 | @end group | ||
| 57 | |||
| 58 | @group | ||
| 59 | (search-forward "fox") | ||
| 60 | @result{} 20 | ||
| 61 | |||
| 62 | ---------- Buffer: foo ---------- | ||
| 63 | The quick brown fox@point{} jumped over the lazy dog. | ||
| 64 | ---------- Buffer: foo ---------- | ||
| 65 | @end group | ||
| 66 | @end example | ||
| 67 | |||
| 68 | The argument @var{limit} specifies the upper bound to the search. (It | ||
| 69 | must be a position in the current buffer.) No match extending after | ||
| 70 | that position is accepted. If @var{limit} is omitted or @code{nil}, it | ||
| 71 | defaults to the end of the accessible portion of the buffer. | ||
| 72 | |||
| 73 | @kindex search-failed | ||
| 74 | What happens when the search fails depends on the value of | ||
| 75 | @var{noerror}. If @var{noerror} is @code{nil}, a @code{search-failed} | ||
| 76 | error is signaled. If @var{noerror} is @code{t}, @code{search-forward} | ||
| 77 | returns @code{nil} and does nothing. If @var{noerror} is neither | ||
| 78 | @code{nil} nor @code{t}, then @code{search-forward} moves point to the | ||
| 79 | upper bound and returns @code{nil}. (It would be more consistent now | ||
| 80 | to return the new position of point in that case, but some programs | ||
| 81 | may depend on a value of @code{nil}.) | ||
| 82 | |||
| 83 | If @var{repeat} is non-@code{nil}, then the search is repeated that | ||
| 84 | many times. Point is positioned at the end of the last match. | ||
| 85 | @end deffn | ||
| 86 | |||
| 87 | @deffn Command search-backward string &optional limit noerror repeat | ||
| 88 | This function searches backward from point for @var{string}. It is | ||
| 89 | just like @code{search-forward} except that it searches backwards and | ||
| 90 | leaves point at the beginning of the match. | ||
| 91 | @end deffn | ||
| 92 | |||
| 93 | @deffn Command word-search-forward string &optional limit noerror repeat | ||
| 94 | @cindex word search | ||
| 95 | This function searches forward from point for a ``word'' match for | ||
| 96 | @var{string}. If it finds a match, it sets point to the end of the | ||
| 97 | match found, and returns the new value of point. | ||
| 98 | @c Emacs 19 feature | ||
| 99 | |||
| 100 | Word matching regards @var{string} as a sequence of words, disregarding | ||
| 101 | punctuation that separates them. It searches the buffer for the same | ||
| 102 | sequence of words. Each word must be distinct in the buffer (searching | ||
| 103 | for the word @samp{ball} does not match the word @samp{balls}), but the | ||
| 104 | details of punctuation and spacing are ignored (searching for @samp{ball | ||
| 105 | boy} does match @samp{ball. Boy!}). | ||
| 106 | |||
| 107 | In this example, point is initially at the beginning of the buffer; the | ||
| 108 | search leaves it between the @samp{y} and the @samp{!}. | ||
| 109 | |||
| 110 | @example | ||
| 111 | @group | ||
| 112 | ---------- Buffer: foo ---------- | ||
| 113 | @point{}He said "Please! Find | ||
| 114 | the ball boy!" | ||
| 115 | ---------- Buffer: foo ---------- | ||
| 116 | @end group | ||
| 117 | |||
| 118 | @group | ||
| 119 | (word-search-forward "Please find the ball, boy.") | ||
| 120 | @result{} 35 | ||
| 121 | |||
| 122 | ---------- Buffer: foo ---------- | ||
| 123 | He said "Please! Find | ||
| 124 | the ball boy@point{}!" | ||
| 125 | ---------- Buffer: foo ---------- | ||
| 126 | @end group | ||
| 127 | @end example | ||
| 128 | |||
| 129 | If @var{limit} is non-@code{nil} (it must be a position in the current | ||
| 130 | buffer), then it is the upper bound to the search. The match found must | ||
| 131 | not extend after that position. | ||
| 132 | |||
| 133 | If @var{noerror} is @code{nil}, then @code{word-search-forward} signals | ||
| 134 | an error if the search fails. If @var{noerror} is @code{t}, then it | ||
| 135 | returns @code{nil} instead of signaling an error. If @var{noerror} is | ||
| 136 | neither @code{nil} nor @code{t}, it moves point to @var{limit} (or the | ||
| 137 | end of the buffer) and returns @code{nil}. | ||
| 138 | |||
| 139 | If @var{repeat} is non-@code{nil}, then the search is repeated that many | ||
| 140 | times. Point is positioned at the end of the last match. | ||
| 141 | @end deffn | ||
| 142 | |||
| 143 | @deffn Command word-search-backward string &optional limit noerror repeat | ||
| 144 | This function searches backward from point for a word match to | ||
| 145 | @var{string}. This function is just like @code{word-search-forward} | ||
| 146 | except that it searches backward and normally leaves point at the | ||
| 147 | beginning of the match. | ||
| 148 | @end deffn | ||
| 149 | |||
| 150 | @node Regular Expressions | ||
| 151 | @section Regular Expressions | ||
| 152 | @cindex regular expression | ||
| 153 | @cindex regexp | ||
| 154 | |||
| 155 | A @dfn{regular expression} (@dfn{regexp}, for short) is a pattern that | ||
| 156 | denotes a (possibly infinite) set of strings. Searching for matches for | ||
| 157 | a regexp is a very powerful operation. This section explains how to write | ||
| 158 | regexps; the following section says how to search for them. | ||
| 159 | |||
| 160 | @menu | ||
| 161 | * Syntax of Regexps:: Rules for writing regular expressions. | ||
| 162 | * Regexp Example:: Illustrates regular expression syntax. | ||
| 163 | @end menu | ||
| 164 | |||
| 165 | @node Syntax of Regexps | ||
| 166 | @subsection Syntax of Regular Expressions | ||
| 167 | |||
| 168 | Regular expressions have a syntax in which a few characters are special | ||
| 169 | constructs and the rest are @dfn{ordinary}. An ordinary character is a | ||
| 170 | simple regular expression which matches that character and nothing else. | ||
| 171 | The special characters are @samp{$}, @samp{^}, @samp{.}, @samp{*}, | ||
| 172 | @samp{+}, @samp{?}, @samp{[}, @samp{]} and @samp{\}; no new special | ||
| 173 | characters will be defined in the future. Any other character appearing | ||
| 174 | in a regular expression is ordinary, unless a @samp{\} precedes it. | ||
| 175 | |||
| 176 | For example, @samp{f} is not a special character, so it is ordinary, and | ||
| 177 | therefore @samp{f} is a regular expression that matches the string | ||
| 178 | @samp{f} and no other string. (It does @emph{not} match the string | ||
| 179 | @samp{ff}.) Likewise, @samp{o} is a regular expression that matches | ||
| 180 | only @samp{o}.@refill | ||
| 181 | |||
| 182 | Any two regular expressions @var{a} and @var{b} can be concatenated. The | ||
| 183 | result is a regular expression which matches a string if @var{a} matches | ||
| 184 | some amount of the beginning of that string and @var{b} matches the rest of | ||
| 185 | the string.@refill | ||
| 186 | |||
| 187 | As a simple example, we can concatenate the regular expressions @samp{f} | ||
| 188 | and @samp{o} to get the regular expression @samp{fo}, which matches only | ||
| 189 | the string @samp{fo}. Still trivial. To do something more powerful, you | ||
| 190 | need to use one of the special characters. Here is a list of them: | ||
| 191 | |||
| 192 | @need 1200 | ||
| 193 | @table @kbd | ||
| 194 | @item .@: @r{(Period)} | ||
| 195 | @cindex @samp{.} in regexp | ||
| 196 | is a special character that matches any single character except a newline. | ||
| 197 | Using concatenation, we can make regular expressions like @samp{a.b}, which | ||
| 198 | matches any three-character string that begins with @samp{a} and ends with | ||
| 199 | @samp{b}.@refill | ||
| 200 | |||
| 201 | @item * | ||
| 202 | @cindex @samp{*} in regexp | ||
| 203 | is not a construct by itself; it is a suffix operator that means to | ||
| 204 | repeat the preceding regular expression as many times as possible. In | ||
| 205 | @samp{fo*}, the @samp{*} applies to the @samp{o}, so @samp{fo*} matches | ||
| 206 | one @samp{f} followed by any number of @samp{o}s. The case of zero | ||
| 207 | @samp{o}s is allowed: @samp{fo*} does match @samp{f}.@refill | ||
| 208 | |||
| 209 | @samp{*} always applies to the @emph{smallest} possible preceding | ||
| 210 | expression. Thus, @samp{fo*} has a repeating @samp{o}, not a | ||
| 211 | repeating @samp{fo}.@refill | ||
| 212 | |||
| 213 | The matcher processes a @samp{*} construct by matching, immediately, | ||
| 214 | as many repetitions as can be found. Then it continues with the rest | ||
| 215 | of the pattern. If that fails, backtracking occurs, discarding some | ||
| 216 | of the matches of the @samp{*}-modified construct in case that makes | ||
| 217 | it possible to match the rest of the pattern. For example, in matching | ||
| 218 | @samp{ca*ar} against the string @samp{caaar}, the @samp{a*} first | ||
| 219 | tries to match all three @samp{a}s; but the rest of the pattern is | ||
| 220 | @samp{ar} and there is only @samp{r} left to match, so this try fails. | ||
| 221 | The next alternative is for @samp{a*} to match only two @samp{a}s. | ||
| 222 | With this choice, the rest of the regexp matches successfully.@refill | ||
| 223 | |||
| 224 | @item + | ||
| 225 | @cindex @samp{+} in regexp | ||
| 226 | is a suffix operator similar to @samp{*} except that the preceding | ||
| 227 | expression must match at least once. So, for example, @samp{ca+r} | ||
| 228 | matches the strings @samp{car} and @samp{caaaar} but not the string | ||
| 229 | @samp{cr}, whereas @samp{ca*r} matches all three strings. | ||
| 230 | |||
| 231 | @item ? | ||
| 232 | @cindex @samp{?} in regexp | ||
| 233 | is a suffix operator similar to @samp{*} except that the preceding | ||
| 234 | expression can match either once or not at all. For example, | ||
| 235 | @samp{ca?r} matches @samp{car} or @samp{cr}, but does not match anyhing | ||
| 236 | else. | ||
| 237 | |||
| 238 | @item [ @dots{} ] | ||
| 239 | @cindex character set (in regexp) | ||
| 240 | @cindex @samp{[} in regexp | ||
| 241 | @cindex @samp{]} in regexp | ||
| 242 | @samp{[} begins a @dfn{character set}, which is terminated by a | ||
| 243 | @samp{]}. In the simplest case, the characters between the two brackets | ||
| 244 | form the set. Thus, @samp{[ad]} matches either one @samp{a} or one | ||
| 245 | @samp{d}, and @samp{[ad]*} matches any string composed of just @samp{a}s | ||
| 246 | and @samp{d}s (including the empty string), from which it follows that | ||
| 247 | @samp{c[ad]*r} matches @samp{cr}, @samp{car}, @samp{cdr}, | ||
| 248 | @samp{caddaar}, etc.@refill | ||
| 249 | |||
| 250 | The usual regular expression special characters are not special inside a | ||
| 251 | character set. A completely different set of special characters exists | ||
| 252 | inside character sets: @samp{]}, @samp{-} and @samp{^}.@refill | ||
| 253 | |||
| 254 | @samp{-} is used for ranges of characters. To write a range, write two | ||
| 255 | characters with a @samp{-} between them. Thus, @samp{[a-z]} matches any | ||
| 256 | lower case letter. Ranges may be intermixed freely with individual | ||
| 257 | characters, as in @samp{[a-z$%.]}, which matches any lower case letter | ||
| 258 | or @samp{$}, @samp{%} or a period.@refill | ||
| 259 | |||
| 260 | To include a @samp{]} in a character set, make it the first character. | ||
| 261 | For example, @samp{[]a]} matches @samp{]} or @samp{a}. To include a | ||
| 262 | @samp{-}, write @samp{-} as the first character in the set, or put | ||
| 263 | immediately after a range. (You can replace one individual character | ||
| 264 | @var{c} with the range @samp{@var{c}-@var{c}} to make a place to put the | ||
| 265 | @samp{-}). There is no way to write a set containing just @samp{-} and | ||
| 266 | @samp{]}. | ||
| 267 | |||
| 268 | To include @samp{^} in a set, put it anywhere but at the beginning of | ||
| 269 | the set. | ||
| 270 | |||
| 271 | @item [^ @dots{} ] | ||
| 272 | @cindex @samp{^} in regexp | ||
| 273 | @samp{[^} begins a @dfn{complement character set}, which matches any | ||
| 274 | character except the ones specified. Thus, @samp{[^a-z0-9A-Z]} | ||
| 275 | matches all characters @emph{except} letters and digits.@refill | ||
| 276 | |||
| 277 | @samp{^} is not special in a character set unless it is the first | ||
| 278 | character. The character following the @samp{^} is treated as if it | ||
| 279 | were first (thus, @samp{-} and @samp{]} are not special there). | ||
| 280 | |||
| 281 | Note that a complement character set can match a newline, unless | ||
| 282 | newline is mentioned as one of the characters not to match. | ||
| 283 | |||
| 284 | @item ^ | ||
| 285 | @cindex @samp{^} in regexp | ||
| 286 | @cindex beginning of line in regexp | ||
| 287 | is a special character that matches the empty string, but only at | ||
| 288 | the beginning of a line in the text being matched. Otherwise it fails | ||
| 289 | to match anything. Thus, @samp{^foo} matches a @samp{foo} which occurs | ||
| 290 | at the beginning of a line. | ||
| 291 | |||
| 292 | When matching a string, @samp{^} matches at the beginning of the string | ||
| 293 | or after a newline character @samp{\n}. | ||
| 294 | |||
| 295 | @item $ | ||
| 296 | @cindex @samp{$} in regexp | ||
| 297 | is similar to @samp{^} but matches only at the end of a line. Thus, | ||
| 298 | @samp{x+$} matches a string of one @samp{x} or more at the end of a line. | ||
| 299 | |||
| 300 | When matching a string, @samp{$} matches at the end of the string | ||
| 301 | or before a newline character @samp{\n}. | ||
| 302 | |||
| 303 | @item \ | ||
| 304 | @cindex @samp{\} in regexp | ||
| 305 | has two functions: it quotes the special characters (including | ||
| 306 | @samp{\}), and it introduces additional special constructs. | ||
| 307 | |||
| 308 | Because @samp{\} quotes special characters, @samp{\$} is a regular | ||
| 309 | expression which matches only @samp{$}, and @samp{\[} is a regular | ||
| 310 | expression which matches only @samp{[}, and so on. | ||
| 311 | |||
| 312 | Note that @samp{\} also has special meaning in the read syntax of Lisp | ||
| 313 | strings (@pxref{String Type}), and must be quoted with @samp{\}. For | ||
| 314 | example, the regular expression that matches the @samp{\} character is | ||
| 315 | @samp{\\}. To write a Lisp string that contains the characters | ||
| 316 | @samp{\\}, Lisp syntax requires you to quote each @samp{\} with another | ||
| 317 | @samp{\}. Therefore, the read syntax for a regular expression matching | ||
| 318 | @samp{\} is @code{"\\\\"}.@refill | ||
| 319 | @end table | ||
| 320 | |||
| 321 | @strong{Please note:} for historical compatibility, special characters | ||
| 322 | are treated as ordinary ones if they are in contexts where their special | ||
| 323 | meanings make no sense. For example, @samp{*foo} treats @samp{*} as | ||
| 324 | ordinary since there is no preceding expression on which the @samp{*} | ||
| 325 | can act. It is poor practice to depend on this behavior; better to | ||
| 326 | quote the special character anyway, regardless of where it | ||
| 327 | appears.@refill | ||
| 328 | |||
| 329 | For the most part, @samp{\} followed by any character matches only | ||
| 330 | that character. However, there are several exceptions: characters | ||
| 331 | which, when preceded by @samp{\}, are special constructs. Such | ||
| 332 | characters are always ordinary when encountered on their own. Here | ||
| 333 | is a table of @samp{\} constructs: | ||
| 334 | |||
| 335 | @table @kbd | ||
| 336 | @item \| | ||
| 337 | @cindex @samp{|} in regexp | ||
| 338 | @cindex regexp alternative | ||
| 339 | specifies an alternative. | ||
| 340 | Two regular expressions @var{a} and @var{b} with @samp{\|} in | ||
| 341 | between form an expression that matches anything that either @var{a} or | ||
| 342 | @var{b} matches.@refill | ||
| 343 | |||
| 344 | Thus, @samp{foo\|bar} matches either @samp{foo} or @samp{bar} | ||
| 345 | but no other string.@refill | ||
| 346 | |||
| 347 | @samp{\|} applies to the largest possible surrounding expressions. Only a | ||
| 348 | surrounding @samp{\( @dots{} \)} grouping can limit the grouping power of | ||
| 349 | @samp{\|}.@refill | ||
| 350 | |||
| 351 | Full backtracking capability exists to handle multiple uses of @samp{\|}. | ||
| 352 | |||
| 353 | @item \( @dots{} \) | ||
| 354 | @cindex @samp{(} in regexp | ||
| 355 | @cindex @samp{)} in regexp | ||
| 356 | @cindex regexp grouping | ||
| 357 | is a grouping construct that serves three purposes: | ||
| 358 | |||
| 359 | @enumerate | ||
| 360 | @item | ||
| 361 | To enclose a set of @samp{\|} alternatives for other operations. | ||
| 362 | Thus, @samp{\(foo\|bar\)x} matches either @samp{foox} or @samp{barx}. | ||
| 363 | |||
| 364 | @item | ||
| 365 | To enclose an expression for a suffix operator such as @samp{*} to act | ||
| 366 | on. Thus, @samp{ba\(na\)*} matches @samp{bananana}, etc., with any | ||
| 367 | (zero or more) number of @samp{na} strings.@refill | ||
| 368 | |||
| 369 | @item | ||
| 370 | To record a matched substring for future reference. | ||
| 371 | @end enumerate | ||
| 372 | |||
| 373 | This last application is not a consequence of the idea of a | ||
| 374 | parenthetical grouping; it is a separate feature which happens to be | ||
| 375 | assigned as a second meaning to the same @samp{\( @dots{} \)} construct | ||
| 376 | because there is no conflict in practice between the two meanings. | ||
| 377 | Here is an explanation of this feature: | ||
| 378 | |||
| 379 | @item \@var{digit} | ||
| 380 | matches the same text which matched the @var{digit}th occurrence of a | ||
| 381 | @samp{\( @dots{} \)} construct. | ||
| 382 | |||
| 383 | In other words, after the end of a @samp{\( @dots{} \)} construct. the | ||
| 384 | matcher remembers the beginning and end of the text matched by that | ||
| 385 | construct. Then, later on in the regular expression, you can use | ||
| 386 | @samp{\} followed by @var{digit} to match that same text, whatever it | ||
| 387 | may have been. | ||
| 388 | |||
| 389 | The strings matching the first nine @samp{\( @dots{} \)} constructs | ||
| 390 | appearing in a regular expression are assigned numbers 1 through 9 in | ||
| 391 | the order that the open parentheses appear in the regular expression. | ||
| 392 | So you can use @samp{\1} through @samp{\9} to refer to the text matched | ||
| 393 | by the corresponding @samp{\( @dots{} \)} constructs. | ||
| 394 | |||
| 395 | For example, @samp{\(.*\)\1} matches any newline-free string that is | ||
| 396 | composed of two identical halves. The @samp{\(.*\)} matches the first | ||
| 397 | half, which may be anything, but the @samp{\1} that follows must match | ||
| 398 | the same exact text. | ||
| 399 | |||
| 400 | @item \w | ||
| 401 | @cindex @samp{\w} in regexp | ||
| 402 | matches any word-constituent character. The editor syntax table | ||
| 403 | determines which characters these are. @xref{Syntax Tables}. | ||
| 404 | |||
| 405 | @item \W | ||
| 406 | @cindex @samp{\W} in regexp | ||
| 407 | matches any character that is not a word-constituent. | ||
| 408 | |||
| 409 | @item \s@var{code} | ||
| 410 | @cindex @samp{\s} in regexp | ||
| 411 | matches any character whose syntax is @var{code}. Here @var{code} is a | ||
| 412 | character which represents a syntax code: thus, @samp{w} for word | ||
| 413 | constituent, @samp{-} for whitespace, @samp{(} for open parenthesis, | ||
| 414 | etc. @xref{Syntax Tables}, for a list of syntax codes and the | ||
| 415 | characters that stand for them. | ||
| 416 | |||
| 417 | @item \S@var{code} | ||
| 418 | @cindex @samp{\S} in regexp | ||
| 419 | matches any character whose syntax is not @var{code}. | ||
| 420 | @end table | ||
| 421 | |||
| 422 | These regular expression constructs match the empty string---that is, | ||
| 423 | they don't use up any characters---but whether they match depends on the | ||
| 424 | context. | ||
| 425 | |||
| 426 | @table @kbd | ||
| 427 | @item \` | ||
| 428 | @cindex @samp{\`} in regexp | ||
| 429 | matches the empty string, but only at the beginning | ||
| 430 | of the buffer or string being matched against. | ||
| 431 | |||
| 432 | @item \' | ||
| 433 | @cindex @samp{\'} in regexp | ||
| 434 | matches the empty string, but only at the end of | ||
| 435 | the buffer or string being matched against. | ||
| 436 | |||
| 437 | @item \= | ||
| 438 | @cindex @samp{\=} in regexp | ||
| 439 | matches the empty string, but only at point. | ||
| 440 | (This construct is not defined when matching against a string.) | ||
| 441 | |||
| 442 | @item \b | ||
| 443 | @cindex @samp{\b} in regexp | ||
| 444 | matches the empty string, but only at the beginning or | ||
| 445 | end of a word. Thus, @samp{\bfoo\b} matches any occurrence of | ||
| 446 | @samp{foo} as a separate word. @samp{\bballs?\b} matches | ||
| 447 | @samp{ball} or @samp{balls} as a separate word.@refill | ||
| 448 | |||
| 449 | @item \B | ||
| 450 | @cindex @samp{\B} in regexp | ||
| 451 | matches the empty string, but @emph{not} at the beginning or | ||
| 452 | end of a word. | ||
| 453 | |||
| 454 | @item \< | ||
| 455 | @cindex @samp{\<} in regexp | ||
| 456 | matches the empty string, but only at the beginning of a word. | ||
| 457 | |||
| 458 | @item \> | ||
| 459 | @cindex @samp{\>} in regexp | ||
| 460 | matches the empty string, but only at the end of a word. | ||
| 461 | @end table | ||
| 462 | |||
| 463 | @kindex invalid-regexp | ||
| 464 | Not every string is a valid regular expression. For example, a string | ||
| 465 | with unbalanced square brackets is invalid (with a few exceptions, such | ||
| 466 | as @samp{[]]}, and so is a string that ends with a single @samp{\}. If | ||
| 467 | an invalid regular expression is passed to any of the search functions, | ||
| 468 | an @code{invalid-regexp} error is signaled. | ||
| 469 | |||
| 470 | @defun regexp-quote string | ||
| 471 | This function returns a regular expression string that matches exactly | ||
| 472 | @var{string} and nothing else. This allows you to request an exact | ||
| 473 | string match when calling a function that wants a regular expression. | ||
| 474 | |||
| 475 | @example | ||
| 476 | @group | ||
| 477 | (regexp-quote "^The cat$") | ||
| 478 | @result{} "\\^The cat\\$" | ||
| 479 | @end group | ||
| 480 | @end example | ||
| 481 | |||
| 482 | One use of @code{regexp-quote} is to combine an exact string match with | ||
| 483 | context described as a regular expression. For example, this searches | ||
| 484 | for the string which is the value of @code{string}, surrounded by | ||
| 485 | whitespace: | ||
| 486 | |||
| 487 | @example | ||
| 488 | @group | ||
| 489 | (re-search-forward | ||
| 490 | (concat "\\s " (regexp-quote string) "\\s ")) | ||
| 491 | @end group | ||
| 492 | @end example | ||
| 493 | @end defun | ||
| 494 | |||
| 495 | @node Regexp Example | ||
| 496 | @comment node-name, next, previous, up | ||
| 497 | @subsection Complex Regexp Example | ||
| 498 | |||
| 499 | Here is a complicated regexp, used by Emacs to recognize the end of a | ||
| 500 | sentence together with any whitespace that follows. It is the value of | ||
| 501 | the variable @code{sentence-end}. | ||
| 502 | |||
| 503 | First, we show the regexp as a string in Lisp syntax to distinguish | ||
| 504 | spaces from tab characters. The string constant begins and ends with a | ||
| 505 | double-quote. @samp{\"} stands for a double-quote as part of the | ||
| 506 | string, @samp{\\} for a backslash as part of the string, @samp{\t} for a | ||
| 507 | tab and @samp{\n} for a newline. | ||
| 508 | |||
| 509 | @example | ||
| 510 | "[.?!][]\"')@}]*\\($\\| $\\|\t\\| \\)[ \t\n]*" | ||
| 511 | @end example | ||
| 512 | |||
| 513 | In contrast, if you evaluate the variable @code{sentence-end}, you | ||
| 514 | will see the following: | ||
| 515 | |||
| 516 | @example | ||
| 517 | @group | ||
| 518 | sentence-end | ||
| 519 | @result{} | ||
| 520 | "[.?!][]\"')@}]*\\($\\| $\\| \\| \\)[ | ||
| 521 | ]*" | ||
| 522 | @end group | ||
| 523 | @end example | ||
| 524 | |||
| 525 | @noindent | ||
| 526 | In this output, tab and newline appear as themselves. | ||
| 527 | |||
| 528 | This regular expression contains four parts in succession and can be | ||
| 529 | deciphered as follows: | ||
| 530 | |||
| 531 | @table @code | ||
| 532 | @item [.?!] | ||
| 533 | The first part of the pattern consists of three characters, a period, a | ||
| 534 | question mark and an exclamation mark, within square brackets. The | ||
| 535 | match must begin with one of these three characters. | ||
| 536 | |||
| 537 | @item []\"')@}]* | ||
| 538 | The second part of the pattern matches any closing braces and quotation | ||
| 539 | marks, zero or more of them, that may follow the period, question mark | ||
| 540 | or exclamation mark. The @code{\"} is Lisp syntax for a double-quote in | ||
| 541 | a string. The @samp{*} at the end indicates that the immediately | ||
| 542 | preceding regular expression (a character set, in this case) may be | ||
| 543 | repeated zero or more times. | ||
| 544 | |||
| 545 | @item \\($\\|@ \\|\t\\|@ @ \\) | ||
| 546 | The third part of the pattern matches the whitespace that follows the | ||
| 547 | end of a sentence: the end of a line, or a tab, or two spaces. The | ||
| 548 | double backslashes mark the parentheses and vertical bars as regular | ||
| 549 | expression syntax; the parentheses mark the group and the vertical bars | ||
| 550 | separate alternatives. The dollar sign is used to match the end of a | ||
| 551 | line. | ||
| 552 | |||
| 553 | @item [ \t\n]* | ||
| 554 | Finally, the last part of the pattern matches any additional whitespace | ||
| 555 | beyond the minimum needed to end a sentence. | ||
| 556 | @end table | ||
| 557 | |||
| 558 | @node Regexp Search | ||
| 559 | @section Regular Expression Searching | ||
| 560 | @cindex regular expression searching | ||
| 561 | @cindex regexp searching | ||
| 562 | @cindex searching for regexp | ||
| 563 | |||
| 564 | In GNU Emacs, you can search for the next match for a regexp either | ||
| 565 | incrementally or not. For incremental search commands, see @ref{Regexp | ||
| 566 | Search, , Regular Expression Search, emacs, The GNU Emacs Manual}. Here | ||
| 567 | we describe only the search functions useful in programs. The principal | ||
| 568 | one is @code{re-search-forward}. | ||
| 569 | |||
| 570 | @deffn Command re-search-forward regexp &optional limit noerror repeat | ||
| 571 | This function searches forward in the current buffer for a string of | ||
| 572 | text that is matched by the regular expression @var{regexp}. The | ||
| 573 | function skips over any amount of text that is not matched by | ||
| 574 | @var{regexp}, and leaves point at the end of the first match found. | ||
| 575 | It returns the new value of point. | ||
| 576 | |||
| 577 | If @var{limit} is non-@code{nil} (it must be a position in the current | ||
| 578 | buffer), then it is the upper bound to the search. No match extending | ||
| 579 | after that position is accepted. | ||
| 580 | |||
| 581 | What happens when the search fails depends on the value of | ||
| 582 | @var{noerror}. If @var{noerror} is @code{nil}, a @code{search-failed} | ||
| 583 | error is signaled. If @var{noerror} is @code{t}, | ||
| 584 | @code{re-search-forward} does nothing and returns @code{nil}. If | ||
| 585 | @var{noerror} is neither @code{nil} nor @code{t}, then | ||
| 586 | @code{re-search-forward} moves point to @var{limit} (or the end of the | ||
| 587 | buffer) and returns @code{nil}. | ||
| 588 | |||
| 589 | If @var{repeat} is supplied (it must be a positive number), then the | ||
| 590 | search is repeated that many times (each time starting at the end of the | ||
| 591 | previous time's match). If these successive searches succeed, the | ||
| 592 | function succeeds, moving point and returning its new value. Otherwise | ||
| 593 | the search fails. | ||
| 594 | |||
| 595 | In the following example, point is initially before the @samp{T}. | ||
| 596 | Evaluating the search call moves point to the end of that line (between | ||
| 597 | the @samp{t} of @samp{hat} and the newline). | ||
| 598 | |||
| 599 | @example | ||
| 600 | @group | ||
| 601 | ---------- Buffer: foo ---------- | ||
| 602 | I read "@point{}The cat in the hat | ||
| 603 | comes back" twice. | ||
| 604 | ---------- Buffer: foo ---------- | ||
| 605 | @end group | ||
| 606 | |||
| 607 | @group | ||
| 608 | (re-search-forward "[a-z]+" nil t 5) | ||
| 609 | @result{} 27 | ||
| 610 | |||
| 611 | ---------- Buffer: foo ---------- | ||
| 612 | I read "The cat in the hat@point{} | ||
| 613 | comes back" twice. | ||
| 614 | ---------- Buffer: foo ---------- | ||
| 615 | @end group | ||
| 616 | @end example | ||
| 617 | @end deffn | ||
| 618 | |||
| 619 | @deffn Command re-search-backward regexp &optional limit noerror repeat | ||
| 620 | This function searches backward in the current buffer for a string of | ||
| 621 | text that is matched by the regular expression @var{regexp}, leaving | ||
| 622 | point at the beginning of the first text found. | ||
| 623 | |||
| 624 | This function is analogous to @code{re-search-forward}, but they are | ||
| 625 | not simple mirror images. @code{re-search-forward} finds the match | ||
| 626 | whose beginning is as close as possible. If @code{re-search-backward} | ||
| 627 | were a perfect mirror image, it would find the match whose end is as | ||
| 628 | close as possible. However, in fact it finds the match whose beginning | ||
| 629 | is as close as possible. The reason is that matching a regular | ||
| 630 | expression at a given spot always works from beginning to end, and is | ||
| 631 | done at a specified beginning position. | ||
| 632 | |||
| 633 | A true mirror-image of @code{re-search-forward} would require a special | ||
| 634 | feature for matching regexps from end to beginning. It's not worth the | ||
| 635 | trouble of implementing that. | ||
| 636 | @end deffn | ||
| 637 | |||
| 638 | @defun string-match regexp string &optional start | ||
| 639 | This function returns the index of the start of the first match for | ||
| 640 | the regular expression @var{regexp} in @var{string}, or @code{nil} if | ||
| 641 | there is no match. If @var{start} is non-@code{nil}, the search starts | ||
| 642 | at that index in @var{string}. | ||
| 643 | |||
| 644 | For example, | ||
| 645 | |||
| 646 | @example | ||
| 647 | @group | ||
| 648 | (string-match | ||
| 649 | "quick" "The quick brown fox jumped quickly.") | ||
| 650 | @result{} 4 | ||
| 651 | @end group | ||
| 652 | @group | ||
| 653 | (string-match | ||
| 654 | "quick" "The quick brown fox jumped quickly." 8) | ||
| 655 | @result{} 27 | ||
| 656 | @end group | ||
| 657 | @end example | ||
| 658 | |||
| 659 | @noindent | ||
| 660 | The index of the first character of the | ||
| 661 | string is 0, the index of the second character is 1, and so on. | ||
| 662 | |||
| 663 | After this function returns, the index of the first character beyond | ||
| 664 | the match is available as @code{(match-end 0)}. @xref{Match Data}. | ||
| 665 | |||
| 666 | @example | ||
| 667 | @group | ||
| 668 | (string-match | ||
| 669 | "quick" "The quick brown fox jumped quickly." 8) | ||
| 670 | @result{} 27 | ||
| 671 | @end group | ||
| 672 | |||
| 673 | @group | ||
| 674 | (match-end 0) | ||
| 675 | @result{} 32 | ||
| 676 | @end group | ||
| 677 | @end example | ||
| 678 | @end defun | ||
| 679 | |||
| 680 | @defun looking-at regexp | ||
| 681 | This function determines whether the text in the current buffer directly | ||
| 682 | following point matches the regular expression @var{regexp}. ``Directly | ||
| 683 | following'' means precisely that: the search is ``anchored'' and it can | ||
| 684 | succeed only starting with the first character following point. The | ||
| 685 | result is @code{t} if so, @code{nil} otherwise. | ||
| 686 | |||
| 687 | This function does not move point, but it updates the match data, which | ||
| 688 | you can access using @code{match-beginning} and @code{match-end}. | ||
| 689 | @xref{Match Data}. | ||
| 690 | |||
| 691 | In this example, point is located directly before the @samp{T}. If it | ||
| 692 | were anywhere else, the result would be @code{nil}. | ||
| 693 | |||
| 694 | @example | ||
| 695 | @group | ||
| 696 | ---------- Buffer: foo ---------- | ||
| 697 | I read "@point{}The cat in the hat | ||
| 698 | comes back" twice. | ||
| 699 | ---------- Buffer: foo ---------- | ||
| 700 | |||
| 701 | (looking-at "The cat in the hat$") | ||
| 702 | @result{} t | ||
| 703 | @end group | ||
| 704 | @end example | ||
| 705 | @end defun | ||
| 706 | |||
| 707 | @ignore | ||
| 708 | @deffn Command delete-matching-lines regexp | ||
| 709 | This function is identical to @code{delete-non-matching-lines}, save | ||
| 710 | that it deletes what @code{delete-non-matching-lines} keeps. | ||
| 711 | |||
| 712 | In the example below, point is located on the first line of text. | ||
| 713 | |||
| 714 | @example | ||
| 715 | @group | ||
| 716 | ---------- Buffer: foo ---------- | ||
| 717 | We hold these truths | ||
| 718 | to be self-evident, | ||
| 719 | that all men are created | ||
| 720 | equal, and that they are | ||
| 721 | ---------- Buffer: foo ---------- | ||
| 722 | @end group | ||
| 723 | |||
| 724 | @group | ||
| 725 | (delete-matching-lines "the") | ||
| 726 | @result{} nil | ||
| 727 | |||
| 728 | ---------- Buffer: foo ---------- | ||
| 729 | to be self-evident, | ||
| 730 | that all men are created | ||
| 731 | ---------- Buffer: foo ---------- | ||
| 732 | @end group | ||
| 733 | @end example | ||
| 734 | @end deffn | ||
| 735 | |||
| 736 | @deffn Command flush-lines regexp | ||
| 737 | This function is the same as @code{delete-matching-lines}. | ||
| 738 | @end deffn | ||
| 739 | |||
| 740 | @defun delete-non-matching-lines regexp | ||
| 741 | This function deletes all lines following point which don't | ||
| 742 | contain a match for the regular expression @var{regexp}. | ||
| 743 | @end defun | ||
| 744 | |||
| 745 | @deffn Command keep-lines regexp | ||
| 746 | This function is the same as @code{delete-non-matching-lines}. | ||
| 747 | @end deffn | ||
| 748 | |||
| 749 | @deffn Command how-many regexp | ||
| 750 | This function counts the number of matches for @var{regexp} there are in | ||
| 751 | the current buffer following point. It prints this number in | ||
| 752 | the echo area, returning the string printed. | ||
| 753 | @end deffn | ||
| 754 | |||
| 755 | @deffn Command count-matches regexp | ||
| 756 | This function is a synonym of @code{how-many}. | ||
| 757 | @end deffn | ||
| 758 | |||
| 759 | @deffn Command list-matching-lines regexp nlines | ||
| 760 | This function is a synonym of @code{occur}. | ||
| 761 | Show all lines following point containing a match for @var{regexp}. | ||
| 762 | Display each line with @var{nlines} lines before and after, | ||
| 763 | or @code{-}@var{nlines} before if @var{nlines} is negative. | ||
| 764 | @var{nlines} defaults to @code{list-matching-lines-default-context-lines}. | ||
| 765 | Interactively it is the prefix arg. | ||
| 766 | |||
| 767 | The lines are shown in a buffer named @samp{*Occur*}. | ||
| 768 | It serves as a menu to find any of the occurrences in this buffer. | ||
| 769 | @kbd{C-h m} (@code{describe-mode} in that buffer gives help. | ||
| 770 | @end deffn | ||
| 771 | |||
| 772 | @defopt list-matching-lines-default-context-lines | ||
| 773 | Default value is 0. | ||
| 774 | Default number of context lines to include around a @code{list-matching-lines} | ||
| 775 | match. A negative number means to include that many lines before the match. | ||
| 776 | A positive number means to include that many lines both before and after. | ||
| 777 | @end defopt | ||
| 778 | @end ignore | ||
| 779 | |||
| 780 | @node Search and Replace | ||
| 781 | @section Search and Replace | ||
| 782 | @cindex replacement | ||
| 783 | |||
| 784 | @defun perform-replace from-string replacements query-flag regexp-flag delimited-flag &optional repeat-count map | ||
| 785 | This function is the guts of @code{query-replace} and related commands. | ||
| 786 | It searches for occurrences of @var{from-string} and replaces some or | ||
| 787 | all of them. If @var{query-flag} is @code{nil}, it replaces all | ||
| 788 | occurrences; otherwise, it asks the user what to do about each one. | ||
| 789 | |||
| 790 | If @var{regexp-flag} is non-@code{nil}, then @var{from-string} is | ||
| 791 | considered a regular expression; otherwise, it must match literally. If | ||
| 792 | @var{delimited-flag} is non-@code{nil}, then only replacements | ||
| 793 | surrounded by word boundaries are considered. | ||
| 794 | |||
| 795 | The argument @var{replacements} specifies what to replace occurrences | ||
| 796 | with. If it is a string, that string is used. It can also be a list of | ||
| 797 | strings, to be used in cyclic order. | ||
| 798 | |||
| 799 | If @var{repeat-count} is non-@code{nil}, it should be an integer, the | ||
| 800 | number of occurrences to consider. In this case, @code{perform-replace} | ||
| 801 | returns after considering that many occurrences. | ||
| 802 | |||
| 803 | Normally, the keymap @code{query-replace-map} defines the possible user | ||
| 804 | responses. The argument @var{map}, if non-@code{nil}, is a keymap to | ||
| 805 | use instead of @code{query-replace-map}. | ||
| 806 | @end defun | ||
| 807 | |||
| 808 | @defvar query-replace-map | ||
| 809 | This variable holds a special keymap that defines the valid user | ||
| 810 | responses for @code{query-replace} and related functions, as well as | ||
| 811 | @code{y-or-n-p} and @code{map-y-or-n-p}. It is unusual in two ways: | ||
| 812 | |||
| 813 | @itemize @bullet | ||
| 814 | @item | ||
| 815 | The ``key bindings'' are not commands, just symbols that are meaningful | ||
| 816 | to the functions that use this map. | ||
| 817 | |||
| 818 | @item | ||
| 819 | Prefix keys are not supported; each key binding must be for a single event | ||
| 820 | key sequence. This is because the functions don't use read key sequence to | ||
| 821 | get the input; instead, they read a single event and look it up ``by hand.'' | ||
| 822 | @end itemize | ||
| 823 | @end defvar | ||
| 824 | |||
| 825 | Here are the meaningful ``bindings'' for @code{query-replace-map}. | ||
| 826 | Several of them are meaningful only for @code{query-replace} and | ||
| 827 | friends. | ||
| 828 | |||
| 829 | @table @code | ||
| 830 | @item act | ||
| 831 | Do take the action being considered---in other words, ``yes.'' | ||
| 832 | |||
| 833 | @item skip | ||
| 834 | Do not take action for this question---in other words, ``no.'' | ||
| 835 | |||
| 836 | @item exit | ||
| 837 | Answer this question ``no,'' and don't ask any more. | ||
| 838 | |||
| 839 | @item act-and-exit | ||
| 840 | Answer this question ``yes,'' and don't ask any more. | ||
| 841 | |||
| 842 | @item act-and-show | ||
| 843 | Answer this question ``yes,'' but show the results---don't advance yet | ||
| 844 | to the next question. | ||
| 845 | |||
| 846 | @item automatic | ||
| 847 | Answer this question and all subsequent questions in the series with | ||
| 848 | ``yes,'' without further user interaction. | ||
| 849 | |||
| 850 | @item backup | ||
| 851 | Move back to the previous place that a question was asked about. | ||
| 852 | |||
| 853 | @item edit | ||
| 854 | Enter a recursive edit to deal with this question---instead of any | ||
| 855 | other action that would normally be taken. | ||
| 856 | |||
| 857 | @item delete-and-edit | ||
| 858 | Delete the text being considered, then enter a recursive edit to replace | ||
| 859 | it. | ||
| 860 | |||
| 861 | @item recenter | ||
| 862 | Redisplay and center the window, then ask the same question again. | ||
| 863 | |||
| 864 | @item quit | ||
| 865 | Perform a quit right away. Only @code{y-or-n-p} and related functions | ||
| 866 | use this answer. | ||
| 867 | |||
| 868 | @item help | ||
| 869 | Display some help, then ask again. | ||
| 870 | @end table | ||
| 871 | |||
| 872 | @node Match Data | ||
| 873 | @section The Match Data | ||
| 874 | @cindex match data | ||
| 875 | |||
| 876 | Emacs keeps track of the positions of the start and end of segments of | ||
| 877 | text found during a regular expression search. This means, for example, | ||
| 878 | that you can search for a complex pattern, such as a date in an Rmail | ||
| 879 | message, and then extract parts of the match under control of the | ||
| 880 | pattern. | ||
| 881 | |||
| 882 | Because the match data normally describe the most recent search only, | ||
| 883 | you must be careful not to do another search inadvertently between the | ||
| 884 | search you wish to refer back to and the use of the match data. If you | ||
| 885 | can't avoid another intervening search, you must save and restore the | ||
| 886 | match data around it, to prevent it from being overwritten. | ||
| 887 | |||
| 888 | @menu | ||
| 889 | * Simple Match Data:: Accessing single items of match data, | ||
| 890 | such as where a particular subexpression started. | ||
| 891 | * Replacing Match:: Replacing a substring that was matched. | ||
| 892 | * Entire Match Data:: Accessing the entire match data at once, as a list. | ||
| 893 | * Saving Match Data:: Saving and restoring the match data. | ||
| 894 | @end menu | ||
| 895 | |||
| 896 | @node Simple Match Data | ||
| 897 | @subsection Simple Match Data Access | ||
| 898 | |||
| 899 | This section explains how to use the match data to find the starting | ||
| 900 | point or ending point of the text that was matched by a particular | ||
| 901 | search, or by a particular parenthetical subexpression of a regular | ||
| 902 | expression. | ||
| 903 | |||
| 904 | @defun match-beginning count | ||
| 905 | This function returns the position of the start of text matched by the | ||
| 906 | last regular expression searched for, or a subexpression of it. | ||
| 907 | |||
| 908 | The argument @var{count}, a number, specifies a subexpression whose | ||
| 909 | start position is the value. If @var{count} is zero, then the value is | ||
| 910 | the position of the text matched by the whole regexp. If @var{count} is | ||
| 911 | greater than zero, then the value is the position of the beginning of | ||
| 912 | the text matched by the @var{count}th subexpression. | ||
| 913 | |||
| 914 | Subexpressions of a regular expression are those expressions grouped | ||
| 915 | inside of parentheses, @samp{\(@dots{}\)}. The @var{count}th | ||
| 916 | subexpression is found by counting occurrences of @samp{\(} from the | ||
| 917 | beginning of the whole regular expression. The first subexpression is | ||
| 918 | numbered 1, the second 2, and so on. | ||
| 919 | |||
| 920 | The value is @code{nil} for a parenthetical grouping inside of a | ||
| 921 | @samp{\|} alternative that wasn't used in the match. | ||
| 922 | @end defun | ||
| 923 | |||
| 924 | @defun match-end count | ||
| 925 | This function returns the position of the end of the text that matched | ||
| 926 | the last regular expression searched for, or a subexpression of it. | ||
| 927 | This function is otherwise similar to @code{match-beginning}. | ||
| 928 | @end defun | ||
| 929 | |||
| 930 | Here is an example of using the match data, with a comment showing the | ||
| 931 | positions within the text: | ||
| 932 | |||
| 933 | @example | ||
| 934 | @group | ||
| 935 | (string-match "\\(qu\\)\\(ick\\)" | ||
| 936 | "The quick fox jumped quickly.") | ||
| 937 | ;0123456789 | ||
| 938 | @result{} 4 | ||
| 939 | @end group | ||
| 940 | |||
| 941 | @group | ||
| 942 | (match-beginning 1) ; @r{The beginning of the match} | ||
| 943 | @result{} 4 ; @r{with @samp{qu} is at index 4.} | ||
| 944 | @end group | ||
| 945 | |||
| 946 | @group | ||
| 947 | (match-beginning 2) ; @r{The beginning of the match} | ||
| 948 | @result{} 6 ; @r{with @samp{ick} is at index 6.} | ||
| 949 | @end group | ||
| 950 | |||
| 951 | @group | ||
| 952 | (match-end 1) ; @r{The end of the match} | ||
| 953 | @result{} 6 ; @r{with @samp{qu} is at index 6.} | ||
| 954 | |||
| 955 | (match-end 2) ; @r{The end of the match} | ||
| 956 | @result{} 9 ; @r{with @samp{ick} is at index 9.} | ||
| 957 | @end group | ||
| 958 | @end example | ||
| 959 | |||
| 960 | Here is another example. Point is initially located at the beginning | ||
| 961 | of the line. Searching moves point to between the space and the word | ||
| 962 | @samp{in}. The beginning of the entire match is at the 9th character of | ||
| 963 | the buffer (@samp{T}), and the beginning of the match for the first | ||
| 964 | subexpression is at the 13th character (@samp{c}). | ||
| 965 | |||
| 966 | @example | ||
| 967 | @group | ||
| 968 | (list | ||
| 969 | (re-search-forward "The \\(cat \\)") | ||
| 970 | (match-beginning 0) | ||
| 971 | (match-beginning 1)) | ||
| 972 | @result{} (t 9 13) | ||
| 973 | @end group | ||
| 974 | |||
| 975 | @group | ||
| 976 | ---------- Buffer: foo ---------- | ||
| 977 | I read "The cat @point{}in the hat comes back" twice. | ||
| 978 | ^ ^ | ||
| 979 | 9 13 | ||
| 980 | ---------- Buffer: foo ---------- | ||
| 981 | @end group | ||
| 982 | @end example | ||
| 983 | |||
| 984 | @noindent | ||
| 985 | (In this case, the index returned is a buffer position; the first | ||
| 986 | character of the buffer counts as 1.) | ||
| 987 | |||
| 988 | @node Replacing Match | ||
| 989 | @subsection Replacing the Text That Matched | ||
| 990 | |||
| 991 | This function replaces the text matched by the last search with | ||
| 992 | @var{replacement}. | ||
| 993 | |||
| 994 | @cindex case in replacements | ||
| 995 | @defun replace-match replacement &optional fixedcase literal | ||
| 996 | This function replaces the buffer text matched by the last search, with | ||
| 997 | @var{replacement}. It applies only to buffers; you can't use | ||
| 998 | @code{replace-match} to replace a substring found with | ||
| 999 | @code{string-match}. | ||
| 1000 | |||
| 1001 | If @var{fixedcase} is non-@code{nil}, then the case of the replacement | ||
| 1002 | text is not changed; otherwise, the replacement text is converted to a | ||
| 1003 | different case depending upon the capitalization of the text to be | ||
| 1004 | replaced. If the original text is all upper case, the replacement text | ||
| 1005 | is converted to upper case, except when all of the words in the original | ||
| 1006 | text are only one character long. In that event, the replacement text | ||
| 1007 | is capitalized. If @emph{any} of the words in the original text is | ||
| 1008 | capitalized, then all of the words in the replacement text are | ||
| 1009 | capitalized. | ||
| 1010 | |||
| 1011 | If @var{literal} is non-@code{nil}, then @var{replacement} is inserted | ||
| 1012 | exactly as it is, the only alterations being case changes as needed. | ||
| 1013 | If it is @code{nil} (the default), then the character @samp{\} is treated | ||
| 1014 | specially. If a @samp{\} appears in @var{replacement}, then it must be | ||
| 1015 | part of one of the following sequences: | ||
| 1016 | |||
| 1017 | @table @asis | ||
| 1018 | @item @samp{\&} | ||
| 1019 | @cindex @samp{&} in replacement | ||
| 1020 | @samp{\&} stands for the entire text being replaced. | ||
| 1021 | |||
| 1022 | @item @samp{\@var{n}} | ||
| 1023 | @cindex @samp{\@var{n}} in replacement | ||
| 1024 | @samp{\@var{n}} stands for the text that matched the @var{n}th | ||
| 1025 | subexpression in the original regexp. Subexpressions are those | ||
| 1026 | expressions grouped inside of @samp{\(@dots{}\)}. @var{n} is a digit. | ||
| 1027 | |||
| 1028 | @item @samp{\\} | ||
| 1029 | @cindex @samp{\} in replacement | ||
| 1030 | @samp{\\} stands for a single @samp{\} in the replacement text. | ||
| 1031 | @end table | ||
| 1032 | |||
| 1033 | @code{replace-match} leaves point at the end of the replacement text, | ||
| 1034 | and returns @code{t}. | ||
| 1035 | @end defun | ||
| 1036 | |||
| 1037 | @node Entire Match Data | ||
| 1038 | @subsection Accessing the Entire Match Data | ||
| 1039 | |||
| 1040 | The functions @code{match-data} and @code{set-match-data} read or | ||
| 1041 | write the entire match data, all at once. | ||
| 1042 | |||
| 1043 | @defun match-data | ||
| 1044 | This function returns a newly constructed list containing all the | ||
| 1045 | information on what text the last search matched. Element zero is the | ||
| 1046 | position of the beginning of the match for the whole expression; element | ||
| 1047 | one is the position of the end of the match for the expression. The | ||
| 1048 | next two elements are the positions of the beginning and end of the | ||
| 1049 | match for the first subexpression, and so on. In general, element | ||
| 1050 | @ifinfo | ||
| 1051 | number 2@var{n} | ||
| 1052 | @end ifinfo | ||
| 1053 | @tex | ||
| 1054 | number {\mathsurround=0pt $2n$} | ||
| 1055 | @end tex | ||
| 1056 | corresponds to @code{(match-beginning @var{n})}; and | ||
| 1057 | element | ||
| 1058 | @ifinfo | ||
| 1059 | number 2@var{n} + 1 | ||
| 1060 | @end ifinfo | ||
| 1061 | @tex | ||
| 1062 | number {\mathsurround=0pt $2n+1$} | ||
| 1063 | @end tex | ||
| 1064 | corresponds to @code{(match-end @var{n})}. | ||
| 1065 | |||
| 1066 | All the elements are markers or @code{nil} if matching was done on a | ||
| 1067 | buffer, and all are integers or @code{nil} if matching was done on a | ||
| 1068 | string with @code{string-match}. (In Emacs 18 and earlier versions, | ||
| 1069 | markers were used even for matching on a string, except in the case | ||
| 1070 | of the integer 0.) | ||
| 1071 | |||
| 1072 | As always, there must be no possibility of intervening searches between | ||
| 1073 | the call to a search function and the call to @code{match-data} that is | ||
| 1074 | intended to access the match data for that search. | ||
| 1075 | |||
| 1076 | @example | ||
| 1077 | @group | ||
| 1078 | (match-data) | ||
| 1079 | @result{} (#<marker at 9 in foo> | ||
| 1080 | #<marker at 17 in foo> | ||
| 1081 | #<marker at 13 in foo> | ||
| 1082 | #<marker at 17 in foo>) | ||
| 1083 | @end group | ||
| 1084 | @end example | ||
| 1085 | @end defun | ||
| 1086 | |||
| 1087 | @defun set-match-data match-list | ||
| 1088 | This function sets the match data from the elements of @var{match-list}, | ||
| 1089 | which should be a list that was the value of a previous call to | ||
| 1090 | @code{match-data}. | ||
| 1091 | |||
| 1092 | If @var{match-list} refers to a buffer that doesn't exist, you don't get | ||
| 1093 | an error; that sets the match data in a meaningless but harmless way. | ||
| 1094 | |||
| 1095 | @findex store-match-data | ||
| 1096 | @code{store-match-data} is an alias for @code{set-match-data}. | ||
| 1097 | @end defun | ||
| 1098 | |||
| 1099 | @node Saving Match Data | ||
| 1100 | @subsection Saving and Restoring the Match Data | ||
| 1101 | |||
| 1102 | All asynchronous process functions (filters and sentinels) and | ||
| 1103 | functions that use @code{recursive-edit} should save and restore the | ||
| 1104 | match data if they do a search or if they let the user type arbitrary | ||
| 1105 | commands. Saving the match data is useful in other cases as | ||
| 1106 | well---whenever you want to access the match data resulting from an | ||
| 1107 | earlier search, notwithstanding another intervening search. | ||
| 1108 | |||
| 1109 | This example shows the problem that can arise if you fail to | ||
| 1110 | attend to this requirement: | ||
| 1111 | |||
| 1112 | @example | ||
| 1113 | @group | ||
| 1114 | (re-search-forward "The \\(cat \\)") | ||
| 1115 | @result{} 48 | ||
| 1116 | (foo) ; @r{Perhaps @code{foo} does} | ||
| 1117 | ; @r{more searching.} | ||
| 1118 | (match-end 0) | ||
| 1119 | @result{} 61 ; @r{Unexpected result---not 48!} | ||
| 1120 | @end group | ||
| 1121 | @end example | ||
| 1122 | |||
| 1123 | In Emacs versions 19 and later, you can save and restore the match | ||
| 1124 | data with @code{save-match-data}: | ||
| 1125 | |||
| 1126 | @defspec save-match-data body@dots{} | ||
| 1127 | This special form executes @var{body}, saving and restoring the match | ||
| 1128 | data around it. This is useful if you wish to do a search without | ||
| 1129 | altering the match data that resulted from an earlier search. | ||
| 1130 | @end defspec | ||
| 1131 | |||
| 1132 | You can use @code{set-match-data} together with @code{match-data} to | ||
| 1133 | imitate the effect of the special form @code{save-match-data}. This is | ||
| 1134 | useful for writing code that can run in Emacs 18. Here is how: | ||
| 1135 | |||
| 1136 | @example | ||
| 1137 | @group | ||
| 1138 | (let ((data (match-data))) | ||
| 1139 | (unwind-protect | ||
| 1140 | @dots{} ; @r{May change the original match data.} | ||
| 1141 | (set-match-data data))) | ||
| 1142 | @end group | ||
| 1143 | @end example | ||
| 1144 | |||
| 1145 | @ignore | ||
| 1146 | Here is a function which restores the match data provided the buffer | ||
| 1147 | associated with it still exists. | ||
| 1148 | |||
| 1149 | @smallexample | ||
| 1150 | @group | ||
| 1151 | (defun restore-match-data (data) | ||
| 1152 | @c It is incorrect to split the first line of a doc string. | ||
| 1153 | @c If there's a problem here, it should be solved in some other way. | ||
| 1154 | "Restore the match data DATA unless the buffer is missing." | ||
| 1155 | (catch 'foo | ||
| 1156 | (let ((d data)) | ||
| 1157 | @end group | ||
| 1158 | (while d | ||
| 1159 | (and (car d) | ||
| 1160 | (null (marker-buffer (car d))) | ||
| 1161 | @group | ||
| 1162 | ;; @file{match-data} @r{buffer is deleted.} | ||
| 1163 | (throw 'foo nil)) | ||
| 1164 | (setq d (cdr d))) | ||
| 1165 | (set-match-data data)))) | ||
| 1166 | @end group | ||
| 1167 | @end smallexample | ||
| 1168 | @end ignore | ||
| 1169 | |||
| 1170 | @node Searching and Case | ||
| 1171 | @section Searching and Case | ||
| 1172 | @cindex searching and case | ||
| 1173 | |||
| 1174 | By default, searches in Emacs ignore the case of the text they are | ||
| 1175 | searching through; if you specify searching for @samp{FOO}, then | ||
| 1176 | @samp{Foo} or @samp{foo} is also considered a match. Regexps, and in | ||
| 1177 | particular character sets, are included: thus, @samp{[aB]} would match | ||
| 1178 | @samp{a} or @samp{A} or @samp{b} or @samp{B}. | ||
| 1179 | |||
| 1180 | If you do not want this feature, set the variable | ||
| 1181 | @code{case-fold-search} to @code{nil}. Then all letters must match | ||
| 1182 | exactly, including case. This is a per-buffer-local variable; altering | ||
| 1183 | the variable affects only the current buffer. (@xref{Intro to | ||
| 1184 | Buffer-Local}.) Alternatively, you may change the value of | ||
| 1185 | @code{default-case-fold-search}, which is the default value of | ||
| 1186 | @code{case-fold-search} for buffers that do not override it. | ||
| 1187 | |||
| 1188 | Note that the user-level incremental search feature handles case | ||
| 1189 | distinctions differently. When given a lower case letter, it looks for | ||
| 1190 | a match of either case, but when given an upper case letter, it looks | ||
| 1191 | for an upper case letter only. But this has nothing to do with the | ||
| 1192 | searching functions Lisp functions use. | ||
| 1193 | |||
| 1194 | @defopt case-replace | ||
| 1195 | This variable determines whether @code{query-replace} should preserve | ||
| 1196 | case in replacements. If the variable is @code{nil}, then | ||
| 1197 | @code{replace-match} should not try to convert case. | ||
| 1198 | @end defopt | ||
| 1199 | |||
| 1200 | @defopt case-fold-search | ||
| 1201 | This buffer-local variable determines whether searches should ignore | ||
| 1202 | case. If the variable is @code{nil} they do not ignore case; otherwise | ||
| 1203 | they do ignore case. | ||
| 1204 | @end defopt | ||
| 1205 | |||
| 1206 | @defvar default-case-fold-search | ||
| 1207 | The value of this variable is the default value for | ||
| 1208 | @code{case-fold-search} in buffers that do not override it. This is the | ||
| 1209 | same as @code{(default-value 'case-fold-search)}. | ||
| 1210 | @end defvar | ||
| 1211 | |||
| 1212 | @node Standard Regexps | ||
| 1213 | @section Standard Regular Expressions Used in Editing | ||
| 1214 | @cindex regexps used standardly in editing | ||
| 1215 | @cindex standard regexps used in editing | ||
| 1216 | |||
| 1217 | This section describes some variables that hold regular expressions | ||
| 1218 | used for certain purposes in editing: | ||
| 1219 | |||
| 1220 | @defvar page-delimiter | ||
| 1221 | This is the regexp describing line-beginnings that separate pages. The | ||
| 1222 | default value is @code{"^\014"} (i.e., @code{"^^L"} or @code{"^\C-l"}). | ||
| 1223 | @end defvar | ||
| 1224 | |||
| 1225 | @defvar paragraph-separate | ||
| 1226 | This is the regular expression for recognizing the beginning of a line | ||
| 1227 | that separates paragraphs. (If you change this, you may have to | ||
| 1228 | change @code{paragraph-start} also.) The default value is @code{"^[ | ||
| 1229 | \t\f]*$"}, which is a line that consists entirely of spaces, tabs, and | ||
| 1230 | form feeds. | ||
| 1231 | @end defvar | ||
| 1232 | |||
| 1233 | @defvar paragraph-start | ||
| 1234 | This is the regular expression for recognizing the beginning of a line | ||
| 1235 | that starts @emph{or} separates paragraphs. The default value is | ||
| 1236 | @code{"^[ \t\n\f]"}, which matches a line starting with a space, tab, | ||
| 1237 | newline, or form feed. | ||
| 1238 | @end defvar | ||
| 1239 | |||
| 1240 | @defvar sentence-end | ||
| 1241 | This is the regular expression describing the end of a sentence. (All | ||
| 1242 | paragraph boundaries also end sentences, regardless.) The default value | ||
| 1243 | is: | ||
| 1244 | |||
| 1245 | @example | ||
| 1246 | "[.?!][]\"')@}]*\\($\\|\t\\| \\)[ \t\n]*" | ||
| 1247 | @end example | ||
| 1248 | |||
| 1249 | This means a period, question mark or exclamation mark, followed by a | ||
| 1250 | closing brace, followed by tabs, spaces or new lines. | ||
| 1251 | |||
| 1252 | For a detailed explanation of this regular expression, see @ref{Regexp | ||
| 1253 | Example}. | ||
| 1254 | @end defvar | ||
diff --git a/lispref/syntax.texi b/lispref/syntax.texi new file mode 100644 index 00000000000..9766df698f2 --- /dev/null +++ b/lispref/syntax.texi | |||
| @@ -0,0 +1,707 @@ | |||
| 1 | @c -*-texinfo-*- | ||
| 2 | @c This is part of the GNU Emacs Lisp Reference Manual. | ||
| 3 | @c Copyright (C) 1990, 1991, 1992, 1993, 1994 Free Software Foundation, Inc. | ||
| 4 | @c See the file elisp.texi for copying conditions. | ||
| 5 | @setfilename ../info/syntax | ||
| 6 | @node Syntax Tables, Abbrevs, Searching and Matching, Top | ||
| 7 | @chapter Syntax Tables | ||
| 8 | @cindex parsing | ||
| 9 | @cindex syntax table | ||
| 10 | @cindex text parsing | ||
| 11 | |||
| 12 | A @dfn{syntax table} specifies the syntactic textual function of each | ||
| 13 | character. This information is used by the parsing commands, the | ||
| 14 | complex movement commands, and others to determine where words, symbols, | ||
| 15 | and other syntactic constructs begin and end. The current syntax table | ||
| 16 | controls the meaning of the word motion functions (@pxref{Word Motion}) | ||
| 17 | and the list motion functions (@pxref{List Motion}) as well as the | ||
| 18 | functions in this chapter. | ||
| 19 | |||
| 20 | @menu | ||
| 21 | * Basics: Syntax Basics. Basic concepts of syntax tables. | ||
| 22 | * Desc: Syntax Descriptors. How characters are classified. | ||
| 23 | * Syntax Table Functions:: How to create, examine and alter syntax tables. | ||
| 24 | * Motion and Syntax:: Moving over characters with certain syntaxes. | ||
| 25 | * Parsing Expressions:: Parsing balanced expressions | ||
| 26 | using the syntax table. | ||
| 27 | * Standard Syntax Tables:: Syntax tables used by various major modes. | ||
| 28 | * Syntax Table Internals:: How syntax table information is stored. | ||
| 29 | @end menu | ||
| 30 | |||
| 31 | @node Syntax Basics | ||
| 32 | @section Syntax Table Concepts | ||
| 33 | |||
| 34 | @ifinfo | ||
| 35 | A @dfn{syntax table} provides Emacs with the information that | ||
| 36 | determines the syntactic use of each character in a buffer. This | ||
| 37 | information is used by the parsing commands, the complex movement | ||
| 38 | commands, and others to determine where words, symbols, and other | ||
| 39 | syntactic constructs begin and end. The current syntax table controls | ||
| 40 | the meaning of the word motion functions (@pxref{Word Motion}) and the | ||
| 41 | list motion functions (@pxref{List Motion}) as well as the functions in | ||
| 42 | this chapter. | ||
| 43 | @end ifinfo | ||
| 44 | |||
| 45 | A syntax table is a vector of 256 elements; it contains one entry for | ||
| 46 | each of the 256 @sc{ASCII} characters of an 8-bit byte. Each element is | ||
| 47 | an integer that encodes the syntax of the character in question. | ||
| 48 | |||
| 49 | Syntax tables are used only for moving across text, not for the Emacs | ||
| 50 | Lisp reader. Emacs Lisp uses built-in syntactic rules when reading Lisp | ||
| 51 | expressions, and these rules cannot be changed. | ||
| 52 | |||
| 53 | Each buffer has its own major mode, and each major mode has its own | ||
| 54 | idea of the syntactic class of various characters. For example, in Lisp | ||
| 55 | mode, the character @samp{;} begins a comment, but in C mode, it | ||
| 56 | terminates a statement. To support these variations, Emacs makes the | ||
| 57 | choice of syntax table local to each buffer. Typically, each major | ||
| 58 | mode has its own syntax table and installs that table in each buffer | ||
| 59 | which uses that mode. Changing this table alters the syntax in all | ||
| 60 | those buffers as well as in any buffers subsequently put in that mode. | ||
| 61 | Occasionally several similar modes share one syntax table. | ||
| 62 | @xref{Example Major Modes}, for an example of how to set up a syntax | ||
| 63 | table. | ||
| 64 | |||
| 65 | A syntax table can inherit the data for some characters from the | ||
| 66 | standard syntax table, while specifying other characters itself. The | ||
| 67 | ``inherit'' syntax class means ``inherit this character's syntax from | ||
| 68 | the standard syntax table.'' Most major modes' syntax tables inherit | ||
| 69 | the syntax of character codes 0 through 31 and 128 through 255. This is | ||
| 70 | useful with character sets such as ISO Latin-1 that have additional | ||
| 71 | alphabetic characters in the range 128 to 255. Just changing the | ||
| 72 | standard syntax for these characters affects all major modes. | ||
| 73 | |||
| 74 | @defun syntax-table-p object | ||
| 75 | This function returns @code{t} if @var{object} is a vector of length 256 | ||
| 76 | elements. This means that the vector may be a syntax table. However, | ||
| 77 | according to this test, any vector of length 256 is considered to be a | ||
| 78 | syntax table, no matter what its contents. | ||
| 79 | @end defun | ||
| 80 | |||
| 81 | @node Syntax Descriptors | ||
| 82 | @section Syntax Descriptors | ||
| 83 | @cindex syntax classes | ||
| 84 | |||
| 85 | This section describes the syntax classes and flags that denote the | ||
| 86 | syntax of a character, and how they are represented as a @dfn{syntax | ||
| 87 | descriptor}, which is a Lisp string that you pass to | ||
| 88 | @code{modify-syntax-entry} to specify the desired syntax. | ||
| 89 | |||
| 90 | Emacs defines a number of @dfn{syntax classes}. Each syntax table | ||
| 91 | puts each character into one class. There is no necessary relationship | ||
| 92 | between the class of a character in one syntax table and its class in | ||
| 93 | any other table. | ||
| 94 | |||
| 95 | Each class is designated by a mnemonic character which serves as the | ||
| 96 | name of the class when you need to specify a class. Usually the | ||
| 97 | designator character is one which is frequently put in that class; | ||
| 98 | however, its meaning as a designator is unvarying and independent of | ||
| 99 | what syntax that character currently has. | ||
| 100 | |||
| 101 | @cindex syntax descriptor | ||
| 102 | A syntax descriptor is a Lisp string which specifies a syntax class, a | ||
| 103 | matching character (used only for the parenthesis classes) and flags. | ||
| 104 | The first character is the designator for a syntax class. The second | ||
| 105 | character is the character to match; if it is unused, put a space there. | ||
| 106 | Then come the characters for any desired flags. If no matching | ||
| 107 | character or flags are needed, one character is sufficient. | ||
| 108 | |||
| 109 | For example, the descriptor for the character @samp{*} in C mode is | ||
| 110 | @samp{@w{. 23}} (i.e., punctuation, matching character slot unused, | ||
| 111 | second character of a comment-starter, first character of an | ||
| 112 | comment-ender), and the entry for @samp{/} is @samp{@w{. 14}} (i.e., | ||
| 113 | punctuation, matching character slot unused, first character of a | ||
| 114 | comment-starter, second character of a comment-ender). | ||
| 115 | |||
| 116 | @menu | ||
| 117 | * Syntax Class Table:: Table of syntax classes. | ||
| 118 | * Syntax Flags:: Additional flags each character can have. | ||
| 119 | @end menu | ||
| 120 | |||
| 121 | @node Syntax Class Table | ||
| 122 | @subsection Table of Syntax Classes | ||
| 123 | |||
| 124 | Here is a table syntax classes, the characters that stand for them, | ||
| 125 | their meanings, and examples of their use. | ||
| 126 | |||
| 127 | @deffn {Syntax class} @w{whitespace character} | ||
| 128 | @dfn{Whitespace characters} (designated with @w{@samp{@ }} or @samp{-}) | ||
| 129 | separate symbols and words from each other. Typically, whitespace | ||
| 130 | characters have no other syntactic significance, and multiple whitespace | ||
| 131 | characters are syntactically equivalent to a single one. Space, tab, | ||
| 132 | newline and formfeed are almost always classified as whitespace. | ||
| 133 | @end deffn | ||
| 134 | |||
| 135 | @deffn {Syntax class} @w{word constituent} | ||
| 136 | @dfn{Word constituents} (designated with @samp{w}) are parts of normal | ||
| 137 | English words and are typically used in variable and command names in | ||
| 138 | programs. All upper and lower case letters and the digits are typically | ||
| 139 | word constituents. | ||
| 140 | @end deffn | ||
| 141 | |||
| 142 | @deffn {Syntax class} @w{symbol constituent} | ||
| 143 | @dfn{Symbol constituents} (designated with @samp{_}) are the extra | ||
| 144 | characters that are used in variable and command names along with word | ||
| 145 | constituents. For example, the symbol constituents class is used in | ||
| 146 | Lisp mode to indicate that certain characters may be part of symbol | ||
| 147 | names even though they are not part of English words. These characters | ||
| 148 | are @samp{$&*+-_<>}. In standard C, the only non-word-constituent | ||
| 149 | character that is valid in symbols is underscore (@samp{_}). | ||
| 150 | @end deffn | ||
| 151 | |||
| 152 | @deffn {Syntax class} @w{punctuation character} | ||
| 153 | @dfn{Punctuation characters} (@samp{.}) are those characters that are | ||
| 154 | used as punctuation in English, or are used in some way in a programming | ||
| 155 | language to separate symbols from one another. Most programming | ||
| 156 | language modes, including Emacs Lisp mode, have no characters in this | ||
| 157 | class since the few characters that are not symbol or word constituents | ||
| 158 | all have other uses. | ||
| 159 | @end deffn | ||
| 160 | |||
| 161 | @deffn {Syntax class} @w{open parenthesis character} | ||
| 162 | @deffnx {Syntax class} @w{close parenthesis character} | ||
| 163 | @cindex parenthesis syntax | ||
| 164 | Open and close @dfn{parenthesis characters} are characters used in | ||
| 165 | dissimilar pairs to surround sentences or expressions. Such a grouping | ||
| 166 | is begun with an open parenthesis character and terminated with a close. | ||
| 167 | Each open parenthesis character matches a particular close parenthesis | ||
| 168 | character, and vice versa. Normally, Emacs indicates momentarily the | ||
| 169 | matching open parenthesis when you insert a close parenthesis. | ||
| 170 | @xref{Blinking}. | ||
| 171 | |||
| 172 | The class of open parentheses is designated with @samp{(}, and that of | ||
| 173 | close parentheses with @samp{)}. | ||
| 174 | |||
| 175 | In English text, and in C code, the parenthesis pairs are @samp{()}, | ||
| 176 | @samp{[]}, and @samp{@{@}}. In Emacs Lisp, the delimiters for lists and | ||
| 177 | vectors (@samp{()} and @samp{[]}) are classified as parenthesis | ||
| 178 | characters. | ||
| 179 | @end deffn | ||
| 180 | |||
| 181 | @deffn {Syntax class} @w{string quote} | ||
| 182 | @dfn{String quote characters} (designated with @samp{"}) are used in | ||
| 183 | many languages, including Lisp and C, to delimit string constants. The | ||
| 184 | same string quote character appears at the beginning and the end of a | ||
| 185 | string. Such quoted strings do not nest. | ||
| 186 | |||
| 187 | The parsing facilities of Emacs consider a string as a single token. | ||
| 188 | The usual syntactic meanings of the characters in the string are | ||
| 189 | suppressed. | ||
| 190 | |||
| 191 | The Lisp modes have two string quote characters: double-quote (@samp{"}) | ||
| 192 | and vertical bar (@samp{|}). @samp{|} is not used in Emacs Lisp, but it | ||
| 193 | is used in Common Lisp. C also has two string quote characters: | ||
| 194 | double-quote for strings, and single-quote (@samp{'}) for character | ||
| 195 | constants. | ||
| 196 | |||
| 197 | English text has no string quote characters because English is not a | ||
| 198 | programming language. Although quotation marks are used in English, | ||
| 199 | we do not want them to turn off the usual syntactic properties of | ||
| 200 | other characters in the quotation. | ||
| 201 | @end deffn | ||
| 202 | |||
| 203 | @deffn {Syntax class} @w{escape} | ||
| 204 | An @dfn{escape character} (designated with @samp{\}) starts an escape | ||
| 205 | sequence such as is used in C string and character constants. The | ||
| 206 | character @samp{\} belongs to this class in both C and Lisp. (In C, it | ||
| 207 | is used thus only inside strings, but it turns out to cause no trouble | ||
| 208 | to treat it this way throughout C code.) | ||
| 209 | |||
| 210 | Characters in this class count as part of words if | ||
| 211 | @code{words-include-escapes} is non-@code{nil}. @xref{Word Motion}. | ||
| 212 | @end deffn | ||
| 213 | |||
| 214 | @deffn {Syntax class} @w{character quote} | ||
| 215 | A @dfn{character quote character} (designated with @samp{/}) quotes the | ||
| 216 | following character so that it loses its normal syntactic meaning. This | ||
| 217 | differs from an escape character in that only the character immediately | ||
| 218 | following is ever affected. | ||
| 219 | |||
| 220 | Characters in this class count as part of words if | ||
| 221 | @code{words-include-escapes} is non-@code{nil}. @xref{Word Motion}. | ||
| 222 | |||
| 223 | This class is not currently used in any standard Emacs modes. | ||
| 224 | @end deffn | ||
| 225 | |||
| 226 | @deffn {Syntax class} @w{paired delimiter} | ||
| 227 | @dfn{Paired delimiter characters} (designated with @samp{$}) are like | ||
| 228 | string quote characters except that the syntactic properties of the | ||
| 229 | characters between the delimiters are not suppressed. Only @TeX{} mode | ||
| 230 | uses a paired identical delimiter presently---the @samp{$} that both | ||
| 231 | enters and leaves math mode. | ||
| 232 | @end deffn | ||
| 233 | |||
| 234 | @deffn {Syntax class} @w{expression prefix} | ||
| 235 | An @dfn{expression prefix operator} (designated with @samp{'}) is used | ||
| 236 | for syntactic operators that are part of an expression if they appear | ||
| 237 | next to one. These characters in Lisp include the apostrophe, @samp{'} | ||
| 238 | (used for quoting), the comma, @samp{,} (used in macros), and @samp{#} | ||
| 239 | (used in the read syntax for certain data types). | ||
| 240 | @end deffn | ||
| 241 | |||
| 242 | @deffn {Syntax class} @w{comment starter} | ||
| 243 | @deffnx {Syntax class} @w{comment ender} | ||
| 244 | @cindex comment syntax | ||
| 245 | The @dfn{comment starter} and @dfn{comment ender} characters are used in | ||
| 246 | various languages to delimit comments. These classes are designated | ||
| 247 | with @samp{<} and @samp{>}, respectively. | ||
| 248 | |||
| 249 | English text has no comment characters. In Lisp, the semicolon | ||
| 250 | (@samp{;}) starts a comment and a newline or formfeed ends one. | ||
| 251 | @end deffn | ||
| 252 | |||
| 253 | @deffn {Syntax class} @w{inherit} | ||
| 254 | This syntax class does not specify a syntax. It says to look in the | ||
| 255 | standard syntax table to find the syntax of this character. The | ||
| 256 | designator for this syntax code is @samp{@@}. | ||
| 257 | @end deffn | ||
| 258 | |||
| 259 | @node Syntax Flags | ||
| 260 | @subsection Syntax Flags | ||
| 261 | @cindex syntax flags | ||
| 262 | |||
| 263 | In addition to the classes, entries for characters in a syntax table | ||
| 264 | can include flags. There are six possible flags, represented by the | ||
| 265 | characters @samp{1}, @samp{2}, @samp{3}, @samp{4}, @samp{b} and | ||
| 266 | @samp{p}. | ||
| 267 | |||
| 268 | All the flags except @samp{p} are used to describe multi-character | ||
| 269 | comment delimiters. The digit flags indicate that a character can | ||
| 270 | @emph{also} be part of a comment sequence, in addition to the syntactic | ||
| 271 | properties associated with its character class. The flags are | ||
| 272 | independent of the class and each other for the sake of characters such | ||
| 273 | as @samp{*} in C mode, which is a punctuation character, @emph{and} the | ||
| 274 | second character of a start-of-comment sequence (@samp{/*}), @emph{and} | ||
| 275 | the first character of an end-of-comment sequence (@samp{*/}). | ||
| 276 | |||
| 277 | The flags for a character @var{c} are: | ||
| 278 | |||
| 279 | @itemize @bullet | ||
| 280 | @item | ||
| 281 | @samp{1} means @var{c} is the start of a two-character comment start | ||
| 282 | sequence. | ||
| 283 | |||
| 284 | @item | ||
| 285 | @samp{2} means @var{c} is the second character of such a sequence. | ||
| 286 | |||
| 287 | @item | ||
| 288 | @samp{3} means @var{c} is the start of a two-character comment end | ||
| 289 | sequence. | ||
| 290 | |||
| 291 | @item | ||
| 292 | @samp{4} means @var{c} is the second character of such a sequence. | ||
| 293 | |||
| 294 | @item | ||
| 295 | @c Emacs 19 feature | ||
| 296 | @samp{b} means that @var{c} as a comment delimiter belongs to the | ||
| 297 | alternative ``b'' comment style. | ||
| 298 | |||
| 299 | Emacs supports two comment styles simultaneously in any one syntax | ||
| 300 | table. This is for the sake of C++. Each style of comment syntax has | ||
| 301 | its own comment-start sequence and its own comment-end sequence. Each | ||
| 302 | comment must stick to one style or the other; thus, if it starts with | ||
| 303 | the comment-start sequence of style ``b'', it must also end with the | ||
| 304 | comment-end sequence of style ``b''. | ||
| 305 | |||
| 306 | The two comment-start sequences must begin with the same character; only | ||
| 307 | the second character may differ. Mark the second character of the | ||
| 308 | ``b''-style comment start sequence with the @samp{b} flag. | ||
| 309 | |||
| 310 | A comment-end sequence (one or two characters) applies to the ``b'' | ||
| 311 | style if its first character has the @samp{b} flag set; otherwise, it | ||
| 312 | applies to the ``a'' style. | ||
| 313 | |||
| 314 | The appropriate comment syntax settings for C++ are as follows: | ||
| 315 | |||
| 316 | @table @asis | ||
| 317 | @item @samp{/} | ||
| 318 | @samp{124b} | ||
| 319 | @item @samp{*} | ||
| 320 | @samp{23} | ||
| 321 | @item newline | ||
| 322 | @samp{>b} | ||
| 323 | @end table | ||
| 324 | |||
| 325 | Thus @samp{/*} is a comment-start sequence for ``a'' style, @samp{//} | ||
| 326 | is a comment-start sequence for ``b'' style, @samp{*/} is a | ||
| 327 | comment-end sequence for ``a'' style, and newline is a comment-end | ||
| 328 | sequence for ``b'' style. | ||
| 329 | |||
| 330 | @item | ||
| 331 | @c Emacs 19 feature | ||
| 332 | @samp{p} identifies an additional ``prefix character'' for Lisp syntax. | ||
| 333 | These characters are treated as whitespace when they appear between | ||
| 334 | expressions. When they appear within an expression, they are handled | ||
| 335 | according to their usual syntax codes. | ||
| 336 | |||
| 337 | The function @code{backward-prefix-chars} moves back over these | ||
| 338 | characters, as well as over characters whose primary syntax class is | ||
| 339 | prefix (@samp{'}). @xref{Motion and Syntax}. | ||
| 340 | @end itemize | ||
| 341 | |||
| 342 | @node Syntax Table Functions | ||
| 343 | @section Syntax Table Functions | ||
| 344 | |||
| 345 | In this section we describe functions for creating, accessing and | ||
| 346 | altering syntax tables. | ||
| 347 | |||
| 348 | @defun make-syntax-table | ||
| 349 | This function creates a new syntax table. Character codes 0 through | ||
| 350 | 31, and 128 through 255, are set up to inherit from the standard syntax | ||
| 351 | table. The other character codes are set up by copying what the | ||
| 352 | standard syntax table says about them. | ||
| 353 | |||
| 354 | Most major mode syntax tables are created in this way. | ||
| 355 | @end defun | ||
| 356 | |||
| 357 | @defun copy-syntax-table &optional table | ||
| 358 | This function constructs a copy of @var{table} and returns it. If | ||
| 359 | @var{table} is not supplied (or is @code{nil}), it returns a copy of the | ||
| 360 | current syntax table. Otherwise, an error is signaled if @var{table} is | ||
| 361 | not a syntax table. | ||
| 362 | @end defun | ||
| 363 | |||
| 364 | @deffn Command modify-syntax-entry char syntax-descriptor &optional table | ||
| 365 | This function sets the syntax entry for @var{char} according to | ||
| 366 | @var{syntax-descriptor}. The syntax is changed only for @var{table}, | ||
| 367 | which defaults to the current buffer's syntax table, and not in any | ||
| 368 | other syntax table. The argument @var{syntax-descriptor} specifies the | ||
| 369 | desired syntax; this is a string beginning with a class designator | ||
| 370 | character, and optionally containing a matching character and flags as | ||
| 371 | well. @xref{Syntax Descriptors}. | ||
| 372 | |||
| 373 | This function always returns @code{nil}. The old syntax information in | ||
| 374 | the table for this character is discarded. | ||
| 375 | |||
| 376 | An error is signaled if the first character of the syntax descriptor is not | ||
| 377 | one of the twelve syntax class designator characters. An error is also | ||
| 378 | signaled if @var{char} is not a character. | ||
| 379 | |||
| 380 | @example | ||
| 381 | @group | ||
| 382 | @exdent @r{Examples:} | ||
| 383 | |||
| 384 | ;; @r{Put the space character in class whitespace.} | ||
| 385 | (modify-syntax-entry ?\ " ") | ||
| 386 | @result{} nil | ||
| 387 | @end group | ||
| 388 | |||
| 389 | @group | ||
| 390 | ;; @r{Make @samp{$} an open parenthesis character,} | ||
| 391 | ;; @r{with @samp{^} as its matching close.} | ||
| 392 | (modify-syntax-entry ?$ "(^") | ||
| 393 | @result{} nil | ||
| 394 | @end group | ||
| 395 | |||
| 396 | @group | ||
| 397 | ;; @r{Make @samp{^} a close parenthesis character,} | ||
| 398 | ;; @r{with @samp{$} as its matching open.} | ||
| 399 | (modify-syntax-entry ?^ ")$") | ||
| 400 | @result{} nil | ||
| 401 | @end group | ||
| 402 | |||
| 403 | @group | ||
| 404 | ;; @r{Make @samp{/} a punctuation character,} | ||
| 405 | ;; @r{the first character of a start-comment sequence,} | ||
| 406 | ;; @r{and the second character of an end-comment sequence.} | ||
| 407 | ;; @r{This is used in C mode.} | ||
| 408 | (modify-syntax-entry ?/ ".13") | ||
| 409 | @result{} nil | ||
| 410 | @end group | ||
| 411 | @end example | ||
| 412 | @end deffn | ||
| 413 | |||
| 414 | @defun char-syntax character | ||
| 415 | This function returns the syntax class of @var{character}, represented | ||
| 416 | by its mnemonic designator character. This @emph{only} returns the | ||
| 417 | class, not any matching parenthesis or flags. | ||
| 418 | |||
| 419 | An error is signaled if @var{char} is not a character. | ||
| 420 | |||
| 421 | The following examples apply to C mode. The first example shows that | ||
| 422 | the syntax class of space is whitespace (represented by a space). The | ||
| 423 | second example shows that the syntax of @samp{/} is punctuation. This | ||
| 424 | does not show the fact that it is also part of comment start and end | ||
| 425 | sequence. The third example shows that open parenthesis is in the class | ||
| 426 | of open parentheses. This does not show the fact that it has a matching | ||
| 427 | character, @samp{)}. | ||
| 428 | |||
| 429 | @example | ||
| 430 | @group | ||
| 431 | (char-to-string (char-syntax ?\ )) | ||
| 432 | @result{} " " | ||
| 433 | @end group | ||
| 434 | |||
| 435 | @group | ||
| 436 | (char-to-string (char-syntax ?/)) | ||
| 437 | @result{} "." | ||
| 438 | @end group | ||
| 439 | |||
| 440 | @group | ||
| 441 | (char-to-string (char-syntax ?\()) | ||
| 442 | @result{} "(" | ||
| 443 | @end group | ||
| 444 | @end example | ||
| 445 | @end defun | ||
| 446 | |||
| 447 | @defun set-syntax-table table | ||
| 448 | This function makes @var{table} the syntax table for the current buffer. | ||
| 449 | It returns @var{table}. | ||
| 450 | @end defun | ||
| 451 | |||
| 452 | @defun syntax-table | ||
| 453 | This function returns the current syntax table, which is the table for | ||
| 454 | the current buffer. | ||
| 455 | @end defun | ||
| 456 | |||
| 457 | @node Motion and Syntax | ||
| 458 | @section Motion and Syntax | ||
| 459 | |||
| 460 | This section describes functions for moving across characters in | ||
| 461 | certain syntax classes. None of these functions exists in Emacs | ||
| 462 | version 18 or earlier. | ||
| 463 | |||
| 464 | @defun skip-syntax-forward syntaxes &optional limit | ||
| 465 | This function moves point forward across characters having syntax classes | ||
| 466 | mentioned in @var{syntaxes}. It stops when it encounters the end of | ||
| 467 | the buffer, or position @var{lim} (if specified), or a character it is | ||
| 468 | not supposed to skip. | ||
| 469 | @ignore @c may want to change this. | ||
| 470 | The return value is the distance traveled, which is a nonnegative | ||
| 471 | integer. | ||
| 472 | @end ignore | ||
| 473 | @end defun | ||
| 474 | |||
| 475 | @defun skip-syntax-backward syntaxes &optional limit | ||
| 476 | This function moves point backward across characters whose syntax | ||
| 477 | classes are mentioned in @var{syntaxes}. It stops when it encounters | ||
| 478 | the beginning of the buffer, or position @var{lim} (if specified), or a | ||
| 479 | character it is not supposed to skip. | ||
| 480 | @ignore @c may want to change this. | ||
| 481 | The return value indicates the distance traveled. It is an integer that | ||
| 482 | is zero or less. | ||
| 483 | @end ignore | ||
| 484 | @end defun | ||
| 485 | |||
| 486 | @defun backward-prefix-chars | ||
| 487 | This function moves point backward over any number of characters with | ||
| 488 | expression prefix syntax. This includes both characters in the | ||
| 489 | expression prefix syntax class, and characters with the @samp{p} flag. | ||
| 490 | @end defun | ||
| 491 | |||
| 492 | @node Parsing Expressions | ||
| 493 | @section Parsing Balanced Expressions | ||
| 494 | |||
| 495 | Here are several functions for parsing and scanning balanced | ||
| 496 | expressions, also known as @dfn{sexps}, in which parentheses match in | ||
| 497 | pairs. The syntax table controls the interpretation of characters, so | ||
| 498 | these functions can be used for Lisp expressions when in Lisp mode and | ||
| 499 | for C expressions when in C mode. @xref{List Motion}, for convenient | ||
| 500 | higher-level functions for moving over balanced expressions. | ||
| 501 | |||
| 502 | @defun parse-partial-sexp start limit &optional target-depth stop-before state stop-comment | ||
| 503 | This function parses a sexp in the current buffer starting at | ||
| 504 | @var{start}, not scanning past @var{limit}. It stops at @var{limit} or | ||
| 505 | when certain criteria described below are met, and sets to the location | ||
| 506 | where parsing stops. It returns a value describing the status of the | ||
| 507 | parse at the point where it stops. | ||
| 508 | |||
| 509 | If @var{state} is @code{nil}, @var{start} is assumed to be at the top | ||
| 510 | level of parenthesis structure, such as the beginning of a function | ||
| 511 | definition. Alternatively, you might wish to resume parsing in the | ||
| 512 | middle of the structure. To do this, you must provide a @var{state} | ||
| 513 | argument that describes the initial status of parsing. | ||
| 514 | |||
| 515 | @cindex parenthesis depth | ||
| 516 | If the third argument @var{target-depth} is non-@code{nil}, parsing | ||
| 517 | stops if the depth in parentheses becomes equal to @var{target-depth}. | ||
| 518 | The depth starts at 0, or at whatever is given in @var{state}. | ||
| 519 | |||
| 520 | If the fourth argument @var{stop-before} is non-@code{nil}, parsing | ||
| 521 | stops when it comes to any character that starts a sexp. If | ||
| 522 | @var{stop-comment} is non-@code{nil}, parsing stops when it comes to the | ||
| 523 | start of a comment. | ||
| 524 | |||
| 525 | @cindex parse state | ||
| 526 | The fifth argument @var{state} is an eight-element list of the same | ||
| 527 | form as the value of this function, described below. The return value | ||
| 528 | of one call may be used to initialize the state of the parse on another | ||
| 529 | call to @code{parse-partial-sexp}. | ||
| 530 | |||
| 531 | The result is a list of eight elements describing the final state of | ||
| 532 | the parse: | ||
| 533 | |||
| 534 | @enumerate 0 | ||
| 535 | @item | ||
| 536 | The depth in parentheses, counting from 0. | ||
| 537 | |||
| 538 | @item | ||
| 539 | @cindex innermost containing parentheses | ||
| 540 | The character position of the start of the innermost containing | ||
| 541 | parenthetical grouping; @code{nil} if none. | ||
| 542 | |||
| 543 | @item | ||
| 544 | @cindex previous complete subexpression | ||
| 545 | The character position of the start of the last complete subexpression | ||
| 546 | terminated; @code{nil} if none. | ||
| 547 | |||
| 548 | @item | ||
| 549 | @cindex inside string | ||
| 550 | Non-@code{nil} if inside a string. More precisely, this is the | ||
| 551 | character that will terminate the string. | ||
| 552 | |||
| 553 | @item | ||
| 554 | @cindex inside comment | ||
| 555 | @code{t} if inside a comment. | ||
| 556 | |||
| 557 | @item | ||
| 558 | @cindex quote character | ||
| 559 | @code{t} if point is just after a quote character. | ||
| 560 | |||
| 561 | @item | ||
| 562 | The minimum parenthesis depth encountered during this scan. | ||
| 563 | |||
| 564 | @item | ||
| 565 | @code{t} if inside a comment of style ``b''. | ||
| 566 | @end enumerate | ||
| 567 | |||
| 568 | Elements 0, 3, 4, 5 and 7 are significant in the argument @var{state}. | ||
| 569 | |||
| 570 | @cindex indenting with parentheses | ||
| 571 | This function is most often used to compute indentation for languages | ||
| 572 | that have nested parentheses. | ||
| 573 | @end defun | ||
| 574 | |||
| 575 | @defun scan-lists from count depth | ||
| 576 | This function scans forward @var{count} balanced parenthetical groupings | ||
| 577 | from character number @var{from}. It returns the character position | ||
| 578 | where the scan stops. | ||
| 579 | |||
| 580 | If @var{depth} is nonzero, parenthesis depth counting begins from that | ||
| 581 | value. The only candidates for stopping are places where the depth in | ||
| 582 | parentheses becomes zero; @code{scan-lists} counts @var{count} such | ||
| 583 | places and then stops. Thus, a positive value for @var{depth} means go | ||
| 584 | out levels of parenthesis. | ||
| 585 | |||
| 586 | Scanning ignores comments if @code{parse-sexp-ignore-comments} is | ||
| 587 | non-@code{nil}. | ||
| 588 | |||
| 589 | If scan reaches the beginning or end of the buffer (or its accessible | ||
| 590 | portion), and the depth is not zero, an error is signaled. If the depth | ||
| 591 | is zero but the count is not used up, @code{nil} is returned. | ||
| 592 | @end defun | ||
| 593 | |||
| 594 | @defun scan-sexps from count | ||
| 595 | This function scans forward @var{count} sexps from character position | ||
| 596 | @var{from}. It returns the character position where the scan stops. | ||
| 597 | |||
| 598 | Scanning ignores comments if @code{parse-sexp-ignore-comments} is | ||
| 599 | non-@code{nil}. | ||
| 600 | |||
| 601 | If scan reaches the beginning or end of (the accessible part of) the | ||
| 602 | buffer in the middle of a parenthetical grouping, an error is signaled. | ||
| 603 | If it reaches the beginning or end between groupings but before count is | ||
| 604 | used up, @code{nil} is returned. | ||
| 605 | @end defun | ||
| 606 | |||
| 607 | @defvar parse-sexp-ignore-comments | ||
| 608 | @cindex skipping comments | ||
| 609 | If the value is non-@code{nil}, then comments are treated as | ||
| 610 | whitespace by the functions in this section and by @code{forward-sexp}. | ||
| 611 | |||
| 612 | In older Emacs versions, this feature worked only when the comment | ||
| 613 | terminator is something like @samp{*/}, and appears only to end a | ||
| 614 | comment. In languages where newlines terminate comments, it was | ||
| 615 | necessary make this variable @code{nil}, since not every newline is the | ||
| 616 | end of a comment. This limitation no longer exists. | ||
| 617 | @end defvar | ||
| 618 | |||
| 619 | You can use @code{forward-comment} to move forward or backward over | ||
| 620 | one comment or several comments. | ||
| 621 | |||
| 622 | @defun forward-comment count | ||
| 623 | This function moves point forward across @var{count} comments (backward, | ||
| 624 | if @var{count} is negative). If it finds anything other than a comment | ||
| 625 | or whitespace, it stops, leaving point at the place where it stopped. | ||
| 626 | It also stops after satisfying @var{count}. | ||
| 627 | @end defun | ||
| 628 | |||
| 629 | To move forward over all comments and whitespace following point, use | ||
| 630 | @code{(forward-comment (buffer-size))}. @code{(buffer-size)} is a good | ||
| 631 | argument to use, because the number of comments to in the buffer cannot | ||
| 632 | exceed that many. | ||
| 633 | |||
| 634 | @node Standard Syntax Tables | ||
| 635 | @section Some Standard Syntax Tables | ||
| 636 | |||
| 637 | Each of the major modes in Emacs has its own syntax table. Here are | ||
| 638 | several of them: | ||
| 639 | |||
| 640 | @defun standard-syntax-table | ||
| 641 | This function returns the standard syntax table, which is the syntax | ||
| 642 | table used in Fundamental mode. | ||
| 643 | @end defun | ||
| 644 | |||
| 645 | @defvar text-mode-syntax-table | ||
| 646 | The value of this variable is the syntax table used in Text mode. | ||
| 647 | @end defvar | ||
| 648 | |||
| 649 | @defvar c-mode-syntax-table | ||
| 650 | The value of this variable is the syntax table for C-mode buffers. | ||
| 651 | @end defvar | ||
| 652 | |||
| 653 | @defvar emacs-lisp-mode-syntax-table | ||
| 654 | The value of this variable is the syntax table used in Emacs Lisp mode | ||
| 655 | by editing commands. (It has no effect on the Lisp @code{read} | ||
| 656 | function.) | ||
| 657 | @end defvar | ||
| 658 | |||
| 659 | @node Syntax Table Internals | ||
| 660 | @section Syntax Table Internals | ||
| 661 | @cindex syntax table internals | ||
| 662 | |||
| 663 | Each element of a syntax table is an integer that encodes the syntax | ||
| 664 | of one character: the syntax class, possible matching character, and | ||
| 665 | flags. Lisp programs don't usually work with the elements directly; the | ||
| 666 | Lisp-level syntax table functions usually work with syntax descriptors | ||
| 667 | (@pxref{Syntax Descriptors}). | ||
| 668 | |||
| 669 | The low 8 bits of each element of a syntax table indicate the | ||
| 670 | syntax class. | ||
| 671 | |||
| 672 | @table @asis | ||
| 673 | @item @i{Integer} | ||
| 674 | @i{Class} | ||
| 675 | @item 0 | ||
| 676 | whitespace | ||
| 677 | @item 1 | ||
| 678 | punctuation | ||
| 679 | @item 2 | ||
| 680 | word | ||
| 681 | @item 3 | ||
| 682 | symbol | ||
| 683 | @item 4 | ||
| 684 | open parenthesis | ||
| 685 | @item 5 | ||
| 686 | close parenthesis | ||
| 687 | @item 6 | ||
| 688 | expression prefix | ||
| 689 | @item 7 | ||
| 690 | string quote | ||
| 691 | @item 8 | ||
| 692 | paired delimiter | ||
| 693 | @item 9 | ||
| 694 | escape | ||
| 695 | @item 10 | ||
| 696 | character quote | ||
| 697 | @item 11 | ||
| 698 | comment-start | ||
| 699 | @item 12 | ||
| 700 | comment-end | ||
| 701 | @item 13 | ||
| 702 | inherit | ||
| 703 | @end table | ||
| 704 | |||
| 705 | The next 8 bits are the matching opposite parenthesis (if the | ||
| 706 | character has parenthesis syntax); otherwise, they are not meaningful. | ||
| 707 | The next 6 bits are the flags. | ||
diff --git a/lispref/tips.texi b/lispref/tips.texi new file mode 100644 index 00000000000..e917e75f463 --- /dev/null +++ b/lispref/tips.texi | |||
| @@ -0,0 +1,582 @@ | |||
| 1 | @c -*-texinfo-*- | ||
| 2 | @c This is part of the GNU Emacs Lisp Reference Manual. | ||
| 3 | @c Copyright (C) 1990, 1991, 1992, 1993 Free Software Foundation, Inc. | ||
| 4 | @c See the file elisp.texi for copying conditions. | ||
| 5 | @setfilename ../info/tips | ||
| 6 | @node Tips, GNU Emacs Internals, Calendar, Top | ||
| 7 | @appendix Tips and Standards | ||
| 8 | @cindex tips | ||
| 9 | @cindex standards of coding style | ||
| 10 | @cindex coding standards | ||
| 11 | |||
| 12 | This chapter describes no additional features of Emacs Lisp. | ||
| 13 | Instead it gives advice on making effective use of the features described | ||
| 14 | in the previous chapters. | ||
| 15 | |||
| 16 | @menu | ||
| 17 | * Style Tips:: Writing clean and robust programs. | ||
| 18 | * Compilation Tips:: Making compiled code run fast. | ||
| 19 | * Documentation Tips:: Writing readable documentation strings. | ||
| 20 | * Comment Tips:: Conventions for writing comments. | ||
| 21 | * Library Headers:: Standard headers for library packages. | ||
| 22 | @end menu | ||
| 23 | |||
| 24 | @node Style Tips | ||
| 25 | @section Writing Clean Lisp Programs | ||
| 26 | |||
| 27 | Here are some tips for avoiding common errors in writing Lisp code | ||
| 28 | intended for widespread use: | ||
| 29 | |||
| 30 | @itemize @bullet | ||
| 31 | @item | ||
| 32 | Since all global variables share the same name space, and all functions | ||
| 33 | share another name space, you should choose a short word to distinguish | ||
| 34 | your program from other Lisp programs. Then take care to begin the | ||
| 35 | names of all global variables, constants, and functions with the chosen | ||
| 36 | prefix. This helps avoid name conflicts. | ||
| 37 | |||
| 38 | This recommendation applies even to names for traditional Lisp | ||
| 39 | primitives that are not primitives in Emacs Lisp---even to @code{cadr}. | ||
| 40 | Believe it or not, there is more than one plausible way to define | ||
| 41 | @code{cadr}. Play it safe; append your name prefix to produce a name | ||
| 42 | like @code{foo-cadr} or @code{mylib-cadr} instead. | ||
| 43 | |||
| 44 | If you write a function that you think ought to be added to Emacs under | ||
| 45 | a certain name, such as @code{twiddle-files}, don't call it by that name | ||
| 46 | in your program. Call it @code{mylib-twiddle-files} in your program, | ||
| 47 | and send mail to @samp{bug-gnu-emacs@@prep.ai.mit.edu} suggesting we add | ||
| 48 | it to Emacs. If and when we do, we can change the name easily enough. | ||
| 49 | |||
| 50 | If one prefix is insufficient, your package may use two or three | ||
| 51 | alternative common prefixes, so long as they make sense. | ||
| 52 | |||
| 53 | Separate the prefix from the rest of the symbol name with a hyphen, | ||
| 54 | @samp{-}. This will be consistent with Emacs itself and with most Emacs | ||
| 55 | Lisp programs. | ||
| 56 | |||
| 57 | @item | ||
| 58 | It is often useful to put a call to @code{provide} in each separate | ||
| 59 | library program, at least if there is more than one entry point to the | ||
| 60 | program. | ||
| 61 | |||
| 62 | @item | ||
| 63 | If one file @var{foo} uses a macro defined in another file @var{bar}, | ||
| 64 | @var{foo} should contain @code{(require '@var{bar})} before the first | ||
| 65 | use of the macro. (And @var{bar} should contain @code{(provide | ||
| 66 | '@var{bar})}, to make the @code{require} work.) This will cause | ||
| 67 | @var{bar} to be loaded when you byte-compile @var{foo}. Otherwise, you | ||
| 68 | risk compiling @var{foo} without the necessary macro loaded, and that | ||
| 69 | would produce compiled code that won't work right. @xref{Compiling | ||
| 70 | Macros}. | ||
| 71 | |||
| 72 | @item | ||
| 73 | If you define a major mode, make sure to run a hook variable using | ||
| 74 | @code{run-hooks}, just as the existing major modes do. @xref{Hooks}. | ||
| 75 | |||
| 76 | @item | ||
| 77 | Please do not define @kbd{C-c @var{letter}} as a key in your major | ||
| 78 | modes. These sequences are reserved for users; they are the | ||
| 79 | @strong{only} sequences reserved for users, so we cannot do without | ||
| 80 | them. | ||
| 81 | |||
| 82 | Instead, define sequences consisting of @kbd{C-c} followed by a | ||
| 83 | non-letter. These sequences are reserved for major modes. | ||
| 84 | |||
| 85 | Changing all the major modes in Emacs 18 so they would follow this | ||
| 86 | convention was a lot of work. Abandoning this convention would waste | ||
| 87 | that work and inconvenience the users. | ||
| 88 | |||
| 89 | @item | ||
| 90 | You should not bind @kbd{C-h} following any prefix character (including | ||
| 91 | @kbd{C-c}). If you don't bind @kbd{C-h}, it is automatically available | ||
| 92 | as a help character for listing the subcommands of the prefix character. | ||
| 93 | |||
| 94 | @item | ||
| 95 | You should not bind a key sequence ending in @key{ESC} except following | ||
| 96 | another @key{ESC}. (That is, it is ok to bind a sequence ending in | ||
| 97 | @kbd{@key{ESC} @key{ESC}}.) | ||
| 98 | |||
| 99 | The reason for this rule is that a non-prefix binding for @key{ESC} in | ||
| 100 | any context prevents recognition of escape sequences as function keys in | ||
| 101 | that context. | ||
| 102 | |||
| 103 | @item | ||
| 104 | It is a bad idea to define aliases for the Emacs primitives. | ||
| 105 | Use the standard names instead. | ||
| 106 | |||
| 107 | @item | ||
| 108 | Redefining an Emacs primitive is an even worse idea. | ||
| 109 | It may do the right thing for a particular program, but | ||
| 110 | there is no telling what other programs might break as a result. | ||
| 111 | |||
| 112 | @item | ||
| 113 | If a file does replace any of the functions or library programs of | ||
| 114 | standard Emacs, prominent comments at the beginning of the file should | ||
| 115 | say which functions are replaced, and how the behavior of the | ||
| 116 | replacements differs from that of the originals. | ||
| 117 | |||
| 118 | @item | ||
| 119 | If a file requires certain standard library programs to be loaded | ||
| 120 | beforehand, then the comments at the beginning of the file should say | ||
| 121 | so. | ||
| 122 | |||
| 123 | @item | ||
| 124 | Please keep the names of your Emacs Lisp source files to 13 characters | ||
| 125 | or less. This way, if the files are compiled, the compiled files' names | ||
| 126 | will be 14 characters or less, which is short enough to fit on all kinds | ||
| 127 | of Unix systems. | ||
| 128 | |||
| 129 | @item | ||
| 130 | Don't use @code{next-line} or @code{previous-line} in programs; nearly | ||
| 131 | always, @code{forward-line} is more convenient as well as more | ||
| 132 | predictable and robust. @xref{Text Lines}. | ||
| 133 | |||
| 134 | @item | ||
| 135 | Don't use functions that set the mark in your Lisp code (unless you are | ||
| 136 | writing a command to set the mark). The mark is a user-level feature, | ||
| 137 | so it is incorrect to change the mark except to supply a value for the | ||
| 138 | user's benefit. @xref{The Mark}. | ||
| 139 | |||
| 140 | In particular, don't use these functions: | ||
| 141 | |||
| 142 | @itemize @bullet | ||
| 143 | @item | ||
| 144 | @code{beginning-of-buffer}, @code{end-of-buffer} | ||
| 145 | @item | ||
| 146 | @code{replace-string}, @code{replace-regexp} | ||
| 147 | @end itemize | ||
| 148 | |||
| 149 | If you just want to move point, or replace a certain string, without any | ||
| 150 | of the other features intended for interactive users, you can replace | ||
| 151 | these functions with one or two lines of simple Lisp code. | ||
| 152 | |||
| 153 | @item | ||
| 154 | The recommended way to print a message in the echo area is with | ||
| 155 | the @code{message} function, not @code{princ}. @xref{The Echo Area}. | ||
| 156 | |||
| 157 | @item | ||
| 158 | When you encounter an error condition, call the function @code{error} | ||
| 159 | (or @code{signal}). The function @code{error} does not return. | ||
| 160 | @xref{Signaling Errors}. | ||
| 161 | |||
| 162 | Do not use @code{message}, @code{throw}, @code{sleep-for}, | ||
| 163 | or @code{beep} to report errors. | ||
| 164 | |||
| 165 | @item | ||
| 166 | Avoid using recursive edits. Instead, do what the Rmail @kbd{w} command | ||
| 167 | does: use a new local keymap that contains one command defined to | ||
| 168 | switch back to the old local keymap. Or do what the @code{edit-options} | ||
| 169 | command does: switch to another buffer and let the user switch back at | ||
| 170 | will. @xref{Recursive Editing}. | ||
| 171 | |||
| 172 | @item | ||
| 173 | In some other systems there is a convention of choosing variable names | ||
| 174 | that begin and end with @samp{*}. We don't use that convention in Emacs | ||
| 175 | Lisp, so please don't use it in your library. (In fact, in Emacs names | ||
| 176 | of this form are conventionally used for program-generated buffers.) The | ||
| 177 | users will find Emacs more coherent if all libraries use the same | ||
| 178 | conventions. | ||
| 179 | |||
| 180 | @item | ||
| 181 | Indent each function with @kbd{C-M-q} (@code{indent-sexp}) using the | ||
| 182 | default indentation parameters. | ||
| 183 | |||
| 184 | @item | ||
| 185 | Don't make a habit of putting close-parentheses on lines by themselves; | ||
| 186 | Lisp programmers find this disconcerting. Once in a while, when there | ||
| 187 | is a sequence of many consecutive close-parentheses, it may make sense | ||
| 188 | to split them in one or two significant places. | ||
| 189 | |||
| 190 | @item | ||
| 191 | Please put a copyright notice on the file if you give copies to anyone. | ||
| 192 | Use the same lines that appear at the top of the Lisp files in Emacs | ||
| 193 | itself. If you have not signed papers to assign the copyright to the | ||
| 194 | Foundation, then place your name in the copyright notice in place of the | ||
| 195 | Foundation's name. | ||
| 196 | @end itemize | ||
| 197 | |||
| 198 | @node Compilation Tips | ||
| 199 | @section Tips for Making Compiled Code Fast | ||
| 200 | @cindex execution speed | ||
| 201 | @cindex speedups | ||
| 202 | |||
| 203 | Here are ways of improving the execution speed of byte-compiled | ||
| 204 | lisp programs. | ||
| 205 | |||
| 206 | @itemize @bullet | ||
| 207 | @item | ||
| 208 | @cindex profiling | ||
| 209 | @cindex timing programs | ||
| 210 | @cindex @file{profile.el} | ||
| 211 | Use the @file{profile} library to profile your program. See the file | ||
| 212 | @file{profile.el} for instructions. | ||
| 213 | |||
| 214 | @item | ||
| 215 | Use iteration rather than recursion whenever possible. | ||
| 216 | Function calls are slow in Emacs Lisp even when a compiled function | ||
| 217 | is calling another compiled function. | ||
| 218 | |||
| 219 | @item | ||
| 220 | Using the primitive list-searching functions @code{memq}, @code{assq} or | ||
| 221 | @code{assoc} is even faster than explicit iteration. It may be worth | ||
| 222 | rearranging a data structure so that one of these primitive search | ||
| 223 | functions can be used. | ||
| 224 | |||
| 225 | @item | ||
| 226 | Certain built-in functions are handled specially by the byte compiler | ||
| 227 | avoiding the need for an ordinary function call. It is a good idea to | ||
| 228 | use these functions rather than alternatives. To see whether a function | ||
| 229 | is handled specially by the compiler, examine its @code{byte-compile} | ||
| 230 | property. If the property is non-@code{nil}, then the function is | ||
| 231 | handled specially. | ||
| 232 | |||
| 233 | For example, the following input will show you that @code{aref} is | ||
| 234 | compiled specially (@pxref{Array Functions}) while @code{elt} is not | ||
| 235 | (@pxref{Sequence Functions}): | ||
| 236 | |||
| 237 | @smallexample | ||
| 238 | @group | ||
| 239 | (get 'aref 'byte-compile) | ||
| 240 | @result{} byte-compile-two-args | ||
| 241 | @end group | ||
| 242 | |||
| 243 | @group | ||
| 244 | (get 'elt 'byte-compile) | ||
| 245 | @result{} nil | ||
| 246 | @end group | ||
| 247 | @end smallexample | ||
| 248 | |||
| 249 | @item | ||
| 250 | If calling a small function accounts for a substantial part of your | ||
| 251 | program's running time, make the function inline. This eliminates | ||
| 252 | the function call overhead. Since making a function inline reduces | ||
| 253 | the flexibility of changing the program, don't do it unless it gives | ||
| 254 | a noticeable speedup in something slow enough for users to care about | ||
| 255 | the speed. @xref{Inline Functions}. | ||
| 256 | @end itemize | ||
| 257 | |||
| 258 | @node Documentation Tips | ||
| 259 | @section Tips for Documentation Strings | ||
| 260 | |||
| 261 | Here are some tips for the writing of documentation strings. | ||
| 262 | |||
| 263 | @itemize @bullet | ||
| 264 | @item | ||
| 265 | Every command, function or variable intended for users to know about | ||
| 266 | should have a documentation string. | ||
| 267 | |||
| 268 | @item | ||
| 269 | An internal subroutine of a Lisp program need not have a documentation | ||
| 270 | string, and you can save space by using a comment instead. | ||
| 271 | |||
| 272 | @item | ||
| 273 | The first line of the documentation string should consist of one or two | ||
| 274 | complete sentences which stand on their own as a summary. In particular, | ||
| 275 | start the line with a capital letter and end with a period. | ||
| 276 | For instance, use ``Return the cons of A and B.'' in preference to | ||
| 277 | ``Returns the cons of A and B@.'' | ||
| 278 | |||
| 279 | The documentation string can have additional lines which expand on the | ||
| 280 | details of how to use the function or variable. The additional lines | ||
| 281 | should be made up of complete sentences also, but they may be filled if | ||
| 282 | that looks good. | ||
| 283 | |||
| 284 | @item | ||
| 285 | Write documentation strings in the active voice, not the passive, and in | ||
| 286 | the present tense, not the future. For instance, use ``Return a list | ||
| 287 | containing A and B.'' instead of ``A list containing A and B will be | ||
| 288 | returned.'' | ||
| 289 | |||
| 290 | @item | ||
| 291 | Avoid using the word ``cause'' (or its equivalents) unnecessarily. | ||
| 292 | Instead of, ``Cause Emacs to display text in boldface,'' write just | ||
| 293 | ``Display text in boldface.'' | ||
| 294 | |||
| 295 | @item | ||
| 296 | Do not start or end a documentation string with whitespace. | ||
| 297 | |||
| 298 | @item | ||
| 299 | Format the documentation string so that it fits in an Emacs window on an | ||
| 300 | 80 column screen. It is a good idea for most lines to be no wider than | ||
| 301 | 60 characters. The first line can be wider if necessary to fit the | ||
| 302 | information that ought to be there. | ||
| 303 | |||
| 304 | However, rather than simply filling the entire documentation string, you | ||
| 305 | can make it much more readable by choosing line breaks with care. | ||
| 306 | Use blank lines between topics if the documentation string is long. | ||
| 307 | |||
| 308 | @item | ||
| 309 | @strong{Do not} indent subsequent lines of a documentation string so | ||
| 310 | that the text is lined up in the source code with the text of the first | ||
| 311 | line. This looks nice in the source code, but looks bizarre when users | ||
| 312 | view the documentation. Remember that the indentation before the | ||
| 313 | starting double-quote is not part of the string! | ||
| 314 | |||
| 315 | @item | ||
| 316 | A variable's documentation string should start with @samp{*} if the | ||
| 317 | variable is one that users would want to set interactively often. If | ||
| 318 | the value is a long list, or a function, or if the variable would only | ||
| 319 | be set in init files, then don't start the documentation string with | ||
| 320 | @samp{*}. @xref{Defining Variables}. | ||
| 321 | |||
| 322 | @item | ||
| 323 | The documentation string for a variable that is a yes-or-no flag should | ||
| 324 | start with words such as ``Non-nil means@dots{}'', to make it clear both | ||
| 325 | that the variable only has two meaningfully distinct values and which value | ||
| 326 | means ``yes''. | ||
| 327 | |||
| 328 | @item | ||
| 329 | When a function's documentation string mentions the value of an argument | ||
| 330 | of the function, use the argument name in capital letters as if it were | ||
| 331 | a name for that value. Thus, the documentation string of the function | ||
| 332 | @code{/} refers to its second argument as @samp{DIVISOR}. | ||
| 333 | |||
| 334 | Also use all caps for meta-syntactic variables, such as when you show | ||
| 335 | the decomposition of a list or vector into subunits, some of which may | ||
| 336 | vary. | ||
| 337 | |||
| 338 | @item | ||
| 339 | @iftex | ||
| 340 | When a documentation string refers to a Lisp symbol, write it as it | ||
| 341 | would be printed (which usually means in lower case), with single-quotes | ||
| 342 | around it. For example: @samp{`lambda'}. There are two exceptions: | ||
| 343 | write @code{t} and @code{nil} without single-quotes. | ||
| 344 | @end iftex | ||
| 345 | @ifinfo | ||
| 346 | When a documentation string refers to a Lisp symbol, write it as it | ||
| 347 | would be printed (which usually means in lower case), with single-quotes | ||
| 348 | around it. For example: @samp{lambda}. There are two exceptions: write | ||
| 349 | t and nil without single-quotes. (In this manual, we normally do use | ||
| 350 | single-quotes for those symbols.) | ||
| 351 | @end ifinfo | ||
| 352 | |||
| 353 | @item | ||
| 354 | Don't write key sequences directly in documentation strings. Instead, | ||
| 355 | use the @samp{\\[@dots{}]} construct to stand for them. For example, | ||
| 356 | instead of writing @samp{C-f}, write @samp{\\[forward-char]}. When the | ||
| 357 | documentation string is printed, Emacs will substitute whatever key is | ||
| 358 | currently bound to @code{forward-char}. This will usually be | ||
| 359 | @samp{C-f}, but if the user has moved key bindings, it will be the | ||
| 360 | correct key for that user. @xref{Keys in Documentation}. | ||
| 361 | |||
| 362 | @item | ||
| 363 | In documentation strings for a major mode, you will want to refer to the | ||
| 364 | key bindings of that mode's local map, rather than global ones. | ||
| 365 | Therefore, use the construct @samp{\\<@dots{}>} once in the | ||
| 366 | documentation string to specify which key map to use. Do this before | ||
| 367 | the first use of @samp{\\[@dots{}]}. The text inside the | ||
| 368 | @samp{\\<@dots{}>} should be the name of the variable containing the | ||
| 369 | local keymap for the major mode. | ||
| 370 | |||
| 371 | It is not practical to use @samp{\\[@dots{}]} very many times, because | ||
| 372 | display of the documentation string will become slow. So use this to | ||
| 373 | describe the most important commands in your major mode, and then use | ||
| 374 | @samp{\\@{@dots{}@}} to display the rest of the mode's keymap. | ||
| 375 | |||
| 376 | @item | ||
| 377 | Don't use the term ``Elisp'', since that is or was a trademark. | ||
| 378 | Use the term ``Emacs Lisp''. | ||
| 379 | @end itemize | ||
| 380 | |||
| 381 | @node Comment Tips | ||
| 382 | @section Tips on Writing Comments | ||
| 383 | |||
| 384 | We recommend these conventions for where to put comments and how to | ||
| 385 | indent them: | ||
| 386 | |||
| 387 | @table @samp | ||
| 388 | @item ; | ||
| 389 | Comments that start with a single semicolon, @samp{;}, should all be | ||
| 390 | aligned to the same column on the right of the source code. Such | ||
| 391 | comments usually explain how the code on the same line does its job. In | ||
| 392 | Lisp mode and related modes, the @kbd{M-;} (@code{indent-for-comment}) | ||
| 393 | command automatically inserts such a @samp{;} in the right place, or | ||
| 394 | aligns such a comment if it is already inserted. | ||
| 395 | |||
| 396 | (The following examples are taken from the Emacs sources.) | ||
| 397 | |||
| 398 | @smallexample | ||
| 399 | @group | ||
| 400 | (setq base-version-list ; there was a base | ||
| 401 | (assoc (substring fn 0 start-vn) ; version to which | ||
| 402 | file-version-assoc-list)) ; this looks like | ||
| 403 | ; a subversion | ||
| 404 | @end group | ||
| 405 | @end smallexample | ||
| 406 | |||
| 407 | @item ;; | ||
| 408 | Comments that start with two semicolons, @samp{;;}, should be aligned to | ||
| 409 | the same level of indentation as the code. Such comments are used to | ||
| 410 | describe the purpose of the following lines or the state of the program | ||
| 411 | at that point. For example: | ||
| 412 | |||
| 413 | @smallexample | ||
| 414 | @group | ||
| 415 | (prog1 (setq auto-fill-function | ||
| 416 | @dots{} | ||
| 417 | @dots{} | ||
| 418 | ;; update mode-line | ||
| 419 | (force-mode-line-update))) | ||
| 420 | @end group | ||
| 421 | @end smallexample | ||
| 422 | |||
| 423 | These comments are also written before a function definition to explain | ||
| 424 | what the function does and how to call it properly. | ||
| 425 | |||
| 426 | @item ;;; | ||
| 427 | Comments that start with three semicolons, @samp{;;;}, should start at | ||
| 428 | the left margin. Such comments are not used within function | ||
| 429 | definitions, but are used to make more general comments. For example: | ||
| 430 | |||
| 431 | @smallexample | ||
| 432 | @group | ||
| 433 | ;;; This Lisp code is run in Emacs | ||
| 434 | ;;; when it is to operate as a server | ||
| 435 | ;;; for other processes. | ||
| 436 | @end group | ||
| 437 | @end smallexample | ||
| 438 | |||
| 439 | @item ;;;; | ||
| 440 | Comments that start with four semicolons, @samp{;;;;}, should be aligned | ||
| 441 | to the left margin and are used for headings of major sections of a | ||
| 442 | program. For example: | ||
| 443 | |||
| 444 | @smallexample | ||
| 445 | ;;;; The kill ring | ||
| 446 | @end smallexample | ||
| 447 | @end table | ||
| 448 | |||
| 449 | @noindent | ||
| 450 | The indentation commands of the Lisp modes in Emacs, such as @kbd{M-;} | ||
| 451 | (@code{indent-for-comment}) and @key{TAB} (@code{lisp-indent-line}) | ||
| 452 | automatically indent comments according to these conventions, | ||
| 453 | depending on the the number of semicolons. @xref{Comments,, | ||
| 454 | Manipulating Comments, emacs, The GNU Emacs Manual}. | ||
| 455 | |||
| 456 | If you wish to ``comment out'' a number of lines of code, use triple | ||
| 457 | semicolons at the beginnings of the lines. | ||
| 458 | |||
| 459 | Any character may be included in a comment, but it is advisable to | ||
| 460 | precede a character with syntactic significance in Lisp (such as | ||
| 461 | @samp{\} or unpaired @samp{(} or @samp{)}) with a @samp{\}, to prevent | ||
| 462 | it from confusing the Emacs commands for editing Lisp. | ||
| 463 | |||
| 464 | @node Library Headers | ||
| 465 | @section Conventional Headers for Emacs Libraries | ||
| 466 | @cindex header comments | ||
| 467 | @cindex library header comments | ||
| 468 | |||
| 469 | Emacs 19 has conventions for using special comments in Lisp libraries | ||
| 470 | to divide them into sections and give information such as who wrote | ||
| 471 | them. This section explains these conventions. First, an example: | ||
| 472 | |||
| 473 | @smallexample | ||
| 474 | @group | ||
| 475 | ;;; lisp-mnt.el --- minor mode for Emacs Lisp maintainers | ||
| 476 | |||
| 477 | ;; Copyright (C) 1992 Free Software Foundation, Inc. | ||
| 478 | @end group | ||
| 479 | |||
| 480 | ;; Author: Eric S. Raymond <esr@@snark.thyrsus.com> | ||
| 481 | ;; Maintainer: Eric S. Raymond <esr@@snark.thyrsus.com> | ||
| 482 | ;; Created: 14 Jul 1992 | ||
| 483 | ;; Version: 1.2 | ||
| 484 | @group | ||
| 485 | ;; Keywords: docs | ||
| 486 | |||
| 487 | ;; This file is part of GNU Emacs. | ||
| 488 | @var{copying conditions}@dots{} | ||
| 489 | @end group | ||
| 490 | @end smallexample | ||
| 491 | |||
| 492 | The very first line should have this format: | ||
| 493 | |||
| 494 | @example | ||
| 495 | ;;; @var{filename} --- @var{description} | ||
| 496 | @end example | ||
| 497 | |||
| 498 | @noindent | ||
| 499 | The description should be complete in one line. | ||
| 500 | |||
| 501 | After the copyright notice come several @dfn{header comment} lines, | ||
| 502 | each beginning with @samp{;;; @var{header-name}:}. Here is a table of | ||
| 503 | the conventional possibilities for @var{header-name}: | ||
| 504 | |||
| 505 | @table @samp | ||
| 506 | @item Author | ||
| 507 | This line states the name and net address of at least the principal | ||
| 508 | author of the library. | ||
| 509 | |||
| 510 | If there are multiple authors, you can list them on continuation lines | ||
| 511 | led by @code{;;<TAB>}, like this: | ||
| 512 | |||
| 513 | @smallexample | ||
| 514 | @group | ||
| 515 | ;; Author: Ashwin Ram <Ram-Ashwin@@cs.yale.edu> | ||
| 516 | ;; Dave Sill <de5@@ornl.gov> | ||
| 517 | ;; Dave Brennan <brennan@@hal.com> | ||
| 518 | ;; Eric Raymond <esr@@snark.thyrsus.com> | ||
| 519 | @end group | ||
| 520 | @end smallexample | ||
| 521 | |||
| 522 | @item Maintainer | ||
| 523 | This line should contain a single name/address as in the Author line, or | ||
| 524 | an address only, or the string ``FSF''. If there is no maintainer line, | ||
| 525 | the person(s) in the Author field are presumed to be the maintainers. | ||
| 526 | The example above is mildly bogus because the maintainer line is | ||
| 527 | redundant. | ||
| 528 | |||
| 529 | The idea behind the @samp{Author} and @samp{Maintainer} lines is to make | ||
| 530 | possible a Lisp function to ``send mail to the maintainer'' without | ||
| 531 | having to mine the name out by hand. | ||
| 532 | |||
| 533 | Be sure to surround the network address with @samp{<@dots{}>} if | ||
| 534 | you include the person's full name as well as the network address. | ||
| 535 | |||
| 536 | @item Created | ||
| 537 | This optional line gives the original creation date of the | ||
| 538 | file. For historical interest only. | ||
| 539 | |||
| 540 | @item Version | ||
| 541 | If you wish to record version numbers for the individual Lisp program, put | ||
| 542 | them in this line. | ||
| 543 | |||
| 544 | @item Adapted-By | ||
| 545 | In this header line, place the name of the person who adapted the | ||
| 546 | library for installation (to make it fit the style conventions, for | ||
| 547 | example). | ||
| 548 | |||
| 549 | @item Keywords | ||
| 550 | This line lists keywords for the @code{finder-by-keyword} help command. | ||
| 551 | This field is important; it's how people will find your package when | ||
| 552 | they're looking for things by topic area. | ||
| 553 | @end table | ||
| 554 | |||
| 555 | Just about every Lisp library ought to have the @samp{Author} and | ||
| 556 | @samp{Keywords} header comment lines. Use the others if they are | ||
| 557 | appropriate. You can also put in header lines with other header | ||
| 558 | names---they have no standard meanings, so they can't do any harm. | ||
| 559 | |||
| 560 | We use additional stylized comments to subdivide the contents of the | ||
| 561 | library file. Here is a table of them: | ||
| 562 | |||
| 563 | @table @samp | ||
| 564 | @item ;;; Commentary: | ||
| 565 | This begins introductory comments that explain how the library works. | ||
| 566 | It should come right after the copying permissions. | ||
| 567 | |||
| 568 | @item ;;; Change log: | ||
| 569 | This begins change log information stored in the library file (if you | ||
| 570 | store the change history there). For most of the Lisp | ||
| 571 | files distributed with Emacs, the change history is kept in the file | ||
| 572 | @file{ChangeLog} and not in the source file at all; these files do | ||
| 573 | not have a @samp{;;; Change log:} line. | ||
| 574 | |||
| 575 | @item ;;; Code: | ||
| 576 | This begins the actual code of the program. | ||
| 577 | |||
| 578 | @item ;;; @var{filename} ends here | ||
| 579 | This is the @dfn{footer line}; it appears at the very end of the file. | ||
| 580 | Its purpose is to enable people to detect truncated versions of the file | ||
| 581 | from the lack of a footer line. | ||
| 582 | @end table | ||