diff options
| author | Francesco Potortì | 2002-06-13 10:44:15 +0000 |
|---|---|---|
| committer | Francesco Potortì | 2002-06-13 10:44:15 +0000 |
| commit | 24dbe96a108cfb899cf1f54ea4b42aceb159776f (patch) | |
| tree | 120be290c5ba546bd568627c6e7275bb999c6ce4 /lib-src | |
| parent | dfd8bf47b3498b83fb7ff03ba7bf9c3860604213 (diff) | |
| download | emacs-24dbe96a108cfb899cf1f54ea4b42aceb159776f.tar.gz emacs-24dbe96a108cfb899cf1f54ea4b42aceb159776f.zip | |
New multi-line regexp and new regexp syntax.
(arg_type): at_icregexp label removed (obsolete).
(pattern): New member multi_line for multi-line regexps.
(filebuf): A global buffer containing the whole file as a string
for multi-line regexp matching.
(need_filebuf): Global flag raised if multi-line regexps used.
(print_help): Document new regexp modifiers, remove references to
obsolete option --ignore-case-regexp.
(main): Do not set regexp syntax and translation table here.
(main): Treat -c option as a backward compatibility hack.
(main, find_entries): Init and free filebuf.
(find_entries): Call regex_tag_multiline after the regular parser.
(scan_separators): Check for untermintaed regexp and return NULL.
(analyse_regex, add_regex): Remove the ignore_case argument, which
is now a modifier to the regexp. All callers changed.
(add_regex): Manage the regexp modifiers.
(regex_tag_multiline): New function. Reads from filebuf.
(readline_internal): If necessary, copy the whole file into filebuf.
(readline): Skip multi-line regexps, leave them to regex_tag_multiline.
(add_regex): Better check for null regexps.
(readline): Check for regex matching null string.
(find_entries): Reorganisation.
Diffstat (limited to 'lib-src')
| -rw-r--r-- | lib-src/etags.c | 406 |
1 files changed, 292 insertions, 114 deletions
diff --git a/lib-src/etags.c b/lib-src/etags.c index 6b4b379f138..be60b476972 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c | |||
| @@ -2,21 +2,21 @@ | |||
| 2 | Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002 | 2 | Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002 |
| 3 | Free Software Foundation, Inc. and Ken Arnold | 3 | Free Software Foundation, Inc. and Ken Arnold |
| 4 | 4 | ||
| 5 | This file is not considered part of GNU Emacs. | 5 | This file is not considered part of GNU Emacs. |
| 6 | 6 | ||
| 7 | This program is free software; you can redistribute it and/or modify | 7 | This program is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by | 8 | it under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation; either version 2 of the License, or | 9 | the Free Software Foundation; either version 2 of the License, or |
| 10 | (at your option) any later version. | 10 | (at your option) any later version. |
| 11 | 11 | ||
| 12 | This program is distributed in the hope that it will be useful, | 12 | This program is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | GNU General Public License for more details. | 15 | GNU General Public License for more details. |
| 16 | 16 | ||
| 17 | You should have received a copy of the GNU General Public License | 17 | You should have received a copy of the GNU General Public License |
| 18 | along with this program; if not, write to the Free Software Foundation, | 18 | along with this program; if not, write to the Free Software Foundation, |
| 19 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | 19 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
| 20 | 20 | ||
| 21 | /* | 21 | /* |
| 22 | * Authors: | 22 | * Authors: |
| @@ -34,7 +34,7 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |||
| 34 | * Francesco Potortì <pot@gnu.org> has maintained it since 1993. | 34 | * Francesco Potortì <pot@gnu.org> has maintained it since 1993. |
| 35 | */ | 35 | */ |
| 36 | 36 | ||
| 37 | char pot_etags_version[] = "@(#) pot revision number is 16.10"; | 37 | char pot_etags_version[] = "@(#) pot revision number is 16.19"; |
| 38 | 38 | ||
| 39 | #define TRUE 1 | 39 | #define TRUE 1 |
| 40 | #define FALSE 0 | 40 | #define FALSE 0 |
| @@ -288,7 +288,6 @@ typedef struct | |||
| 288 | enum { | 288 | enum { |
| 289 | at_language, /* a language specification */ | 289 | at_language, /* a language specification */ |
| 290 | at_regexp, /* a regular expression */ | 290 | at_regexp, /* a regular expression */ |
| 291 | at_icregexp, /* same, but with case ignored */ | ||
| 292 | at_filename, /* a file name */ | 291 | at_filename, /* a file name */ |
| 293 | at_stdin /* read from stdin here */ | 292 | at_stdin /* read from stdin here */ |
| 294 | } arg_type; /* argument type */ | 293 | } arg_type; /* argument type */ |
| @@ -308,6 +307,7 @@ typedef struct pattern | |||
| 308 | char *name_pattern; | 307 | char *name_pattern; |
| 309 | bool error_signaled; | 308 | bool error_signaled; |
| 310 | bool ignore_case; | 309 | bool ignore_case; |
| 310 | bool multi_line; | ||
| 311 | } pattern; | 311 | } pattern; |
| 312 | #endif /* ETAGS_REGEXPS */ | 312 | #endif /* ETAGS_REGEXPS */ |
| 313 | 313 | ||
| @@ -355,9 +355,9 @@ static bool nocase_tail __P((char *)); | |||
| 355 | static char *get_tag __P((char *)); | 355 | static char *get_tag __P((char *)); |
| 356 | 356 | ||
| 357 | #ifdef ETAGS_REGEXPS | 357 | #ifdef ETAGS_REGEXPS |
| 358 | static void analyse_regex __P((char *, bool)); | 358 | static void analyse_regex __P((char *)); |
| 359 | static void add_regex __P((char *, bool, language *)); | ||
| 360 | static void free_patterns __P((void)); | 359 | static void free_patterns __P((void)); |
| 360 | static void regex_tag_multiline __P((void)); | ||
| 361 | #endif /* ETAGS_REGEXPS */ | 361 | #endif /* ETAGS_REGEXPS */ |
| 362 | static void error __P((const char *, const char *)); | 362 | static void error __P((const char *, const char *)); |
| 363 | static void suggest_asking_for_help __P((void)); | 363 | static void suggest_asking_for_help __P((void)); |
| @@ -417,6 +417,7 @@ static node *nodehead; /* the head of the binary tree of tags */ | |||
| 417 | static node *last_node; /* the last node created */ | 417 | static node *last_node; /* the last node created */ |
| 418 | 418 | ||
| 419 | static linebuffer lb; /* the current line */ | 419 | static linebuffer lb; /* the current line */ |
| 420 | static linebuffer filebuf; /* a buffer containing the whole file */ | ||
| 420 | 421 | ||
| 421 | /* boolean "functions" (see init) */ | 422 | /* boolean "functions" (see init) */ |
| 422 | static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; | 423 | static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; |
| @@ -457,13 +458,10 @@ static bool packages_only; /* --packages-only: in Ada, only tag packages*/ | |||
| 457 | static bool parsing_stdin; /* --parse-stdin used */ | 458 | static bool parsing_stdin; /* --parse-stdin used */ |
| 458 | 459 | ||
| 459 | #ifdef ETAGS_REGEXPS | 460 | #ifdef ETAGS_REGEXPS |
| 460 | /* List of all regexps. */ | 461 | static pattern *p_head; /* list of all regexps */ |
| 461 | static pattern *p_head; | 462 | static bool need_filebuf; /* some regexes are multi-line */ |
| 462 | 463 | #else | |
| 463 | /* How many characters in the character set. (From regex.c.) */ | 464 | # define need_filebuf FALSE |
| 464 | #define CHAR_SET_SIZE 256 | ||
| 465 | /* Translation table for case-insensitive matching. */ | ||
| 466 | static char lc_trans[CHAR_SET_SIZE]; | ||
| 467 | #endif /* ETAGS_REGEXPS */ | 465 | #endif /* ETAGS_REGEXPS */ |
| 468 | 466 | ||
| 469 | #ifdef LONG_OPTIONS | 467 | #ifdef LONG_OPTIONS |
| @@ -680,10 +678,10 @@ Compressed files are supported using gzip and bzip2."); | |||
| 680 | } | 678 | } |
| 681 | 679 | ||
| 682 | #ifndef EMACS_NAME | 680 | #ifndef EMACS_NAME |
| 683 | # define EMACS_NAME "GNU Emacs" | 681 | # define EMACS_NAME "standalone" |
| 684 | #endif | 682 | #endif |
| 685 | #ifndef VERSION | 683 | #ifndef VERSION |
| 686 | # define VERSION "21" | 684 | # define VERSION "version" |
| 687 | #endif | 685 | #endif |
| 688 | static void | 686 | static void |
| 689 | print_version () | 687 | print_version () |
| @@ -775,9 +773,11 @@ Relative ones are stored relative to the output file's directory.\n"); | |||
| 775 | REGEXP is anchored (as if preceded by ^).\n\ | 773 | REGEXP is anchored (as if preceded by ^).\n\ |
| 776 | The form /REGEXP/NAME/ creates a named tag.\n\ | 774 | The form /REGEXP/NAME/ creates a named tag.\n\ |
| 777 | For example Tcl named tags can be created with:\n\ | 775 | For example Tcl named tags can be created with:\n\ |
| 778 | --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\""); | 776 | --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"."); |
| 779 | puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\ | 777 | puts ("In the form /REGEXP/MODS or /REGEXP/NAME/MODS, MODS are\n\ |
| 780 | Like -r, --regex but ignore case when matching expressions."); | 778 | one-letter modifiers: `i' means to ignore case, `m' means\n\ |
| 779 | allow multi-line matches, `s' implies `m' and additionally\n\ | ||
| 780 | causes dot to match the newline character."); | ||
| 781 | puts ("-R, --no-regex\n\ | 781 | puts ("-R, --no-regex\n\ |
| 782 | Don't create tags from regexps for the following files."); | 782 | Don't create tags from regexps for the following files."); |
| 783 | #endif /* ETAGS_REGEXPS */ | 783 | #endif /* ETAGS_REGEXPS */ |
| @@ -996,14 +996,6 @@ main (argc, argv) | |||
| 996 | is small. */ | 996 | is small. */ |
| 997 | argbuffer = xnew (argc, argument); | 997 | argbuffer = xnew (argc, argument); |
| 998 | 998 | ||
| 999 | #ifdef ETAGS_REGEXPS | ||
| 1000 | /* Set syntax for regular expression routines. */ | ||
| 1001 | re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS); | ||
| 1002 | /* Translation table for case-insensitive search. */ | ||
| 1003 | for (i = 0; i < CHAR_SET_SIZE; i++) | ||
| 1004 | lc_trans[i] = lowcase (i); | ||
| 1005 | #endif /* ETAGS_REGEXPS */ | ||
| 1006 | |||
| 1007 | /* | 999 | /* |
| 1008 | * If etags, always find typedefs and structure tags. Why not? | 1000 | * If etags, always find typedefs and structure tags. Why not? |
| 1009 | * Also default to find macro constants, enum constants and | 1001 | * Also default to find macro constants, enum constants and |
| @@ -1079,6 +1071,10 @@ main (argc, argv) | |||
| 1079 | } | 1071 | } |
| 1080 | } | 1072 | } |
| 1081 | break; | 1073 | break; |
| 1074 | case 'c': | ||
| 1075 | /* Backward compatibility: support obsolete --ignore-case-regexp. */ | ||
| 1076 | optarg = concat (optarg, "i", ""); /* memory leak here */ | ||
| 1077 | /* FALLTHRU */ | ||
| 1082 | case 'r': | 1078 | case 'r': |
| 1083 | argbuffer[current_arg].arg_type = at_regexp; | 1079 | argbuffer[current_arg].arg_type = at_regexp; |
| 1084 | argbuffer[current_arg].what = optarg; | 1080 | argbuffer[current_arg].what = optarg; |
| @@ -1089,11 +1085,6 @@ main (argc, argv) | |||
| 1089 | argbuffer[current_arg].what = NULL; | 1085 | argbuffer[current_arg].what = NULL; |
| 1090 | ++current_arg; | 1086 | ++current_arg; |
| 1091 | break; | 1087 | break; |
| 1092 | case 'c': | ||
| 1093 | argbuffer[current_arg].arg_type = at_icregexp; | ||
| 1094 | argbuffer[current_arg].what = optarg; | ||
| 1095 | ++current_arg; | ||
| 1096 | break; | ||
| 1097 | case 'V': | 1088 | case 'V': |
| 1098 | print_version (); | 1089 | print_version (); |
| 1099 | break; | 1090 | break; |
| @@ -1152,6 +1143,7 @@ main (argc, argv) | |||
| 1152 | 1143 | ||
| 1153 | initbuffer (&lb); | 1144 | initbuffer (&lb); |
| 1154 | initbuffer (&filename_lb); | 1145 | initbuffer (&filename_lb); |
| 1146 | initbuffer (&filebuf); | ||
| 1155 | 1147 | ||
| 1156 | if (!CTAGS) | 1148 | if (!CTAGS) |
| 1157 | { | 1149 | { |
| @@ -1186,10 +1178,7 @@ main (argc, argv) | |||
| 1186 | break; | 1178 | break; |
| 1187 | #ifdef ETAGS_REGEXPS | 1179 | #ifdef ETAGS_REGEXPS |
| 1188 | case at_regexp: | 1180 | case at_regexp: |
| 1189 | analyse_regex (argbuffer[i].what, FALSE); | 1181 | analyse_regex (argbuffer[i].what); |
| 1190 | break; | ||
| 1191 | case at_icregexp: | ||
| 1192 | analyse_regex (argbuffer[i].what, TRUE); | ||
| 1193 | break; | 1182 | break; |
| 1194 | #endif | 1183 | #endif |
| 1195 | case at_filename: | 1184 | case at_filename: |
| @@ -1234,6 +1223,7 @@ main (argc, argv) | |||
| 1234 | #ifdef ETAGS_REGEXPS | 1223 | #ifdef ETAGS_REGEXPS |
| 1235 | free_patterns (); | 1224 | free_patterns (); |
| 1236 | #endif /* ETAGS_REGEXPS */ | 1225 | #endif /* ETAGS_REGEXPS */ |
| 1226 | free (filebuf.buffer); | ||
| 1237 | 1227 | ||
| 1238 | if (!CTAGS || cxref_style) | 1228 | if (!CTAGS || cxref_style) |
| 1239 | { | 1229 | { |
| @@ -1648,7 +1638,6 @@ find_entries (inf) | |||
| 1648 | FILE *inf; | 1638 | FILE *inf; |
| 1649 | { | 1639 | { |
| 1650 | char *cp; | 1640 | char *cp; |
| 1651 | node *old_last_node; | ||
| 1652 | language *lang = curfdp->lang; | 1641 | language *lang = curfdp->lang; |
| 1653 | Lang_function *parser = NULL; | 1642 | Lang_function *parser = NULL; |
| 1654 | 1643 | ||
| @@ -1703,7 +1692,7 @@ find_entries (inf) | |||
| 1703 | /* We rewind here, even if inf may be a pipe. We fail if the | 1692 | /* We rewind here, even if inf may be a pipe. We fail if the |
| 1704 | length of the first line is longer than the pipe block size, | 1693 | length of the first line is longer than the pipe block size, |
| 1705 | which is unlikely. */ | 1694 | which is unlikely. */ |
| 1706 | rewind (inf); | 1695 | rewind (inf); |
| 1707 | 1696 | ||
| 1708 | /* Else try to guess the language given the case insensitive file name. */ | 1697 | /* Else try to guess the language given the case insensitive file name. */ |
| 1709 | if (parser == NULL) | 1698 | if (parser == NULL) |
| @@ -1716,6 +1705,26 @@ find_entries (inf) | |||
| 1716 | } | 1705 | } |
| 1717 | } | 1706 | } |
| 1718 | 1707 | ||
| 1708 | /* Else try Fortran or C. */ | ||
| 1709 | if (parser == NULL) | ||
| 1710 | { | ||
| 1711 | node *old_last_node = last_node; | ||
| 1712 | |||
| 1713 | curfdp->lang = get_language_from_langname ("fortran"); | ||
| 1714 | find_entries (inf); | ||
| 1715 | |||
| 1716 | if (old_last_node == last_node) | ||
| 1717 | /* No Fortran entries found. Try C. */ | ||
| 1718 | { | ||
| 1719 | /* We do not tag if rewind fails. | ||
| 1720 | Only the file name will be recorded in the tags file. */ | ||
| 1721 | rewind (inf); | ||
| 1722 | curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c"); | ||
| 1723 | find_entries (inf); | ||
| 1724 | } | ||
| 1725 | return; | ||
| 1726 | } | ||
| 1727 | |||
| 1719 | if (!no_line_directive | 1728 | if (!no_line_directive |
| 1720 | && curfdp->lang != NULL && curfdp->lang->metasource) | 1729 | && curfdp->lang != NULL && curfdp->lang->metasource) |
| 1721 | /* It may be that this is a bingo.y file, and we already parsed a bingo.c | 1730 | /* It may be that this is a bingo.y file, and we already parsed a bingo.c |
| @@ -1748,32 +1757,21 @@ find_entries (inf) | |||
| 1748 | fdpp = &(*fdpp)->next; /* advance the list pointer */ | 1757 | fdpp = &(*fdpp)->next; /* advance the list pointer */ |
| 1749 | } | 1758 | } |
| 1750 | 1759 | ||
| 1751 | if (parser != NULL) | 1760 | assert (parser != NULL); |
| 1752 | { | ||
| 1753 | /* Generic initialisations before reading from file. */ | ||
| 1754 | lineno = 0; /* reset global line number */ | ||
| 1755 | charno = 0; /* reset global char number */ | ||
| 1756 | linecharno = 0; /* reset global char number of line start */ | ||
| 1757 | 1761 | ||
| 1758 | parser (inf); | 1762 | /* Generic initialisations before reading from file. */ |
| 1759 | return; | 1763 | filebuf.len = 0; /* reset the file buffer */ |
| 1760 | } | ||
| 1761 | 1764 | ||
| 1762 | /* Else try Fortran. */ | 1765 | /* Generic initialisations before parsing file with readline. */ |
| 1763 | old_last_node = last_node; | 1766 | lineno = 0; /* reset global line number */ |
| 1764 | curfdp->lang = get_language_from_langname ("fortran"); | 1767 | charno = 0; /* reset global char number */ |
| 1765 | find_entries (inf); | 1768 | linecharno = 0; /* reset global char number of line start */ |
| 1766 | 1769 | ||
| 1767 | if (old_last_node == last_node) | 1770 | parser (inf); |
| 1768 | /* No Fortran entries found. Try C. */ | 1771 | |
| 1769 | { | 1772 | #ifdef ETAGS_REGEXPS |
| 1770 | /* We do not tag if rewind fails. | 1773 | regex_tag_multiline (); |
| 1771 | Only the file name will be recorded in the tags file. */ | 1774 | #endif /* ETAGS_REGEXPS */ |
| 1772 | rewind (inf); | ||
| 1773 | curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c"); | ||
| 1774 | find_entries (inf); | ||
| 1775 | } | ||
| 1776 | return; | ||
| 1777 | } | 1775 | } |
| 1778 | 1776 | ||
| 1779 | 1777 | ||
| @@ -2014,6 +2012,11 @@ add_node (np, cur_node_p) | |||
| 2014 | * invalidate_nodes () | 2012 | * invalidate_nodes () |
| 2015 | * Scan the node tree and invalidate all nodes pointing to the | 2013 | * Scan the node tree and invalidate all nodes pointing to the |
| 2016 | * given file description (CTAGS case) or free them (ETAGS case). | 2014 | * given file description (CTAGS case) or free them (ETAGS case). |
| 2015 | * | ||
| 2016 | * This function most likely contains a bug, but I cannot tell where. | ||
| 2017 | * I have a case of a binary that crashes inside this function with a bus | ||
| 2018 | * error. Unfortunately, the binary does not contain debug information, and | ||
| 2019 | * compiling with debugging information makes the bug disappear. | ||
| 2017 | */ | 2020 | */ |
| 2018 | static void | 2021 | static void |
| 2019 | invalidate_nodes (badfdp, npp) | 2022 | invalidate_nodes (badfdp, npp) |
| @@ -2030,7 +2033,7 @@ invalidate_nodes (badfdp, npp) | |||
| 2030 | if (np->left != NULL) | 2033 | if (np->left != NULL) |
| 2031 | invalidate_nodes (badfdp, &np->left); | 2034 | invalidate_nodes (badfdp, &np->left); |
| 2032 | if (np->fdp == badfdp) | 2035 | if (np->fdp == badfdp) |
| 2033 | np-> valid = FALSE; | 2036 | np->valid = FALSE; |
| 2034 | if (np->right != NULL) | 2037 | if (np->right != NULL) |
| 2035 | invalidate_nodes (badfdp, &np->right); | 2038 | invalidate_nodes (badfdp, &np->right); |
| 2036 | } | 2039 | } |
| @@ -5263,17 +5266,18 @@ erlang_atom (s, pos) | |||
| 5263 | #ifdef ETAGS_REGEXPS | 5266 | #ifdef ETAGS_REGEXPS |
| 5264 | 5267 | ||
| 5265 | static char *scan_separators __P((char *)); | 5268 | static char *scan_separators __P((char *)); |
| 5266 | static void analyse_regex __P((char *, bool)); | 5269 | static void add_regex __P((char *, language *)); |
| 5267 | static void add_regex __P((char *, bool, language *)); | ||
| 5268 | static char *substitute __P((char *, char *, struct re_registers *)); | 5270 | static char *substitute __P((char *, char *, struct re_registers *)); |
| 5269 | 5271 | ||
| 5270 | /* Take a string like "/blah/" and turn it into "blah", making sure | 5272 | /* |
| 5271 | that the first and last characters are the same, and handling | 5273 | * Take a string like "/blah/" and turn it into "blah", verifying |
| 5272 | quoted separator characters. Actually, stops on the occurrence of | 5274 | * that the first and last characters are the same, and handling |
| 5273 | an unquoted separator. Also turns "\t" into a Tab character, and | 5275 | * quoted separator characters. Actually, stops on the occurrence of |
| 5274 | similarly for all character escape sequences supported by Gcc. | 5276 | * an unquoted separator. Also process \t, \n, etc. and turn into |
| 5275 | Returns pointer to terminating separator. Works in place. Null | 5277 | * appropriate characters. Works in place. Null terminates name string. |
| 5276 | terminates name string. */ | 5278 | * Returns pointer to terminating separator, or NULL for |
| 5279 | * unterminated regexps. | ||
| 5280 | */ | ||
| 5277 | static char * | 5281 | static char * |
| 5278 | scan_separators (name) | 5282 | scan_separators (name) |
| 5279 | char *name; | 5283 | char *name; |
| @@ -5288,15 +5292,15 @@ scan_separators (name) | |||
| 5288 | { | 5292 | { |
| 5289 | switch (*name) | 5293 | switch (*name) |
| 5290 | { | 5294 | { |
| 5291 | case 'a': *copyto++ = '\007'; break; | 5295 | case 'a': *copyto++ = '\007'; break; /* BEL (bell) */ |
| 5292 | case 'b': *copyto++ = '\b'; break; | 5296 | case 'b': *copyto++ = '\b'; break; /* BS (back space) */ |
| 5293 | case 'd': *copyto++ = 0177; break; | 5297 | case 'd': *copyto++ = 0177; break; /* DEL (delete) */ |
| 5294 | case 'e': *copyto++ = 033; break; | 5298 | case 'e': *copyto++ = 033; break; /* ESC (delete) */ |
| 5295 | case 'f': *copyto++ = '\f'; break; | 5299 | case 'f': *copyto++ = '\f'; break; /* FF (form feed) */ |
| 5296 | case 'n': *copyto++ = '\n'; break; | 5300 | case 'n': *copyto++ = '\n'; break; /* NL (new line) */ |
| 5297 | case 'r': *copyto++ = '\r'; break; | 5301 | case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */ |
| 5298 | case 't': *copyto++ = '\t'; break; | 5302 | case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */ |
| 5299 | case 'v': *copyto++ = '\v'; break; | 5303 | case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */ |
| 5300 | default: | 5304 | default: |
| 5301 | if (*name == sep) | 5305 | if (*name == sep) |
| 5302 | *copyto++ = sep; | 5306 | *copyto++ = sep; |
| @@ -5317,6 +5321,8 @@ scan_separators (name) | |||
| 5317 | else | 5321 | else |
| 5318 | *copyto++ = *name; | 5322 | *copyto++ = *name; |
| 5319 | } | 5323 | } |
| 5324 | if (*name != sep) | ||
| 5325 | name = NULL; /* signal unterminated regexp */ | ||
| 5320 | 5326 | ||
| 5321 | /* Terminate copied string. */ | 5327 | /* Terminate copied string. */ |
| 5322 | *copyto = '\0'; | 5328 | *copyto = '\0'; |
| @@ -5326,9 +5332,8 @@ scan_separators (name) | |||
| 5326 | /* Look at the argument of --regex or --no-regex and do the right | 5332 | /* Look at the argument of --regex or --no-regex and do the right |
| 5327 | thing. Same for each line of a regexp file. */ | 5333 | thing. Same for each line of a regexp file. */ |
| 5328 | static void | 5334 | static void |
| 5329 | analyse_regex (regex_arg, ignore_case) | 5335 | analyse_regex (regex_arg) |
| 5330 | char *regex_arg; | 5336 | char *regex_arg; |
| 5331 | bool ignore_case; | ||
| 5332 | { | 5337 | { |
| 5333 | if (regex_arg == NULL) | 5338 | if (regex_arg == NULL) |
| 5334 | { | 5339 | { |
| @@ -5362,7 +5367,7 @@ analyse_regex (regex_arg, ignore_case) | |||
| 5362 | } | 5367 | } |
| 5363 | initbuffer (®exbuf); | 5368 | initbuffer (®exbuf); |
| 5364 | while (readline_internal (®exbuf, regexfp) > 0) | 5369 | while (readline_internal (®exbuf, regexfp) > 0) |
| 5365 | analyse_regex (regexbuf.buffer, ignore_case); | 5370 | analyse_regex (regexbuf.buffer); |
| 5366 | free (regexbuf.buffer); | 5371 | free (regexbuf.buffer); |
| 5367 | fclose (regexfp); | 5372 | fclose (regexfp); |
| 5368 | } | 5373 | } |
| @@ -5381,17 +5386,17 @@ analyse_regex (regex_arg, ignore_case) | |||
| 5381 | error ("unterminated language name in regex: %s", regex_arg); | 5386 | error ("unterminated language name in regex: %s", regex_arg); |
| 5382 | return; | 5387 | return; |
| 5383 | } | 5388 | } |
| 5384 | *cp = '\0'; | 5389 | *cp++ = '\0'; |
| 5385 | lang = get_language_from_langname (lang_name); | 5390 | lang = get_language_from_langname (lang_name); |
| 5386 | if (lang == NULL) | 5391 | if (lang == NULL) |
| 5387 | return; | 5392 | return; |
| 5388 | add_regex (cp + 1, ignore_case, lang); | 5393 | add_regex (cp, lang); |
| 5389 | } | 5394 | } |
| 5390 | break; | 5395 | break; |
| 5391 | 5396 | ||
| 5392 | /* Regexp to be used for any language. */ | 5397 | /* Regexp to be used for any language. */ |
| 5393 | default: | 5398 | default: |
| 5394 | add_regex (regex_arg, ignore_case, NULL); | 5399 | add_regex (regex_arg, NULL); |
| 5395 | break; | 5400 | break; |
| 5396 | } | 5401 | } |
| 5397 | } | 5402 | } |
| @@ -5399,37 +5404,91 @@ analyse_regex (regex_arg, ignore_case) | |||
| 5399 | /* Turn a name, which is an ed-style (but Emacs syntax) regular | 5404 | /* Turn a name, which is an ed-style (but Emacs syntax) regular |
| 5400 | expression, into a real regular expression by compiling it. */ | 5405 | expression, into a real regular expression by compiling it. */ |
| 5401 | static void | 5406 | static void |
| 5402 | add_regex (regexp_pattern, ignore_case, lang) | 5407 | add_regex (regexp_pattern, lang) |
| 5403 | char *regexp_pattern; | 5408 | char *regexp_pattern; |
| 5404 | bool ignore_case; | ||
| 5405 | language *lang; | 5409 | language *lang; |
| 5406 | { | 5410 | { |
| 5407 | static struct re_pattern_buffer zeropattern; | 5411 | static struct re_pattern_buffer zeropattern; |
| 5408 | char *name; | 5412 | char sep, *pat, *name, *modifiers; |
| 5409 | const char *err; | 5413 | const char *err; |
| 5410 | struct re_pattern_buffer *patbuf; | 5414 | struct re_pattern_buffer *patbuf; |
| 5411 | pattern *pp; | 5415 | pattern *pp; |
| 5416 | bool ignore_case, multi_line, single_line; | ||
| 5412 | 5417 | ||
| 5413 | 5418 | ||
| 5414 | if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0]) | 5419 | if (strlen(regexp_pattern) < 3) |
| 5415 | { | 5420 | { |
| 5416 | error ("%s: unterminated regexp", regexp_pattern); | 5421 | error ("null regexp", (char *)NULL); |
| 5417 | return; | 5422 | return; |
| 5418 | } | 5423 | } |
| 5424 | sep = regexp_pattern[0]; | ||
| 5419 | name = scan_separators (regexp_pattern); | 5425 | name = scan_separators (regexp_pattern); |
| 5420 | if (regexp_pattern[0] == '\0') | 5426 | if (name == NULL) |
| 5421 | { | 5427 | { |
| 5422 | error ("null regexp", (char *)NULL); | 5428 | error ("%s: unterminated regexp", regexp_pattern); |
| 5429 | return; | ||
| 5430 | } | ||
| 5431 | if (name[1] == sep) | ||
| 5432 | { | ||
| 5433 | error ("null name for regexp \"%s\"", regexp_pattern); | ||
| 5423 | return; | 5434 | return; |
| 5424 | } | 5435 | } |
| 5425 | (void) scan_separators (name); | 5436 | modifiers = scan_separators (name); |
| 5437 | if (modifiers == NULL) /* no terminating separator --> no name */ | ||
| 5438 | { | ||
| 5439 | modifiers = name; | ||
| 5440 | name = ""; | ||
| 5441 | } | ||
| 5442 | else | ||
| 5443 | modifiers += 1; /* skip separator */ | ||
| 5444 | |||
| 5445 | /* Parse regex modifiers. */ | ||
| 5446 | ignore_case = FALSE; /* case is significant */ | ||
| 5447 | multi_line = FALSE; /* matches are done one line at a time */ | ||
| 5448 | single_line = FALSE; /* dot does not match newline */ | ||
| 5449 | for (; modifiers[0] != '\0'; modifiers++) | ||
| 5450 | switch (modifiers[0]) | ||
| 5451 | { | ||
| 5452 | case 'i': | ||
| 5453 | ignore_case = TRUE; | ||
| 5454 | break; | ||
| 5455 | case 's': | ||
| 5456 | single_line = TRUE; | ||
| 5457 | /* FALLTHRU */ | ||
| 5458 | case 'm': | ||
| 5459 | multi_line = TRUE; | ||
| 5460 | need_filebuf = TRUE; | ||
| 5461 | break; | ||
| 5462 | default: | ||
| 5463 | modifiers[1] = '\0'; | ||
| 5464 | error ("invalid regexp modifier `%s'", modifiers); | ||
| 5465 | return; | ||
| 5466 | } | ||
| 5426 | 5467 | ||
| 5427 | patbuf = xnew (1, struct re_pattern_buffer); | 5468 | patbuf = xnew (1, struct re_pattern_buffer); |
| 5428 | *patbuf = zeropattern; | 5469 | *patbuf = zeropattern; |
| 5429 | if (ignore_case) | 5470 | if (ignore_case) |
| 5430 | patbuf->translate = lc_trans; /* translation table to fold case */ | 5471 | { |
| 5472 | static char lc_trans[CHARS]; | ||
| 5473 | int i; | ||
| 5474 | for (i = 0; i < CHARS; i++) | ||
| 5475 | lc_trans[i] = lowcase (i); | ||
| 5476 | patbuf->translate = lc_trans; /* translation table to fold case */ | ||
| 5477 | } | ||
| 5478 | |||
| 5479 | if (multi_line) | ||
| 5480 | pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */ | ||
| 5481 | else | ||
| 5482 | pat = regexp_pattern; | ||
| 5431 | 5483 | ||
| 5432 | err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf); | 5484 | if (single_line) |
| 5485 | re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE); | ||
| 5486 | else | ||
| 5487 | re_set_syntax (RE_SYNTAX_EMACS); | ||
| 5488 | |||
| 5489 | err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf); | ||
| 5490 | if (multi_line) | ||
| 5491 | free (pat); | ||
| 5433 | if (err != NULL) | 5492 | if (err != NULL) |
| 5434 | { | 5493 | { |
| 5435 | error ("%s while compiling pattern", err); | 5494 | error ("%s while compiling pattern", err); |
| @@ -5445,6 +5504,7 @@ add_regex (regexp_pattern, ignore_case, lang) | |||
| 5445 | p_head->name_pattern = savestr (name); | 5504 | p_head->name_pattern = savestr (name); |
| 5446 | p_head->error_signaled = FALSE; | 5505 | p_head->error_signaled = FALSE; |
| 5447 | p_head->ignore_case = ignore_case; | 5506 | p_head->ignore_case = ignore_case; |
| 5507 | p_head->multi_line = multi_line; | ||
| 5448 | } | 5508 | } |
| 5449 | 5509 | ||
| 5450 | /* | 5510 | /* |
| @@ -5512,6 +5572,92 @@ free_patterns () | |||
| 5512 | } | 5572 | } |
| 5513 | return; | 5573 | return; |
| 5514 | } | 5574 | } |
| 5575 | |||
| 5576 | /* | ||
| 5577 | * Reads the whole file as a single string from `filebuf' and looks for | ||
| 5578 | * multi-line regular expressions, creating tags on matches. | ||
| 5579 | * readline already dealt with normal regexps. | ||
| 5580 | * | ||
| 5581 | * Idea by Ben Wing <ben@666.com> (2002). | ||
| 5582 | */ | ||
| 5583 | static void | ||
| 5584 | regex_tag_multiline () | ||
| 5585 | { | ||
| 5586 | char *buffer = filebuf.buffer; | ||
| 5587 | pattern *pp; | ||
| 5588 | |||
| 5589 | for (pp = p_head; pp != NULL; pp = pp->p_next) | ||
| 5590 | { | ||
| 5591 | int match = 0; | ||
| 5592 | |||
| 5593 | if (!pp->multi_line) | ||
| 5594 | continue; /* skip normal regexps */ | ||
| 5595 | |||
| 5596 | /* Generic initialisations before parsing file from memory. */ | ||
| 5597 | lineno = 1; /* reset global line number */ | ||
| 5598 | charno = 0; /* reset global char number */ | ||
| 5599 | linecharno = 0; /* reset global char number of line start */ | ||
| 5600 | |||
| 5601 | /* Only use generic regexps or those for the current language. */ | ||
| 5602 | if (pp->lang != NULL && pp->lang != curfdp->lang) | ||
| 5603 | continue; | ||
| 5604 | |||
| 5605 | while (match >= 0 && match < filebuf.len) | ||
| 5606 | { | ||
| 5607 | match = re_search (pp->pat, buffer, filebuf.len, charno, | ||
| 5608 | filebuf.len - match, &pp->regs); | ||
| 5609 | switch (match) | ||
| 5610 | { | ||
| 5611 | case -2: | ||
| 5612 | /* Some error. */ | ||
| 5613 | if (!pp->error_signaled) | ||
| 5614 | { | ||
| 5615 | error ("regexp stack overflow while matching \"%s\"", | ||
| 5616 | pp->regex); | ||
| 5617 | pp->error_signaled = TRUE; | ||
| 5618 | } | ||
| 5619 | break; | ||
| 5620 | case -1: | ||
| 5621 | /* No match. */ | ||
| 5622 | break; | ||
| 5623 | default: | ||
| 5624 | if (match == pp->regs.end[0]) | ||
| 5625 | { | ||
| 5626 | if (!pp->error_signaled) | ||
| 5627 | { | ||
| 5628 | error ("regexp matches the empty string: \"%s\"", | ||
| 5629 | pp->regex); | ||
| 5630 | pp->error_signaled = TRUE; | ||
| 5631 | } | ||
| 5632 | match = -3; /* exit from while loop */ | ||
| 5633 | break; | ||
| 5634 | } | ||
| 5635 | |||
| 5636 | /* Match occurred. Construct a tag. */ | ||
| 5637 | while (charno < pp->regs.end[0]) | ||
| 5638 | if (buffer[charno++] == '\n') | ||
| 5639 | lineno++, linecharno = charno; | ||
| 5640 | if (pp->name_pattern[0] != '\0') | ||
| 5641 | { | ||
| 5642 | /* Make a named tag. */ | ||
| 5643 | char *name = substitute (buffer, | ||
| 5644 | pp->name_pattern, &pp->regs); | ||
| 5645 | if (name != NULL) | ||
| 5646 | pfnote (name, TRUE, buffer + linecharno, | ||
| 5647 | charno - linecharno + 1, lineno, linecharno); | ||
| 5648 | } | ||
| 5649 | else | ||
| 5650 | { | ||
| 5651 | /* Make an unnamed tag. */ | ||
| 5652 | pfnote ((char *)NULL, TRUE, buffer + linecharno, | ||
| 5653 | charno - linecharno + 1, lineno, linecharno); | ||
| 5654 | } | ||
| 5655 | break; | ||
| 5656 | } | ||
| 5657 | } | ||
| 5658 | } | ||
| 5659 | } | ||
| 5660 | |||
| 5515 | #endif /* ETAGS_REGEXPS */ | 5661 | #endif /* ETAGS_REGEXPS */ |
| 5516 | 5662 | ||
| 5517 | 5663 | ||
| @@ -5564,10 +5710,13 @@ initbuffer (lbp) | |||
| 5564 | * newline or CR-NL, if any. Return the number of characters read from | 5710 | * newline or CR-NL, if any. Return the number of characters read from |
| 5565 | * `stream', which is the length of the line including the newline. | 5711 | * `stream', which is the length of the line including the newline. |
| 5566 | * | 5712 | * |
| 5567 | * On DOS or Windows we do not count the CR character, if any, before the | 5713 | * On DOS or Windows we do not count the CR character, if any before the |
| 5568 | * NL, in the returned length; this mirrors the behavior of emacs on those | 5714 | * NL, in the returned length; this mirrors the behavior of Emacs on those |
| 5569 | * platforms (for text files, it translates CR-NL to NL as it reads in the | 5715 | * platforms (for text files, it translates CR-NL to NL as it reads in the |
| 5570 | * file). | 5716 | * file). |
| 5717 | * | ||
| 5718 | * If multi-line regular expressions are requested, each line read is | ||
| 5719 | * appended to `filebuf'. | ||
| 5571 | */ | 5720 | */ |
| 5572 | static long | 5721 | static long |
| 5573 | readline_internal (lbp, stream) | 5722 | readline_internal (lbp, stream) |
| @@ -5626,12 +5775,28 @@ readline_internal (lbp, stream) | |||
| 5626 | } | 5775 | } |
| 5627 | lbp->len = p - buffer; | 5776 | lbp->len = p - buffer; |
| 5628 | 5777 | ||
| 5778 | if (need_filebuf /* we need filebuf for multi-line regexps */ | ||
| 5779 | && chars_deleted > 0) /* not at EOF */ | ||
| 5780 | { | ||
| 5781 | while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */ | ||
| 5782 | { | ||
| 5783 | /* Expand filebuf. */ | ||
| 5784 | filebuf.size *= 2; | ||
| 5785 | xrnew (filebuf.buffer, filebuf.size, char); | ||
| 5786 | } | ||
| 5787 | strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len); | ||
| 5788 | filebuf.len += lbp->len; | ||
| 5789 | filebuf.buffer[filebuf.len++] = '\n'; | ||
| 5790 | filebuf.buffer[filebuf.len] = '\0'; | ||
| 5791 | } | ||
| 5792 | |||
| 5629 | return lbp->len + chars_deleted; | 5793 | return lbp->len + chars_deleted; |
| 5630 | } | 5794 | } |
| 5631 | 5795 | ||
| 5632 | /* | 5796 | /* |
| 5633 | * Like readline_internal, above, but in addition try to match the | 5797 | * Like readline_internal, above, but in addition try to match the |
| 5634 | * input line against relevant regular expressions. | 5798 | * input line against relevant regular expressions and manage #line |
| 5799 | * directives. | ||
| 5635 | */ | 5800 | */ |
| 5636 | static void | 5801 | static void |
| 5637 | readline (lbp, stream) | 5802 | readline (lbp, stream) |
| @@ -5752,8 +5917,8 @@ readline (lbp, stream) | |||
| 5752 | { | 5917 | { |
| 5753 | if (result > 0) | 5918 | if (result > 0) |
| 5754 | { | 5919 | { |
| 5755 | /* Do a tail recursion on ourselves, thus discarding the contents | 5920 | /* Do a tail recursion on ourselves, thus discarding the contents |
| 5756 | of the line buffer. */ | 5921 | of the line buffer. */ |
| 5757 | readline (lbp, stream); | 5922 | readline (lbp, stream); |
| 5758 | return; | 5923 | return; |
| 5759 | } | 5924 | } |
| @@ -5772,8 +5937,11 @@ readline (lbp, stream) | |||
| 5772 | if (lbp->len > 0) | 5937 | if (lbp->len > 0) |
| 5773 | for (pp = p_head; pp != NULL; pp = pp->p_next) | 5938 | for (pp = p_head; pp != NULL; pp = pp->p_next) |
| 5774 | { | 5939 | { |
| 5775 | /* Only use generic regexps or those for the current language. */ | 5940 | /* Only use generic regexps or those for the current language. |
| 5776 | if (pp->lang != NULL && pp->lang != fdhead->lang) | 5941 | Also do not use multiline regexps, which is the job of |
| 5942 | regex_tag_multiline. */ | ||
| 5943 | if ((pp->lang != NULL && pp->lang != fdhead->lang) | ||
| 5944 | || pp->multi_line) | ||
| 5777 | continue; | 5945 | continue; |
| 5778 | 5946 | ||
| 5779 | match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs); | 5947 | match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs); |
| @@ -5783,13 +5951,23 @@ readline (lbp, stream) | |||
| 5783 | /* Some error. */ | 5951 | /* Some error. */ |
| 5784 | if (!pp->error_signaled) | 5952 | if (!pp->error_signaled) |
| 5785 | { | 5953 | { |
| 5786 | error ("error while matching \"%s\"", pp->regex); | 5954 | error ("regexp stack overflow while matching \"%s\"", |
| 5955 | pp->regex); | ||
| 5787 | pp->error_signaled = TRUE; | 5956 | pp->error_signaled = TRUE; |
| 5788 | } | 5957 | } |
| 5789 | break; | 5958 | break; |
| 5790 | case -1: | 5959 | case -1: |
| 5791 | /* No match. */ | 5960 | /* No match. */ |
| 5792 | break; | 5961 | break; |
| 5962 | case 0: | ||
| 5963 | /* Empty string matched. */ | ||
| 5964 | if (!pp->error_signaled) | ||
| 5965 | { | ||
| 5966 | error ("regexp matches the empty string: \"%s\"", | ||
| 5967 | pp->regex); | ||
| 5968 | pp->error_signaled = TRUE; | ||
| 5969 | } | ||
| 5970 | break; | ||
| 5793 | default: | 5971 | default: |
| 5794 | /* Match occurred. Construct a tag. */ | 5972 | /* Match occurred. Construct a tag. */ |
| 5795 | if (pp->name_pattern[0] != '\0') | 5973 | if (pp->name_pattern[0] != '\0') |
| @@ -6229,6 +6407,6 @@ xrealloc (ptr, size) | |||
| 6229 | * indent-tabs-mode: t | 6407 | * indent-tabs-mode: t |
| 6230 | * tab-width: 8 | 6408 | * tab-width: 8 |
| 6231 | * fill-column: 79 | 6409 | * fill-column: 79 |
| 6232 | * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node") | 6410 | * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "pattern") |
| 6233 | * End: | 6411 | * End: |
| 6234 | */ | 6412 | */ |