aboutsummaryrefslogtreecommitdiffstats
path: root/lib-src
diff options
context:
space:
mode:
authorFrancesco Potortì2002-06-13 10:44:15 +0000
committerFrancesco Potortì2002-06-13 10:44:15 +0000
commit24dbe96a108cfb899cf1f54ea4b42aceb159776f (patch)
tree120be290c5ba546bd568627c6e7275bb999c6ce4 /lib-src
parentdfd8bf47b3498b83fb7ff03ba7bf9c3860604213 (diff)
downloademacs-24dbe96a108cfb899cf1f54ea4b42aceb159776f.tar.gz
emacs-24dbe96a108cfb899cf1f54ea4b42aceb159776f.zip
New multi-line regexp and new regexp syntax.
(arg_type): at_icregexp label removed (obsolete). (pattern): New member multi_line for multi-line regexps. (filebuf): A global buffer containing the whole file as a string for multi-line regexp matching. (need_filebuf): Global flag raised if multi-line regexps used. (print_help): Document new regexp modifiers, remove references to obsolete option --ignore-case-regexp. (main): Do not set regexp syntax and translation table here. (main): Treat -c option as a backward compatibility hack. (main, find_entries): Init and free filebuf. (find_entries): Call regex_tag_multiline after the regular parser. (scan_separators): Check for untermintaed regexp and return NULL. (analyse_regex, add_regex): Remove the ignore_case argument, which is now a modifier to the regexp. All callers changed. (add_regex): Manage the regexp modifiers. (regex_tag_multiline): New function. Reads from filebuf. (readline_internal): If necessary, copy the whole file into filebuf. (readline): Skip multi-line regexps, leave them to regex_tag_multiline. (add_regex): Better check for null regexps. (readline): Check for regex matching null string. (find_entries): Reorganisation.
Diffstat (limited to 'lib-src')
-rw-r--r--lib-src/etags.c406
1 files changed, 292 insertions, 114 deletions
diff --git a/lib-src/etags.c b/lib-src/etags.c
index 6b4b379f138..be60b476972 100644
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -2,21 +2,21 @@
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002 2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold 3 Free Software Foundation, Inc. and Ken Arnold
4 4
5This file is not considered part of GNU Emacs. 5 This file is not considered part of GNU Emacs.
6 6
7This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by 8 it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2 of the License, or 9 the Free Software Foundation; either version 2 of the License, or
10(at your option) any later version. 10 (at your option) any later version.
11 11
12This program is distributed in the hope that it will be useful, 12 This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of 13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details. 15 GNU General Public License for more details.
16 16
17You should have received a copy of the GNU General Public License 17 You should have received a copy of the GNU General Public License
18along with this program; if not, write to the Free Software Foundation, 18 along with this program; if not, write to the Free Software Foundation,
19Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 20
21/* 21/*
22 * Authors: 22 * Authors:
@@ -34,7 +34,7 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 * Francesco Potortì <pot@gnu.org> has maintained it since 1993. 34 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
35 */ 35 */
36 36
37char pot_etags_version[] = "@(#) pot revision number is 16.10"; 37char pot_etags_version[] = "@(#) pot revision number is 16.19";
38 38
39#define TRUE 1 39#define TRUE 1
40#define FALSE 0 40#define FALSE 0
@@ -288,7 +288,6 @@ typedef struct
288 enum { 288 enum {
289 at_language, /* a language specification */ 289 at_language, /* a language specification */
290 at_regexp, /* a regular expression */ 290 at_regexp, /* a regular expression */
291 at_icregexp, /* same, but with case ignored */
292 at_filename, /* a file name */ 291 at_filename, /* a file name */
293 at_stdin /* read from stdin here */ 292 at_stdin /* read from stdin here */
294 } arg_type; /* argument type */ 293 } arg_type; /* argument type */
@@ -308,6 +307,7 @@ typedef struct pattern
308 char *name_pattern; 307 char *name_pattern;
309 bool error_signaled; 308 bool error_signaled;
310 bool ignore_case; 309 bool ignore_case;
310 bool multi_line;
311} pattern; 311} pattern;
312#endif /* ETAGS_REGEXPS */ 312#endif /* ETAGS_REGEXPS */
313 313
@@ -355,9 +355,9 @@ static bool nocase_tail __P((char *));
355static char *get_tag __P((char *)); 355static char *get_tag __P((char *));
356 356
357#ifdef ETAGS_REGEXPS 357#ifdef ETAGS_REGEXPS
358static void analyse_regex __P((char *, bool)); 358static void analyse_regex __P((char *));
359static void add_regex __P((char *, bool, language *));
360static void free_patterns __P((void)); 359static void free_patterns __P((void));
360static void regex_tag_multiline __P((void));
361#endif /* ETAGS_REGEXPS */ 361#endif /* ETAGS_REGEXPS */
362static void error __P((const char *, const char *)); 362static void error __P((const char *, const char *));
363static void suggest_asking_for_help __P((void)); 363static void suggest_asking_for_help __P((void));
@@ -417,6 +417,7 @@ static node *nodehead; /* the head of the binary tree of tags */
417static node *last_node; /* the last node created */ 417static node *last_node; /* the last node created */
418 418
419static linebuffer lb; /* the current line */ 419static linebuffer lb; /* the current line */
420static linebuffer filebuf; /* a buffer containing the whole file */
420 421
421/* boolean "functions" (see init) */ 422/* boolean "functions" (see init) */
422static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; 423static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
@@ -457,13 +458,10 @@ static bool packages_only; /* --packages-only: in Ada, only tag packages*/
457static bool parsing_stdin; /* --parse-stdin used */ 458static bool parsing_stdin; /* --parse-stdin used */
458 459
459#ifdef ETAGS_REGEXPS 460#ifdef ETAGS_REGEXPS
460/* List of all regexps. */ 461static pattern *p_head; /* list of all regexps */
461static pattern *p_head; 462static bool need_filebuf; /* some regexes are multi-line */
462 463#else
463/* How many characters in the character set. (From regex.c.) */ 464# define need_filebuf FALSE
464#define CHAR_SET_SIZE 256
465/* Translation table for case-insensitive matching. */
466static char lc_trans[CHAR_SET_SIZE];
467#endif /* ETAGS_REGEXPS */ 465#endif /* ETAGS_REGEXPS */
468 466
469#ifdef LONG_OPTIONS 467#ifdef LONG_OPTIONS
@@ -680,10 +678,10 @@ Compressed files are supported using gzip and bzip2.");
680} 678}
681 679
682#ifndef EMACS_NAME 680#ifndef EMACS_NAME
683# define EMACS_NAME "GNU Emacs" 681# define EMACS_NAME "standalone"
684#endif 682#endif
685#ifndef VERSION 683#ifndef VERSION
686# define VERSION "21" 684# define VERSION "version"
687#endif 685#endif
688static void 686static void
689print_version () 687print_version ()
@@ -775,9 +773,11 @@ Relative ones are stored relative to the output file's directory.\n");
775 REGEXP is anchored (as if preceded by ^).\n\ 773 REGEXP is anchored (as if preceded by ^).\n\
776 The form /REGEXP/NAME/ creates a named tag.\n\ 774 The form /REGEXP/NAME/ creates a named tag.\n\
777 For example Tcl named tags can be created with:\n\ 775 For example Tcl named tags can be created with:\n\
778 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\""); 776 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".");
779 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\ 777 puts ("In the form /REGEXP/MODS or /REGEXP/NAME/MODS, MODS are\n\
780 Like -r, --regex but ignore case when matching expressions."); 778 one-letter modifiers: `i' means to ignore case, `m' means\n\
779 allow multi-line matches, `s' implies `m' and additionally\n\
780 causes dot to match the newline character.");
781 puts ("-R, --no-regex\n\ 781 puts ("-R, --no-regex\n\
782 Don't create tags from regexps for the following files."); 782 Don't create tags from regexps for the following files.");
783#endif /* ETAGS_REGEXPS */ 783#endif /* ETAGS_REGEXPS */
@@ -996,14 +996,6 @@ main (argc, argv)
996 is small. */ 996 is small. */
997 argbuffer = xnew (argc, argument); 997 argbuffer = xnew (argc, argument);
998 998
999#ifdef ETAGS_REGEXPS
1000 /* Set syntax for regular expression routines. */
1001 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
1002 /* Translation table for case-insensitive search. */
1003 for (i = 0; i < CHAR_SET_SIZE; i++)
1004 lc_trans[i] = lowcase (i);
1005#endif /* ETAGS_REGEXPS */
1006
1007 /* 999 /*
1008 * If etags, always find typedefs and structure tags. Why not? 1000 * If etags, always find typedefs and structure tags. Why not?
1009 * Also default to find macro constants, enum constants and 1001 * Also default to find macro constants, enum constants and
@@ -1079,6 +1071,10 @@ main (argc, argv)
1079 } 1071 }
1080 } 1072 }
1081 break; 1073 break;
1074 case 'c':
1075 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1076 optarg = concat (optarg, "i", ""); /* memory leak here */
1077 /* FALLTHRU */
1082 case 'r': 1078 case 'r':
1083 argbuffer[current_arg].arg_type = at_regexp; 1079 argbuffer[current_arg].arg_type = at_regexp;
1084 argbuffer[current_arg].what = optarg; 1080 argbuffer[current_arg].what = optarg;
@@ -1089,11 +1085,6 @@ main (argc, argv)
1089 argbuffer[current_arg].what = NULL; 1085 argbuffer[current_arg].what = NULL;
1090 ++current_arg; 1086 ++current_arg;
1091 break; 1087 break;
1092 case 'c':
1093 argbuffer[current_arg].arg_type = at_icregexp;
1094 argbuffer[current_arg].what = optarg;
1095 ++current_arg;
1096 break;
1097 case 'V': 1088 case 'V':
1098 print_version (); 1089 print_version ();
1099 break; 1090 break;
@@ -1152,6 +1143,7 @@ main (argc, argv)
1152 1143
1153 initbuffer (&lb); 1144 initbuffer (&lb);
1154 initbuffer (&filename_lb); 1145 initbuffer (&filename_lb);
1146 initbuffer (&filebuf);
1155 1147
1156 if (!CTAGS) 1148 if (!CTAGS)
1157 { 1149 {
@@ -1186,10 +1178,7 @@ main (argc, argv)
1186 break; 1178 break;
1187#ifdef ETAGS_REGEXPS 1179#ifdef ETAGS_REGEXPS
1188 case at_regexp: 1180 case at_regexp:
1189 analyse_regex (argbuffer[i].what, FALSE); 1181 analyse_regex (argbuffer[i].what);
1190 break;
1191 case at_icregexp:
1192 analyse_regex (argbuffer[i].what, TRUE);
1193 break; 1182 break;
1194#endif 1183#endif
1195 case at_filename: 1184 case at_filename:
@@ -1234,6 +1223,7 @@ main (argc, argv)
1234#ifdef ETAGS_REGEXPS 1223#ifdef ETAGS_REGEXPS
1235 free_patterns (); 1224 free_patterns ();
1236#endif /* ETAGS_REGEXPS */ 1225#endif /* ETAGS_REGEXPS */
1226 free (filebuf.buffer);
1237 1227
1238 if (!CTAGS || cxref_style) 1228 if (!CTAGS || cxref_style)
1239 { 1229 {
@@ -1648,7 +1638,6 @@ find_entries (inf)
1648 FILE *inf; 1638 FILE *inf;
1649{ 1639{
1650 char *cp; 1640 char *cp;
1651 node *old_last_node;
1652 language *lang = curfdp->lang; 1641 language *lang = curfdp->lang;
1653 Lang_function *parser = NULL; 1642 Lang_function *parser = NULL;
1654 1643
@@ -1703,7 +1692,7 @@ find_entries (inf)
1703 /* We rewind here, even if inf may be a pipe. We fail if the 1692 /* We rewind here, even if inf may be a pipe. We fail if the
1704 length of the first line is longer than the pipe block size, 1693 length of the first line is longer than the pipe block size,
1705 which is unlikely. */ 1694 which is unlikely. */
1706 rewind (inf); 1695 rewind (inf);
1707 1696
1708 /* Else try to guess the language given the case insensitive file name. */ 1697 /* Else try to guess the language given the case insensitive file name. */
1709 if (parser == NULL) 1698 if (parser == NULL)
@@ -1716,6 +1705,26 @@ find_entries (inf)
1716 } 1705 }
1717 } 1706 }
1718 1707
1708 /* Else try Fortran or C. */
1709 if (parser == NULL)
1710 {
1711 node *old_last_node = last_node;
1712
1713 curfdp->lang = get_language_from_langname ("fortran");
1714 find_entries (inf);
1715
1716 if (old_last_node == last_node)
1717 /* No Fortran entries found. Try C. */
1718 {
1719 /* We do not tag if rewind fails.
1720 Only the file name will be recorded in the tags file. */
1721 rewind (inf);
1722 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1723 find_entries (inf);
1724 }
1725 return;
1726 }
1727
1719 if (!no_line_directive 1728 if (!no_line_directive
1720 && curfdp->lang != NULL && curfdp->lang->metasource) 1729 && curfdp->lang != NULL && curfdp->lang->metasource)
1721 /* It may be that this is a bingo.y file, and we already parsed a bingo.c 1730 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
@@ -1748,32 +1757,21 @@ find_entries (inf)
1748 fdpp = &(*fdpp)->next; /* advance the list pointer */ 1757 fdpp = &(*fdpp)->next; /* advance the list pointer */
1749 } 1758 }
1750 1759
1751 if (parser != NULL) 1760 assert (parser != NULL);
1752 {
1753 /* Generic initialisations before reading from file. */
1754 lineno = 0; /* reset global line number */
1755 charno = 0; /* reset global char number */
1756 linecharno = 0; /* reset global char number of line start */
1757 1761
1758 parser (inf); 1762 /* Generic initialisations before reading from file. */
1759 return; 1763 filebuf.len = 0; /* reset the file buffer */
1760 }
1761 1764
1762 /* Else try Fortran. */ 1765 /* Generic initialisations before parsing file with readline. */
1763 old_last_node = last_node; 1766 lineno = 0; /* reset global line number */
1764 curfdp->lang = get_language_from_langname ("fortran"); 1767 charno = 0; /* reset global char number */
1765 find_entries (inf); 1768 linecharno = 0; /* reset global char number of line start */
1766 1769
1767 if (old_last_node == last_node) 1770 parser (inf);
1768 /* No Fortran entries found. Try C. */ 1771
1769 { 1772#ifdef ETAGS_REGEXPS
1770 /* We do not tag if rewind fails. 1773 regex_tag_multiline ();
1771 Only the file name will be recorded in the tags file. */ 1774#endif /* ETAGS_REGEXPS */
1772 rewind (inf);
1773 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1774 find_entries (inf);
1775 }
1776 return;
1777} 1775}
1778 1776
1779 1777
@@ -2014,6 +2012,11 @@ add_node (np, cur_node_p)
2014 * invalidate_nodes () 2012 * invalidate_nodes ()
2015 * Scan the node tree and invalidate all nodes pointing to the 2013 * Scan the node tree and invalidate all nodes pointing to the
2016 * given file description (CTAGS case) or free them (ETAGS case). 2014 * given file description (CTAGS case) or free them (ETAGS case).
2015 *
2016 * This function most likely contains a bug, but I cannot tell where.
2017 * I have a case of a binary that crashes inside this function with a bus
2018 * error. Unfortunately, the binary does not contain debug information, and
2019 * compiling with debugging information makes the bug disappear.
2017 */ 2020 */
2018static void 2021static void
2019invalidate_nodes (badfdp, npp) 2022invalidate_nodes (badfdp, npp)
@@ -2030,7 +2033,7 @@ invalidate_nodes (badfdp, npp)
2030 if (np->left != NULL) 2033 if (np->left != NULL)
2031 invalidate_nodes (badfdp, &np->left); 2034 invalidate_nodes (badfdp, &np->left);
2032 if (np->fdp == badfdp) 2035 if (np->fdp == badfdp)
2033 np-> valid = FALSE; 2036 np->valid = FALSE;
2034 if (np->right != NULL) 2037 if (np->right != NULL)
2035 invalidate_nodes (badfdp, &np->right); 2038 invalidate_nodes (badfdp, &np->right);
2036 } 2039 }
@@ -5263,17 +5266,18 @@ erlang_atom (s, pos)
5263#ifdef ETAGS_REGEXPS 5266#ifdef ETAGS_REGEXPS
5264 5267
5265static char *scan_separators __P((char *)); 5268static char *scan_separators __P((char *));
5266static void analyse_regex __P((char *, bool)); 5269static void add_regex __P((char *, language *));
5267static void add_regex __P((char *, bool, language *));
5268static char *substitute __P((char *, char *, struct re_registers *)); 5270static char *substitute __P((char *, char *, struct re_registers *));
5269 5271
5270/* Take a string like "/blah/" and turn it into "blah", making sure 5272/*
5271 that the first and last characters are the same, and handling 5273 * Take a string like "/blah/" and turn it into "blah", verifying
5272 quoted separator characters. Actually, stops on the occurrence of 5274 * that the first and last characters are the same, and handling
5273 an unquoted separator. Also turns "\t" into a Tab character, and 5275 * quoted separator characters. Actually, stops on the occurrence of
5274 similarly for all character escape sequences supported by Gcc. 5276 * an unquoted separator. Also process \t, \n, etc. and turn into
5275 Returns pointer to terminating separator. Works in place. Null 5277 * appropriate characters. Works in place. Null terminates name string.
5276 terminates name string. */ 5278 * Returns pointer to terminating separator, or NULL for
5279 * unterminated regexps.
5280 */
5277static char * 5281static char *
5278scan_separators (name) 5282scan_separators (name)
5279 char *name; 5283 char *name;
@@ -5288,15 +5292,15 @@ scan_separators (name)
5288 { 5292 {
5289 switch (*name) 5293 switch (*name)
5290 { 5294 {
5291 case 'a': *copyto++ = '\007'; break; 5295 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5292 case 'b': *copyto++ = '\b'; break; 5296 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5293 case 'd': *copyto++ = 0177; break; 5297 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5294 case 'e': *copyto++ = 033; break; 5298 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5295 case 'f': *copyto++ = '\f'; break; 5299 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5296 case 'n': *copyto++ = '\n'; break; 5300 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5297 case 'r': *copyto++ = '\r'; break; 5301 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5298 case 't': *copyto++ = '\t'; break; 5302 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5299 case 'v': *copyto++ = '\v'; break; 5303 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5300 default: 5304 default:
5301 if (*name == sep) 5305 if (*name == sep)
5302 *copyto++ = sep; 5306 *copyto++ = sep;
@@ -5317,6 +5321,8 @@ scan_separators (name)
5317 else 5321 else
5318 *copyto++ = *name; 5322 *copyto++ = *name;
5319 } 5323 }
5324 if (*name != sep)
5325 name = NULL; /* signal unterminated regexp */
5320 5326
5321 /* Terminate copied string. */ 5327 /* Terminate copied string. */
5322 *copyto = '\0'; 5328 *copyto = '\0';
@@ -5326,9 +5332,8 @@ scan_separators (name)
5326/* Look at the argument of --regex or --no-regex and do the right 5332/* Look at the argument of --regex or --no-regex and do the right
5327 thing. Same for each line of a regexp file. */ 5333 thing. Same for each line of a regexp file. */
5328static void 5334static void
5329analyse_regex (regex_arg, ignore_case) 5335analyse_regex (regex_arg)
5330 char *regex_arg; 5336 char *regex_arg;
5331 bool ignore_case;
5332{ 5337{
5333 if (regex_arg == NULL) 5338 if (regex_arg == NULL)
5334 { 5339 {
@@ -5362,7 +5367,7 @@ analyse_regex (regex_arg, ignore_case)
5362 } 5367 }
5363 initbuffer (&regexbuf); 5368 initbuffer (&regexbuf);
5364 while (readline_internal (&regexbuf, regexfp) > 0) 5369 while (readline_internal (&regexbuf, regexfp) > 0)
5365 analyse_regex (regexbuf.buffer, ignore_case); 5370 analyse_regex (regexbuf.buffer);
5366 free (regexbuf.buffer); 5371 free (regexbuf.buffer);
5367 fclose (regexfp); 5372 fclose (regexfp);
5368 } 5373 }
@@ -5381,17 +5386,17 @@ analyse_regex (regex_arg, ignore_case)
5381 error ("unterminated language name in regex: %s", regex_arg); 5386 error ("unterminated language name in regex: %s", regex_arg);
5382 return; 5387 return;
5383 } 5388 }
5384 *cp = '\0'; 5389 *cp++ = '\0';
5385 lang = get_language_from_langname (lang_name); 5390 lang = get_language_from_langname (lang_name);
5386 if (lang == NULL) 5391 if (lang == NULL)
5387 return; 5392 return;
5388 add_regex (cp + 1, ignore_case, lang); 5393 add_regex (cp, lang);
5389 } 5394 }
5390 break; 5395 break;
5391 5396
5392 /* Regexp to be used for any language. */ 5397 /* Regexp to be used for any language. */
5393 default: 5398 default:
5394 add_regex (regex_arg, ignore_case, NULL); 5399 add_regex (regex_arg, NULL);
5395 break; 5400 break;
5396 } 5401 }
5397} 5402}
@@ -5399,37 +5404,91 @@ analyse_regex (regex_arg, ignore_case)
5399/* Turn a name, which is an ed-style (but Emacs syntax) regular 5404/* Turn a name, which is an ed-style (but Emacs syntax) regular
5400 expression, into a real regular expression by compiling it. */ 5405 expression, into a real regular expression by compiling it. */
5401static void 5406static void
5402add_regex (regexp_pattern, ignore_case, lang) 5407add_regex (regexp_pattern, lang)
5403 char *regexp_pattern; 5408 char *regexp_pattern;
5404 bool ignore_case;
5405 language *lang; 5409 language *lang;
5406{ 5410{
5407 static struct re_pattern_buffer zeropattern; 5411 static struct re_pattern_buffer zeropattern;
5408 char *name; 5412 char sep, *pat, *name, *modifiers;
5409 const char *err; 5413 const char *err;
5410 struct re_pattern_buffer *patbuf; 5414 struct re_pattern_buffer *patbuf;
5411 pattern *pp; 5415 pattern *pp;
5416 bool ignore_case, multi_line, single_line;
5412 5417
5413 5418
5414 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0]) 5419 if (strlen(regexp_pattern) < 3)
5415 { 5420 {
5416 error ("%s: unterminated regexp", regexp_pattern); 5421 error ("null regexp", (char *)NULL);
5417 return; 5422 return;
5418 } 5423 }
5424 sep = regexp_pattern[0];
5419 name = scan_separators (regexp_pattern); 5425 name = scan_separators (regexp_pattern);
5420 if (regexp_pattern[0] == '\0') 5426 if (name == NULL)
5421 { 5427 {
5422 error ("null regexp", (char *)NULL); 5428 error ("%s: unterminated regexp", regexp_pattern);
5429 return;
5430 }
5431 if (name[1] == sep)
5432 {
5433 error ("null name for regexp \"%s\"", regexp_pattern);
5423 return; 5434 return;
5424 } 5435 }
5425 (void) scan_separators (name); 5436 modifiers = scan_separators (name);
5437 if (modifiers == NULL) /* no terminating separator --> no name */
5438 {
5439 modifiers = name;
5440 name = "";
5441 }
5442 else
5443 modifiers += 1; /* skip separator */
5444
5445 /* Parse regex modifiers. */
5446 ignore_case = FALSE; /* case is significant */
5447 multi_line = FALSE; /* matches are done one line at a time */
5448 single_line = FALSE; /* dot does not match newline */
5449 for (; modifiers[0] != '\0'; modifiers++)
5450 switch (modifiers[0])
5451 {
5452 case 'i':
5453 ignore_case = TRUE;
5454 break;
5455 case 's':
5456 single_line = TRUE;
5457 /* FALLTHRU */
5458 case 'm':
5459 multi_line = TRUE;
5460 need_filebuf = TRUE;
5461 break;
5462 default:
5463 modifiers[1] = '\0';
5464 error ("invalid regexp modifier `%s'", modifiers);
5465 return;
5466 }
5426 5467
5427 patbuf = xnew (1, struct re_pattern_buffer); 5468 patbuf = xnew (1, struct re_pattern_buffer);
5428 *patbuf = zeropattern; 5469 *patbuf = zeropattern;
5429 if (ignore_case) 5470 if (ignore_case)
5430 patbuf->translate = lc_trans; /* translation table to fold case */ 5471 {
5472 static char lc_trans[CHARS];
5473 int i;
5474 for (i = 0; i < CHARS; i++)
5475 lc_trans[i] = lowcase (i);
5476 patbuf->translate = lc_trans; /* translation table to fold case */
5477 }
5478
5479 if (multi_line)
5480 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5481 else
5482 pat = regexp_pattern;
5431 5483
5432 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf); 5484 if (single_line)
5485 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5486 else
5487 re_set_syntax (RE_SYNTAX_EMACS);
5488
5489 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5490 if (multi_line)
5491 free (pat);
5433 if (err != NULL) 5492 if (err != NULL)
5434 { 5493 {
5435 error ("%s while compiling pattern", err); 5494 error ("%s while compiling pattern", err);
@@ -5445,6 +5504,7 @@ add_regex (regexp_pattern, ignore_case, lang)
5445 p_head->name_pattern = savestr (name); 5504 p_head->name_pattern = savestr (name);
5446 p_head->error_signaled = FALSE; 5505 p_head->error_signaled = FALSE;
5447 p_head->ignore_case = ignore_case; 5506 p_head->ignore_case = ignore_case;
5507 p_head->multi_line = multi_line;
5448} 5508}
5449 5509
5450/* 5510/*
@@ -5512,6 +5572,92 @@ free_patterns ()
5512 } 5572 }
5513 return; 5573 return;
5514} 5574}
5575
5576/*
5577 * Reads the whole file as a single string from `filebuf' and looks for
5578 * multi-line regular expressions, creating tags on matches.
5579 * readline already dealt with normal regexps.
5580 *
5581 * Idea by Ben Wing <ben@666.com> (2002).
5582 */
5583static void
5584regex_tag_multiline ()
5585{
5586 char *buffer = filebuf.buffer;
5587 pattern *pp;
5588
5589 for (pp = p_head; pp != NULL; pp = pp->p_next)
5590 {
5591 int match = 0;
5592
5593 if (!pp->multi_line)
5594 continue; /* skip normal regexps */
5595
5596 /* Generic initialisations before parsing file from memory. */
5597 lineno = 1; /* reset global line number */
5598 charno = 0; /* reset global char number */
5599 linecharno = 0; /* reset global char number of line start */
5600
5601 /* Only use generic regexps or those for the current language. */
5602 if (pp->lang != NULL && pp->lang != curfdp->lang)
5603 continue;
5604
5605 while (match >= 0 && match < filebuf.len)
5606 {
5607 match = re_search (pp->pat, buffer, filebuf.len, charno,
5608 filebuf.len - match, &pp->regs);
5609 switch (match)
5610 {
5611 case -2:
5612 /* Some error. */
5613 if (!pp->error_signaled)
5614 {
5615 error ("regexp stack overflow while matching \"%s\"",
5616 pp->regex);
5617 pp->error_signaled = TRUE;
5618 }
5619 break;
5620 case -1:
5621 /* No match. */
5622 break;
5623 default:
5624 if (match == pp->regs.end[0])
5625 {
5626 if (!pp->error_signaled)
5627 {
5628 error ("regexp matches the empty string: \"%s\"",
5629 pp->regex);
5630 pp->error_signaled = TRUE;
5631 }
5632 match = -3; /* exit from while loop */
5633 break;
5634 }
5635
5636 /* Match occurred. Construct a tag. */
5637 while (charno < pp->regs.end[0])
5638 if (buffer[charno++] == '\n')
5639 lineno++, linecharno = charno;
5640 if (pp->name_pattern[0] != '\0')
5641 {
5642 /* Make a named tag. */
5643 char *name = substitute (buffer,
5644 pp->name_pattern, &pp->regs);
5645 if (name != NULL)
5646 pfnote (name, TRUE, buffer + linecharno,
5647 charno - linecharno + 1, lineno, linecharno);
5648 }
5649 else
5650 {
5651 /* Make an unnamed tag. */
5652 pfnote ((char *)NULL, TRUE, buffer + linecharno,
5653 charno - linecharno + 1, lineno, linecharno);
5654 }
5655 break;
5656 }
5657 }
5658 }
5659}
5660
5515#endif /* ETAGS_REGEXPS */ 5661#endif /* ETAGS_REGEXPS */
5516 5662
5517 5663
@@ -5564,10 +5710,13 @@ initbuffer (lbp)
5564 * newline or CR-NL, if any. Return the number of characters read from 5710 * newline or CR-NL, if any. Return the number of characters read from
5565 * `stream', which is the length of the line including the newline. 5711 * `stream', which is the length of the line including the newline.
5566 * 5712 *
5567 * On DOS or Windows we do not count the CR character, if any, before the 5713 * On DOS or Windows we do not count the CR character, if any before the
5568 * NL, in the returned length; this mirrors the behavior of emacs on those 5714 * NL, in the returned length; this mirrors the behavior of Emacs on those
5569 * platforms (for text files, it translates CR-NL to NL as it reads in the 5715 * platforms (for text files, it translates CR-NL to NL as it reads in the
5570 * file). 5716 * file).
5717 *
5718 * If multi-line regular expressions are requested, each line read is
5719 * appended to `filebuf'.
5571 */ 5720 */
5572static long 5721static long
5573readline_internal (lbp, stream) 5722readline_internal (lbp, stream)
@@ -5626,12 +5775,28 @@ readline_internal (lbp, stream)
5626 } 5775 }
5627 lbp->len = p - buffer; 5776 lbp->len = p - buffer;
5628 5777
5778 if (need_filebuf /* we need filebuf for multi-line regexps */
5779 && chars_deleted > 0) /* not at EOF */
5780 {
5781 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
5782 {
5783 /* Expand filebuf. */
5784 filebuf.size *= 2;
5785 xrnew (filebuf.buffer, filebuf.size, char);
5786 }
5787 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
5788 filebuf.len += lbp->len;
5789 filebuf.buffer[filebuf.len++] = '\n';
5790 filebuf.buffer[filebuf.len] = '\0';
5791 }
5792
5629 return lbp->len + chars_deleted; 5793 return lbp->len + chars_deleted;
5630} 5794}
5631 5795
5632/* 5796/*
5633 * Like readline_internal, above, but in addition try to match the 5797 * Like readline_internal, above, but in addition try to match the
5634 * input line against relevant regular expressions. 5798 * input line against relevant regular expressions and manage #line
5799 * directives.
5635 */ 5800 */
5636static void 5801static void
5637readline (lbp, stream) 5802readline (lbp, stream)
@@ -5752,8 +5917,8 @@ readline (lbp, stream)
5752 { 5917 {
5753 if (result > 0) 5918 if (result > 0)
5754 { 5919 {
5755 /* Do a tail recursion on ourselves, thus discarding the contents 5920 /* Do a tail recursion on ourselves, thus discarding the contents
5756 of the line buffer. */ 5921 of the line buffer. */
5757 readline (lbp, stream); 5922 readline (lbp, stream);
5758 return; 5923 return;
5759 } 5924 }
@@ -5772,8 +5937,11 @@ readline (lbp, stream)
5772 if (lbp->len > 0) 5937 if (lbp->len > 0)
5773 for (pp = p_head; pp != NULL; pp = pp->p_next) 5938 for (pp = p_head; pp != NULL; pp = pp->p_next)
5774 { 5939 {
5775 /* Only use generic regexps or those for the current language. */ 5940 /* Only use generic regexps or those for the current language.
5776 if (pp->lang != NULL && pp->lang != fdhead->lang) 5941 Also do not use multiline regexps, which is the job of
5942 regex_tag_multiline. */
5943 if ((pp->lang != NULL && pp->lang != fdhead->lang)
5944 || pp->multi_line)
5777 continue; 5945 continue;
5778 5946
5779 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs); 5947 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
@@ -5783,13 +5951,23 @@ readline (lbp, stream)
5783 /* Some error. */ 5951 /* Some error. */
5784 if (!pp->error_signaled) 5952 if (!pp->error_signaled)
5785 { 5953 {
5786 error ("error while matching \"%s\"", pp->regex); 5954 error ("regexp stack overflow while matching \"%s\"",
5955 pp->regex);
5787 pp->error_signaled = TRUE; 5956 pp->error_signaled = TRUE;
5788 } 5957 }
5789 break; 5958 break;
5790 case -1: 5959 case -1:
5791 /* No match. */ 5960 /* No match. */
5792 break; 5961 break;
5962 case 0:
5963 /* Empty string matched. */
5964 if (!pp->error_signaled)
5965 {
5966 error ("regexp matches the empty string: \"%s\"",
5967 pp->regex);
5968 pp->error_signaled = TRUE;
5969 }
5970 break;
5793 default: 5971 default:
5794 /* Match occurred. Construct a tag. */ 5972 /* Match occurred. Construct a tag. */
5795 if (pp->name_pattern[0] != '\0') 5973 if (pp->name_pattern[0] != '\0')
@@ -6229,6 +6407,6 @@ xrealloc (ptr, size)
6229 * indent-tabs-mode: t 6407 * indent-tabs-mode: t
6230 * tab-width: 8 6408 * tab-width: 8
6231 * fill-column: 79 6409 * fill-column: 79
6232 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node") 6410 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "pattern")
6233 * End: 6411 * End:
6234 */ 6412 */