aboutsummaryrefslogtreecommitdiffstats
path: root/lib-src
diff options
context:
space:
mode:
authorYuuki Harano2021-06-13 17:34:06 +0900
committerYuuki Harano2021-06-13 17:34:06 +0900
commit7d5e94bada09e642a8bfc4f66804f7948bad40bc (patch)
tree38629672102b31bb38a855f24d4dd009e212c10d /lib-src
parent7673b6b9eb0af3add73e1614a466f142092b00aa (diff)
parentdc471feee3bcac872cc52cdc73282955cd2d219d (diff)
downloademacs-7d5e94bada09e642a8bfc4f66804f7948bad40bc.tar.gz
emacs-7d5e94bada09e642a8bfc4f66804f7948bad40bc.zip
Merge branch 'master' of git.sv.gnu.org:/srv/git/emacs into feature/pgtk
Diffstat (limited to 'lib-src')
-rw-r--r--lib-src/etags.c505
1 files changed, 501 insertions, 4 deletions
diff --git a/lib-src/etags.c b/lib-src/etags.c
index d703183cef7..9f20e44caf4 100644
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -142,7 +142,14 @@ University of California, as described above. */
142# define CTAGS false 142# define CTAGS false
143#endif 143#endif
144 144
145/* Copy to DEST from SRC (containing LEN bytes), and append a NUL byte. */ 145/* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
146 Mercury from Objective C, which have same file extensions .m
147 See comments before function test_objc_is_mercury for details. */
148#ifndef MERCURY_HEURISTICS_RATIO
149# define MERCURY_HEURISTICS_RATIO 0.5
150#endif
151
152/* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte. */
146static void 153static void
147memcpyz (void *dest, void const *src, ptrdiff_t len) 154memcpyz (void *dest, void const *src, ptrdiff_t len)
148{ 155{
@@ -359,6 +366,7 @@ static void HTML_labels (FILE *);
359static void Lisp_functions (FILE *); 366static void Lisp_functions (FILE *);
360static void Lua_functions (FILE *); 367static void Lua_functions (FILE *);
361static void Makefile_targets (FILE *); 368static void Makefile_targets (FILE *);
369static void Mercury_functions (FILE *);
362static void Pascal_functions (FILE *); 370static void Pascal_functions (FILE *);
363static void Perl_functions (FILE *); 371static void Perl_functions (FILE *);
364static void PHP_functions (FILE *); 372static void PHP_functions (FILE *);
@@ -379,6 +387,7 @@ static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *);
379static bool nocase_tail (const char *); 387static bool nocase_tail (const char *);
380static void get_tag (char *, char **); 388static void get_tag (char *, char **);
381static void get_lispy_tag (char *); 389static void get_lispy_tag (char *);
390static void test_objc_is_mercury (char *, language **);
382 391
383static void analyze_regex (char *); 392static void analyze_regex (char *);
384static void free_regexps (void); 393static void free_regexps (void);
@@ -684,10 +693,22 @@ static const char Makefile_help [] =
684"In makefiles, targets are tags; additionally, variables are tags\n\ 693"In makefiles, targets are tags; additionally, variables are tags\n\
685unless you specify '--no-globals'."; 694unless you specify '--no-globals'.";
686 695
696/* Mercury and Objective C share the same .m file extensions. */
697static const char *Mercury_suffixes [] =
698 {"m",
699 NULL};
700static const char Mercury_help [] =
701 "In Mercury code, tags are all declarations beginning a line with ':-'\n\
702and optionally Prolog-like definitions (first rule for a predicate or \
703function).\n\
704To enable this behavior, run etags using --declarations.";
705static bool with_mercury_definitions = false;
706float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
707
687static const char *Objc_suffixes [] = 708static const char *Objc_suffixes [] =
688 { "lm", /* Objective lex file */ 709 { "lm", /* Objective lex file */
689 "m", /* Objective C file */ 710 "m", /* By default, Objective C file will be assumed. */
690 NULL }; 711 NULL};
691static const char Objc_help [] = 712static const char Objc_help [] =
692"In Objective C code, tags include Objective C definitions for classes,\n\ 713"In Objective C code, tags include Objective C definitions for classes,\n\
693class categories, methods and protocols. Tags for variables and\n\ 714class categories, methods and protocols. Tags for variables and\n\
@@ -831,7 +852,9 @@ static language lang_names [] =
831 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes }, 852 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
832 { "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters}, 853 { "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
833 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames}, 854 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
855 /* objc listed before mercury as it is a better default for .m extensions. */
834 { "objc", Objc_help, plain_C_entries, Objc_suffixes }, 856 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
857 { "mercury", Mercury_help, Mercury_functions, Mercury_suffixes },
835 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes }, 858 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
836 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters}, 859 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
837 { "php", PHP_help, PHP_functions, PHP_suffixes }, 860 { "php", PHP_help, PHP_functions, PHP_suffixes },
@@ -958,6 +981,9 @@ Relative ones are stored relative to the output file's directory.\n");
958 puts 981 puts
959 ("\tand create tags for extern variables unless --no-globals is used."); 982 ("\tand create tags for extern variables unless --no-globals is used.");
960 983
984 puts ("In Mercury, tag both declarations starting a line with ':-' and first\n\
985 predicates or functions in clauses.");
986
961 if (CTAGS) 987 if (CTAGS)
962 puts ("-d, --defines\n\ 988 puts ("-d, --defines\n\
963 Create tag entries for C #define constants and enum constants, too."); 989 Create tag entries for C #define constants and enum constants, too.");
@@ -1783,6 +1809,11 @@ find_entries (FILE *inf)
1783 if (parser == NULL) 1809 if (parser == NULL)
1784 { 1810 {
1785 lang = get_language_from_filename (curfdp->infname, true); 1811 lang = get_language_from_filename (curfdp->infname, true);
1812
1813 /* Disambiguate file names between Objc and Mercury. */
1814 if (lang != NULL && strcmp (lang->name, "objc") == 0)
1815 test_objc_is_mercury (curfdp->infname, &lang);
1816
1786 if (lang != NULL && lang->function != NULL) 1817 if (lang != NULL && lang->function != NULL)
1787 { 1818 {
1788 curfdp->lang = lang; 1819 curfdp->lang = lang;
@@ -6072,6 +6103,472 @@ prolog_atom (char *s, size_t pos)
6072 6103
6073 6104
6074/* 6105/*
6106 * Support for Mercury
6107 *
6108 * Assumes that the declarations start at column 0.
6109 * Original code by Sunichirou Sugou (1989) for Prolog.
6110 * Rewritten by Anders Lindgren (1996) for Prolog.
6111 * Adapted by Fabrice Nicol (2021) for Mercury.
6112 * Note: Prolog-support behavior is preserved if
6113 * --declarations is used, corresponding to
6114 * with_mercury_definitions=true.
6115 */
6116
6117static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
6118static void mercury_skip_comment (linebuffer *, FILE *);
6119static bool is_mercury_type = false;
6120static bool is_mercury_quantifier = false;
6121static bool is_mercury_declaration = false;
6122
6123/*
6124 * Objective-C and Mercury have identical file extension .m.
6125 * To disambiguate between Objective C and Mercury, parse file
6126 * with the following heuristics hook:
6127 * - if line starts with :-, choose Mercury unconditionally;
6128 * - if line starts with #, @, choose Objective-C;
6129 * - otherwise compute the following ratio:
6130 *
6131 * r = (number of lines with :-
6132 * or % in non-commented parts or . at trimmed EOL)
6133 * / (number of lines - number of lines starting by any amount
6134 * of whitespace, optionally followed by comment(s))
6135 *
6136 * Note: strings are neglected in counts.
6137 *
6138 * If r > mercury_heuristics_ratio, choose Mercury.
6139 * Experimental tests show that a possibly optimal default value for
6140 * this floor value is around 0.5. This is the default value for
6141 * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
6142 * The closer r is to 0.5, the closer the source code to pure Prolog.
6143 * Idiomatic Mercury is scored either with r = 1.0 or higher.
6144 * Objective-C is scored with r = 0.0. When this fails, the r-score
6145 * never rose above 0.1 in Objective-C tests.
6146 */
6147
6148static void
6149test_objc_is_mercury (char *this_file, language **lang)
6150{
6151 if (this_file == NULL) return;
6152 FILE* fp = fopen (this_file, "r");
6153 if (fp == NULL)
6154 pfatal (this_file);
6155
6156 bool blank_line = false; /* Line starting with any amount of white space
6157 followed by optional comment(s). */
6158 bool commented_line = false;
6159 bool found_dot = false;
6160 bool only_space_before = true;
6161 bool start_of_line = true;
6162 int c;
6163 intmax_t lines = 1;
6164 intmax_t mercury_dots = 0;
6165 intmax_t percentage_signs = 0;
6166 intmax_t rule_signs = 0;
6167 float ratio = 0;
6168
6169 while ((c = fgetc (fp)) != EOF)
6170 {
6171 switch (c)
6172 {
6173 case '\n':
6174 if (! blank_line) ++lines;
6175 blank_line = true;
6176 commented_line = false;
6177 start_of_line = true;
6178 if (found_dot) ++mercury_dots;
6179 found_dot = false;
6180 only_space_before = true;
6181 break;
6182 case '.':
6183 found_dot = ! commented_line;
6184 only_space_before = false;
6185 break;
6186 case '%': /* More frequent in Mercury. May be modulo in Obj.-C. */
6187 if (! commented_line)
6188 {
6189 ++percentage_signs;
6190 /* Cannot tell if it is a comment or modulo yet for sure.
6191 Yet works for heuristic purposes. */
6192 commented_line = true;
6193 }
6194 found_dot = false;
6195 start_of_line = false;
6196 only_space_before = false;
6197 break;
6198 case '/':
6199 {
6200 int d = fgetc (fp);
6201 found_dot = false;
6202 only_space_before = false;
6203 if (! commented_line)
6204 {
6205 if (d == '*')
6206 commented_line = true;
6207 else
6208 /* If d == '/', cannot tell if it is an Obj.-C comment:
6209 may be Mercury integ. division. */
6210 blank_line = false;
6211 }
6212 }
6213 FALLTHROUGH;
6214 case ' ':
6215 case '\t':
6216 start_of_line = false;
6217 break;
6218 case ':':
6219 c = fgetc (fp);
6220 if (start_of_line)
6221 {
6222 if (c == '-')
6223 {
6224 ratio = 1.0; /* Failsafe, not an operator in Obj.-C. */
6225 goto out;
6226 }
6227 start_of_line = false;
6228 }
6229 else
6230 {
6231 /* p :- q. Frequent in Mercury.
6232 Rare or in quoted exprs in Obj.-C. */
6233 if (c == '-' && ! commented_line)
6234 ++rule_signs;
6235 }
6236 blank_line = false;
6237 found_dot = false;
6238 only_space_before = false;
6239 break;
6240 case '@':
6241 case '#':
6242 if (start_of_line || only_space_before)
6243 {
6244 ratio = 0.0;
6245 goto out;
6246 }
6247 FALLTHROUGH;
6248 default:
6249 start_of_line = false;
6250 blank_line = false;
6251 found_dot = false;
6252 only_space_before = false;
6253 }
6254 }
6255
6256 /* Fallback heuristic test. Not failsafe but errless in pratice. */
6257 ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
6258
6259 out:
6260 if (fclose (fp) == EOF)
6261 pfatal (this_file);
6262
6263 if (ratio > mercury_heuristics_ratio)
6264 {
6265 /* Change the language from Objective-C to Mercury. */
6266 static language lang0 = { "mercury", Mercury_help, Mercury_functions,
6267 Mercury_suffixes };
6268 *lang = &lang0;
6269 }
6270}
6271
6272static void
6273Mercury_functions (FILE *inf)
6274{
6275 char *cp, *last = NULL;
6276 ptrdiff_t lastlen = 0, allocated = 0;
6277 if (declarations) with_mercury_definitions = true;
6278
6279 LOOP_ON_INPUT_LINES (inf, lb, cp)
6280 {
6281 if (cp[0] == '\0') /* Empty line. */
6282 continue;
6283 else if (c_isspace (cp[0]) || cp[0] == '%')
6284 /* A Prolog-type comment or anything other than a declaration. */
6285 continue;
6286 else if (cp[0] == '/' && cp[1] == '*') /* Mercury C-type comment. */
6287 mercury_skip_comment (&lb, inf);
6288 else
6289 {
6290 is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
6291
6292 if (is_mercury_declaration
6293 || with_mercury_definitions)
6294 {
6295 ptrdiff_t len = mercury_pr (cp, last, lastlen);
6296 if (0 < len)
6297 {
6298 /* Store the declaration to avoid generating duplicate
6299 tags later. */
6300 if (allocated <= len)
6301 {
6302 xrnew (last, len + 1, 1);
6303 allocated = len + 1;
6304 }
6305 memcpyz (last, cp, len);
6306 lastlen = len;
6307 }
6308 }
6309 }
6310 }
6311 free (last);
6312}
6313
6314static void
6315mercury_skip_comment (linebuffer *plb, FILE *inf)
6316{
6317 char *cp;
6318
6319 do
6320 {
6321 for (cp = plb->buffer; *cp != '\0'; ++cp)
6322 if (cp[0] == '*' && cp[1] == '/')
6323 return;
6324 readline (plb, inf);
6325 }
6326 while (perhaps_more_input (inf));
6327}
6328
6329/*
6330 * A declaration is added if it matches:
6331 * <beginning of line>:-<whitespace><Mercury Term><whitespace>(
6332 * If with_mercury_definitions == true, we also add:
6333 * <beginning of line><Mercury item><whitespace>(
6334 * or <beginning of line><Mercury item><whitespace>:-
6335 * As for Prolog support, different arities and types are not taken into
6336 * consideration.
6337 * Item is added to the tags database if it doesn't match the
6338 * name of the previous declaration.
6339 *
6340 * Consume a Mercury declaration.
6341 * Return the number of bytes consumed, or 0 if there was an error.
6342 *
6343 * A Mercury declaration must be one of:
6344 * :- type
6345 * :- solver type
6346 * :- pred
6347 * :- func
6348 * :- inst
6349 * :- mode
6350 * :- typeclass
6351 * :- instance
6352 * :- pragma
6353 * :- promise
6354 * :- initialise
6355 * :- finalise
6356 * :- mutable
6357 * :- module
6358 * :- interface
6359 * :- implementation
6360 * :- import_module
6361 * :- use_module
6362 * :- include_module
6363 * :- end_module
6364 * followed on the same line by an alphanumeric sequence, starting with a lower
6365 * case letter or by a single-quoted arbitrary string.
6366 * Single quotes can escape themselves. Backslash quotes everything.
6367 *
6368 * Return the size of the name of the declaration or 0 if no header was found.
6369 * As quantifiers may precede functions or predicates, we must list them too.
6370 */
6371
6372static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
6373 "func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
6374 "initialise", "finalise", "mutable", "module", "interface", "implementation",
6375 "import_module", "use_module", "include_module", "end_module", "some", "all"};
6376
6377static size_t
6378mercury_decl (char *s, size_t pos)
6379{
6380 if (s == NULL) return 0;
6381
6382 size_t origpos;
6383 origpos = pos;
6384
6385 while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) ++pos;
6386
6387 unsigned char decl_type_length = pos - origpos;
6388 char buf[decl_type_length + 1];
6389 memset (buf, 0, decl_type_length + 1);
6390
6391 /* Mercury declaration tags. Consume them, then check the declaration item
6392 following :- is legitimate, then go on as in the prolog case. */
6393
6394 memcpy (buf, &s[origpos], decl_type_length);
6395
6396 bool found_decl_tag = false;
6397
6398 if (is_mercury_quantifier)
6399 {
6400 if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */
6401 return 0;
6402 is_mercury_quantifier = false; /* Reset to base value. */
6403 found_decl_tag = true;
6404 }
6405 else
6406 {
6407 for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
6408 {
6409 if (strcmp (buf, Mercury_decl_tags[j]) == 0)
6410 {
6411 found_decl_tag = true;
6412 if (strcmp (buf, "type") == 0)
6413 is_mercury_type = true;
6414
6415 if (strcmp (buf, "some") == 0
6416 || strcmp (buf, "all") == 0)
6417 {
6418 is_mercury_quantifier = true;
6419 }
6420
6421 break; /* Found declaration tag of rank j. */
6422 }
6423 else
6424 /* 'solver type' has a blank in the middle,
6425 so this is the hard case. */
6426 if (strcmp (buf, "solver") == 0)
6427 {
6428 ++pos;
6429 while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_'))
6430 ++pos;
6431
6432 decl_type_length = pos - origpos;
6433 char buf2[decl_type_length + 1];
6434 memset (buf2, 0, decl_type_length + 1);
6435 memcpy (buf2, &s[origpos], decl_type_length);
6436
6437 if (strcmp (buf2, "solver type") == 0)
6438 {
6439 found_decl_tag = false;
6440 break; /* Found declaration tag of rank j. */
6441 }
6442 }
6443 }
6444 }
6445
6446 /* If with_mercury_definitions == false
6447 * this is a Mercury syntax error, ignoring... */
6448
6449 if (with_mercury_definitions)
6450 {
6451 if (found_decl_tag)
6452 pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
6453 else
6454 /* Prolog-like behavior
6455 * we have parsed the predicate once, yet inappropriately
6456 * so restarting again the parsing step. */
6457 pos = 0;
6458 }
6459 else
6460 {
6461 if (found_decl_tag)
6462 pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
6463 else
6464 return 0;
6465 }
6466
6467 /* From now on it is the same as for Prolog except for module dots. */
6468
6469 if (c_islower (s[pos]) || s[pos] == '_' )
6470 {
6471 /* The name is unquoted.
6472 Do not confuse module dots with end-of-declaration dots. */
6473
6474 while (c_isalnum (s[pos])
6475 || s[pos] == '_'
6476 || (s[pos] == '.' /* A module dot. */
6477 && s + pos + 1 != NULL
6478 && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')))
6479 ++pos;
6480
6481 return pos - origpos;
6482 }
6483 else if (s[pos] == '\'')
6484 {
6485 ++pos;
6486 for (;;)
6487 {
6488 if (s[pos] == '\'')
6489 {
6490 ++pos;
6491 if (s[pos] != '\'')
6492 break;
6493 ++pos; /* A double quote. */
6494 }
6495 else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */
6496 return 0;
6497 else if (s[pos] == '\\')
6498 {
6499 if (s[pos+1] == '\0')
6500 return 0;
6501 pos += 2;
6502 }
6503 else
6504 ++pos;
6505 }
6506 return pos - origpos;
6507 }
6508 else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */
6509 {
6510 for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {}
6511 if (s + pos == NULL) return 0;
6512 ++pos;
6513 pos = skip_spaces (s + pos) - s;
6514 return mercury_decl (s, pos) + pos - origpos;
6515 }
6516 else
6517 return 0;
6518}
6519
6520static ptrdiff_t
6521mercury_pr (char *s, char *last, ptrdiff_t lastlen)
6522{
6523 size_t len0 = 0;
6524 is_mercury_type = false;
6525 is_mercury_quantifier = false;
6526
6527 if (is_mercury_declaration)
6528 {
6529 /* Skip len0 blanks only for declarations. */
6530 len0 = skip_spaces (s + 2) - s;
6531 }
6532
6533 size_t len = mercury_decl (s, len0);
6534 if (len == 0) return 0;
6535 len += len0;
6536
6537 if (( (s[len] == '.' /* This is a statement dot, not a module dot. */
6538 || (s[len] == '(' && (len += 1))
6539 || (s[len] == ':' /* Stopping in case of a rule. */
6540 && s[len + 1] == '-'
6541 && (len += 2)))
6542 && (lastlen != len || memcmp (s, last, len) != 0)
6543 )
6544 /* Types are often declared on several lines so keeping just
6545 the first line. */
6546 || is_mercury_type)
6547 {
6548 char *name = skip_non_spaces (s + len0);
6549 size_t namelen;
6550 if (name >= s + len)
6551 {
6552 name = s;
6553 namelen = len;
6554 }
6555 else
6556 {
6557 name = skip_spaces (name);
6558 namelen = len - (name - s);
6559 }
6560 /* Remove trailing non-name characters. */
6561 while (namelen > 0 && notinname (name[namelen - 1]))
6562 namelen--;
6563 make_tag (name, namelen, true, s, len, lineno, linecharno);
6564 return len;
6565 }
6566
6567 return 0;
6568}
6569
6570
6571/*
6075 * Support for Erlang 6572 * Support for Erlang
6076 * 6573 *
6077 * Generates tags for functions, defines, and records. 6574 * Generates tags for functions, defines, and records.