aboutsummaryrefslogtreecommitdiffstats
path: root/src/syntax.c
diff options
context:
space:
mode:
authorVibhav Pant2023-06-06 19:30:27 +0530
committerVibhav Pant2023-06-06 19:30:27 +0530
commit49ffcbf86a32a8a217538d4df3736fe069ccf35d (patch)
treea5f16157cc20fb19a844473a6fbd2b434f4c8260 /src/syntax.c
parentaf569fa3d90a717983b743eb97adbf869c6d1736 (diff)
parent7ca1d782f5910d0c3978c6798a45c6854ec668c7 (diff)
downloademacs-49ffcbf86a32a8a217538d4df3736fe069ccf35d.tar.gz
emacs-49ffcbf86a32a8a217538d4df3736fe069ccf35d.zip
Merge branch 'master' into scratch/comp-static-data
Diffstat (limited to 'src/syntax.c')
-rw-r--r--src/syntax.c157
1 files changed, 73 insertions, 84 deletions
diff --git a/src/syntax.c b/src/syntax.c
index 79e16f652f3..0cac923bba7 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -178,14 +178,14 @@ static ptrdiff_t find_start_begv;
178static modiff_count find_start_modiff; 178static modiff_count find_start_modiff;
179 179
180 180
181static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool); 181static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object);
182static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object); 182static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
183static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool); 183static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
184static void scan_sexps_forward (struct lisp_parse_state *, 184static void scan_sexps_forward (struct lisp_parse_state *,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT, 185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
186 bool, int); 186 bool, int);
187static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *); 187static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
188static bool in_classes (int, Lisp_Object); 188static bool in_classes (int c, int num_classes, const unsigned char *classes);
189static void parse_sexp_propertize (ptrdiff_t charpos); 189static void parse_sexp_propertize (ptrdiff_t charpos);
190 190
191/* This setter is used only in this file, so it can be private. */ 191/* This setter is used only in this file, so it can be private. */
@@ -250,7 +250,6 @@ SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
250 gl_state.b_property = BEGV; 250 gl_state.b_property = BEGV;
251 gl_state.e_property = ZV + 1; 251 gl_state.e_property = ZV + 1;
252 gl_state.object = Qnil; 252 gl_state.object = Qnil;
253 gl_state.offset = 0;
254 if (parse_sexp_lookup_properties) 253 if (parse_sexp_lookup_properties)
255 { 254 {
256 if (count > 0) 255 if (count > 0)
@@ -266,46 +265,38 @@ SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
266/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer. 265/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
267 If it is t (which is only used in fast_c_string_match_ignore_case), 266 If it is t (which is only used in fast_c_string_match_ignore_case),
268 ignore properties altogether. 267 ignore properties altogether.
269 268 FROMBYTE is an regexp-byteoffset. */
270 This is meant for regex-emacs.c to use. For buffers, regex-emacs.c
271 passes arguments to the UPDATE_SYNTAX_TABLE functions which are
272 relative to BEGV. So if it is a buffer, we set the offset field to
273 BEGV. */
274 269
275void 270void
276SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object, 271RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
277 ptrdiff_t from, ptrdiff_t count) 272 ptrdiff_t frombyte)
278{ 273{
279 SETUP_BUFFER_SYNTAX_TABLE (); 274 SETUP_BUFFER_SYNTAX_TABLE ();
280 gl_state.object = object; 275 gl_state.object = object;
281 if (BUFFERP (gl_state.object)) 276 if (BUFFERP (gl_state.object))
282 { 277 {
283 struct buffer *buf = XBUFFER (gl_state.object); 278 struct buffer *buf = XBUFFER (gl_state.object);
284 gl_state.b_property = 1; 279 gl_state.b_property = BEG;
285 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; 280 gl_state.e_property = BUF_ZV (buf);
286 gl_state.offset = BUF_BEGV (buf) - 1;
287 } 281 }
288 else if (NILP (gl_state.object)) 282 else if (NILP (gl_state.object))
289 { 283 {
290 gl_state.b_property = 1; 284 gl_state.b_property = BEG;
291 gl_state.e_property = ZV - BEGV + 1; 285 gl_state.e_property = ZV; /* FIXME: Why not +1 like in SETUP_SYNTAX_TABLE? */
292 gl_state.offset = BEGV - 1;
293 } 286 }
294 else if (EQ (gl_state.object, Qt)) 287 else if (EQ (gl_state.object, Qt))
295 { 288 {
296 gl_state.b_property = 0; 289 gl_state.b_property = 0;
297 gl_state.e_property = PTRDIFF_MAX; 290 gl_state.e_property = PTRDIFF_MAX;
298 gl_state.offset = 0;
299 } 291 }
300 else 292 else
301 { 293 {
302 gl_state.b_property = 0; 294 gl_state.b_property = 0;
303 gl_state.e_property = 1 + SCHARS (gl_state.object); 295 gl_state.e_property = 1 + SCHARS (gl_state.object);
304 gl_state.offset = 0;
305 } 296 }
306 if (parse_sexp_lookup_properties) 297 if (parse_sexp_lookup_properties)
307 update_syntax_table (from + gl_state.offset - (count <= 0), 298 update_syntax_table (RE_SYNTAX_TABLE_BYTE_TO_CHAR (frombyte),
308 count, 1, gl_state.object); 299 1, 1, gl_state.object);
309} 300}
310 301
311/* Update gl_state to an appropriate interval which contains CHARPOS. The 302/* Update gl_state to an appropriate interval which contains CHARPOS. The
@@ -341,8 +332,8 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
341 if (!i) 332 if (!i)
342 return; 333 return;
343 i = gl_state.forward_i; 334 i = gl_state.forward_i;
344 gl_state.b_property = i->position - gl_state.offset; 335 gl_state.b_property = i->position;
345 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset; 336 gl_state.e_property = INTERVAL_LAST_POS (i);
346 } 337 }
347 else 338 else
348 { 339 {
@@ -362,7 +353,7 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
362 { 353 {
363 invalidate = false; 354 invalidate = false;
364 gl_state.forward_i = i; 355 gl_state.forward_i = i;
365 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset; 356 gl_state.e_property = INTERVAL_LAST_POS (i);
366 } 357 }
367 } 358 }
368 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */ 359 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
@@ -375,7 +366,7 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
375 { 366 {
376 invalidate = false; 367 invalidate = false;
377 gl_state.backward_i = i; 368 gl_state.backward_i = i;
378 gl_state.b_property = i->position - gl_state.offset; 369 gl_state.b_property = i->position;
379 } 370 }
380 } 371 }
381 } 372 }
@@ -391,12 +382,12 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
391 if (count > 0) 382 if (count > 0)
392 { 383 {
393 gl_state.backward_i = i; 384 gl_state.backward_i = i;
394 gl_state.b_property = i->position - gl_state.offset; 385 gl_state.b_property = i->position;
395 } 386 }
396 else 387 else
397 { 388 {
398 gl_state.forward_i = i; 389 gl_state.forward_i = i;
399 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset; 390 gl_state.e_property = INTERVAL_LAST_POS (i);
400 } 391 }
401 } 392 }
402 393
@@ -426,13 +417,13 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
426 { 417 {
427 if (count > 0) 418 if (count > 0)
428 { 419 {
429 gl_state.e_property = i->position - gl_state.offset; 420 gl_state.e_property = i->position;
430 gl_state.forward_i = i; 421 gl_state.forward_i = i;
431 } 422 }
432 else 423 else
433 { 424 {
434 gl_state.b_property 425 gl_state.b_property
435 = i->position + LENGTH (i) - gl_state.offset; 426 = i->position + LENGTH (i);
436 gl_state.backward_i = i; 427 gl_state.backward_i = i;
437 } 428 }
438 return; 429 return;
@@ -442,7 +433,7 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
442 if (count > 0) 433 if (count > 0)
443 { 434 {
444 gl_state.e_property 435 gl_state.e_property
445 = i->position + LENGTH (i) - gl_state.offset 436 = i->position + LENGTH (i)
446 /* e_property at EOB is not set to ZV but to ZV+1, so that 437 /* e_property at EOB is not set to ZV but to ZV+1, so that
447 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without 438 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
448 having to check eob between the two. */ 439 having to check eob between the two. */
@@ -451,7 +442,7 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
451 } 442 }
452 else 443 else
453 { 444 {
454 gl_state.b_property = i->position - gl_state.offset; 445 gl_state.b_property = i->position;
455 gl_state.backward_i = i; 446 gl_state.backward_i = i;
456 } 447 }
457 return; 448 return;
@@ -1616,7 +1607,7 @@ Char classes, e.g. `[:alpha:]', are supported.
1616Returns the distance traveled, either zero or positive. */) 1607Returns the distance traveled, either zero or positive. */)
1617 (Lisp_Object string, Lisp_Object lim) 1608 (Lisp_Object string, Lisp_Object lim)
1618{ 1609{
1619 return skip_chars (1, string, lim, 1); 1610 return skip_chars (1, string, lim);
1620} 1611}
1621 1612
1622DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0, 1613DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
@@ -1625,7 +1616,7 @@ See `skip-chars-forward' for details.
1625Returns the distance traveled, either zero or negative. */) 1616Returns the distance traveled, either zero or negative. */)
1626 (Lisp_Object string, Lisp_Object lim) 1617 (Lisp_Object string, Lisp_Object lim)
1627{ 1618{
1628 return skip_chars (0, string, lim, 1); 1619 return skip_chars (0, string, lim);
1629} 1620}
1630 1621
1631DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0, 1622DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
@@ -1652,8 +1643,7 @@ of this is the distance traveled. */)
1652} 1643}
1653 1644
1654static Lisp_Object 1645static Lisp_Object
1655skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, 1646skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim)
1656 bool handle_iso_classes)
1657{ 1647{
1658 int c; 1648 int c;
1659 char fastmap[0400]; 1649 char fastmap[0400];
@@ -1670,11 +1660,9 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1670 ptrdiff_t size_byte; 1660 ptrdiff_t size_byte;
1671 const unsigned char *str; 1661 const unsigned char *str;
1672 int len; 1662 int len;
1673 Lisp_Object iso_classes;
1674 USE_SAFE_ALLOCA; 1663 USE_SAFE_ALLOCA;
1675 1664
1676 CHECK_STRING (string); 1665 CHECK_STRING (string);
1677 iso_classes = Qnil;
1678 1666
1679 if (NILP (lim)) 1667 if (NILP (lim))
1680 XSETINT (lim, forwardp ? ZV : BEGV); 1668 XSETINT (lim, forwardp ? ZV : BEGV);
@@ -1709,6 +1697,8 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1709 If STRING contains non-ASCII characters, setup char_ranges for 1697 If STRING contains non-ASCII characters, setup char_ranges for
1710 them and use fastmap only for their leading codes. */ 1698 them and use fastmap only for their leading codes. */
1711 1699
1700 int nclasses = 0;
1701 unsigned char classes[RECC_NUM_CLASSES];
1712 if (! string_multibyte) 1702 if (! string_multibyte)
1713 { 1703 {
1714 bool string_has_eight_bit = 0; 1704 bool string_has_eight_bit = 0;
@@ -1716,18 +1706,16 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1716 /* At first setup fastmap. */ 1706 /* At first setup fastmap. */
1717 while (i_byte < size_byte) 1707 while (i_byte < size_byte)
1718 { 1708 {
1719 if (handle_iso_classes) 1709 const unsigned char *ch = str + i_byte;
1710 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1711 if (cc == 0)
1712 error ("Invalid ISO C character class");
1713 if (cc != -1)
1720 { 1714 {
1721 const unsigned char *ch = str + i_byte; 1715 if (!(nclasses && memchr (classes, cc, nclasses)))
1722 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte); 1716 classes[nclasses++] = cc;
1723 if (cc == 0) 1717 i_byte = ch - str;
1724 error ("Invalid ISO C character class"); 1718 continue;
1725 if (cc != -1)
1726 {
1727 iso_classes = Fcons (make_fixnum (cc), iso_classes);
1728 i_byte = ch - str;
1729 continue;
1730 }
1731 } 1719 }
1732 1720
1733 c = str[i_byte++]; 1721 c = str[i_byte++];
@@ -1812,18 +1800,16 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1812 { 1800 {
1813 int leading_code = str[i_byte]; 1801 int leading_code = str[i_byte];
1814 1802
1815 if (handle_iso_classes) 1803 const unsigned char *ch = str + i_byte;
1804 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1805 if (cc == 0)
1806 error ("Invalid ISO C character class");
1807 if (cc != -1)
1816 { 1808 {
1817 const unsigned char *ch = str + i_byte; 1809 if (!(nclasses && memchr (classes, cc, nclasses)))
1818 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte); 1810 classes[nclasses++] = cc;
1819 if (cc == 0) 1811 i_byte = ch - str;
1820 error ("Invalid ISO C character class"); 1812 continue;
1821 if (cc != -1)
1822 {
1823 iso_classes = Fcons (make_fixnum (cc), iso_classes);
1824 i_byte = ch - str;
1825 continue;
1826 }
1827 } 1813 }
1828 1814
1829 if (leading_code== '\\') 1815 if (leading_code== '\\')
@@ -1969,7 +1955,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1969 stop = endp; 1955 stop = endp;
1970 } 1956 }
1971 c = string_char_and_length (p, &nbytes); 1957 c = string_char_and_length (p, &nbytes);
1972 if (! NILP (iso_classes) && in_classes (c, iso_classes)) 1958 if (nclasses && in_classes (c, nclasses, classes))
1973 { 1959 {
1974 if (negate) 1960 if (negate)
1975 break; 1961 break;
@@ -2010,7 +1996,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
2010 stop = endp; 1996 stop = endp;
2011 } 1997 }
2012 1998
2013 if (!NILP (iso_classes) && in_classes (*p, iso_classes)) 1999 if (nclasses && in_classes (*p, nclasses, classes))
2014 { 2000 {
2015 if (negate) 2001 if (negate)
2016 break; 2002 break;
@@ -2044,7 +2030,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
2044 2030
2045 c = STRING_CHAR (p); 2031 c = STRING_CHAR (p);
2046 2032
2047 if (! NILP (iso_classes) && in_classes (c, iso_classes)) 2033 if (nclasses && in_classes (c, nclasses, classes))
2048 { 2034 {
2049 if (negate) 2035 if (negate)
2050 break; 2036 break;
@@ -2078,7 +2064,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
2078 stop = endp; 2064 stop = endp;
2079 } 2065 }
2080 2066
2081 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes)) 2067 if (nclasses && in_classes (p[-1], nclasses, classes))
2082 { 2068 {
2083 if (negate) 2069 if (negate)
2084 break; 2070 break;
@@ -2201,8 +2187,7 @@ skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2201 while (!parse_sexp_lookup_properties 2187 while (!parse_sexp_lookup_properties
2202 || pos < gl_state.e_property); 2188 || pos < gl_state.e_property);
2203 2189
2204 update_syntax_table_forward (pos + gl_state.offset, 2190 update_syntax_table_forward (pos, false, gl_state.object);
2205 false, gl_state.object);
2206 } 2191 }
2207 } 2192 }
2208 else 2193 else
@@ -2263,26 +2248,16 @@ skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2263 } 2248 }
2264} 2249}
2265 2250
2266/* Return true if character C belongs to one of the ISO classes 2251/* Return true if character C belongs to one of the ISO classes in the
2267 in the list ISO_CLASSES. Each class is represented by an 2252 array. */
2268 integer which is its type according to re_wctype. */
2269 2253
2270static bool 2254static bool
2271in_classes (int c, Lisp_Object iso_classes) 2255in_classes (int c, int nclasses, const unsigned char *classes)
2272{ 2256{
2273 bool fits_class = 0; 2257 for (int i = 0; i < nclasses; i++)
2274 2258 if (re_iswctype (c, classes[i]))
2275 while (CONSP (iso_classes)) 2259 return true;
2276 { 2260 return false;
2277 Lisp_Object elt;
2278 elt = XCAR (iso_classes);
2279 iso_classes = XCDR (iso_classes);
2280
2281 if (re_iswctype (c, XFIXNAT (elt)))
2282 fits_class = 1;
2283 }
2284
2285 return fits_class;
2286} 2261}
2287 2262
2288/* Jump over a comment, assuming we are at the beginning of one. 2263/* Jump over a comment, assuming we are at the beginning of one.
@@ -2348,13 +2323,16 @@ forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2348 return 0; 2323 return 0;
2349 } 2324 }
2350 c = FETCH_CHAR_AS_MULTIBYTE (from_byte); 2325 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2326 prev_syntax = syntax;
2351 syntax = SYNTAX_WITH_FLAGS (c); 2327 syntax = SYNTAX_WITH_FLAGS (c);
2352 code = syntax & 0xff; 2328 code = syntax & 0xff;
2353 if (code == Sendcomment 2329 if (code == Sendcomment
2354 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style 2330 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2355 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ? 2331 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2356 (nesting > 0 && --nesting == 0) : nesting < 0) 2332 (nesting > 0 && --nesting == 0) : nesting < 0)
2357 && !(comment_end_can_be_escaped && char_quoted (from, from_byte))) 2333 && !(comment_end_can_be_escaped
2334 && ((prev_syntax & 0xff) == Sescape
2335 || (prev_syntax & 0xff) == Scharquote)))
2358 /* We have encountered a comment end of the same style 2336 /* We have encountered a comment end of the same style
2359 as the comment sequence which began this comment 2337 as the comment sequence which began this comment
2360 section. */ 2338 section. */
@@ -2378,7 +2356,11 @@ forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2378 inc_both (&from, &from_byte); 2356 inc_both (&from, &from_byte);
2379 UPDATE_SYNTAX_TABLE_FORWARD (from); 2357 UPDATE_SYNTAX_TABLE_FORWARD (from);
2380 if (from == stop) continue; /* Failure */ 2358 if (from == stop) continue; /* Failure */
2381 } 2359 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2360 prev_syntax = syntax;
2361 syntax = Smax;
2362 code = syntax;
2363 }
2382 inc_both (&from, &from_byte); 2364 inc_both (&from, &from_byte);
2383 UPDATE_SYNTAX_TABLE_FORWARD (from); 2365 UPDATE_SYNTAX_TABLE_FORWARD (from);
2384 2366
@@ -3359,7 +3341,14 @@ do { prev_from = from; \
3359 are invalid now. Luckily, the `done' doesn't use them 3341 are invalid now. Luckily, the `done' doesn't use them
3360 and the INC_FROM sets them to a sane value without 3342 and the INC_FROM sets them to a sane value without
3361 looking at them. */ 3343 looking at them. */
3362 if (!found) goto done; 3344 if (!found)
3345 {
3346 if ((prev_from_syntax & 0xff) == Sescape
3347 || (prev_from_syntax & 0xff) == Scharquote)
3348 goto endquoted;
3349 else
3350 goto done;
3351 }
3363 INC_FROM; 3352 INC_FROM;
3364 state->incomment = 0; 3353 state->incomment = 0;
3365 state->comstyle = 0; /* reset the comment style */ 3354 state->comstyle = 0; /* reset the comment style */