aboutsummaryrefslogtreecommitdiffstats
path: root/src/syntax.c
diff options
context:
space:
mode:
authorKenichi Handa2003-09-08 12:53:41 +0000
committerKenichi Handa2003-09-08 12:53:41 +0000
commit8f924df7df019cce90537647de2627581043b5c4 (patch)
tree6c40bd05679425e710d6b2e5649eae3da5e40a52 /src/syntax.c
parent463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (diff)
parent9d4807432a01f9b3cc519fcfa3ea92a70ffa7f43 (diff)
downloademacs-8f924df7df019cce90537647de2627581043b5c4.tar.gz
emacs-8f924df7df019cce90537647de2627581043b5c4.zip
*** empty log message ***
Diffstat (limited to 'src/syntax.c')
-rw-r--r--src/syntax.c903
1 files changed, 553 insertions, 350 deletions
diff --git a/src/syntax.c b/src/syntax.c
index 706706a53a1..5b25371fcbc 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -24,7 +24,7 @@ Boston, MA 02111-1307, USA. */
24#include "lisp.h" 24#include "lisp.h"
25#include "commands.h" 25#include "commands.h"
26#include "buffer.h" 26#include "buffer.h"
27#include "charset.h" 27#include "character.h"
28#include "keymap.h" 28#include "keymap.h"
29 29
30/* Make syntax table lookup grant data in gl_state. */ 30/* Make syntax table lookup grant data in gl_state. */
@@ -97,7 +97,8 @@ static int find_start_modiff;
97static int find_defun_start P_ ((int, int)); 97static int find_defun_start P_ ((int, int));
98static int back_comment P_ ((int, int, int, int, int, int *, int *)); 98static int back_comment P_ ((int, int, int, int, int, int *, int *));
99static int char_quoted P_ ((int, int)); 99static int char_quoted P_ ((int, int));
100static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object)); 100static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object));
101static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
101static Lisp_Object scan_lists P_ ((int, int, int, int)); 102static Lisp_Object scan_lists P_ ((int, int, int, int));
102static void scan_sexps_forward P_ ((struct lisp_parse_state *, 103static void scan_sexps_forward P_ ((struct lisp_parse_state *,
103 int, int, int, int, 104 int, int, int, int,
@@ -293,7 +294,7 @@ char_quoted (charpos, bytepos)
293 while (bytepos >= beg) 294 while (bytepos >= beg)
294 { 295 {
295 UPDATE_SYNTAX_TABLE_BACKWARD (charpos); 296 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
296 code = SYNTAX (FETCH_CHAR (bytepos)); 297 code = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (bytepos));
297 if (! (code == Scharquote || code == Sescape)) 298 if (! (code == Scharquote || code == Sescape))
298 break; 299 break;
299 300
@@ -382,10 +383,10 @@ find_defun_start (pos, pos_byte)
382 { 383 {
383 /* Open-paren at start of line means we may have found our 384 /* Open-paren at start of line means we may have found our
384 defun-start. */ 385 defun-start. */
385 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen) 386 if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
386 { 387 {
387 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */ 388 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
388 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen) 389 if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
389 break; 390 break;
390 /* Now fallback to the default value. */ 391 /* Now fallback to the default value. */
391 gl_state.current_syntax_table = current_buffer->syntax_table; 392 gl_state.current_syntax_table = current_buffer->syntax_table;
@@ -505,7 +506,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
505 UPDATE_SYNTAX_TABLE_BACKWARD (from); 506 UPDATE_SYNTAX_TABLE_BACKWARD (from);
506 507
507 prev_syntax = syntax; 508 prev_syntax = syntax;
508 c = FETCH_CHAR (from_byte); 509 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
509 syntax = SYNTAX_WITH_FLAGS (c); 510 syntax = SYNTAX_WITH_FLAGS (c);
510 code = SYNTAX (c); 511 code = SYNTAX (c);
511 512
@@ -534,7 +535,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
534 int next = from, next_byte = from_byte, next_c, next_syntax; 535 int next = from, next_byte = from_byte, next_c, next_syntax;
535 DEC_BOTH (next, next_byte); 536 DEC_BOTH (next, next_byte);
536 UPDATE_SYNTAX_TABLE_BACKWARD (next); 537 UPDATE_SYNTAX_TABLE_BACKWARD (next);
537 next_c = FETCH_CHAR (next_byte); 538 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
538 next_syntax = SYNTAX_WITH_FLAGS (next_c); 539 next_syntax = SYNTAX_WITH_FLAGS (next_c);
539 if (((com2start || comnested) 540 if (((com2start || comnested)
540 && SYNTAX_FLAGS_COMEND_SECOND (syntax) 541 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
@@ -838,29 +839,6 @@ char syntax_code_spec[16] =
838static Lisp_Object Vsyntax_code_object; 839static Lisp_Object Vsyntax_code_object;
839 840
840 841
841/* Look up the value for CHARACTER in syntax table TABLE's parent
842 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
843 for CHARACTER. It's actually used only when not compiled with GCC. */
844
845Lisp_Object
846syntax_parent_lookup (table, character)
847 Lisp_Object table;
848 int character;
849{
850 Lisp_Object value;
851
852 while (1)
853 {
854 table = XCHAR_TABLE (table)->parent;
855 if (NILP (table))
856 return Qnil;
857
858 value = XCHAR_TABLE (table)->contents[character];
859 if (!NILP (value))
860 return value;
861 }
862}
863
864DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0, 842DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
865 doc: /* Return the syntax code of CHARACTER, described by a character. 843 doc: /* Return the syntax code of CHARACTER, described by a character.
866For example, if CHARACTER is a word constituent, 844For example, if CHARACTER is a word constituent,
@@ -979,6 +957,8 @@ DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
979 doc: /* Set syntax for character CHAR according to string NEWENTRY. 957 doc: /* Set syntax for character CHAR according to string NEWENTRY.
980The syntax is changed only for table SYNTAX_TABLE, which defaults to 958The syntax is changed only for table SYNTAX_TABLE, which defaults to
981 the current buffer's syntax table. 959 the current buffer's syntax table.
960CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
961in the range MIN and MAX are changed.
982The first character of NEWENTRY should be one of the following: 962The first character of NEWENTRY should be one of the following:
983 Space or - whitespace syntax. w word constituent. 963 Space or - whitespace syntax. w word constituent.
984 _ symbol constituent. . punctuation. 964 _ symbol constituent. . punctuation.
@@ -1015,14 +995,24 @@ usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1015 (c, newentry, syntax_table) 995 (c, newentry, syntax_table)
1016 Lisp_Object c, newentry, syntax_table; 996 Lisp_Object c, newentry, syntax_table;
1017{ 997{
1018 CHECK_NUMBER (c); 998 if (CONSP (c))
999 {
1000 CHECK_CHARACTER_CAR (c);
1001 CHECK_CHARACTER_CDR (c);
1002 }
1003 else
1004 CHECK_CHARACTER (c);
1019 1005
1020 if (NILP (syntax_table)) 1006 if (NILP (syntax_table))
1021 syntax_table = current_buffer->syntax_table; 1007 syntax_table = current_buffer->syntax_table;
1022 else 1008 else
1023 check_syntax_table (syntax_table); 1009 check_syntax_table (syntax_table);
1024 1010
1025 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry)); 1011 newentry = Fstring_to_syntax (newentry);
1012 if (CONSP (c))
1013 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1014 else
1015 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1026 return Qnil; 1016 return Qnil;
1027} 1017}
1028 1018
@@ -1176,6 +1166,10 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1176 1166
1177int parse_sexp_ignore_comments; 1167int parse_sexp_ignore_comments;
1178 1168
1169/* Char-table of functions that find the next or previous word
1170 boundary. */
1171Lisp_Object Vfind_word_boundary_function_table;
1172
1179/* Return the position across COUNT words from FROM. 1173/* Return the position across COUNT words from FROM.
1180 If that many words cannot be found before the end of the buffer, return 0. 1174 If that many words cannot be found before the end of the buffer, return 0.
1181 COUNT negative means scan backward and stop at word beginning. */ 1175 COUNT negative means scan backward and stop at word beginning. */
@@ -1189,6 +1183,7 @@ scan_words (from, count)
1189 register int from_byte = CHAR_TO_BYTE (from); 1183 register int from_byte = CHAR_TO_BYTE (from);
1190 register enum syntaxcode code; 1184 register enum syntaxcode code;
1191 int ch0, ch1; 1185 int ch0, ch1;
1186 Lisp_Object func, script, pos;
1192 1187
1193 immediate_quit = 1; 1188 immediate_quit = 1;
1194 QUIT; 1189 QUIT;
@@ -1205,7 +1200,7 @@ scan_words (from, count)
1205 return 0; 1200 return 0;
1206 } 1201 }
1207 UPDATE_SYNTAX_TABLE_FORWARD (from); 1202 UPDATE_SYNTAX_TABLE_FORWARD (from);
1208 ch0 = FETCH_CHAR (from_byte); 1203 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1209 code = SYNTAX (ch0); 1204 code = SYNTAX (ch0);
1210 INC_BOTH (from, from_byte); 1205 INC_BOTH (from, from_byte);
1211 if (words_include_escapes 1206 if (words_include_escapes
@@ -1216,18 +1211,33 @@ scan_words (from, count)
1216 } 1211 }
1217 /* Now CH0 is a character which begins a word and FROM is the 1212 /* Now CH0 is a character which begins a word and FROM is the
1218 position of the next character. */ 1213 position of the next character. */
1219 while (1) 1214 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1215 if (! NILP (Ffboundp (func)))
1220 { 1216 {
1221 if (from == end) break; 1217 pos = call2 (func, make_number (from - 1), make_number (end));
1222 UPDATE_SYNTAX_TABLE_FORWARD (from); 1218 if (INTEGERP (pos) && XINT (pos) > from)
1223 ch1 = FETCH_CHAR (from_byte); 1219 {
1224 code = SYNTAX (ch1); 1220 from = XINT (pos);
1225 if (!(words_include_escapes 1221 from_byte = CHAR_TO_BYTE (from);
1226 && (code == Sescape || code == Scharquote))) 1222 }
1227 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1)) 1223 }
1228 break; 1224 else
1229 INC_BOTH (from, from_byte); 1225 {
1230 ch0 = ch1; 1226 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1227 while (1)
1228 {
1229 if (from == end) break;
1230 UPDATE_SYNTAX_TABLE_FORWARD (from);
1231 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1232 code = SYNTAX (ch1);
1233 if ((code != Sword
1234 && (! words_include_escapes
1235 || (code != Sescape && code != Scharquote)))
1236 || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script))
1237 break;
1238 INC_BOTH (from, from_byte);
1239 ch0 = ch1;
1240 }
1231 } 1241 }
1232 count--; 1242 count--;
1233 } 1243 }
@@ -1242,7 +1252,7 @@ scan_words (from, count)
1242 } 1252 }
1243 DEC_BOTH (from, from_byte); 1253 DEC_BOTH (from, from_byte);
1244 UPDATE_SYNTAX_TABLE_BACKWARD (from); 1254 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1245 ch1 = FETCH_CHAR (from_byte); 1255 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1246 code = SYNTAX (ch1); 1256 code = SYNTAX (ch1);
1247 if (words_include_escapes 1257 if (words_include_escapes
1248 && (code == Sescape || code == Scharquote)) 1258 && (code == Sescape || code == Scharquote))
@@ -1252,22 +1262,37 @@ scan_words (from, count)
1252 } 1262 }
1253 /* Now CH1 is a character which ends a word and FROM is the 1263 /* Now CH1 is a character which ends a word and FROM is the
1254 position of it. */ 1264 position of it. */
1255 while (1) 1265 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1266 if (! NILP (Ffboundp (func)))
1267 {
1268 pos = call2 (func, make_number (from), make_number (beg));
1269 if (INTEGERP (pos) && XINT (pos) < from)
1270 {
1271 from = XINT (pos);
1272 from_byte = CHAR_TO_BYTE (from);
1273 }
1274 }
1275 else
1256 { 1276 {
1257 int temp_byte; 1277 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1278 while (1)
1279 {
1280 int temp_byte;
1258 1281
1259 if (from == beg) 1282 if (from == beg)
1260 break; 1283 break;
1261 temp_byte = dec_bytepos (from_byte); 1284 temp_byte = dec_bytepos (from_byte);
1262 UPDATE_SYNTAX_TABLE_BACKWARD (from); 1285 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1263 ch0 = FETCH_CHAR (temp_byte); 1286 ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte);
1264 code = SYNTAX (ch0); 1287 code = SYNTAX (ch0);
1265 if (!(words_include_escapes 1288 if ((code != Sword
1266 && (code == Sescape || code == Scharquote))) 1289 && (! words_include_escapes
1267 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1)) 1290 || (code != Sescape && code != Scharquote)))
1268 break; 1291 || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
1269 DEC_BOTH (from, from_byte); 1292 break;
1270 ch1 = ch0; 1293 DEC_BOTH (from, from_byte);
1294 ch1 = ch0;
1295 }
1271 } 1296 }
1272 count++; 1297 count++;
1273 } 1298 }
@@ -1316,7 +1341,7 @@ they will be treated as literals. */)
1316 (string, lim) 1341 (string, lim)
1317 Lisp_Object string, lim; 1342 Lisp_Object string, lim;
1318{ 1343{
1319 return skip_chars (1, 0, string, lim); 1344 return skip_chars (1, string, lim);
1320} 1345}
1321 1346
1322DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0, 1347DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
@@ -1326,7 +1351,7 @@ Returns the distance traveled, either zero or negative. */)
1326 (string, lim) 1351 (string, lim)
1327 Lisp_Object string, lim; 1352 Lisp_Object string, lim;
1328{ 1353{
1329 return skip_chars (0, 0, string, lim); 1354 return skip_chars (0, string, lim);
1330} 1355}
1331 1356
1332DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0, 1357DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
@@ -1338,7 +1363,7 @@ This function returns the distance traveled, either zero or positive. */)
1338 (syntax, lim) 1363 (syntax, lim)
1339 Lisp_Object syntax, lim; 1364 Lisp_Object syntax, lim;
1340{ 1365{
1341 return skip_chars (1, 1, syntax, lim); 1366 return skip_syntaxes (1, syntax, lim);
1342} 1367}
1343 1368
1344DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0, 1369DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
@@ -1350,54 +1375,32 @@ This function returns the distance traveled, either zero or negative. */)
1350 (syntax, lim) 1375 (syntax, lim)
1351 Lisp_Object syntax, lim; 1376 Lisp_Object syntax, lim;
1352{ 1377{
1353 return skip_chars (0, 1, syntax, lim); 1378 return skip_syntaxes (0, syntax, lim);
1354} 1379}
1355 1380
1356static Lisp_Object 1381static Lisp_Object
1357skip_chars (forwardp, syntaxp, string, lim) 1382skip_chars (forwardp, string, lim)
1358 int forwardp, syntaxp; 1383 int forwardp;
1359 Lisp_Object string, lim; 1384 Lisp_Object string, lim;
1360{ 1385{
1361 register unsigned int c; 1386 register unsigned int c;
1362 unsigned char fastmap[0400]; 1387 unsigned char fastmap[0400];
1363 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters 1388 /* Store the ranges of non-ASCII characters. */
1364 of which codes don't fit in FASTMAP. In that case, set the
1365 ranges of characters in CHAR_RANGES. */
1366 int *char_ranges; 1389 int *char_ranges;
1367 int n_char_ranges = 0; 1390 int n_char_ranges = 0;
1368 int negate = 0; 1391 int negate = 0;
1369 register int i, i_byte; 1392 register int i, i_byte;
1370 int multibyte = !NILP (current_buffer->enable_multibyte_characters); 1393 /* Set to 1 if the current buffer is multibyte and the region
1394 contains non-ASCII chars. */
1395 int multibyte;
1396 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1397 chars. */
1371 int string_multibyte; 1398 int string_multibyte;
1372 int size_byte; 1399 int size_byte;
1373 const unsigned char *str; 1400 const unsigned char *str;
1374 int len; 1401 int len;
1375 1402
1376 CHECK_STRING (string); 1403 CHECK_STRING (string);
1377 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1378 string_multibyte = STRING_MULTIBYTE (string);
1379 str = SDATA (string);
1380 size_byte = SBYTES (string);
1381
1382 /* Adjust the multibyteness of the string to that of the buffer. */
1383 if (multibyte != string_multibyte)
1384 {
1385 int nbytes;
1386
1387 if (multibyte)
1388 nbytes = count_size_as_multibyte (SDATA (string),
1389 SCHARS (string));
1390 else
1391 nbytes = SCHARS (string);
1392 if (nbytes != size_byte)
1393 {
1394 unsigned char *tmp = (unsigned char *) alloca (nbytes);
1395 copy_text (SDATA (string), tmp, size_byte,
1396 string_multibyte, multibyte);
1397 size_byte = nbytes;
1398 str = tmp;
1399 }
1400 }
1401 1404
1402 if (NILP (lim)) 1405 if (NILP (lim))
1403 XSETINT (lim, forwardp ? ZV : BEGV); 1406 XSETINT (lim, forwardp ? ZV : BEGV);
@@ -1410,10 +1413,18 @@ skip_chars (forwardp, syntaxp, string, lim)
1410 if (XINT (lim) < BEGV) 1413 if (XINT (lim) < BEGV)
1411 XSETFASTINT (lim, BEGV); 1414 XSETFASTINT (lim, BEGV);
1412 1415
1416 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1417 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1418 string_multibyte = SBYTES (string) > SCHARS (string);
1419
1413 bzero (fastmap, sizeof fastmap); 1420 bzero (fastmap, sizeof fastmap);
1421 if (multibyte)
1422 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1414 1423
1415 i_byte = 0; 1424 str = SDATA (string);
1425 size_byte = SBYTES (string);
1416 1426
1427 i_byte = 0;
1417 if (i_byte < size_byte 1428 if (i_byte < size_byte
1418 && SREF (string, 0) == '^') 1429 && SREF (string, 0) == '^')
1419 { 1430 {
@@ -1421,23 +1432,110 @@ skip_chars (forwardp, syntaxp, string, lim)
1421 } 1432 }
1422 1433
1423 /* Find the characters specified and set their elements of fastmap. 1434 /* Find the characters specified and set their elements of fastmap.
1424 If syntaxp, each character counts as itself. 1435 Handle backslashes and ranges specially.
1425 Otherwise, handle backslashes and ranges specially. */
1426 1436
1427 while (i_byte < size_byte) 1437 If STRING contains non-ASCII characters, setup char_ranges for
1438 them and use fastmap only for their leading codes. */
1439
1440 if (! string_multibyte)
1428 { 1441 {
1429 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len); 1442 int string_has_eight_bit = 0;
1430 i_byte += len;
1431 1443
1432 if (syntaxp) 1444 /* At first setup fastmap. */
1433 fastmap[syntax_spec_code[c & 0377]] = 1; 1445 while (i_byte < size_byte)
1434 else 1446 {
1447 c = str[i_byte++];
1448
1449 if (c == '\\')
1450 {
1451 if (i_byte == size_byte)
1452 break;
1453
1454 c = str[i_byte++];
1455 }
1456 if (i_byte < size_byte
1457 && str[i_byte] == '-')
1458 {
1459 unsigned int c2;
1460
1461 /* Skip over the dash. */
1462 i_byte++;
1463
1464 if (i_byte == size_byte)
1465 break;
1466
1467 /* Get the end of the range. */
1468 c2 = str[i_byte++];
1469 if (c2 == '\\'
1470 && i_byte < size_byte)
1471 c2 = str[i_byte++];
1472
1473 if (c <= c2)
1474 {
1475 while (c <= c2)
1476 fastmap[c++] = 1;
1477 if (! ASCII_CHAR_P (c2))
1478 string_has_eight_bit = 1;
1479 }
1480 }
1481 else
1482 {
1483 fastmap[c] = 1;
1484 if (! ASCII_CHAR_P (c))
1485 string_has_eight_bit = 1;
1486 }
1487 }
1488
1489 /* If the current range is multibyte and STRING contains
1490 eight-bit chars, arrange fastmap and setup char_ranges for
1491 the corresponding multibyte chars. */
1492 if (multibyte && string_has_eight_bit)
1493 {
1494 unsigned char fastmap2[0400];
1495 int range_start_byte, range_start_char;
1496
1497 bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
1498 bzero (fastmap + 0200, 0200);
1499 /* We are sure that this loop stops. */
1500 for (i = 0200; ! fastmap2[i]; i++);
1501 c = unibyte_char_to_multibyte (i);
1502 fastmap[CHAR_LEADING_CODE (c)] = 1;
1503 range_start_byte = i;
1504 range_start_char = c;
1505 for (i = 129; i < 0400; i++)
1506 {
1507 c = unibyte_char_to_multibyte (i);
1508 fastmap[CHAR_LEADING_CODE (c)] = 1;
1509 if (i - range_start_byte != c - range_start_char)
1510 {
1511 char_ranges[n_char_ranges++] = range_start_char;
1512 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1513 + range_start_char);
1514 range_start_byte = i;
1515 range_start_char = c;
1516 }
1517 }
1518 char_ranges[n_char_ranges++] = range_start_char;
1519 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1520 + range_start_char);
1521 }
1522 }
1523 else
1524 {
1525 while (i_byte < size_byte)
1435 { 1526 {
1527 unsigned char leading_code;
1528
1529 leading_code = str[i_byte];
1530 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
1531 i_byte += len;
1532
1436 if (c == '\\') 1533 if (c == '\\')
1437 { 1534 {
1438 if (i_byte == size_byte) 1535 if (i_byte == size_byte)
1439 break; 1536 break;
1440 1537
1538 leading_code = str[i_byte];
1441 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len); 1539 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1442 i_byte += len; 1540 i_byte += len;
1443 } 1541 }
@@ -1445,6 +1543,7 @@ skip_chars (forwardp, syntaxp, string, lim)
1445 && str[i_byte] == '-') 1543 && str[i_byte] == '-')
1446 { 1544 {
1447 unsigned int c2; 1545 unsigned int c2;
1546 unsigned char leading_code2;
1448 1547
1449 /* Skip over the dash. */ 1548 /* Skip over the dash. */
1450 i_byte++; 1549 i_byte++;
@@ -1453,55 +1552,81 @@ skip_chars (forwardp, syntaxp, string, lim)
1453 break; 1552 break;
1454 1553
1455 /* Get the end of the range. */ 1554 /* Get the end of the range. */
1456 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len); 1555 leading_code2 = str[i_byte];
1556 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
1457 i_byte += len; 1557 i_byte += len;
1458 1558
1459 if (SINGLE_BYTE_CHAR_P (c)) 1559 if (c2 == '\\'
1560 && i_byte < size_byte)
1561 {
1562 leading_code2 = str[i_byte];
1563 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
1564 i_byte += len;
1565 }
1566
1567 if (ASCII_CHAR_P (c))
1460 { 1568 {
1461 if (! SINGLE_BYTE_CHAR_P (c2)) 1569 while (c <= c2 && c < 0x80)
1570 fastmap[c++] = 1;
1571 leading_code = CHAR_LEADING_CODE (c);
1572 }
1573 if (! ASCII_CHAR_P (c))
1574 {
1575 while (leading_code <= leading_code2)
1576 fastmap[leading_code++] = 1;
1577 if (c <= c2)
1462 { 1578 {
1463 /* Handle a range starting with a character of 1579 char_ranges[n_char_ranges++] = c;
1464 less than 256, and ending with a character of
1465 not less than 256. Split that into two
1466 ranges, the low one ending at 0377, and the
1467 high one starting at the smallest character
1468 in the charset of C2 and ending at C2. */
1469 int charset = CHAR_CHARSET (c2);
1470 int c1 = MAKE_CHAR (charset, 0, 0);
1471
1472 char_ranges[n_char_ranges++] = c1;
1473 char_ranges[n_char_ranges++] = c2; 1580 char_ranges[n_char_ranges++] = c2;
1474 c2 = 0377;
1475 }
1476 while (c <= c2)
1477 {
1478 fastmap[c] = 1;
1479 c++;
1480 } 1581 }
1481 } 1582 }
1482 else if (c <= c2) /* Both C and C2 are multibyte char. */
1483 {
1484 char_ranges[n_char_ranges++] = c;
1485 char_ranges[n_char_ranges++] = c2;
1486 }
1487 } 1583 }
1488 else 1584 else
1489 { 1585 {
1490 if (SINGLE_BYTE_CHAR_P (c)) 1586 if (ASCII_CHAR_P (c))
1491 fastmap[c] = 1; 1587 fastmap[c] = 1;
1492 else 1588 else
1493 { 1589 {
1590 fastmap[leading_code] = 1;
1494 char_ranges[n_char_ranges++] = c; 1591 char_ranges[n_char_ranges++] = c;
1495 char_ranges[n_char_ranges++] = c; 1592 char_ranges[n_char_ranges++] = c;
1496 } 1593 }
1497 } 1594 }
1498 } 1595 }
1596
1597 /* If the current range is unibyte and STRING contains non-ASCII
1598 chars, arrange fastmap for the corresponding unibyte
1599 chars. */
1600
1601 if (! multibyte && n_char_ranges > 0)
1602 {
1603 bzero (fastmap + 0200, 0200);
1604 for (i = 0; i < n_char_ranges; i += 2)
1605 {
1606 int c1 = char_ranges[i];
1607 int c2 = char_ranges[i + 1];
1608
1609 for (; c1 <= c2; c1++)
1610 fastmap[CHAR_TO_BYTE8 (c1)] = 1;
1611 }
1612 }
1499 } 1613 }
1500 1614
1501 /* If ^ was the first character, complement the fastmap. */ 1615 /* If ^ was the first character, complement the fastmap. */
1502 if (negate) 1616 if (negate)
1503 for (i = 0; i < sizeof fastmap; i++) 1617 {
1504 fastmap[i] ^= 1; 1618 if (! multibyte)
1619 for (i = 0; i < sizeof fastmap; i++)
1620 fastmap[i] ^= 1;
1621 else
1622 {
1623 for (i = 0; i < 0200; i++)
1624 fastmap[i] ^= 1;
1625 /* All non-ASCII chars possibly match. */
1626 for (; i < sizeof fastmap; i++)
1627 fastmap[i] = 1;
1628 }
1629 }
1505 1630
1506 { 1631 {
1507 int start_point = PT; 1632 int start_point = PT;
@@ -1511,224 +1636,283 @@ skip_chars (forwardp, syntaxp, string, lim)
1511 1636
1512 if (forwardp) 1637 if (forwardp)
1513 { 1638 {
1514 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim)); 1639 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1515 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp; 1640 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1516 } 1641 }
1517 else 1642 else
1518 { 1643 {
1519 endp = CHAR_POS_ADDR (XINT (lim)); 1644 endp = CHAR_POS_ADDR (XINT (lim));
1520 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp; 1645 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1521 } 1646 }
1522 1647
1523 immediate_quit = 1; 1648 immediate_quit = 1;
1524 if (syntaxp) 1649 if (forwardp)
1525 { 1650 {
1526 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1); 1651 if (multibyte)
1527 if (forwardp) 1652 while (1)
1528 { 1653 {
1529 if (multibyte) 1654 int nbytes;
1530 while (1)
1531 {
1532 int nbytes;
1533 1655
1534 if (p >= stop) 1656 if (p >= stop)
1535 {
1536 if (p >= endp)
1537 break;
1538 p = GAP_END_ADDR;
1539 stop = endp;
1540 }
1541 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1542 if (! fastmap[(int) SYNTAX (c)])
1543 break;
1544 p += nbytes, pos++, pos_byte += nbytes;
1545 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1546 }
1547 else
1548 while (1)
1549 { 1657 {
1550 if (p >= stop) 1658 if (p >= endp)
1551 {
1552 if (p >= endp)
1553 break;
1554 p = GAP_END_ADDR;
1555 stop = endp;
1556 }
1557 if (! fastmap[(int) SYNTAX (*p)])
1558 break; 1659 break;
1559 p++, pos++; 1660 p = GAP_END_ADDR;
1560 UPDATE_SYNTAX_TABLE_FORWARD (pos); 1661 stop = endp;
1561 } 1662 }
1562 } 1663 if (! fastmap[*p])
1563 else 1664 break;
1564 { 1665 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1565 if (multibyte) 1666 if (! ASCII_CHAR_P (c))
1566 while (1)
1567 { 1667 {
1568 unsigned char *prev_p; 1668 /* As we are looking at a multibyte character, we
1569 int nbytes; 1669 must look up the character in the table
1570 1670 CHAR_RANGES. If there's no data in the table,
1571 if (p <= stop) 1671 that character is not what we want to skip. */
1572 { 1672
1573 if (p <= endp) 1673 /* The following code do the right thing even if
1574 break; 1674 n_char_ranges is zero (i.e. no data in
1575 p = GPT_ADDR; 1675 CHAR_RANGES). */
1576 stop = endp; 1676 for (i = 0; i < n_char_ranges; i += 2)
1577 } 1677 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1578 prev_p = p;
1579 while (--p >= stop && ! CHAR_HEAD_P (*p));
1580 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1581 if (prev_p - p > nbytes)
1582 p = prev_p - 1, c = *p, nbytes = 1;
1583 else
1584 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1585 pos--, pos_byte -= nbytes;
1586 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1587 if (! fastmap[(int) SYNTAX (c)])
1588 {
1589 pos++;
1590 pos_byte += nbytes;
1591 break; 1678 break;
1592 } 1679 if (!(negate ^ (i < n_char_ranges)))
1680 break;
1593 } 1681 }
1594 else 1682 p += nbytes, pos++, pos_byte += nbytes;
1595 while (1) 1683 }
1684 else
1685 while (1)
1686 {
1687 if (p >= stop)
1596 { 1688 {
1597 if (p <= stop) 1689 if (p >= endp)
1598 {
1599 if (p <= endp)
1600 break;
1601 p = GPT_ADDR;
1602 stop = endp;
1603 }
1604 if (! fastmap[(int) SYNTAX (p[-1])])
1605 break; 1690 break;
1606 p--, pos--; 1691 p = GAP_END_ADDR;
1607 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1); 1692 stop = endp;
1608 } 1693 }
1609 } 1694 if (!fastmap[*p])
1695 break;
1696 p++, pos++, pos_byte++;
1697 }
1610 } 1698 }
1611 else 1699 else
1612 { 1700 {
1613 if (forwardp) 1701 if (multibyte)
1614 { 1702 while (1)
1615 if (multibyte) 1703 {
1616 while (1) 1704 unsigned char *prev_p;
1617 {
1618 int nbytes;
1619 1705
1620 if (p >= stop) 1706 if (p <= stop)
1621 {
1622 if (p >= endp)
1623 break;
1624 p = GAP_END_ADDR;
1625 stop = endp;
1626 }
1627 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1628 if (SINGLE_BYTE_CHAR_P (c))
1629 {
1630 if (!fastmap[c])
1631 break;
1632 }
1633 else
1634 {
1635 /* If we are looking at a multibyte character,
1636 we must look up the character in the table
1637 CHAR_RANGES. If there's no data in the
1638 table, that character is not what we want to
1639 skip. */
1640
1641 /* The following code do the right thing even if
1642 n_char_ranges is zero (i.e. no data in
1643 CHAR_RANGES). */
1644 for (i = 0; i < n_char_ranges; i += 2)
1645 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1646 break;
1647 if (!(negate ^ (i < n_char_ranges)))
1648 break;
1649 }
1650 p += nbytes, pos++, pos_byte += nbytes;
1651 }
1652 else
1653 while (1)
1654 { 1707 {
1655 if (p >= stop) 1708 if (p <= endp)
1656 {
1657 if (p >= endp)
1658 break;
1659 p = GAP_END_ADDR;
1660 stop = endp;
1661 }
1662 if (!fastmap[*p])
1663 break; 1709 break;
1664 p++, pos++; 1710 p = GPT_ADDR;
1711 stop = endp;
1665 } 1712 }
1666 } 1713 prev_p = p;
1667 else 1714 while (--p >= stop && ! CHAR_HEAD_P (*p));
1668 { 1715 if (! fastmap[*p])
1669 if (multibyte) 1716 break;
1670 while (1) 1717 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1718 if (! ASCII_CHAR_P (c))
1671 { 1719 {
1672 unsigned char *prev_p; 1720 /* See the comment in the previous similar code. */
1673 int nbytes; 1721 for (i = 0; i < n_char_ranges; i += 2)
1674 1722 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1675 if (p <= stop) 1723 break;
1676 { 1724 if (!(negate ^ (i < n_char_ranges)))
1677 if (p <= endp) 1725 break;
1678 break;
1679 p = GPT_ADDR;
1680 stop = endp;
1681 }
1682 prev_p = p;
1683 while (--p >= stop && ! CHAR_HEAD_P (*p));
1684 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1685 if (prev_p - p > nbytes)
1686 p = prev_p - 1, c = *p, nbytes = 1;
1687 else
1688 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1689 if (SINGLE_BYTE_CHAR_P (c))
1690 {
1691 if (!fastmap[c])
1692 break;
1693 }
1694 else
1695 {
1696 /* See the comment in the previous similar code. */
1697 for (i = 0; i < n_char_ranges; i += 2)
1698 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1699 break;
1700 if (!(negate ^ (i < n_char_ranges)))
1701 break;
1702 }
1703 pos--, pos_byte -= nbytes;
1704 } 1726 }
1705 else 1727 pos--, pos_byte -= prev_p - p;
1706 while (1) 1728 }
1729 else
1730 while (1)
1731 {
1732 if (p <= stop)
1707 { 1733 {
1708 if (p <= stop) 1734 if (p <= endp)
1709 {
1710 if (p <= endp)
1711 break;
1712 p = GPT_ADDR;
1713 stop = endp;
1714 }
1715 if (!fastmap[p[-1]])
1716 break; 1735 break;
1717 p--, pos--; 1736 p = GPT_ADDR;
1737 stop = endp;
1718 } 1738 }
1719 } 1739 if (!fastmap[p[-1]])
1740 break;
1741 p--, pos--, pos_byte--;
1742 }
1720 } 1743 }
1721 1744
1722#if 0 /* Not needed now that a position in mid-character 1745 SET_PT_BOTH (pos, pos_byte);
1723 cannot be specified in Lisp. */ 1746 immediate_quit = 0;
1724 if (multibyte 1747
1725 /* INC_POS or DEC_POS might have moved POS over LIM. */ 1748 return make_number (PT - start_point);
1726 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim)))) 1749 }
1727 pos = XINT (lim); 1750}
1728#endif
1729 1751
1730 if (! multibyte) 1752
1731 pos_byte = pos; 1753static Lisp_Object
1754skip_syntaxes (forwardp, string, lim)
1755 int forwardp;
1756 Lisp_Object string, lim;
1757{
1758 register unsigned int c;
1759 unsigned char fastmap[0400];
1760 int negate = 0;
1761 register int i, i_byte;
1762 int multibyte;
1763 int size_byte;
1764 unsigned char *str;
1765
1766 CHECK_STRING (string);
1767
1768 if (NILP (lim))
1769 XSETINT (lim, forwardp ? ZV : BEGV);
1770 else
1771 CHECK_NUMBER_COERCE_MARKER (lim);
1772
1773 /* In any case, don't allow scan outside bounds of buffer. */
1774 if (XINT (lim) > ZV)
1775 XSETFASTINT (lim, ZV);
1776 if (XINT (lim) < BEGV)
1777 XSETFASTINT (lim, BEGV);
1778
1779 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1780 return Qnil;
1781
1782 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1783 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1784
1785 bzero (fastmap, sizeof fastmap);
1786
1787 if (SBYTES (string) > SCHARS (string))
1788 /* As this is very rare case (syntax spec is ASCII only), don't
1789 consider efficiency. */
1790 string = string_make_unibyte (string);
1791
1792 str = SDATA (string);
1793 size_byte = SBYTES (string);
1794
1795 i_byte = 0;
1796 if (i_byte < size_byte
1797 && SREF (string, 0) == '^')
1798 {
1799 negate = 1; i_byte++;
1800 }
1801
1802 /* Find the syntaxes specified and set their elements of fastmap. */
1803
1804 while (i_byte < size_byte)
1805 {
1806 c = str[i_byte++];
1807 fastmap[syntax_spec_code[c]] = 1;
1808 }
1809
1810 /* If ^ was the first character, complement the fastmap. */
1811 if (negate)
1812 for (i = 0; i < sizeof fastmap; i++)
1813 fastmap[i] ^= 1;
1814
1815 {
1816 int start_point = PT;
1817 int pos = PT;
1818 int pos_byte = PT_BYTE;
1819 unsigned char *p = PT_ADDR, *endp, *stop;
1820
1821 if (forwardp)
1822 {
1823 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1824 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1825 }
1826 else
1827 {
1828 endp = CHAR_POS_ADDR (XINT (lim));
1829 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1830 }
1831
1832 immediate_quit = 1;
1833 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1834 if (forwardp)
1835 {
1836 if (multibyte)
1837 {
1838 while (1)
1839 {
1840 int nbytes;
1841
1842 if (p >= stop)
1843 {
1844 if (p >= endp)
1845 break;
1846 p = GAP_END_ADDR;
1847 stop = endp;
1848 }
1849 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1850 if (! fastmap[(int) SYNTAX (c)])
1851 break;
1852 p += nbytes, pos++, pos_byte += nbytes;
1853 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1854 }
1855 }
1856 else
1857 {
1858 while (1)
1859 {
1860 if (p >= stop)
1861 {
1862 if (p >= endp)
1863 break;
1864 p = GAP_END_ADDR;
1865 stop = endp;
1866 }
1867 if (! fastmap[(int) SYNTAX (*p)])
1868 break;
1869 p++, pos++, pos_byte++;
1870 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1871 }
1872 }
1873 }
1874 else
1875 {
1876 if (multibyte)
1877 {
1878 while (1)
1879 {
1880 unsigned char *prev_p;
1881
1882 if (p <= stop)
1883 {
1884 if (p <= endp)
1885 break;
1886 p = GPT_ADDR;
1887 stop = endp;
1888 }
1889 prev_p = p;
1890 while (--p >= stop && ! CHAR_HEAD_P (*p));
1891 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1892 if (! fastmap[(int) SYNTAX (c)])
1893 break;
1894 pos--, pos_byte -= prev_p - p;
1895 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1896 }
1897 }
1898 else
1899 {
1900 while (1)
1901 {
1902 if (p <= stop)
1903 {
1904 if (p <= endp)
1905 break;
1906 p = GPT_ADDR;
1907 stop = endp;
1908 }
1909 if (! fastmap[(int) SYNTAX (p[-1])])
1910 break;
1911 p--, pos--, pos_byte--;
1912 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1913 }
1914 }
1915 }
1732 1916
1733 SET_PT_BOTH (pos, pos_byte); 1917 SET_PT_BOTH (pos, pos_byte);
1734 immediate_quit = 0; 1918 immediate_quit = 0;
@@ -1788,7 +1972,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1788 *bytepos_ptr = from_byte; 1972 *bytepos_ptr = from_byte;
1789 return 0; 1973 return 0;
1790 } 1974 }
1791 c = FETCH_CHAR (from_byte); 1975 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1792 syntax = SYNTAX_WITH_FLAGS (c); 1976 syntax = SYNTAX_WITH_FLAGS (c);
1793 code = syntax & 0xff; 1977 code = syntax & 0xff;
1794 if (code == Sendcomment 1978 if (code == Sendcomment
@@ -1818,7 +2002,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1818 forw_incomment: 2002 forw_incomment:
1819 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax) 2003 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1820 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style 2004 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1821 && (c1 = FETCH_CHAR (from_byte), 2005 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
1822 SYNTAX_COMEND_SECOND (c1)) 2006 SYNTAX_COMEND_SECOND (c1))
1823 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) || 2007 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1824 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0)) 2008 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
@@ -1837,7 +2021,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1837 if (nesting > 0 2021 if (nesting > 0
1838 && from < stop 2022 && from < stop
1839 && SYNTAX_FLAGS_COMSTART_FIRST (syntax) 2023 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1840 && (c1 = FETCH_CHAR (from_byte), 2024 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
1841 SYNTAX_COMMENT_STYLE (c1) == style 2025 SYNTAX_COMMENT_STYLE (c1) == style
1842 && SYNTAX_COMSTART_SECOND (c1)) 2026 && SYNTAX_COMSTART_SECOND (c1))
1843 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) || 2027 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
@@ -1901,7 +2085,7 @@ between them, return t; otherwise return nil. */)
1901 immediate_quit = 0; 2085 immediate_quit = 0;
1902 return Qnil; 2086 return Qnil;
1903 } 2087 }
1904 c = FETCH_CHAR (from_byte); 2088 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1905 code = SYNTAX (c); 2089 code = SYNTAX (c);
1906 comstart_first = SYNTAX_COMSTART_FIRST (c); 2090 comstart_first = SYNTAX_COMSTART_FIRST (c);
1907 comnested = SYNTAX_COMMENT_NESTED (c); 2091 comnested = SYNTAX_COMMENT_NESTED (c);
@@ -1909,7 +2093,7 @@ between them, return t; otherwise return nil. */)
1909 INC_BOTH (from, from_byte); 2093 INC_BOTH (from, from_byte);
1910 UPDATE_SYNTAX_TABLE_FORWARD (from); 2094 UPDATE_SYNTAX_TABLE_FORWARD (from);
1911 if (from < stop && comstart_first 2095 if (from < stop && comstart_first
1912 && (c1 = FETCH_CHAR (from_byte), 2096 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
1913 SYNTAX_COMSTART_SECOND (c1))) 2097 SYNTAX_COMSTART_SECOND (c1)))
1914 { 2098 {
1915 /* We have encountered a comment start sequence and we 2099 /* We have encountered a comment start sequence and we
@@ -1967,7 +2151,7 @@ between them, return t; otherwise return nil. */)
1967 DEC_BOTH (from, from_byte); 2151 DEC_BOTH (from, from_byte);
1968 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */ 2152 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1969 quoted = char_quoted (from, from_byte); 2153 quoted = char_quoted (from, from_byte);
1970 c = FETCH_CHAR (from_byte); 2154 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1971 code = SYNTAX (c); 2155 code = SYNTAX (c);
1972 comstyle = 0; 2156 comstyle = 0;
1973 comnested = SYNTAX_COMMENT_NESTED (c); 2157 comnested = SYNTAX_COMMENT_NESTED (c);
@@ -1984,7 +2168,7 @@ between them, return t; otherwise return nil. */)
1984 code = Sendcomment; 2168 code = Sendcomment;
1985 /* Calling char_quoted, above, set up global syntax position 2169 /* Calling char_quoted, above, set up global syntax position
1986 at the new value of FROM. */ 2170 at the new value of FROM. */
1987 c1 = FETCH_CHAR (from_byte); 2171 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1988 comstyle = SYNTAX_COMMENT_STYLE (c1); 2172 comstyle = SYNTAX_COMMENT_STYLE (c1);
1989 comnested = comnested || SYNTAX_COMMENT_NESTED (c1); 2173 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1990 } 2174 }
@@ -2000,7 +2184,7 @@ between them, return t; otherwise return nil. */)
2000 if (from == stop) 2184 if (from == stop)
2001 break; 2185 break;
2002 UPDATE_SYNTAX_TABLE_BACKWARD (from); 2186 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2003 c = FETCH_CHAR (from_byte); 2187 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2004 if (SYNTAX (c) == Scomment_fence 2188 if (SYNTAX (c) == Scomment_fence
2005 && !char_quoted (from, from_byte)) 2189 && !char_quoted (from, from_byte))
2006 { 2190 {
@@ -2061,11 +2245,11 @@ between them, return t; otherwise return nil. */)
2061 return Qt; 2245 return Qt;
2062} 2246}
2063 2247
2064/* Return syntax code of character C if C is a single byte character 2248/* Return syntax code of character C if C is an ASCII character
2065 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */ 2249 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
2066 2250
2067#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \ 2251#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2068 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \ 2252 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
2069 ? SYNTAX (c) : Ssymbol) 2253 ? SYNTAX (c) : Ssymbol)
2070 2254
2071static Lisp_Object 2255static Lisp_Object
@@ -2108,7 +2292,7 @@ scan_lists (from, count, depth, sexpflag)
2108 { 2292 {
2109 int comstart_first, prefix; 2293 int comstart_first, prefix;
2110 UPDATE_SYNTAX_TABLE_FORWARD (from); 2294 UPDATE_SYNTAX_TABLE_FORWARD (from);
2111 c = FETCH_CHAR (from_byte); 2295 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2112 code = SYNTAX_WITH_MULTIBYTE_CHECK (c); 2296 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2113 comstart_first = SYNTAX_COMSTART_FIRST (c); 2297 comstart_first = SYNTAX_COMSTART_FIRST (c);
2114 comnested = SYNTAX_COMMENT_NESTED (c); 2298 comnested = SYNTAX_COMMENT_NESTED (c);
@@ -2119,7 +2303,7 @@ scan_lists (from, count, depth, sexpflag)
2119 INC_BOTH (from, from_byte); 2303 INC_BOTH (from, from_byte);
2120 UPDATE_SYNTAX_TABLE_FORWARD (from); 2304 UPDATE_SYNTAX_TABLE_FORWARD (from);
2121 if (from < stop && comstart_first 2305 if (from < stop && comstart_first
2122 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte)) 2306 && SYNTAX_COMSTART_SECOND (FETCH_CHAR_AS_MULTIBYTE (from_byte))
2123 && parse_sexp_ignore_comments) 2307 && parse_sexp_ignore_comments)
2124 { 2308 {
2125 /* we have encountered a comment start sequence and we 2309 /* we have encountered a comment start sequence and we
@@ -2128,7 +2312,7 @@ scan_lists (from, count, depth, sexpflag)
2128 only a comment end of the same style actually ends 2312 only a comment end of the same style actually ends
2129 the comment section */ 2313 the comment section */
2130 code = Scomment; 2314 code = Scomment;
2131 c1 = FETCH_CHAR (from_byte); 2315 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2132 comstyle = SYNTAX_COMMENT_STYLE (c1); 2316 comstyle = SYNTAX_COMMENT_STYLE (c1);
2133 comnested = comnested || SYNTAX_COMMENT_NESTED (c1); 2317 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2134 INC_BOTH (from, from_byte); 2318 INC_BOTH (from, from_byte);
@@ -2154,7 +2338,7 @@ scan_lists (from, count, depth, sexpflag)
2154 UPDATE_SYNTAX_TABLE_FORWARD (from); 2338 UPDATE_SYNTAX_TABLE_FORWARD (from);
2155 2339
2156 /* Some compilers can't handle this inside the switch. */ 2340 /* Some compilers can't handle this inside the switch. */
2157 c = FETCH_CHAR (from_byte); 2341 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2158 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c); 2342 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2159 switch (temp) 2343 switch (temp)
2160 { 2344 {
@@ -2197,7 +2381,7 @@ scan_lists (from, count, depth, sexpflag)
2197 case Smath: 2381 case Smath:
2198 if (!sexpflag) 2382 if (!sexpflag)
2199 break; 2383 break;
2200 if (from != stop && c == FETCH_CHAR (from_byte)) 2384 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2201 { 2385 {
2202 INC_BOTH (from, from_byte); 2386 INC_BOTH (from, from_byte);
2203 } 2387 }
@@ -2225,12 +2409,12 @@ scan_lists (from, count, depth, sexpflag)
2225 case Sstring: 2409 case Sstring:
2226 case Sstring_fence: 2410 case Sstring_fence:
2227 temp_pos = dec_bytepos (from_byte); 2411 temp_pos = dec_bytepos (from_byte);
2228 stringterm = FETCH_CHAR (temp_pos); 2412 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2229 while (1) 2413 while (1)
2230 { 2414 {
2231 if (from >= stop) goto lose; 2415 if (from >= stop) goto lose;
2232 UPDATE_SYNTAX_TABLE_FORWARD (from); 2416 UPDATE_SYNTAX_TABLE_FORWARD (from);
2233 c = FETCH_CHAR (from_byte); 2417 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2234 if (code == Sstring 2418 if (code == Sstring
2235 ? (c == stringterm 2419 ? (c == stringterm
2236 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring) 2420 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
@@ -2273,7 +2457,7 @@ scan_lists (from, count, depth, sexpflag)
2273 { 2457 {
2274 DEC_BOTH (from, from_byte); 2458 DEC_BOTH (from, from_byte);
2275 UPDATE_SYNTAX_TABLE_BACKWARD (from); 2459 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2276 c = FETCH_CHAR (from_byte); 2460 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2277 code = SYNTAX_WITH_MULTIBYTE_CHECK (c); 2461 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2278 if (depth == min_depth) 2462 if (depth == min_depth)
2279 last_good = from; 2463 last_good = from;
@@ -2291,7 +2475,7 @@ scan_lists (from, count, depth, sexpflag)
2291 DEC_BOTH (from, from_byte); 2475 DEC_BOTH (from, from_byte);
2292 UPDATE_SYNTAX_TABLE_BACKWARD (from); 2476 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2293 code = Sendcomment; 2477 code = Sendcomment;
2294 c1 = FETCH_CHAR (from_byte); 2478 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2295 comstyle = SYNTAX_COMMENT_STYLE (c1); 2479 comstyle = SYNTAX_COMMENT_STYLE (c1);
2296 comnested = comnested || SYNTAX_COMMENT_NESTED (c1); 2480 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2297 } 2481 }
@@ -2324,7 +2508,7 @@ scan_lists (from, count, depth, sexpflag)
2324 else 2508 else
2325 temp_pos--; 2509 temp_pos--;
2326 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1); 2510 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2327 c1 = FETCH_CHAR (temp_pos); 2511 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2328 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1); 2512 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2329 /* Don't allow comment-end to be quoted. */ 2513 /* Don't allow comment-end to be quoted. */
2330 if (temp_code == Sendcomment) 2514 if (temp_code == Sendcomment)
@@ -2336,7 +2520,7 @@ scan_lists (from, count, depth, sexpflag)
2336 temp_pos = dec_bytepos (temp_pos); 2520 temp_pos = dec_bytepos (temp_pos);
2337 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1); 2521 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2338 } 2522 }
2339 c1 = FETCH_CHAR (temp_pos); 2523 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2340 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1); 2524 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2341 if (! (quoted || temp_code == Sword 2525 if (! (quoted || temp_code == Sword
2342 || temp_code == Ssymbol 2526 || temp_code == Ssymbol
@@ -2351,7 +2535,7 @@ scan_lists (from, count, depth, sexpflag)
2351 break; 2535 break;
2352 temp_pos = dec_bytepos (from_byte); 2536 temp_pos = dec_bytepos (from_byte);
2353 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1); 2537 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2354 if (from != stop && c == FETCH_CHAR (temp_pos)) 2538 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2355 DEC_BOTH (from, from_byte); 2539 DEC_BOTH (from, from_byte);
2356 if (mathexit) 2540 if (mathexit)
2357 { 2541 {
@@ -2397,7 +2581,7 @@ scan_lists (from, count, depth, sexpflag)
2397 if (from == stop) goto lose; 2581 if (from == stop) goto lose;
2398 UPDATE_SYNTAX_TABLE_BACKWARD (from); 2582 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2399 if (!char_quoted (from, from_byte) 2583 if (!char_quoted (from, from_byte)
2400 && (c = FETCH_CHAR (from_byte), 2584 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2401 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code)) 2585 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2402 break; 2586 break;
2403 } 2587 }
@@ -2405,7 +2589,7 @@ scan_lists (from, count, depth, sexpflag)
2405 break; 2589 break;
2406 2590
2407 case Sstring: 2591 case Sstring:
2408 stringterm = FETCH_CHAR (from_byte); 2592 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2409 while (1) 2593 while (1)
2410 { 2594 {
2411 if (from == stop) goto lose; 2595 if (from == stop) goto lose;
@@ -2416,7 +2600,7 @@ scan_lists (from, count, depth, sexpflag)
2416 temp_pos--; 2600 temp_pos--;
2417 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1); 2601 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2418 if (!char_quoted (from - 1, temp_pos) 2602 if (!char_quoted (from - 1, temp_pos)
2419 && stringterm == (c = FETCH_CHAR (temp_pos)) 2603 && stringterm == (c = FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2420 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring) 2604 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2421 break; 2605 break;
2422 DEC_BOTH (from, from_byte); 2606 DEC_BOTH (from, from_byte);
@@ -2524,7 +2708,7 @@ This includes chars with "quote" or "prefix" syntax (' or p). */)
2524 2708
2525 while (!char_quoted (pos, pos_byte) 2709 while (!char_quoted (pos, pos_byte)
2526 /* Previous statement updates syntax table. */ 2710 /* Previous statement updates syntax table. */
2527 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote) 2711 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2528 || SYNTAX_PREFIX (c))) 2712 || SYNTAX_PREFIX (c)))
2529 { 2713 {
2530 opoint = pos; 2714 opoint = pos;
@@ -2552,7 +2736,8 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2552 stopbefore, oldstate, commentstop) 2736 stopbefore, oldstate, commentstop)
2553 struct lisp_parse_state *stateptr; 2737 struct lisp_parse_state *stateptr;
2554 register int from; 2738 register int from;
2555 int end, targetdepth, stopbefore, from_byte; 2739 int from_byte;
2740 int end, targetdepth, stopbefore;
2556 Lisp_Object oldstate; 2741 Lisp_Object oldstate;
2557 int commentstop; 2742 int commentstop;
2558{ 2743{
@@ -2590,7 +2775,7 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2590do { prev_from = from; \ 2775do { prev_from = from; \
2591 prev_from_byte = from_byte; \ 2776 prev_from_byte = from_byte; \
2592 prev_from_syntax \ 2777 prev_from_syntax \
2593 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \ 2778 = SYNTAX_WITH_FLAGS (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte)); \
2594 INC_BOTH (from, from_byte); \ 2779 INC_BOTH (from, from_byte); \
2595 if (from < end) \ 2780 if (from < end) \
2596 UPDATE_SYNTAX_TABLE_FORWARD (from); \ 2781 UPDATE_SYNTAX_TABLE_FORWARD (from); \
@@ -2706,7 +2891,7 @@ do { prev_from = from; \
2706 } 2891 }
2707 else if (from < end) 2892 else if (from < end)
2708 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)) 2893 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2709 if (c1 = FETCH_CHAR (from_byte), 2894 if (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2710 SYNTAX_COMSTART_SECOND (c1)) 2895 SYNTAX_COMSTART_SECOND (c1))
2711 /* Duplicate code to avoid a complex if-expression 2896 /* Duplicate code to avoid a complex if-expression
2712 which causes trouble for the SGI compiler. */ 2897 which causes trouble for the SGI compiler. */
@@ -2744,7 +2929,7 @@ do { prev_from = from; \
2744 while (from < end) 2929 while (from < end)
2745 { 2930 {
2746 /* Some compilers can't handle this inside the switch. */ 2931 /* Some compilers can't handle this inside the switch. */
2747 temp = SYNTAX (FETCH_CHAR (from_byte)); 2932 temp = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (from_byte));
2748 switch (temp) 2933 switch (temp)
2749 { 2934 {
2750 case Scharquote: 2935 case Scharquote:
@@ -2817,7 +3002,7 @@ do { prev_from = from; \
2817 if (stopbefore) goto stop; /* this arg means stop at sexp start */ 3002 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2818 curlevel->last = prev_from; 3003 curlevel->last = prev_from;
2819 state.instring = (code == Sstring 3004 state.instring = (code == Sstring
2820 ? (FETCH_CHAR (prev_from_byte)) 3005 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
2821 : ST_STRING_STYLE); 3006 : ST_STRING_STYLE);
2822 if (boundary_stop) goto done; 3007 if (boundary_stop) goto done;
2823 startinstring: 3008 startinstring:
@@ -2829,7 +3014,7 @@ do { prev_from = from; \
2829 int c; 3014 int c;
2830 3015
2831 if (from >= end) goto done; 3016 if (from >= end) goto done;
2832 c = FETCH_CHAR (from_byte); 3017 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2833 /* Some compilers can't handle this inside the switch. */ 3018 /* Some compilers can't handle this inside the switch. */
2834 temp = SYNTAX (c); 3019 temp = SYNTAX (c);
2835 3020
@@ -3041,8 +3226,7 @@ init_syntax_once ()
3041 3226
3042 /* All multibyte characters have syntax `word' by default. */ 3227 /* All multibyte characters have syntax `word' by default. */
3043 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword]; 3228 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3044 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++) 3229 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3045 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
3046} 3230}
3047 3231
3048void 3232void
@@ -3082,6 +3266,25 @@ See the info node `(elisp)Syntax Properties' for a description of the
3082 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */); 3266 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
3083 open_paren_in_column_0_is_defun_start = 1; 3267 open_paren_in_column_0_is_defun_start = 1;
3084 3268
3269
3270 DEFVAR_LISP ("find-word-boundary-function-table",
3271 &Vfind_word_boundary_function_table,
3272 doc: /*
3273Char table of functions to search for the word boundary.
3274Each function is called with two arguments; POS and LIMIT.
3275POS and LIMIT are character positions in the current buffer.
3276
3277If POS is less than LIMIT, POS is at the first character of a word,
3278and the return value of a function is a position after the last
3279character of that word.
3280
3281If POS is not less than LIMIT, POS is at the last character of a word,
3282and the return value of a function is a position at the first
3283character of that word.
3284
3285In both cases, LIMIT bounds the search. */);
3286 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3287
3085 defsubr (&Ssyntax_table_p); 3288 defsubr (&Ssyntax_table_p);
3086 defsubr (&Ssyntax_table); 3289 defsubr (&Ssyntax_table);
3087 defsubr (&Sstandard_syntax_table); 3290 defsubr (&Sstandard_syntax_table);