aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlan Mackenzie2018-07-01 11:39:03 +0000
committerAlan Mackenzie2018-07-01 11:39:03 +0000
commit4a9b24e1780c980d033b44f3c86133bbab691ebe (patch)
tree400c23dcd89d63c01c9f4a39c330efb90bf93951
parent76eda952b09db6d79342b7ddfcae45c7c836ab62 (diff)
downloademacs-scratch/fontify-open-string.tar.gz
emacs-scratch/fontify-open-string.zip
Initial commit. Allow wanted fontification of open string in any mode.scratch/fontify-open-string
The wanted fontification is for the string face to end at the first unescaped newline. This is achieved by a new syntax flag `s' on NL, which means "terminate any open string". src/syntax.c (SYNTAX_FLAGS_CLOSE_STRING, back_maybe_string): New functions. (Fstring_to_syntax, Finternal_describe_syntax_value, scan_lists) (scan_sexps_forward): Adapt to handle the `s' flag. lisp/font-lock.el (font-lock-warn-open-string): New defcustom. (font-lock-fontify-syntactically-region): Enhance to fontify " with warning-face. lisp/progmodes/sh-script.el (sh-mode-syntax-table): Add flag `s' to syntax entry for \n.
-rw-r--r--lisp/font-lock.el24
-rw-r--r--lisp/progmodes/sh-script.el2
-rw-r--r--src/syntax.c193
3 files changed, 208 insertions, 11 deletions
diff --git a/lisp/font-lock.el b/lisp/font-lock.el
index be9fb4dc93f..f2b7fef5c23 100644
--- a/lisp/font-lock.el
+++ b/lisp/font-lock.el
@@ -287,6 +287,16 @@ If a number, only buffers greater than this size have fontification messages."
287 (integer :tag "size")) 287 (integer :tag "size"))
288 :group 'font-lock 288 :group 'font-lock
289 :version "24.1") 289 :version "24.1")
290
291(defcustom font-lock-warn-open-string t
292 "Fontify the opening quote of an unterminated string with warning face?
293This is done when this variable is non-nil.
294
295This works only when the syntax-table entry for newline contains the flag `s'
296\(see page \"xxx\" in the Elisp manual)."
297 :type 'boolean
298 :group 'font-lock
299 :version "27.1")
290 300
291 301
292;; Originally these variable values were face names such as `bold' etc. 302;; Originally these variable values were face names such as `bold' etc.
@@ -1597,18 +1607,30 @@ START should be at the beginning of a line."
1597 (replace-regexp-in-string "^ *" "" comment-end)))) 1607 (replace-regexp-in-string "^ *" "" comment-end))))
1598 ;; Find the `start' state. 1608 ;; Find the `start' state.
1599 (state (syntax-ppss start)) 1609 (state (syntax-ppss start))
1600 face beg) 1610 face beg in-string s-c-start)
1601 (if loudly (message "Fontifying %s... (syntactically...)" (buffer-name))) 1611 (if loudly (message "Fontifying %s... (syntactically...)" (buffer-name)))
1602 ;; 1612 ;;
1603 ;; Find each interesting place between here and `end'. 1613 ;; Find each interesting place between here and `end'.
1604 (while 1614 (while
1605 (progn 1615 (progn
1606 (when (or (nth 3 state) (nth 4 state)) 1616 (when (or (nth 3 state) (nth 4 state))
1617 (setq s-c-start (nth 8 state))
1618 (setq in-string (nth 3 state))
1607 (setq face (funcall font-lock-syntactic-face-function state)) 1619 (setq face (funcall font-lock-syntactic-face-function state))
1608 (setq beg (max (nth 8 state) start)) 1620 (setq beg (max (nth 8 state) start))
1609 (setq state (parse-partial-sexp (point) end nil nil state 1621 (setq state (parse-partial-sexp (point) end nil nil state
1610 'syntax-table)) 1622 'syntax-table))
1611 (when face (put-text-property beg (point) 'face face)) 1623 (when face (put-text-property beg (point) 'face face))
1624;;;; NEW STOUGH, 2018-06-29
1625 (put-text-property s-c-start (1+ s-c-start)
1626 'face
1627 (if (and font-lock-warn-open-string
1628 in-string
1629 (not (nth 3 state))
1630 (not (eq in-string (char-before))))
1631 'font-lock-warning-face
1632 face))
1633;;;; END OF NEW STOUGH
1612 (when (and (eq face 'font-lock-comment-face) 1634 (when (and (eq face 'font-lock-comment-face)
1613 (or font-lock-comment-start-skip 1635 (or font-lock-comment-start-skip
1614 comment-start-skip)) 1636 comment-start-skip))
diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index aaa86b5816f..bf760e0a6cc 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -429,7 +429,7 @@ name symbol."
429(defvar sh-mode-syntax-table 429(defvar sh-mode-syntax-table
430 (sh-mode-syntax-table () 430 (sh-mode-syntax-table ()
431 ?\# "<" 431 ?\# "<"
432 ?\n ">#" 432 ?\n ">#s"
433 ?\" "\"\"" 433 ?\" "\"\""
434 ?\' "\"'" 434 ?\' "\"'"
435 ?\` "\"`" 435 ?\` "\"`"
diff --git a/src/syntax.c b/src/syntax.c
index c5a4b03955b..b82b091ced2 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -33,7 +33,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
33#define SYNTAX_ENTRY(c) syntax_property_entry (c, 1) 33#define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34#define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1) 34#define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
35 35
36/* Eight single-bit flags have the following meanings: 36/* Nine single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence. 37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence. 38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence. 39 3. This character is the first of a two-character comment-end sequence.
@@ -42,6 +42,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
42 6. The char is part of a delimiter for comments of style "b". 42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence. 43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c". 44 8. The char is part of a delimiter for comments of style "c".
45 9. The char will close an open string (except one opened by a string-fence).
45 Note that any two-character sequence whose first character has flag 1 46 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start. 47 and whose second character has flag 2 will be interpreted as a comment start.
47 48
@@ -108,7 +109,11 @@ SYNTAX_FLAGS_COMMENT_NESTED (int flags)
108{ 109{
109 return (flags >> 22) & 1; 110 return (flags >> 22) & 1;
110} 111}
111 112static bool
113SYNTAX_FLAGS_CLOSE_STRING (int flags)
114{
115 return (flags >> 24) & 1;
116}
112/* FLAGS should be the flags of the main char of the comment marker, e.g. 117/* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */ 118 the second for comstart and the first for comend. */
114static int 119static int
@@ -1206,6 +1211,10 @@ the value of a `syntax-table' text property. */)
1206 case 'c': 1211 case 'c':
1207 val |= 1 << 23; 1212 val |= 1 << 23;
1208 break; 1213 break;
1214
1215 case 's':
1216 val |= 1 << 24;
1217 break;
1209 } 1218 }
1210 1219
1211 if (val < ASIZE (Vsyntax_code_object) && NILP (match)) 1220 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
@@ -1257,6 +1266,8 @@ c (on any of its chars) using this flag:
1257 p means CHAR is a prefix character for `backward-prefix-chars'; 1266 p means CHAR is a prefix character for `backward-prefix-chars';
1258 such characters are treated as whitespace when they occur 1267 such characters are treated as whitespace when they occur
1259 between expressions. 1268 between expressions.
1269 s means CHAR will terminate any open string (except one started by a
1270 character with generic string fence syntax).
1260usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */) 1271usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1261 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table) 1272 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1262{ 1273{
@@ -1294,7 +1305,8 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1294 (Lisp_Object syntax) 1305 (Lisp_Object syntax)
1295{ 1306{
1296 int code, syntax_code; 1307 int code, syntax_code;
1297 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested; 1308 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested,
1309 strclose;
1298 char str[2]; 1310 char str[2];
1299 Lisp_Object first, match_lisp, value = syntax; 1311 Lisp_Object first, match_lisp, value = syntax;
1300 1312
@@ -1335,6 +1347,7 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1335 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code); 1347 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1336 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code); 1348 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1337 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code); 1349 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1350 strclose = SYNTAX_FLAGS_CLOSE_STRING (syntax_code);
1338 1351
1339 if (Smax <= code) 1352 if (Smax <= code)
1340 { 1353 {
@@ -1368,6 +1381,8 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1368 insert ("c", 1); 1381 insert ("c", 1);
1369 if (comnested) 1382 if (comnested)
1370 insert ("n", 1); 1383 insert ("n", 1);
1384 if (strclose)
1385 insert ("s", 1);
1371 1386
1372 insert_string ("\twhich means: "); 1387 insert_string ("\twhich means: ");
1373 1388
@@ -1439,6 +1454,9 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1439 insert1 (Fsubstitute_command_keys (prefixdoc)); 1454 insert1 (Fsubstitute_command_keys (prefixdoc));
1440 } 1455 }
1441 1456
1457 if (strclose)
1458 insert_string (",\n\t will close any string started by a char with \" syntax");
1459
1442 return syntax; 1460 return syntax;
1443} 1461}
1444 1462
@@ -2637,6 +2655,144 @@ syntax_multibyte (int c, bool multibyte_symbol_p)
2637 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol; 2655 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2638} 2656}
2639 2657
2658static bool
2659back_maybe_string (ptrdiff_t *from, ptrdiff_t *from_byte,
2660 ptrdiff_t stop, bool multibyte_symbol_p)
2661{
2662 unsigned short int quit_count = 0;
2663 enum syntaxcode code = Smax;
2664 int syntax = Smax, prev_syntax;
2665 ptrdiff_t at = *from, at_byte = *from_byte;
2666 ptrdiff_t targ, targ_byte;
2667 int c, stringterm;
2668 ptrdiff_t defun_start;
2669 ptrdiff_t defun_start_byte;
2670
2671#define DEC_AT \
2672 do { \
2673 rarely_quit (++quit_count); \
2674 prev_syntax = syntax; \
2675 DEC_BOTH (at, at_byte); \
2676 if (at >= stop) \
2677 UPDATE_SYNTAX_TABLE_BACKWARD (at); \
2678 if (char_quoted (at, at_byte)) \
2679 { \
2680 DEC_BOTH (at, at_byte); \
2681 syntax = code = Sword; \
2682 } \
2683 else \
2684 { \
2685 c = FETCH_CHAR_AS_MULTIBYTE (at_byte); \
2686 syntax = SYNTAX_WITH_FLAGS (c); \
2687 code = syntax_multibyte (c, multibyte_symbol_p); \
2688 } \
2689 if (SYNTAX_FLAGS_COMSTART_FIRST (syntax) \
2690 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)) \
2691 code = Scomment; \
2692 } while (0)
2693
2694 /* Find the alleged string opener. */
2695 while ((at > stop)
2696 && (code != Sstring)
2697 && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
2698 {
2699 DEC_AT;
2700 }
2701 if (code != Sstring)
2702 goto lose;
2703 stringterm = c;
2704 targ = at;
2705 targ_byte = at_byte;
2706
2707 /* Now go back over paired delimiters which are STRINGTERM. */
2708 while (true) /* One quoted string per iteration. */
2709 {
2710 DEC_AT;
2711 /* Search back for a terminating string delimiter: */
2712 while ((at > stop)
2713 && (code != Sstring)
2714 && (code != Sstring_fence)
2715 && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
2716 {
2717 DEC_AT;
2718 /* Check for comment and "other" strings. */
2719 }
2720 if ((at <= stop)
2721 || SYNTAX_FLAGS_CLOSE_STRING (syntax))
2722 goto done;
2723 if (code == Sstring_fence)
2724 stringterm = ST_STRING_STYLE;
2725 else if (code == Sstring)
2726 stringterm = c;
2727 /* Now search back for the matching opening string delimiter: */
2728 DEC_AT;
2729 while ((at > stop)
2730 && !((stringterm == ST_STRING_STYLE)
2731 && (syntax == Sstring_fence))
2732 && !((c == stringterm)
2733 && (syntax == Sstring))
2734 && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
2735 {
2736 if ((syntax == Sstring_fence)
2737 || (syntax == Sstring)
2738 || (syntax == Scomment))
2739 goto lossage;
2740 DEC_AT;
2741 }
2742 if ((at <= stop)
2743 || SYNTAX_FLAGS_CLOSE_STRING (syntax))
2744 goto lose; /* Even number of string delims in line. */
2745 }
2746
2747 done:
2748 UPDATE_SYNTAX_TABLE_FORWARD (targ);
2749 *from = targ;
2750 *from_byte = targ_byte;
2751 return true;
2752 lose:
2753 UPDATE_SYNTAX_TABLE_FORWARD (*from);
2754 return false;
2755
2756 lossage:
2757 /* We've encountered possible comments or strings with mixed
2758 delimiters. Bail out and scan forward from a safe position. */
2759 {
2760 struct lisp_parse_state state;
2761 bool adjusted = true;
2762
2763 defun_start = find_defun_start (*from, *from_byte);
2764 defun_start_byte = find_start_value_byte;
2765 adjusted = (defun_start > BEGV);
2766 internalize_parse_state (Qnil, &state);
2767 scan_sexps_forward (&state,
2768 defun_start, defun_start_byte,
2769 *from, TYPE_MINIMUM (EMACS_INT),
2770 0, 0);
2771 if (!adjusted)
2772 {
2773 adjusted = true;
2774 find_start_value
2775 = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
2776 : state.thislevelstart >= 0 ? state.thislevelstart
2777 : find_start_value;
2778 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
2779 }
2780
2781 if ((state.instring != -1)
2782 && (state.instring != ST_STRING_STYLE)
2783 && (state.comstr_start >= stop))
2784 {
2785 UPDATE_SYNTAX_TABLE_BACKWARD (state.comstr_start);
2786 *from = state.comstr_start;
2787 *from_byte = CHAR_TO_BYTE (*from);
2788 return true;
2789 }
2790 /* Syntax table is already valid at *FROM, after the
2791 `scan_sexps_forward' */
2792 return false;
2793 }
2794}
2795
2640static Lisp_Object 2796static Lisp_Object
2641scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag) 2797scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2642{ 2798{
@@ -2803,13 +2959,16 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2803 while (1) 2959 while (1)
2804 { 2960 {
2805 enum syntaxcode c_code; 2961 enum syntaxcode c_code;
2962 int c_code_flags;
2806 if (from >= stop) 2963 if (from >= stop)
2807 goto lose; 2964 goto lose;
2808 UPDATE_SYNTAX_TABLE_FORWARD (from); 2965 UPDATE_SYNTAX_TABLE_FORWARD (from);
2809 c = FETCH_CHAR_AS_MULTIBYTE (from_byte); 2966 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2810 c_code = syntax_multibyte (c, multibyte_symbol_p); 2967 c_code = syntax_multibyte (c, multibyte_symbol_p);
2968 c_code_flags = SYNTAX_WITH_FLAGS (c);
2811 if (code == Sstring 2969 if (code == Sstring
2812 ? c == stringterm && c_code == Sstring 2970 ? (c == stringterm && c_code == Sstring)
2971 || SYNTAX_FLAGS_CLOSE_STRING (c_code_flags)
2813 : c_code == Sstring_fence) 2972 : c_code == Sstring_fence)
2814 break; 2973 break;
2815 2974
@@ -2965,6 +3124,10 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2965 for very little gain, so we don't bother either. -sm */ 3124 for very little gain, so we don't bother either. -sm */
2966 if (found) 3125 if (found)
2967 from = out_charpos, from_byte = out_bytepos; 3126 from = out_charpos, from_byte = out_bytepos;
3127 else if (SYNTAX_FLAGS_CLOSE_STRING (syntax)
3128 && back_maybe_string (&from, &from_byte, stop,
3129 multibyte_symbol_p))
3130 goto done2;
2968 break; 3131 break;
2969 3132
2970 case Scomment_fence: 3133 case Scomment_fence:
@@ -3006,7 +3169,14 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
3006 } 3169 }
3007 if (!depth && sexpflag) goto done2; 3170 if (!depth && sexpflag) goto done2;
3008 break; 3171 break;
3009 default: 3172 case Swhitespace:
3173 case Spunct:
3174 if (SYNTAX_FLAGS_CLOSE_STRING (syntax)
3175 && back_maybe_string (&from, &from_byte, stop,
3176 multibyte_symbol_p))
3177 goto done2;
3178 break;
3179 default:
3010 /* Ignore whitespace, punctuation, quote, endcomment. */ 3180 /* Ignore whitespace, punctuation, quote, endcomment. */
3011 break; 3181 break;
3012 } 3182 }
@@ -3046,7 +3216,7 @@ function scans over parentheses until the depth goes to zero COUNT
3046times. Hence, positive DEPTH moves out that number of levels of 3216times. Hence, positive DEPTH moves out that number of levels of
3047parentheses, while negative DEPTH moves to a deeper level. 3217parentheses, while negative DEPTH moves to a deeper level.
3048 3218
3049Comments are ignored if `parse-sexp-ignore-comments' is non-nil. 3219Comments are skipped over if `parse-sexp-ignore-comments' is non-nil.
3050 3220
3051If we reach the beginning or end of the accessible part of the buffer 3221If we reach the beginning or end of the accessible part of the buffer
3052before we have scanned over COUNT lists, return nil if the depth at 3222before we have scanned over COUNT lists, return nil if the depth at
@@ -3065,7 +3235,7 @@ DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3065If COUNT is negative, scan backwards. 3235If COUNT is negative, scan backwards.
3066Returns the character number of the position thus found. 3236Returns the character number of the position thus found.
3067 3237
3068Comments are ignored if `parse-sexp-ignore-comments' is non-nil. 3238Comments are skipped over if `parse-sexp-ignore-comments' is non-nil.
3069 3239
3070If the beginning or end of (the accessible part of) the buffer is reached 3240If the beginning or end of (the accessible part of) the buffer is reached
3071in the middle of a parenthetical grouping, an error is signaled. 3241in the middle of a parenthetical grouping, an error is signaled.
@@ -3396,10 +3566,12 @@ do { prev_from = from; \
3396 { 3566 {
3397 int c; 3567 int c;
3398 enum syntaxcode c_code; 3568 enum syntaxcode c_code;
3569 int c_code_flags;
3399 3570
3400 if (from >= end) goto done; 3571 if (from >= end) goto done;
3401 c = FETCH_CHAR_AS_MULTIBYTE (from_byte); 3572 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3402 c_code = SYNTAX (c); 3573 c_code = SYNTAX (c);
3574 c_code_flags = SYNTAX_WITH_FLAGS (c);
3403 3575
3404 /* Check C_CODE here so that if the char has 3576 /* Check C_CODE here so that if the char has
3405 a syntax-table property which says it is NOT 3577 a syntax-table property which says it is NOT
@@ -3421,9 +3593,12 @@ do { prev_from = from; \
3421 break; 3593 break;
3422 3594
3423 default: 3595 default:
3424 break; 3596 if (nofence
3597 && SYNTAX_FLAGS_CLOSE_STRING (c_code_flags))
3598 goto string_end;
3599 break;
3425 } 3600 }
3426 INC_FROM; 3601 INC_FROM;
3427 rarely_quit (++quit_count); 3602 rarely_quit (++quit_count);
3428 } 3603 }
3429 } 3604 }