aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlWarne2022-08-06 15:01:38 +0200
committerLars Ingebrigtsen2022-08-06 15:01:38 +0200
commitd8abff398bc45a791783c5c463838ba6fa3f030e (patch)
treeb9d1fea79c715cc881a83a2201343f50194d78ce
parent83496888aaed10de35b3cbce695600300a30af84 (diff)
downloademacs-d8abff398bc45a791783c5c463838ba6fa3f030e.tar.gz
emacs-d8abff398bc45a791783c5c463838ba6fa3f030e.zip
Fontify python escape sequences in literals
* lisp/progmodes/python.el (python-rx): Add regular expressions matching escape codes in string and byte literals (python--string-bytes-literal-matcher): new function (python--not-raw-bytes-literal-start-regexp): new constant (python--not-raw-string-literal-start-regexp): new constant * test/lisp/progmodes/python-tests.el: Add tests for new fontification (bug#57004).
-rw-r--r--lisp/progmodes/python.el56
-rw-r--r--test/lisp/progmodes/python-tests.el77
2 files changed, 131 insertions, 2 deletions
diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el
index b8fc7d4c546..27bdbae3113 100644
--- a/lisp/progmodes/python.el
+++ b/lisp/progmodes/python.el
@@ -427,7 +427,19 @@ This variant of `rx' supports common Python named REGEXPS."
427 (: "vim:" (* space) "set" (+ space) 427 (: "vim:" (* space) "set" (+ space)
428 "fileencoding" (* space) ?= (* space) 428 "fileencoding" (* space) ?= (* space)
429 (group-n 1 (+ (or word ?-))) 429 (group-n 1 (+ (or word ?-)))
430 (* space) ":"))))) 430 (* space) ":"))))
431 (bytes-escape-sequence
432 (seq (not "\\")
433 (group (or "\\\\" "\\'" "\\a" "\\b" "\\f"
434 "\\n" "\\r" "\\t" "\\v"
435 (seq "\\" (= 3 (in "0-7")))
436 (seq "\\x" hex hex)))))
437 (string-escape-sequence
438 (or bytes-escape-sequence
439 (seq (not "\\")
440 (or (group-n 1 "\\u" (= 4 hex))
441 (group-n 1 "\\U" (= 8 hex))
442 (group-n 1 "\\N{" (*? anychar) "}"))))))
431 (rx ,@regexps))) 443 (rx ,@regexps)))
432 444
433 445
@@ -539,6 +551,29 @@ the {...} holes that appear within f-strings."
539 (goto-char (min limit (1+ send))) 551 (goto-char (min limit (1+ send)))
540 (setq ppss (syntax-ppss)))))) 552 (setq ppss (syntax-ppss))))))
541 553
554(defconst python--not-raw-bytes-literal-start-regexp
555 (rx (or bos (not alnum)) (or "b" "B") (or "\"" "\"\"\"" "'" "'''") eos)
556 "A regular expression matching the start of a not-raw bytes literal.")
557
558(defconst python--not-raw-string-literal-start-regexp
559 (rx (or bos (not alnum)) (? (or "u" "U" "F" "f")) (or "\"" "\"\"\"" "'" "'''") eos)
560 "A regular expression matching the start of a not-raw string literal.")
561
562(defun python--string-bytes-literal-matcher (regexp start-regexp)
563 "Match REGEXP within a string or bytes literal whose start matches START-REGEXP."
564 (lambda (limit)
565 (cl-loop for result = (re-search-forward regexp limit t)
566 for result-valid = (and
567 result
568 (let* ((pos (nth 8 (syntax-ppss)))
569 (before-quote
570 (buffer-substring-no-properties
571 (max (- pos 5) (point-min))
572 (min (+ pos 1) (point-max)))))
573 (string-match-p start-regexp before-quote)))
574 until (or (not result) result-valid)
575 finally return (and result-valid result))))
576
542(defvar python-font-lock-keywords-level-1 577(defvar python-font-lock-keywords-level-1
543 `((,(python-rx symbol-start "def" (1+ space) (group symbol-name)) 578 `((,(python-rx symbol-start "def" (1+ space) (group symbol-name))
544 (1 font-lock-function-name-face)) 579 (1 font-lock-function-name-face))
@@ -716,7 +751,24 @@ sign in chained assignment."
716 grouped-assignment-target (* space) 751 grouped-assignment-target (* space)
717 (or ")" "]") (* space) 752 (or ")" "]") (* space)
718 assignment-operator)) 753 assignment-operator))
719 (1 font-lock-variable-name-face))) 754 (1 font-lock-variable-name-face))
755 ;; escape sequences within bytes literals
756 ;; "\\" "\'" "\a" "\b" "\f" "\n" "\r" "\t" "\v"
757 ;; "\ooo" character with octal value ooo
758 ;; "\xhh" character with hex value hh
759 (,(python--string-bytes-literal-matcher
760 (python-rx bytes-escape-sequence)
761 python--not-raw-bytes-literal-start-regexp)
762 (1 font-lock-constant-face t))
763 ;; escape sequences within string literals, the same as appear in bytes
764 ;; literals in addition to:
765 ;; "\uxxxx" Character with 16-bit hex value xxxx
766 ;; "\Uxxxxxxxx" Character with 32-bit hex value xxxxxxxx
767 ;; "\N{name}" Character named name in the Unicode database
768 (,(python--string-bytes-literal-matcher
769 (python-rx string-escape-sequence)
770 python--not-raw-string-literal-start-regexp)
771 (1 'font-lock-constant-face t)))
720 "Font lock keywords to use in `python-mode' for maximum decoration. 772 "Font lock keywords to use in `python-mode' for maximum decoration.
721 773
722This decoration level includes everything in 774This decoration level includes everything in
diff --git a/test/lisp/progmodes/python-tests.el b/test/lisp/progmodes/python-tests.el
index 6f2ad87f81a..07f2c4f09a3 100644
--- a/test/lisp/progmodes/python-tests.el
+++ b/test/lisp/progmodes/python-tests.el
@@ -380,6 +380,83 @@ def f(x: CustomInt) -> CustomInt:
380 (128 . font-lock-builtin-face) (131) 380 (128 . font-lock-builtin-face) (131)
381 (144 . font-lock-keyword-face) (150)))) 381 (144 . font-lock-keyword-face) (150))))
382 382
383(ert-deftest python-font-lock-escape-sequence-string-newline ()
384 (python-tests-assert-faces
385 "'\\n'
386\"\\n\"
387f'\\n'
388f\"\\n\"
389u'\\n'
390u\"\\n\""
391 '((1 . font-lock-doc-face)
392 (2 . font-lock-constant-face)
393 (4 . font-lock-doc-face) (5)
394 (6 . font-lock-doc-face)
395 (7 . font-lock-constant-face)
396 (9 . font-lock-doc-face) (10)
397 (12 . font-lock-string-face)
398 (13 . font-lock-constant-face)
399 (15 . font-lock-string-face) (16)
400 (18 . font-lock-string-face)
401 (19 . font-lock-constant-face)
402 (21 . font-lock-string-face) (22)
403 (24 . font-lock-string-face)
404 (25 . font-lock-constant-face)
405 (27 . font-lock-string-face) (28)
406 (30 . font-lock-string-face)
407 (31 . font-lock-constant-face)
408 (33 . font-lock-string-face))))
409
410(ert-deftest python-font-lock-escape-sequence-bytes-newline ()
411 (python-tests-assert-faces
412 "b'\\n'
413b\"\\n\""
414 '((1)
415 (2 . font-lock-doc-face)
416 (3 . font-lock-constant-face)
417 (5 . font-lock-doc-face) (6)
418 (8 . font-lock-doc-face)
419 (9 . font-lock-constant-face)
420 (11 . font-lock-doc-face))))
421
422(ert-deftest python-font-lock-escape-sequence-hex-octal ()
423 (python-tests-assert-faces
424 "b'\\x12 \\777'
425'\\x12 \\777'"
426 '((1)
427 (2 . font-lock-doc-face)
428 (3 . font-lock-constant-face)
429 (7 . font-lock-doc-face)
430 (8 . font-lock-constant-face)
431 (12 . font-lock-doc-face) (13)
432 (14 . font-lock-doc-face)
433 (15 . font-lock-constant-face)
434 (19 . font-lock-doc-face)
435 (20 . font-lock-constant-face)
436 (24 . font-lock-doc-face))))
437
438(ert-deftest python-font-lock-escape-sequence-unicode ()
439 (python-tests-assert-faces
440 "b'\\u1234 \\U00010348 \\N{Plus-Minus Sign}'
441'\\u1234 \\U00010348 \\N{Plus-Minus Sign}'"
442 '((1)
443 (2 . font-lock-doc-face) (41)
444 (42 . font-lock-doc-face)
445 (43 . font-lock-constant-face)
446 (49 . font-lock-doc-face)
447 (50 . font-lock-constant-face)
448 (60 . font-lock-doc-face)
449 (61 . font-lock-constant-face)
450 (80 . font-lock-doc-face))))
451
452(ert-deftest python-font-lock-raw-escape-sequence ()
453 (python-tests-assert-faces
454 "rb'\\x12 \123 \\n'
455r'\\x12 \123 \\n \\u1234 \\U00010348 \\N{Plus-Minus Sign}'"
456 '((1)
457 (3 . font-lock-doc-face) (14)
458 (16 . font-lock-doc-face))))
459
383 460
384;;; Indentation 461;;; Indentation
385 462