diff options
| author | lWarne | 2022-08-06 15:01:38 +0200 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2022-08-06 15:01:38 +0200 |
| commit | d8abff398bc45a791783c5c463838ba6fa3f030e (patch) | |
| tree | b9d1fea79c715cc881a83a2201343f50194d78ce | |
| parent | 83496888aaed10de35b3cbce695600300a30af84 (diff) | |
| download | emacs-d8abff398bc45a791783c5c463838ba6fa3f030e.tar.gz emacs-d8abff398bc45a791783c5c463838ba6fa3f030e.zip | |
Fontify python escape sequences in literals
* lisp/progmodes/python.el (python-rx): Add regular expressions
matching escape codes in string and byte literals
(python--string-bytes-literal-matcher): new function
(python--not-raw-bytes-literal-start-regexp): new constant
(python--not-raw-string-literal-start-regexp): new constant
* test/lisp/progmodes/python-tests.el: Add tests for new
fontification (bug#57004).
| -rw-r--r-- | lisp/progmodes/python.el | 56 | ||||
| -rw-r--r-- | test/lisp/progmodes/python-tests.el | 77 |
2 files changed, 131 insertions, 2 deletions
diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index b8fc7d4c546..27bdbae3113 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el | |||
| @@ -427,7 +427,19 @@ This variant of `rx' supports common Python named REGEXPS." | |||
| 427 | (: "vim:" (* space) "set" (+ space) | 427 | (: "vim:" (* space) "set" (+ space) |
| 428 | "fileencoding" (* space) ?= (* space) | 428 | "fileencoding" (* space) ?= (* space) |
| 429 | (group-n 1 (+ (or word ?-))) | 429 | (group-n 1 (+ (or word ?-))) |
| 430 | (* space) ":"))))) | 430 | (* space) ":")))) |
| 431 | (bytes-escape-sequence | ||
| 432 | (seq (not "\\") | ||
| 433 | (group (or "\\\\" "\\'" "\\a" "\\b" "\\f" | ||
| 434 | "\\n" "\\r" "\\t" "\\v" | ||
| 435 | (seq "\\" (= 3 (in "0-7"))) | ||
| 436 | (seq "\\x" hex hex))))) | ||
| 437 | (string-escape-sequence | ||
| 438 | (or bytes-escape-sequence | ||
| 439 | (seq (not "\\") | ||
| 440 | (or (group-n 1 "\\u" (= 4 hex)) | ||
| 441 | (group-n 1 "\\U" (= 8 hex)) | ||
| 442 | (group-n 1 "\\N{" (*? anychar) "}")))))) | ||
| 431 | (rx ,@regexps))) | 443 | (rx ,@regexps))) |
| 432 | 444 | ||
| 433 | 445 | ||
| @@ -539,6 +551,29 @@ the {...} holes that appear within f-strings." | |||
| 539 | (goto-char (min limit (1+ send))) | 551 | (goto-char (min limit (1+ send))) |
| 540 | (setq ppss (syntax-ppss)))))) | 552 | (setq ppss (syntax-ppss)))))) |
| 541 | 553 | ||
| 554 | (defconst python--not-raw-bytes-literal-start-regexp | ||
| 555 | (rx (or bos (not alnum)) (or "b" "B") (or "\"" "\"\"\"" "'" "'''") eos) | ||
| 556 | "A regular expression matching the start of a not-raw bytes literal.") | ||
| 557 | |||
| 558 | (defconst python--not-raw-string-literal-start-regexp | ||
| 559 | (rx (or bos (not alnum)) (? (or "u" "U" "F" "f")) (or "\"" "\"\"\"" "'" "'''") eos) | ||
| 560 | "A regular expression matching the start of a not-raw string literal.") | ||
| 561 | |||
| 562 | (defun python--string-bytes-literal-matcher (regexp start-regexp) | ||
| 563 | "Match REGEXP within a string or bytes literal whose start matches START-REGEXP." | ||
| 564 | (lambda (limit) | ||
| 565 | (cl-loop for result = (re-search-forward regexp limit t) | ||
| 566 | for result-valid = (and | ||
| 567 | result | ||
| 568 | (let* ((pos (nth 8 (syntax-ppss))) | ||
| 569 | (before-quote | ||
| 570 | (buffer-substring-no-properties | ||
| 571 | (max (- pos 5) (point-min)) | ||
| 572 | (min (+ pos 1) (point-max))))) | ||
| 573 | (string-match-p start-regexp before-quote))) | ||
| 574 | until (or (not result) result-valid) | ||
| 575 | finally return (and result-valid result)))) | ||
| 576 | |||
| 542 | (defvar python-font-lock-keywords-level-1 | 577 | (defvar python-font-lock-keywords-level-1 |
| 543 | `((,(python-rx symbol-start "def" (1+ space) (group symbol-name)) | 578 | `((,(python-rx symbol-start "def" (1+ space) (group symbol-name)) |
| 544 | (1 font-lock-function-name-face)) | 579 | (1 font-lock-function-name-face)) |
| @@ -716,7 +751,24 @@ sign in chained assignment." | |||
| 716 | grouped-assignment-target (* space) | 751 | grouped-assignment-target (* space) |
| 717 | (or ")" "]") (* space) | 752 | (or ")" "]") (* space) |
| 718 | assignment-operator)) | 753 | assignment-operator)) |
| 719 | (1 font-lock-variable-name-face))) | 754 | (1 font-lock-variable-name-face)) |
| 755 | ;; escape sequences within bytes literals | ||
| 756 | ;; "\\" "\'" "\a" "\b" "\f" "\n" "\r" "\t" "\v" | ||
| 757 | ;; "\ooo" character with octal value ooo | ||
| 758 | ;; "\xhh" character with hex value hh | ||
| 759 | (,(python--string-bytes-literal-matcher | ||
| 760 | (python-rx bytes-escape-sequence) | ||
| 761 | python--not-raw-bytes-literal-start-regexp) | ||
| 762 | (1 font-lock-constant-face t)) | ||
| 763 | ;; escape sequences within string literals, the same as appear in bytes | ||
| 764 | ;; literals in addition to: | ||
| 765 | ;; "\uxxxx" Character with 16-bit hex value xxxx | ||
| 766 | ;; "\Uxxxxxxxx" Character with 32-bit hex value xxxxxxxx | ||
| 767 | ;; "\N{name}" Character named name in the Unicode database | ||
| 768 | (,(python--string-bytes-literal-matcher | ||
| 769 | (python-rx string-escape-sequence) | ||
| 770 | python--not-raw-string-literal-start-regexp) | ||
| 771 | (1 'font-lock-constant-face t))) | ||
| 720 | "Font lock keywords to use in `python-mode' for maximum decoration. | 772 | "Font lock keywords to use in `python-mode' for maximum decoration. |
| 721 | 773 | ||
| 722 | This decoration level includes everything in | 774 | This decoration level includes everything in |
diff --git a/test/lisp/progmodes/python-tests.el b/test/lisp/progmodes/python-tests.el index 6f2ad87f81a..07f2c4f09a3 100644 --- a/test/lisp/progmodes/python-tests.el +++ b/test/lisp/progmodes/python-tests.el | |||
| @@ -380,6 +380,83 @@ def f(x: CustomInt) -> CustomInt: | |||
| 380 | (128 . font-lock-builtin-face) (131) | 380 | (128 . font-lock-builtin-face) (131) |
| 381 | (144 . font-lock-keyword-face) (150)))) | 381 | (144 . font-lock-keyword-face) (150)))) |
| 382 | 382 | ||
| 383 | (ert-deftest python-font-lock-escape-sequence-string-newline () | ||
| 384 | (python-tests-assert-faces | ||
| 385 | "'\\n' | ||
| 386 | \"\\n\" | ||
| 387 | f'\\n' | ||
| 388 | f\"\\n\" | ||
| 389 | u'\\n' | ||
| 390 | u\"\\n\"" | ||
| 391 | '((1 . font-lock-doc-face) | ||
| 392 | (2 . font-lock-constant-face) | ||
| 393 | (4 . font-lock-doc-face) (5) | ||
| 394 | (6 . font-lock-doc-face) | ||
| 395 | (7 . font-lock-constant-face) | ||
| 396 | (9 . font-lock-doc-face) (10) | ||
| 397 | (12 . font-lock-string-face) | ||
| 398 | (13 . font-lock-constant-face) | ||
| 399 | (15 . font-lock-string-face) (16) | ||
| 400 | (18 . font-lock-string-face) | ||
| 401 | (19 . font-lock-constant-face) | ||
| 402 | (21 . font-lock-string-face) (22) | ||
| 403 | (24 . font-lock-string-face) | ||
| 404 | (25 . font-lock-constant-face) | ||
| 405 | (27 . font-lock-string-face) (28) | ||
| 406 | (30 . font-lock-string-face) | ||
| 407 | (31 . font-lock-constant-face) | ||
| 408 | (33 . font-lock-string-face)))) | ||
| 409 | |||
| 410 | (ert-deftest python-font-lock-escape-sequence-bytes-newline () | ||
| 411 | (python-tests-assert-faces | ||
| 412 | "b'\\n' | ||
| 413 | b\"\\n\"" | ||
| 414 | '((1) | ||
| 415 | (2 . font-lock-doc-face) | ||
| 416 | (3 . font-lock-constant-face) | ||
| 417 | (5 . font-lock-doc-face) (6) | ||
| 418 | (8 . font-lock-doc-face) | ||
| 419 | (9 . font-lock-constant-face) | ||
| 420 | (11 . font-lock-doc-face)))) | ||
| 421 | |||
| 422 | (ert-deftest python-font-lock-escape-sequence-hex-octal () | ||
| 423 | (python-tests-assert-faces | ||
| 424 | "b'\\x12 \\777' | ||
| 425 | '\\x12 \\777'" | ||
| 426 | '((1) | ||
| 427 | (2 . font-lock-doc-face) | ||
| 428 | (3 . font-lock-constant-face) | ||
| 429 | (7 . font-lock-doc-face) | ||
| 430 | (8 . font-lock-constant-face) | ||
| 431 | (12 . font-lock-doc-face) (13) | ||
| 432 | (14 . font-lock-doc-face) | ||
| 433 | (15 . font-lock-constant-face) | ||
| 434 | (19 . font-lock-doc-face) | ||
| 435 | (20 . font-lock-constant-face) | ||
| 436 | (24 . font-lock-doc-face)))) | ||
| 437 | |||
| 438 | (ert-deftest python-font-lock-escape-sequence-unicode () | ||
| 439 | (python-tests-assert-faces | ||
| 440 | "b'\\u1234 \\U00010348 \\N{Plus-Minus Sign}' | ||
| 441 | '\\u1234 \\U00010348 \\N{Plus-Minus Sign}'" | ||
| 442 | '((1) | ||
| 443 | (2 . font-lock-doc-face) (41) | ||
| 444 | (42 . font-lock-doc-face) | ||
| 445 | (43 . font-lock-constant-face) | ||
| 446 | (49 . font-lock-doc-face) | ||
| 447 | (50 . font-lock-constant-face) | ||
| 448 | (60 . font-lock-doc-face) | ||
| 449 | (61 . font-lock-constant-face) | ||
| 450 | (80 . font-lock-doc-face)))) | ||
| 451 | |||
| 452 | (ert-deftest python-font-lock-raw-escape-sequence () | ||
| 453 | (python-tests-assert-faces | ||
| 454 | "rb'\\x12 \123 \\n' | ||
| 455 | r'\\x12 \123 \\n \\u1234 \\U00010348 \\N{Plus-Minus Sign}'" | ||
| 456 | '((1) | ||
| 457 | (3 . font-lock-doc-face) (14) | ||
| 458 | (16 . font-lock-doc-face)))) | ||
| 459 | |||
| 383 | 460 | ||
| 384 | ;;; Indentation | 461 | ;;; Indentation |
| 385 | 462 | ||