diff options
| author | Paul Eggert | 2016-04-25 10:41:29 -0700 |
|---|---|---|
| committer | Paul Eggert | 2016-04-25 10:42:48 -0700 |
| commit | 86d083438dba60dc00e9e96414bf7e832720c05a (patch) | |
| tree | 9ca5fac163acf4b1a3bca0e1e8b5c87af26e5a89 /lisp | |
| parent | f069d854508946bcc03e4c77ceb430748e3ab6d7 (diff) | |
| download | emacs-86d083438dba60dc00e9e96414bf7e832720c05a.tar.gz emacs-86d083438dba60dc00e9e96414bf7e832720c05a.zip | |
New function ‘char-from-name’
This also fixes the mishandling of "\N{CJK COMPATIBILITY
IDEOGRAPH-F900}", "\N{VARIATION SELECTOR-1}", etc.
Problem reported by Eli Zaretskii in:
http://lists.gnu.org/archive/html/emacs-devel/2016-04/msg00614.html
* doc/lispref/nonascii.texi (Character Codes), etc/NEWS: Document this.
* lisp/international/mule-cmds.el (char-from-name): New function.
(read-char-by-name): Use it. Document that "BED" is treated as
a name, not as a hexadecimal number. Reject out-of-range integers,
floating-point numbers, and strings with trailing junk.
* src/lread.c (character_name_to_code): Call char-from-name
instead of inspecting ucs-names directly, so that we handle
computed names like "VARIATION SELECTOR-1". Do not use an auto
string, since char-from-name might GC.
* test/src/lread-tests.el: Add tests for new behavior, and
fix some old tests that were wrong.
Diffstat (limited to 'lisp')
| -rw-r--r-- | lisp/international/mule-cmds.el | 43 |
1 files changed, 33 insertions, 10 deletions
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 8eb320acea5..2ce21a88731 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el | |||
| @@ -2978,6 +2978,27 @@ on encoding." | |||
| 2978 | (let ((char (assoc name ucs-names))) | 2978 | (let ((char (assoc name ucs-names))) |
| 2979 | (when char (format " (%c)" (cdr char))))) | 2979 | (when char (format " (%c)" (cdr char))))) |
| 2980 | 2980 | ||
| 2981 | (defun char-from-name (string &optional ignore-case) | ||
| 2982 | "Return a character as a number from its Unicode name STRING. | ||
| 2983 | If optional IGNORE-CASE is non-nil, ignore case in STRING. | ||
| 2984 | Return nil if STRING does not name a character." | ||
| 2985 | (or (cdr (assoc-string string (ucs-names) ignore-case)) | ||
| 2986 | (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) | ||
| 2987 | (when minus | ||
| 2988 | ;; Parse names like "VARIATION SELECTOR-17" and "CJK | ||
| 2989 | ;; COMPATIBILITY IDEOGRAPH-F900" that are not in ucs-names. | ||
| 2990 | (ignore-errors | ||
| 2991 | (let* ((case-fold-search ignore-case) | ||
| 2992 | (vs (string-match-p "\\`VARIATION SELECTOR-" string)) | ||
| 2993 | (minus-num (string-to-number (substring string minus) | ||
| 2994 | (if vs 10 16))) | ||
| 2995 | (vs-offset (if vs (if (< minus-num -16) #xE00EF #xFDFF) 0)) | ||
| 2996 | (code (- vs-offset minus-num)) | ||
| 2997 | (name (get-char-code-property code 'name))) | ||
| 2998 | (when (eq t (compare-strings string nil nil name nil nil | ||
| 2999 | ignore-case)) | ||
| 3000 | code))))))) | ||
| 3001 | |||
| 2981 | (defun read-char-by-name (prompt) | 3002 | (defun read-char-by-name (prompt) |
| 2982 | "Read a character by its Unicode name or hex number string. | 3003 | "Read a character by its Unicode name or hex number string. |
| 2983 | Display PROMPT and read a string that represents a character by its | 3004 | Display PROMPT and read a string that represents a character by its |
| @@ -2991,9 +3012,11 @@ preceded by an asterisk `*' and use completion, it will show all | |||
| 2991 | the characters whose names include that substring, not necessarily | 3012 | the characters whose names include that substring, not necessarily |
| 2992 | at the beginning of the name. | 3013 | at the beginning of the name. |
| 2993 | 3014 | ||
| 2994 | This function also accepts a hexadecimal number of Unicode code | 3015 | Accept a name like \"CIRCULATION FUNCTION\", a hexadecimal |
| 2995 | point or a number in hash notation, e.g. #o21430 for octal, | 3016 | number like \"2A10\", or a number in hash notation (e.g., |
| 2996 | #x2318 for hex, or #10r8984 for decimal." | 3017 | \"#x2a10\" for hex, \"10r10768\" for decimal, or \"#o25020\" for |
| 3018 | octal). Treat otherwise-ambiguous strings like \"BED\" (U+1F6CF) | ||
| 3019 | as names, not numbers." | ||
| 2997 | (let* ((enable-recursive-minibuffers t) | 3020 | (let* ((enable-recursive-minibuffers t) |
| 2998 | (completion-ignore-case t) | 3021 | (completion-ignore-case t) |
| 2999 | (input | 3022 | (input |
| @@ -3006,13 +3029,13 @@ point or a number in hash notation, e.g. #o21430 for octal, | |||
| 3006 | (category . unicode-name)) | 3029 | (category . unicode-name)) |
| 3007 | (complete-with-action action (ucs-names) string pred))))) | 3030 | (complete-with-action action (ucs-names) string pred))))) |
| 3008 | (char | 3031 | (char |
| 3009 | (cond | 3032 | (cond |
| 3010 | ((string-match-p "\\`[0-9a-fA-F]+\\'" input) | 3033 | ((char-from-name input t)) |
| 3011 | (string-to-number input 16)) | 3034 | ((string-match-p "\\`[0-9a-fA-F]+\\'" input) |
| 3012 | ((string-match-p "\\`#" input) | 3035 | (ignore-errors (string-to-number input 16))) |
| 3013 | (read input)) | 3036 | ((string-match-p "\\`#\\([bBoOxX]\\|[0-9]+[rR]\\)[0-9a-zA-Z]+\\'" |
| 3014 | (t | 3037 | input) |
| 3015 | (cdr (assoc-string input (ucs-names) t)))))) | 3038 | (ignore-errors (read input)))))) |
| 3016 | (unless (characterp char) | 3039 | (unless (characterp char) |
| 3017 | (error "Invalid character")) | 3040 | (error "Invalid character")) |
| 3018 | char)) | 3041 | char)) |