aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorPaul Eggert2016-04-25 10:41:29 -0700
committerPaul Eggert2016-04-25 10:42:48 -0700
commit86d083438dba60dc00e9e96414bf7e832720c05a (patch)
tree9ca5fac163acf4b1a3bca0e1e8b5c87af26e5a89 /lisp
parentf069d854508946bcc03e4c77ceb430748e3ab6d7 (diff)
downloademacs-86d083438dba60dc00e9e96414bf7e832720c05a.tar.gz
emacs-86d083438dba60dc00e9e96414bf7e832720c05a.zip
New function ‘char-from-name’
This also fixes the mishandling of "\N{CJK COMPATIBILITY IDEOGRAPH-F900}", "\N{VARIATION SELECTOR-1}", etc. Problem reported by Eli Zaretskii in: http://lists.gnu.org/archive/html/emacs-devel/2016-04/msg00614.html * doc/lispref/nonascii.texi (Character Codes), etc/NEWS: Document this. * lisp/international/mule-cmds.el (char-from-name): New function. (read-char-by-name): Use it. Document that "BED" is treated as a name, not as a hexadecimal number. Reject out-of-range integers, floating-point numbers, and strings with trailing junk. * src/lread.c (character_name_to_code): Call char-from-name instead of inspecting ucs-names directly, so that we handle computed names like "VARIATION SELECTOR-1". Do not use an auto string, since char-from-name might GC. * test/src/lread-tests.el: Add tests for new behavior, and fix some old tests that were wrong.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/international/mule-cmds.el43
1 files changed, 33 insertions, 10 deletions
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 8eb320acea5..2ce21a88731 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -2978,6 +2978,27 @@ on encoding."
2978 (let ((char (assoc name ucs-names))) 2978 (let ((char (assoc name ucs-names)))
2979 (when char (format " (%c)" (cdr char))))) 2979 (when char (format " (%c)" (cdr char)))))
2980 2980
2981(defun char-from-name (string &optional ignore-case)
2982 "Return a character as a number from its Unicode name STRING.
2983If optional IGNORE-CASE is non-nil, ignore case in STRING.
2984Return nil if STRING does not name a character."
2985 (or (cdr (assoc-string string (ucs-names) ignore-case))
2986 (let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
2987 (when minus
2988 ;; Parse names like "VARIATION SELECTOR-17" and "CJK
2989 ;; COMPATIBILITY IDEOGRAPH-F900" that are not in ucs-names.
2990 (ignore-errors
2991 (let* ((case-fold-search ignore-case)
2992 (vs (string-match-p "\\`VARIATION SELECTOR-" string))
2993 (minus-num (string-to-number (substring string minus)
2994 (if vs 10 16)))
2995 (vs-offset (if vs (if (< minus-num -16) #xE00EF #xFDFF) 0))
2996 (code (- vs-offset minus-num))
2997 (name (get-char-code-property code 'name)))
2998 (when (eq t (compare-strings string nil nil name nil nil
2999 ignore-case))
3000 code)))))))
3001
2981(defun read-char-by-name (prompt) 3002(defun read-char-by-name (prompt)
2982 "Read a character by its Unicode name or hex number string. 3003 "Read a character by its Unicode name or hex number string.
2983Display PROMPT and read a string that represents a character by its 3004Display PROMPT and read a string that represents a character by its
@@ -2991,9 +3012,11 @@ preceded by an asterisk `*' and use completion, it will show all
2991the characters whose names include that substring, not necessarily 3012the characters whose names include that substring, not necessarily
2992at the beginning of the name. 3013at the beginning of the name.
2993 3014
2994This function also accepts a hexadecimal number of Unicode code 3015Accept a name like \"CIRCULATION FUNCTION\", a hexadecimal
2995point or a number in hash notation, e.g. #o21430 for octal, 3016number like \"2A10\", or a number in hash notation (e.g.,
2996#x2318 for hex, or #10r8984 for decimal." 3017\"#x2a10\" for hex, \"10r10768\" for decimal, or \"#o25020\" for
3018octal). Treat otherwise-ambiguous strings like \"BED\" (U+1F6CF)
3019as names, not numbers."
2997 (let* ((enable-recursive-minibuffers t) 3020 (let* ((enable-recursive-minibuffers t)
2998 (completion-ignore-case t) 3021 (completion-ignore-case t)
2999 (input 3022 (input
@@ -3006,13 +3029,13 @@ point or a number in hash notation, e.g. #o21430 for octal,
3006 (category . unicode-name)) 3029 (category . unicode-name))
3007 (complete-with-action action (ucs-names) string pred))))) 3030 (complete-with-action action (ucs-names) string pred)))))
3008 (char 3031 (char
3009 (cond 3032 (cond
3010 ((string-match-p "\\`[0-9a-fA-F]+\\'" input) 3033 ((char-from-name input t))
3011 (string-to-number input 16)) 3034 ((string-match-p "\\`[0-9a-fA-F]+\\'" input)
3012 ((string-match-p "\\`#" input) 3035 (ignore-errors (string-to-number input 16)))
3013 (read input)) 3036 ((string-match-p "\\`#\\([bBoOxX]\\|[0-9]+[rR]\\)[0-9a-zA-Z]+\\'"
3014 (t 3037 input)
3015 (cdr (assoc-string input (ucs-names) t)))))) 3038 (ignore-errors (read input))))))
3016 (unless (characterp char) 3039 (unless (characterp char)
3017 (error "Invalid character")) 3040 (error "Invalid character"))
3018 char)) 3041 char))