aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ingebrigtsen2015-12-29 17:46:00 +0100
committerLars Ingebrigtsen2015-12-29 17:46:00 +0100
commitd259328fb87db8cc67d52771efcfa653e52c5b71 (patch)
treee80af1fce52d19babcd090e8f4dc56f56c738d65
parentd181366dc39620eb0f249fc3f1d58b6199b9e44d (diff)
downloademacs-d259328fb87db8cc67d52771efcfa653e52c5b71.tar.gz
emacs-d259328fb87db8cc67d52771efcfa653e52c5b71.zip
Further Unicode restrictive fixups
* puny.el (puny-highly-restrictive-p): Include the extra identifier characters from table 3.
-rw-r--r--lisp/net/puny.el31
1 files changed, 27 insertions, 4 deletions
diff --git a/lisp/net/puny.el b/lisp/net/puny.el
index 08da51b587f..ac47e13c97d 100644
--- a/lisp/net/puny.el
+++ b/lisp/net/puny.el
@@ -191,13 +191,36 @@ For instance \"xn--bcher-kva\" => \"bücher\"."
191 (buffer-string))) 191 (buffer-string)))
192 192
193;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection 193;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection
194;; http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
194 195
195(defun puny-highly-restrictive-p (string) 196(defun puny-highly-restrictive-p (string)
196 (let ((scripts 197 (let ((scripts
197 (seq-uniq 198 (delq
198 (seq-map (lambda (char) 199 t
199 (aref char-script-table char)) 200 (seq-uniq
200 string)))) 201 (seq-map (lambda (char)
202 (if (memq char
203 ;; These characters are always allowed
204 ;; in any string.
205 '(#x0027 ; APOSTROPHE
206 #x002D ; HYPHEN-MINUS
207 #x002E ; FULL STOP
208 #x003A ; COLON
209 #x00B7 ; MIDDLE DOT
210 #x058A ; ARMENIAN HYPHEN
211 #x05F3 ; HEBREW PUNCTUATION GERESH
212 #x05F4 ; HEBREW PUNCTUATION GERSHAYIM
213 #x0F0B ; IBETAN MARK INTERSYLLABIC TSHEG
214 #x200C ; ERO WIDTH NON-JOINER*
215 #x200D ; ERO WIDTH JOINER*
216 #x2010 ; YPHEN
217 #x2019 ; IGHT SINGLE QUOTATION MARK
218 #x2027 ; YPHENATION POINT
219 #x30A0 ; KATAKANA-HIRAGANA DOUBLE HYPHEN
220 #x30FB)) ; KATAKANA MIDDLE DOT
221 t
222 (aref char-script-table char)))
223 string)))))
201 (or 224 (or
202 ;; Every character uses the same script. 225 ;; Every character uses the same script.
203 (= (length scripts) 1) 226 (= (length scripts) 1)