diff options
| author | Lars Ingebrigtsen | 2015-12-29 17:46:00 +0100 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2015-12-29 17:46:00 +0100 |
| commit | d259328fb87db8cc67d52771efcfa653e52c5b71 (patch) | |
| tree | e80af1fce52d19babcd090e8f4dc56f56c738d65 | |
| parent | d181366dc39620eb0f249fc3f1d58b6199b9e44d (diff) | |
| download | emacs-d259328fb87db8cc67d52771efcfa653e52c5b71.tar.gz emacs-d259328fb87db8cc67d52771efcfa653e52c5b71.zip | |
Further Unicode restrictive fixups
* puny.el (puny-highly-restrictive-p): Include the extra
identifier characters from table 3.
| -rw-r--r-- | lisp/net/puny.el | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/lisp/net/puny.el b/lisp/net/puny.el index 08da51b587f..ac47e13c97d 100644 --- a/lisp/net/puny.el +++ b/lisp/net/puny.el | |||
| @@ -191,13 +191,36 @@ For instance \"xn--bcher-kva\" => \"bücher\"." | |||
| 191 | (buffer-string))) | 191 | (buffer-string))) |
| 192 | 192 | ||
| 193 | ;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection | 193 | ;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection |
| 194 | ;; http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers | ||
| 194 | 195 | ||
| 195 | (defun puny-highly-restrictive-p (string) | 196 | (defun puny-highly-restrictive-p (string) |
| 196 | (let ((scripts | 197 | (let ((scripts |
| 197 | (seq-uniq | 198 | (delq |
| 198 | (seq-map (lambda (char) | 199 | t |
| 199 | (aref char-script-table char)) | 200 | (seq-uniq |
| 200 | string)))) | 201 | (seq-map (lambda (char) |
| 202 | (if (memq char | ||
| 203 | ;; These characters are always allowed | ||
| 204 | ;; in any string. | ||
| 205 | '(#x0027 ; APOSTROPHE | ||
| 206 | #x002D ; HYPHEN-MINUS | ||
| 207 | #x002E ; FULL STOP | ||
| 208 | #x003A ; COLON | ||
| 209 | #x00B7 ; MIDDLE DOT | ||
| 210 | #x058A ; ARMENIAN HYPHEN | ||
| 211 | #x05F3 ; HEBREW PUNCTUATION GERESH | ||
| 212 | #x05F4 ; HEBREW PUNCTUATION GERSHAYIM | ||
| 213 | #x0F0B ; IBETAN MARK INTERSYLLABIC TSHEG | ||
| 214 | #x200C ; ERO WIDTH NON-JOINER* | ||
| 215 | #x200D ; ERO WIDTH JOINER* | ||
| 216 | #x2010 ; YPHEN | ||
| 217 | #x2019 ; IGHT SINGLE QUOTATION MARK | ||
| 218 | #x2027 ; YPHENATION POINT | ||
| 219 | #x30A0 ; KATAKANA-HIRAGANA DOUBLE HYPHEN | ||
| 220 | #x30FB)) ; KATAKANA MIDDLE DOT | ||
| 221 | t | ||
| 222 | (aref char-script-table char))) | ||
| 223 | string))))) | ||
| 201 | (or | 224 | (or |
| 202 | ;; Every character uses the same script. | 225 | ;; Every character uses the same script. |
| 203 | (= (length scripts) 1) | 226 | (= (length scripts) 1) |