diff options
| author | Lars Ingebrigtsen | 2015-12-28 18:41:13 +0100 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2015-12-28 18:41:13 +0100 |
| commit | 1f11b33a780ca4adeff7560cf347ea41cd31bc43 (patch) | |
| tree | 63f6571f241c59781ad6feafba54eaa601754d08 | |
| parent | ad1f24f96b204e6e61051f896a713b03708391a0 (diff) | |
| download | emacs-1f11b33a780ca4adeff7560cf347ea41cd31bc43.tar.gz emacs-1f11b33a780ca4adeff7560cf347ea41cd31bc43.zip | |
Add IDNA domain encode/decode functions
* puny.el (puny-decode-domain): New function.
(puny-encode-domain): Ditto.
(puny-decode-digit): Fix digit decoding error.
| -rw-r--r-- | lisp/net/puny.el | 109 |
1 files changed, 59 insertions, 50 deletions
diff --git a/lisp/net/puny.el b/lisp/net/puny.el index 474ecda3c0a..5874871a90d 100644 --- a/lisp/net/puny.el +++ b/lisp/net/puny.el | |||
| @@ -29,6 +29,11 @@ | |||
| 29 | 29 | ||
| 30 | (require 'seq) | 30 | (require 'seq) |
| 31 | 31 | ||
| 32 | (defun puny-encode-domain (domain) | ||
| 33 | "Encode DOMAIN according to the IDNA/punycode algorith. | ||
| 34 | For instance, \"fśf.org\" => \"xn--ff-2sa.org\"." | ||
| 35 | (mapconcat 'puny-encode-string (split-string domain "[.]") ".")) | ||
| 36 | |||
| 32 | (defun puny-encode-string (string) | 37 | (defun puny-encode-string (string) |
| 33 | "Encode STRING according to the IDNA/punycode algorithm. | 38 | "Encode STRING according to the IDNA/punycode algorithm. |
| 34 | This is used to encode non-ASCII domain names. | 39 | This is used to encode non-ASCII domain names. |
| @@ -40,10 +45,15 @@ For instance, \"bücher\" => \"xn--bcher-kva\"." | |||
| 40 | string | 45 | string |
| 41 | (concat "xn--" ascii "-" (puny-encode-complex (length ascii) string))))) | 46 | (concat "xn--" ascii "-" (puny-encode-complex (length ascii) string))))) |
| 42 | 47 | ||
| 48 | (defun puny-decode-domain (domain) | ||
| 49 | "Decode DOMAIN according to the IDNA/punycode algorith. | ||
| 50 | For instance, \"xn--ff-2sa.org\" => \"fśf.org\"." | ||
| 51 | (mapconcat 'puny-decode-string (split-string domain "[.]") ".")) | ||
| 52 | |||
| 43 | (defun puny-decode-string (string) | 53 | (defun puny-decode-string (string) |
| 44 | "Decode an IDNA/punycode-encoded string. | 54 | "Decode an IDNA/punycode-encoded string. |
| 45 | For instance \"xn--bcher-kva\" => \"bücher\"." | 55 | For instance \"xn--bcher-kva\" => \"bücher\"." |
| 46 | (if (string-match "\\`xn--.*-" string) | 56 | (if (string-match "\\`xn--" string) |
| 47 | (puny-decode-string-internal (substring string 4)) | 57 | (puny-decode-string-internal (substring string 4)) |
| 48 | string)) | 58 | string)) |
| 49 | 59 | ||
| @@ -55,17 +65,6 @@ For instance \"xn--bcher-kva\" => \"bücher\"." | |||
| 55 | (defconst puny-tmax 26) | 65 | (defconst puny-tmax 26) |
| 56 | (defconst puny-skew 28) | 66 | (defconst puny-skew 28) |
| 57 | 67 | ||
| 58 | (defun puny-decode-digit (cp) | ||
| 59 | (cond | ||
| 60 | ((<= cp ?9) | ||
| 61 | (- cp ?0)) | ||
| 62 | ((<= cp ?Z) | ||
| 63 | (- cp ?A)) | ||
| 64 | ((<= cp ?z) | ||
| 65 | (- cp ?a)) | ||
| 66 | (t | ||
| 67 | puny-base))) | ||
| 68 | |||
| 69 | ;; 0-25 a-z | 68 | ;; 0-25 a-z |
| 70 | ;; 26-36 0-9 | 69 | ;; 26-36 0-9 |
| 71 | (defun puny-encode-digit (d) | 70 | (defun puny-encode-digit (d) |
| @@ -129,48 +128,58 @@ For instance \"xn--bcher-kva\" => \"bücher\"." | |||
| 129 | (cl-incf n)) | 128 | (cl-incf n)) |
| 130 | (nreverse result))) | 129 | (nreverse result))) |
| 131 | 130 | ||
| 131 | (defun puny-decode-digit (cp) | ||
| 132 | (cond | ||
| 133 | ((<= cp ?9) | ||
| 134 | (+ (- cp ?0) 26)) | ||
| 135 | ((<= cp ?Z) | ||
| 136 | (- cp ?A)) | ||
| 137 | ((<= cp ?z) | ||
| 138 | (- cp ?a)) | ||
| 139 | (t | ||
| 140 | puny-base))) | ||
| 141 | |||
| 132 | (defun puny-decode-string-internal (string) | 142 | (defun puny-decode-string-internal (string) |
| 133 | (with-temp-buffer | 143 | (with-temp-buffer |
| 134 | (insert string) | 144 | (insert string) |
| 135 | (goto-char (point-max)) | 145 | (goto-char (point-max)) |
| 136 | (if (not (search-backward "-" nil t)) | 146 | (search-backward "-" nil (point-min)) |
| 137 | (error "Invalid PUNY string") | 147 | ;; The encoded chars are after the final dash. |
| 138 | ;; The encoded chars are after the final dash. | 148 | (let ((encoded (buffer-substring (1+ (point)) (point-max))) |
| 139 | (let ((encoded (buffer-substring (1+ (point)) (point-max))) | 149 | (ic 0) |
| 140 | (ic 0) | 150 | (i 0) |
| 141 | (i 0) | 151 | (bias puny-initial-bias) |
| 142 | (bias puny-initial-bias) | 152 | (n puny-initial-n) |
| 143 | (n puny-initial-n) | 153 | out) |
| 144 | out) | 154 | (delete-region (point) (point-max)) |
| 145 | (delete-region (point) (point-max)) | 155 | (while (< ic (length encoded)) |
| 146 | (while (< ic (length encoded)) | 156 | (let ((old-i i) |
| 147 | (let ((old-i i) | 157 | (w 1) |
| 148 | (w 1) | 158 | (k puny-base) |
| 149 | (k puny-base) | 159 | digit t1) |
| 150 | digit t1) | 160 | (cl-loop do (progn |
| 151 | (cl-loop do (progn | 161 | (setq digit (puny-decode-digit (aref encoded ic))) |
| 152 | (setq digit (puny-decode-digit (aref encoded ic))) | 162 | (cl-incf ic) |
| 153 | (cl-incf ic) | 163 | (cl-incf i (* digit w)) |
| 154 | (cl-incf i (* digit w)) | 164 | (setq t1 (cond |
| 155 | (setq t1 (cond | 165 | ((<= k bias) |
| 156 | ((<= k bias) | 166 | puny-tmin) |
| 157 | puny-tmin) | 167 | ((>= k (+ bias puny-tmax)) |
| 158 | ((>= k (+ bias puny-tmax)) | 168 | puny-tmax) |
| 159 | puny-tmax) | 169 | (t |
| 160 | (t | 170 | (- k bias))))) |
| 161 | (- k bias))))) | 171 | while (>= digit t1) |
| 162 | while (>= digit t1) | 172 | do (setq w (* w (- puny-base t1)) |
| 163 | do (setq w (* w (- puny-base t1)) | 173 | k (+ k puny-base))) |
| 164 | k (+ k puny-base))) | 174 | (setq out (1+ (buffer-size))) |
| 165 | (setq out (1+ (buffer-size))) | 175 | (setq bias (puny-adapt (- i old-i) out (= old-i 0)))) |
| 166 | (setq bias (puny-adapt (- i old-i) out (= old-i 0)))) | 176 | |
| 167 | 177 | (setq n (+ n (/ i out)) | |
| 168 | (setq n (+ n (/ i out)) | 178 | i (mod i out)) |
| 169 | i (mod i out)) | 179 | (goto-char (point-min)) |
| 170 | (goto-char (point-min)) | 180 | (forward-char i) |
| 171 | (forward-char i) | 181 | (insert (format "%c" n)) |
| 172 | (insert (format "%c" n)) | 182 | (cl-incf i))) |
| 173 | (cl-incf i)))) | ||
| 174 | (buffer-string))) | 183 | (buffer-string))) |
| 175 | 184 | ||
| 176 | (provide 'puny) | 185 | (provide 'puny) |