diff options
| author | Dave Love | 2003-05-08 17:58:17 +0000 |
|---|---|---|
| committer | Dave Love | 2003-05-08 17:58:17 +0000 |
| commit | 4b971341ff5ad247dccd762f13cd0a0360bc0510 (patch) | |
| tree | a30f349e1e50c5e5ebc15763c7cfc9edb9942880 | |
| parent | e20f36df99957bc9f29b6b9a56bcf887e6c92885 (diff) | |
| download | emacs-4b971341ff5ad247dccd762f13cd0a0360bc0510.tar.gz emacs-4b971341ff5ad247dccd762f13cd0a0360bc0510.zip | |
*** empty log message ***
| -rw-r--r-- | lisp/ChangeLog | 6 | ||||
| -rw-r--r-- | lisp/international/utf-7.el | 143 |
2 files changed, 149 insertions, 0 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index d16f2b1f6b6..68ce460fb27 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,9 @@ | |||
| 1 | 2003-05-08 Dave Love <fx@gnu.org> | ||
| 2 | |||
| 3 | * international/utf-7.el: New file. | ||
| 4 | |||
| 5 | * international/mule-conf.el (utf-7): New. | ||
| 6 | |||
| 1 | 2003-05-06 Kenichi Handa <handa@m17n.org> | 7 | 2003-05-06 Kenichi Handa <handa@m17n.org> |
| 2 | 8 | ||
| 3 | * international/mule-conf.el (utf-16-be) | 9 | * international/mule-conf.el (utf-16-be) |
diff --git a/lisp/international/utf-7.el b/lisp/international/utf-7.el new file mode 100644 index 00000000000..f45c7cd905a --- /dev/null +++ b/lisp/international/utf-7.el | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | ;;; utf-7.el --- utf-7 coding system | ||
| 2 | |||
| 3 | ;; Copyright (C) 2003 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Author: Dave Love <fx@gnu.org> | ||
| 6 | ;; Keywords: i18n, mail | ||
| 7 | |||
| 8 | ;; This file is free software; you can redistribute it and/or modify | ||
| 9 | ;; it under the terms of the GNU General Public License as published by | ||
| 10 | ;; the Free Software Foundation; either version 2, or (at your option) | ||
| 11 | ;; any later version. | ||
| 12 | |||
| 13 | ;; This file is distributed in the hope that it will be useful, | ||
| 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | ;; GNU General Public License for more details. | ||
| 17 | |||
| 18 | ;; You should have received a copy of the GNU General Public License | ||
| 19 | ;; along with GNU Emacs; see the file COPYING. If not, write to | ||
| 20 | ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 21 | ;; Boston, MA 02111-1307, USA. | ||
| 22 | |||
| 23 | ;;; Commentary: | ||
| 24 | |||
| 25 | ;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII | ||
| 26 | ;; segments are encoded as base64-encoded big endian UTF-16. Also | ||
| 27 | ;; defines a variation required for IMAP (RFC 2060). | ||
| 28 | |||
| 29 | ;; The encoding and decoding was originally taken from Jon K Hellan's | ||
| 30 | ;; implementation in Gnus, but has been substantially re-done. | ||
| 31 | |||
| 32 | ;; This probably needs more attention. In particular, it's not | ||
| 33 | ;; completely consistent with iconv's behaviour. It's arguable | ||
| 34 | ;; whether the IMAP version should be a coding system since it's | ||
| 35 | ;; apparently only used for IMAP mailbox names. | ||
| 36 | |||
| 37 | ;;; Code: | ||
| 38 | |||
| 39 | ;; See mule-conf. | ||
| 40 | ;; (define-coding-system 'utf-7 | ||
| 41 | ;; "UTF-7 encoding of Unicode (RFC 2152)" | ||
| 42 | ;; :coding-type 'utf-8 | ||
| 43 | ;; :mnemonic ?U | ||
| 44 | ;; :mime-charset 'utf-7 | ||
| 45 | ;; :charset-list '(unicode) | ||
| 46 | ;; :pre-write-conversion 'utf-7-pre-write-conversion | ||
| 47 | ;; :post-read-conversion 'utf-7-post-read-conversion) | ||
| 48 | |||
| 49 | ;; (define-coding-system 'utf-7-imap | ||
| 50 | ;; "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" | ||
| 51 | ;; :coding-type 'utf-8 | ||
| 52 | ;; :mnemonic ?U | ||
| 53 | ;; :mime-charset 'utf-7 | ||
| 54 | ;; :charset-list '(unicode) | ||
| 55 | ;; :pre-write-conversion 'utf-7-imap-pre-write-conversion | ||
| 56 | ;; :post-read-conversion 'utf-7-imap-post-read-conversion)) | ||
| 57 | |||
| 58 | ;;;###autoload | ||
| 59 | (defun utf-7-decode (len imap) | ||
| 60 | "Decode LEN bytes of UTF-7 at point. | ||
| 61 | IMAP non-nil means use the IMAP version." | ||
| 62 | (save-excursion | ||
| 63 | (save-restriction | ||
| 64 | (narrow-to-region (point) (+ (point) len)) | ||
| 65 | (let ((not-esc (if imap "^&" "^+")) | ||
| 66 | (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/"))) | ||
| 67 | (while (not (eobp)) | ||
| 68 | (skip-chars-forward not-esc) | ||
| 69 | (unless (eobp) | ||
| 70 | (forward-char) | ||
| 71 | (let ((p (point)) | ||
| 72 | (run-length (skip-chars-forward skip-chars))) | ||
| 73 | (if (eq ?- (char-after)) | ||
| 74 | (delete-char 1)) | ||
| 75 | (unless (= run-length 0) ; encoded lone esc-char | ||
| 76 | (let ((pl (mod (- run-length) 4))) | ||
| 77 | (insert-char ?= pl) | ||
| 78 | (if imap | ||
| 79 | (subst-char-in-region p (point) ?, ?/)) | ||
| 80 | (base64-decode-region p (point))) | ||
| 81 | (decode-coding-region p (point) 'utf-16-be) | ||
| 82 | (save-excursion | ||
| 83 | (goto-char p) | ||
| 84 | (delete-backward-char 1))))))) | ||
| 85 | (- (point-max) (point-min))))) | ||
| 86 | |||
| 87 | (defun utf-7-post-read-conversion (len) | ||
| 88 | (utf-7-decode len nil)) | ||
| 89 | |||
| 90 | (defun utf-7-imap-post-read-conversion (len) | ||
| 91 | (utf-7-decode len t)) | ||
| 92 | |||
| 93 | ;;;###autoload | ||
| 94 | (defun utf-7-encode (from to imap) | ||
| 95 | "Encode bytes between FROM and TO to UTF-7. | ||
| 96 | ESC and SKIP-CHARS are adjusted for the normal and IMAP versions." | ||
| 97 | (let* ((old-buf (current-buffer)) | ||
| 98 | (esc (if imap ?& ?+)) | ||
| 99 | ;; These are characters which can be encoded asis. | ||
| 100 | (skip-chars (if imap | ||
| 101 | "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 | ||
| 102 | ;; This includes the rfc2152 optional set. | ||
| 103 | ;; Perhaps it shouldn't (like iconv). | ||
| 104 | "\t\n\r -*,-[]-}")) | ||
| 105 | (not-skip-chars (format "^%s%c" skip-chars esc))) | ||
| 106 | (set-buffer (generate-new-buffer " *temp*")) | ||
| 107 | (if (stringp from) | ||
| 108 | (insert from) | ||
| 109 | (insert-buffer-substring old-buf from to)) | ||
| 110 | (goto-char (point-min)) | ||
| 111 | (while (not (eobp)) | ||
| 112 | (skip-chars-forward skip-chars) | ||
| 113 | (if (eq ?+ (char-after)) | ||
| 114 | (progn (forward-char) | ||
| 115 | (insert ?-)) | ||
| 116 | (unless (eobp) | ||
| 117 | (insert esc) | ||
| 118 | (let ((p (point))) | ||
| 119 | (skip-chars-forward not-skip-chars) | ||
| 120 | (save-restriction | ||
| 121 | ;; encode-coding-region doesn't preserve point | ||
| 122 | (narrow-to-region p (point)) | ||
| 123 | (encode-coding-region p (point-max) 'utf-16-be) | ||
| 124 | (base64-encode-region p (point-max)) | ||
| 125 | (if imap | ||
| 126 | (subst-char-in-region p (point-max) ?/ ?,)) | ||
| 127 | (goto-char p) | ||
| 128 | ;; As I read the RFC, this isn't correct, but it's | ||
| 129 | ;; consistent with iconv, at least regarding `='. | ||
| 130 | (skip-chars-forward "^= \t\n") | ||
| 131 | (delete-region (point) (point-max)))) | ||
| 132 | (unless (eobp) | ||
| 133 | (insert ?-))))) | ||
| 134 | nil)) | ||
| 135 | |||
| 136 | (defun utf-7-pre-write-conversion (from to) | ||
| 137 | (utf-7-encode from to nil)) | ||
| 138 | |||
| 139 | (defun utf-7-imap-pre-write-conversion (from to) | ||
| 140 | (utf-7-encode from to t)) | ||
| 141 | |||
| 142 | (provide 'utf-7) | ||
| 143 | ;;; utf-7.el ends here | ||