diff options
| author | Dave Love | 2002-09-05 17:43:48 +0000 |
|---|---|---|
| committer | Dave Love | 2002-09-05 17:43:48 +0000 |
| commit | 6ec99eb279d5ca0f588cf4236f52e0978971abf8 (patch) | |
| tree | f8c946c8e8b1e19f135bbd580b14f5a286d39beb | |
| parent | 0c129bca32a03adc6f12b088ba4132b52d5dcf94 (diff) | |
| download | emacs-6ec99eb279d5ca0f588cf4236f52e0978971abf8.tar.gz emacs-6ec99eb279d5ca0f588cf4236f52e0978971abf8.zip | |
(message-posting-charset): defvar when compiling.
(rfc2047-header-encoding-alist): Add `address-mime' part.
(rfc2047-charset-encoding-alist): Use B for iso-8859-7. Doc fix.
(rfc2047-q-encoding-alist): Augment header list.
(rfc2047-encodable-p): Use mm-find-mime-charset-region.
(rfc2047-special-chars, rfc2047-non-special-chars): New.
(rfc2047-dissect-region, rfc2047-encode-region, rfc2047-encode):
Rewritten to avoid charset stuff and to take account of rfc2822
tokens.
(rfc2047-encode-message-header): Don't include header name field
in encoding. Add `address-mime' case and bind
rfc2047-special-chars for `mime' case.
| -rw-r--r-- | lisp/gnus/rfc2047.el | 225 |
1 files changed, 121 insertions, 104 deletions
diff --git a/lisp/gnus/rfc2047.el b/lisp/gnus/rfc2047.el index c1dad4197dc..570681cc0ab 100644 --- a/lisp/gnus/rfc2047.el +++ b/lisp/gnus/rfc2047.el | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
| 2 | ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | 2 | ;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. |
| 3 | 3 | ||
| 4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | 4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
| 5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | 5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> |
| @@ -27,7 +27,9 @@ | |||
| 27 | 27 | ||
| 28 | ;;; Code: | 28 | ;;; Code: |
| 29 | 29 | ||
| 30 | (eval-when-compile (require 'cl)) | 30 | (eval-when-compile |
| 31 | (require 'cl) | ||
| 32 | (defvar message-posting-charset)) | ||
| 31 | 33 | ||
| 32 | (require 'qp) | 34 | (require 'qp) |
| 33 | (require 'mm-util) | 35 | (require 'mm-util) |
| @@ -41,6 +43,8 @@ | |||
| 41 | (defvar rfc2047-header-encoding-alist | 43 | (defvar rfc2047-header-encoding-alist |
| 42 | '(("Newsgroups" . nil) | 44 | '(("Newsgroups" . nil) |
| 43 | ("Message-ID" . nil) | 45 | ("Message-ID" . nil) |
| 46 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . | ||
| 47 | address-mime) | ||
| 44 | (t . mime)) | 48 | (t . mime)) |
| 45 | "*Header/encoding method alist. | 49 | "*Header/encoding method alist. |
| 46 | The list is traversed sequentially. The keys can either be | 50 | The list is traversed sequentially. The keys can either be |
| @@ -50,8 +54,10 @@ The values can be: | |||
| 50 | 54 | ||
| 51 | 1) nil, in which case no encoding is done; | 55 | 1) nil, in which case no encoding is done; |
| 52 | 2) `mime', in which case the header will be encoded according to RFC2047; | 56 | 2) `mime', in which case the header will be encoded according to RFC2047; |
| 53 | 3) a charset, in which case it will be encoded as that charset; | 57 | 3) `address-mime', like `mime', but takes account of the rules for address |
| 54 | 4) `default', in which case the field will be encoded as the rest | 58 | fields (where quoted strings and comments must be treated separately); |
| 59 | 4) a charset, in which case it will be encoded as that charset; | ||
| 60 | 5) `default', in which case the field will be encoded as the rest | ||
| 55 | of the article.") | 61 | of the article.") |
| 56 | 62 | ||
| 57 | (defvar rfc2047-charset-encoding-alist | 63 | (defvar rfc2047-charset-encoding-alist |
| @@ -62,7 +68,7 @@ The values can be: | |||
| 62 | (iso-8859-4 . Q) | 68 | (iso-8859-4 . Q) |
| 63 | (iso-8859-5 . B) | 69 | (iso-8859-5 . B) |
| 64 | (koi8-r . B) | 70 | (koi8-r . B) |
| 65 | (iso-8859-7 . Q) | 71 | (iso-8859-7 . B) |
| 66 | (iso-8859-8 . B) | 72 | (iso-8859-8 . B) |
| 67 | (iso-8859-9 . Q) | 73 | (iso-8859-9 . Q) |
| 68 | (iso-8859-14 . Q) | 74 | (iso-8859-14 . Q) |
| @@ -88,7 +94,8 @@ quoted-printable and base64 respectively.") | |||
| 88 | "Alist of RFC2047 encodings to encoding functions.") | 94 | "Alist of RFC2047 encodings to encoding functions.") |
| 89 | 95 | ||
| 90 | (defvar rfc2047-q-encoding-alist | 96 | (defvar rfc2047-q-encoding-alist |
| 91 | '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") | 97 | '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" |
| 98 | . "-A-Za-z0-9!*+/" ) | ||
| 92 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | 99 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. |
| 93 | ;; Avoid using 8bit characters. | 100 | ;; Avoid using 8bit characters. |
| 94 | ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | 101 | ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" |
| @@ -142,21 +149,26 @@ Should be called narrowed to the head of the message." | |||
| 142 | (eq (car elem) t)) | 149 | (eq (car elem) t)) |
| 143 | (setq alist nil | 150 | (setq alist nil |
| 144 | method (cdr elem)))) | 151 | method (cdr elem)))) |
| 152 | (goto-char (point-min)) | ||
| 153 | (re-search-forward "^[^:]+: *" nil t) | ||
| 145 | (cond | 154 | (cond |
| 155 | ((eq method 'address-mime) | ||
| 156 | (rfc2047-encode-region (point) (point-max))) | ||
| 146 | ((eq method 'mime) | 157 | ((eq method 'mime) |
| 147 | (rfc2047-encode-region (point-min) (point-max))) | 158 | (let (rfc2047-special-chars) |
| 159 | (rfc2047-encode-region (point) (point-max)))) | ||
| 148 | ((eq method 'default) | 160 | ((eq method 'default) |
| 149 | (if (and (featurep 'mule) | 161 | (if (and (featurep 'mule) |
| 150 | (if (boundp 'default-enable-multibyte-characters) | 162 | (if (boundp 'default-enable-multibyte-characters) |
| 151 | default-enable-multibyte-characters) | 163 | default-enable-multibyte-characters) |
| 152 | mail-parse-charset) | 164 | mail-parse-charset) |
| 153 | (mm-encode-coding-region (point-min) (point-max) | 165 | (mm-encode-coding-region (point) (point-max) |
| 154 | mail-parse-charset))) | 166 | mail-parse-charset))) |
| 155 | ((mm-coding-system-p method) | 167 | ((mm-coding-system-p method) |
| 156 | (if (and (featurep 'mule) | 168 | (if (and (featurep 'mule) |
| 157 | (if (boundp 'default-enable-multibyte-characters) | 169 | (if (boundp 'default-enable-multibyte-characters) |
| 158 | default-enable-multibyte-characters)) | 170 | default-enable-multibyte-characters)) |
| 159 | (mm-encode-coding-region (point-min) (point-max) method))) | 171 | (mm-encode-coding-region (point) (point-max) method))) |
| 160 | ;; Hm. | 172 | ;; Hm. |
| 161 | (t))) | 173 | (t))) |
| 162 | (goto-char (point-max))))))) | 174 | (goto-char (point-max))))))) |
| @@ -173,74 +185,72 @@ The buffer may be narrowed." | |||
| 173 | (mm-find-mime-charset-region (point-min) (point-max)))) | 185 | (mm-find-mime-charset-region (point-min) (point-max)))) |
| 174 | (and charsets (not (equal charsets (list message-posting-charset)))))) | 186 | (and charsets (not (equal charsets (list message-posting-charset)))))) |
| 175 | 187 | ||
| 176 | (defun rfc2047-dissect-region (b e) | 188 | ;; ietf-drums-specials-token less \ . @ |
| 177 | "Dissect the region between B and E into words." | 189 | (defconst rfc2047-special-chars (append "()<>[]:;,\"" nil) |
| 178 | (let ((word-chars "-A-Za-z0-9!*+/") | 190 | "List of characters treated as special when rfc207-encoding address fields. |
| 179 | ;; Not using ietf-drums-specials-token makes life simple. | 191 | When encoding other sorts of fields, bin it to nil to avoid treating |
| 180 | mail-parse-mule-charset | 192 | RFC 2822 quoted words and comments specially.") |
| 181 | words point nonascii | 193 | |
| 182 | result word) | 194 | (defconst rfc2047-non-special-chars (concat "^" rfc2047-special-chars)) |
| 183 | (save-restriction | ||
| 184 | (narrow-to-region b e) | ||
| 185 | (goto-char (point-min)) | ||
| 186 | (skip-chars-forward "\000-\177") | ||
| 187 | ;; Fixme: This loop used to check charsets when it found | ||
| 188 | ;; non-ASCII characters. That's removed, since it doesn't make | ||
| 189 | ;; much sense in Emacs 22 and doesn't seem necessary in Emacs | ||
| 190 | ;; 21, even. I'm not sure exactly what it should be doing, and | ||
| 191 | ;; it needs another look, especially for efficiency's sake. -- fx | ||
| 192 | (while (not (eobp)) | ||
| 193 | (setq point (point) | ||
| 194 | nonascii nil) | ||
| 195 | (skip-chars-backward word-chars b) | ||
| 196 | (unless (eq b (point)) | ||
| 197 | (push (cons (buffer-substring b (point)) nil) words)) | ||
| 198 | (setq b (point) | ||
| 199 | nonascii t) | ||
| 200 | (goto-char point) | ||
| 201 | (forward-char 1) | ||
| 202 | (skip-chars-forward word-chars) | ||
| 203 | (while (not (eobp)) | ||
| 204 | (forward-char 1) | ||
| 205 | (skip-chars-forward word-chars)) | ||
| 206 | (unless (eq b (point)) | ||
| 207 | (push (cons (buffer-substring b (point)) nonascii) words)) | ||
| 208 | (setq b (point)) | ||
| 209 | (skip-chars-forward "\000-\177")) | ||
| 210 | (unless (eq b (point)) | ||
| 211 | (push (cons (buffer-substring b (point)) nil) words))) | ||
| 212 | ;; merge adjacent words | ||
| 213 | (setq word (pop words)) | ||
| 214 | (while word | ||
| 215 | (if (and (cdr word) | ||
| 216 | (caar words) | ||
| 217 | (not (cdar words)) | ||
| 218 | (not (string-match "[^ \t]" (caar words)))) | ||
| 219 | (if (eq (cdr (nth 1 words)) (cdr word)) | ||
| 220 | (progn | ||
| 221 | (setq word (cons (concat | ||
| 222 | (car (nth 1 words)) (caar words) | ||
| 223 | (car word)) | ||
| 224 | (cdr word))) | ||
| 225 | (pop words) | ||
| 226 | (pop words)) | ||
| 227 | (push (cons (concat (caar words) (car word)) (cdr word)) | ||
| 228 | result) | ||
| 229 | (pop words) | ||
| 230 | (setq word (pop words))) | ||
| 231 | (push word result) | ||
| 232 | (setq word (pop words)))) | ||
| 233 | result)) | ||
| 234 | 195 | ||
| 196 | (defun rfc2047-dissect-region (b e) | ||
| 197 | "Dissect the region between B and E into tokens. | ||
| 198 | The tokens comprise sequences of atoms, quoted strings, special | ||
| 199 | characters and whitespace." | ||
| 200 | (save-restriction | ||
| 201 | (narrow-to-region b e) | ||
| 202 | (if (null rfc2047-special-chars) | ||
| 203 | ;; simple `mime' case -- no need to tokenize | ||
| 204 | (list (buffer-substring b e)) | ||
| 205 | ;; `address-mime' case -- take care of quoted words, comments | ||
| 206 | (with-syntax-table ietf-drums-syntax-table | ||
| 207 | (let ((start (point)) | ||
| 208 | words) | ||
| 209 | (goto-char (point-min)) | ||
| 210 | (condition-case nil ; in case of unbalanced specials | ||
| 211 | ;; Dissect into: sequences of atoms, quoted strings, | ||
| 212 | ;; specials, whitespace. (Specials mustn't be encoded.) | ||
| 213 | (while (not (eobp)) | ||
| 214 | (setq start (point)) | ||
| 215 | (unless (= 0 (skip-chars-forward ietf-drums-wsp-token)) | ||
| 216 | (push (buffer-substring start (point)) words) | ||
| 217 | (setq start (point))) | ||
| 218 | (cond | ||
| 219 | ((memq (char-after) rfc2047-special-chars) | ||
| 220 | ;; Grab string or special char. | ||
| 221 | (if (eq ?\" (char-after)) | ||
| 222 | (progn | ||
| 223 | (forward-sexp) | ||
| 224 | (push (buffer-substring start (point)) words)) | ||
| 225 | (push (string (char-after)) words) | ||
| 226 | (forward-char))) | ||
| 227 | ((not (char-after))) ; eob | ||
| 228 | (t ; normal token/whitespace sequence | ||
| 229 | (skip-chars-forward rfc2047-non-special-chars) | ||
| 230 | (skip-chars-backward ietf-drums-wsp-token) | ||
| 231 | (push (buffer-substring start (point)) words)))) | ||
| 232 | (error (error "Invalid data for rfc2047 encoding: %s" | ||
| 233 | (buffer-substring b e)))) | ||
| 234 | (nreverse words)))))) | ||
| 235 | |||
| 236 | ;; Fixme: why does this cons a list of words and insert them, rather | ||
| 237 | ;; than encoding in place? | ||
| 235 | (defun rfc2047-encode-region (b e) | 238 | (defun rfc2047-encode-region (b e) |
| 236 | "Encode all encodable words in region B to E." | 239 | "Encode all encodable words in region B to E. |
| 240 | By default, the region is treated as containing addresses (see | ||
| 241 | `rfc2047-special-chars')." | ||
| 237 | (let ((words (rfc2047-dissect-region b e)) word) | 242 | (let ((words (rfc2047-dissect-region b e)) word) |
| 238 | (save-restriction | 243 | (save-restriction |
| 239 | (narrow-to-region b e) | 244 | (narrow-to-region b e) |
| 240 | (delete-region (point-min) (point-max)) | 245 | (delete-region (point-min) (point-max)) |
| 241 | (while (setq word (pop words)) | 246 | (dolist (word words) |
| 242 | (if (not (cdr word)) | 247 | ;; Quoted strings can't contain encoded words. Strip the |
| 243 | (insert (car word)) | 248 | ;; quotes. |
| 249 | (if rfc2047-special-chars | ||
| 250 | (if (eq ?\" (aref word 0)) | ||
| 251 | (setq word (substring word 1 -1)))) | ||
| 252 | (if (string-match "\\`[\0-\177]*\\'" word) ; including whitespace | ||
| 253 | (insert word) | ||
| 244 | (rfc2047-fold-region (gnus-point-at-bol) (point)) | 254 | (rfc2047-fold-region (gnus-point-at-bol) (point)) |
| 245 | (goto-char (point-max)) | 255 | (goto-char (point-max)) |
| 246 | (if (> (- (point) (save-restriction | 256 | (if (> (- (point) (save-restriction |
| @@ -250,56 +260,63 @@ The buffer may be narrowed." | |||
| 250 | ;; Insert blank between encoded words | 260 | ;; Insert blank between encoded words |
| 251 | (if (eq (char-before) ?=) (insert " ")) | 261 | (if (eq (char-before) ?=) (insert " ")) |
| 252 | (rfc2047-encode (point) | 262 | (rfc2047-encode (point) |
| 253 | (progn (insert (car word)) (point))))) | 263 | (progn (insert word) (point))))) |
| 254 | (rfc2047-fold-region (point-min) (point-max))))) | 264 | (rfc2047-fold-region (point-min) (point-max))))) |
| 255 | 265 | ||
| 256 | (defun rfc2047-encode-string (string) | 266 | (defun rfc2047-encode-string (string) |
| 257 | "Encode words in STRING." | 267 | "Encode words in STRING. |
| 268 | By default, the string is treated as containing addresses (see | ||
| 269 | `rfc2047-special-chars')." | ||
| 258 | (with-temp-buffer | 270 | (with-temp-buffer |
| 259 | (insert string) | 271 | (insert string) |
| 260 | (rfc2047-encode-region (point-min) (point-max)) | 272 | (rfc2047-encode-region (point-min) (point-max)) |
| 261 | (buffer-string))) | 273 | (buffer-string))) |
| 262 | 274 | ||
| 263 | (defun rfc2047-encode (b e) | 275 | (defun rfc2047-encode (b e) |
| 264 | "Encode the word in the region B to E." | 276 | "Encode the word(s) in the region B to E. |
| 265 | (let* ((buff (current-buffer)) | 277 | By default, the region is treated as containing addresses (see |
| 266 | (mime-charset (with-temp-buffer | 278 | `rfc2047-special-chars')." |
| 267 | (insert-buffer-substring buff b e) | 279 | (let* ((mime-charset (mm-find-mime-charset-region b e)) |
| 268 | (mm-find-mime-charset-region 1 (point-max)))) | ||
| 269 | (cs (if (> (length mime-charset) 1) | 280 | (cs (if (> (length mime-charset) 1) |
| 270 | (error "Can't encode word: %s" (buffer-substring b e)) | 281 | ;; Fixme: instead of this, try to break region into |
| 282 | ;; parts that can be encoded separately. | ||
| 283 | (error "Can't rfc2047-encode `%s'" | ||
| 284 | (buffer-substring b e)) | ||
| 271 | (setq mime-charset (car mime-charset)) | 285 | (setq mime-charset (car mime-charset)) |
| 272 | (mm-charset-to-coding-system mime-charset))) | 286 | (mm-charset-to-coding-system mime-charset))) |
| 273 | (encoding (or (cdr (assq mime-charset | 287 | (encoding (if (assq mime-charset |
| 288 | rfc2047-charset-encoding-alist) | ||
| 289 | (cdr (assq mime-charset | ||
| 274 | rfc2047-charset-encoding-alist)) | 290 | rfc2047-charset-encoding-alist)) |
| 275 | 'B)) | 291 | 'B)) |
| 276 | (start (concat | 292 | (start (concat |
| 277 | "=?" (downcase (symbol-name mime-charset)) "?" | 293 | "=?" (downcase (symbol-name mime-charset)) "?" |
| 278 | (downcase (symbol-name encoding)) "?")) | 294 | (downcase (symbol-name encoding)) "?")) |
| 279 | (first t)) | 295 | (first t)) |
| 280 | (save-restriction | 296 | (if mime-charset |
| 281 | (narrow-to-region b e) | 297 | (save-restriction |
| 282 | (when (eq encoding 'B) | 298 | (narrow-to-region b e) |
| 283 | ;; break into lines before encoding | 299 | (when (eq encoding 'B) |
| 284 | (goto-char (point-min)) | 300 | ;; break into lines before encoding |
| 285 | (while (not (eobp)) | 301 | (goto-char (point-min)) |
| 286 | (goto-char (min (point-max) (+ 15 (point)))) | 302 | (while (not (eobp)) |
| 287 | (unless (eobp) | 303 | (goto-char (min (point-max) (+ 15 (point)))) |
| 288 | (insert "\n")))) | 304 | (unless (eobp) |
| 289 | (if (and (mm-multibyte-p) | 305 | (insert "\n")))) |
| 290 | (mm-coding-system-p cs)) | 306 | (if (and (mm-multibyte-p) |
| 291 | (mm-encode-coding-region (point-min) (point-max) cs)) | 307 | (mm-coding-system-p cs)) |
| 292 | (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | 308 | (mm-encode-coding-region (point-min) (point-max) cs)) |
| 293 | (point-min) (point-max)) | 309 | (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) |
| 294 | (goto-char (point-min)) | 310 | (point-min) (point-max)) |
| 295 | (while (not (eobp)) | 311 | (goto-char (point-min)) |
| 296 | (unless first | 312 | (while (not (eobp)) |
| 297 | (insert " ")) | 313 | (unless first |
| 298 | (setq first nil) | 314 | (insert " ")) |
| 299 | (insert start) | 315 | (setq first nil) |
| 300 | (end-of-line) | 316 | (insert start) |
| 301 | (insert "?=") | 317 | (end-of-line) |
| 302 | (forward-line 1))))) | 318 | (insert "?=") |
| 319 | (forward-line 1)))))) | ||
| 303 | 320 | ||
| 304 | (defun rfc2047-fold-region (b e) | 321 | (defun rfc2047-fold-region (b e) |
| 305 | "Fold long lines in region B to E." | 322 | "Fold long lines in region B to E." |