aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Love2002-09-05 17:43:48 +0000
committerDave Love2002-09-05 17:43:48 +0000
commit6ec99eb279d5ca0f588cf4236f52e0978971abf8 (patch)
treef8c946c8e8b1e19f135bbd580b14f5a286d39beb
parent0c129bca32a03adc6f12b088ba4132b52d5dcf94 (diff)
downloademacs-6ec99eb279d5ca0f588cf4236f52e0978971abf8.tar.gz
emacs-6ec99eb279d5ca0f588cf4236f52e0978971abf8.zip
(message-posting-charset): defvar when compiling.
(rfc2047-header-encoding-alist): Add `address-mime' part. (rfc2047-charset-encoding-alist): Use B for iso-8859-7. Doc fix. (rfc2047-q-encoding-alist): Augment header list. (rfc2047-encodable-p): Use mm-find-mime-charset-region. (rfc2047-special-chars, rfc2047-non-special-chars): New. (rfc2047-dissect-region, rfc2047-encode-region, rfc2047-encode): Rewritten to avoid charset stuff and to take account of rfc2822 tokens. (rfc2047-encode-message-header): Don't include header name field in encoding. Add `address-mime' case and bind rfc2047-special-chars for `mime' case.
-rw-r--r--lisp/gnus/rfc2047.el225
1 files changed, 121 insertions, 104 deletions
diff --git a/lisp/gnus/rfc2047.el b/lisp/gnus/rfc2047.el
index c1dad4197dc..570681cc0ab 100644
--- a/lisp/gnus/rfc2047.el
+++ b/lisp/gnus/rfc2047.el
@@ -1,5 +1,5 @@
1;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages 1;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
2;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. 2;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
3 3
4;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 4;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5;; MORIOKA Tomohiko <morioka@jaist.ac.jp> 5;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
@@ -27,7 +27,9 @@
27 27
28;;; Code: 28;;; Code:
29 29
30(eval-when-compile (require 'cl)) 30(eval-when-compile
31 (require 'cl)
32 (defvar message-posting-charset))
31 33
32(require 'qp) 34(require 'qp)
33(require 'mm-util) 35(require 'mm-util)
@@ -41,6 +43,8 @@
41(defvar rfc2047-header-encoding-alist 43(defvar rfc2047-header-encoding-alist
42 '(("Newsgroups" . nil) 44 '(("Newsgroups" . nil)
43 ("Message-ID" . nil) 45 ("Message-ID" . nil)
46 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" .
47 address-mime)
44 (t . mime)) 48 (t . mime))
45 "*Header/encoding method alist. 49 "*Header/encoding method alist.
46The list is traversed sequentially. The keys can either be 50The list is traversed sequentially. The keys can either be
@@ -50,8 +54,10 @@ The values can be:
50 54
511) nil, in which case no encoding is done; 551) nil, in which case no encoding is done;
522) `mime', in which case the header will be encoded according to RFC2047; 562) `mime', in which case the header will be encoded according to RFC2047;
533) a charset, in which case it will be encoded as that charset; 573) `address-mime', like `mime', but takes account of the rules for address
544) `default', in which case the field will be encoded as the rest 58 fields (where quoted strings and comments must be treated separately);
594) a charset, in which case it will be encoded as that charset;
605) `default', in which case the field will be encoded as the rest
55 of the article.") 61 of the article.")
56 62
57(defvar rfc2047-charset-encoding-alist 63(defvar rfc2047-charset-encoding-alist
@@ -62,7 +68,7 @@ The values can be:
62 (iso-8859-4 . Q) 68 (iso-8859-4 . Q)
63 (iso-8859-5 . B) 69 (iso-8859-5 . B)
64 (koi8-r . B) 70 (koi8-r . B)
65 (iso-8859-7 . Q) 71 (iso-8859-7 . B)
66 (iso-8859-8 . B) 72 (iso-8859-8 . B)
67 (iso-8859-9 . Q) 73 (iso-8859-9 . Q)
68 (iso-8859-14 . Q) 74 (iso-8859-14 . Q)
@@ -88,7 +94,8 @@ quoted-printable and base64 respectively.")
88 "Alist of RFC2047 encodings to encoding functions.") 94 "Alist of RFC2047 encodings to encoding functions.")
89 95
90(defvar rfc2047-q-encoding-alist 96(defvar rfc2047-q-encoding-alist
91 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") 97 '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):"
98 . "-A-Za-z0-9!*+/" )
92 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. 99 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
93 ;; Avoid using 8bit characters. 100 ;; Avoid using 8bit characters.
94 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" 101 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
@@ -142,21 +149,26 @@ Should be called narrowed to the head of the message."
142 (eq (car elem) t)) 149 (eq (car elem) t))
143 (setq alist nil 150 (setq alist nil
144 method (cdr elem)))) 151 method (cdr elem))))
152 (goto-char (point-min))
153 (re-search-forward "^[^:]+: *" nil t)
145 (cond 154 (cond
155 ((eq method 'address-mime)
156 (rfc2047-encode-region (point) (point-max)))
146 ((eq method 'mime) 157 ((eq method 'mime)
147 (rfc2047-encode-region (point-min) (point-max))) 158 (let (rfc2047-special-chars)
159 (rfc2047-encode-region (point) (point-max))))
148 ((eq method 'default) 160 ((eq method 'default)
149 (if (and (featurep 'mule) 161 (if (and (featurep 'mule)
150 (if (boundp 'default-enable-multibyte-characters) 162 (if (boundp 'default-enable-multibyte-characters)
151 default-enable-multibyte-characters) 163 default-enable-multibyte-characters)
152 mail-parse-charset) 164 mail-parse-charset)
153 (mm-encode-coding-region (point-min) (point-max) 165 (mm-encode-coding-region (point) (point-max)
154 mail-parse-charset))) 166 mail-parse-charset)))
155 ((mm-coding-system-p method) 167 ((mm-coding-system-p method)
156 (if (and (featurep 'mule) 168 (if (and (featurep 'mule)
157 (if (boundp 'default-enable-multibyte-characters) 169 (if (boundp 'default-enable-multibyte-characters)
158 default-enable-multibyte-characters)) 170 default-enable-multibyte-characters))
159 (mm-encode-coding-region (point-min) (point-max) method))) 171 (mm-encode-coding-region (point) (point-max) method)))
160 ;; Hm. 172 ;; Hm.
161 (t))) 173 (t)))
162 (goto-char (point-max))))))) 174 (goto-char (point-max)))))))
@@ -173,74 +185,72 @@ The buffer may be narrowed."
173 (mm-find-mime-charset-region (point-min) (point-max)))) 185 (mm-find-mime-charset-region (point-min) (point-max))))
174 (and charsets (not (equal charsets (list message-posting-charset)))))) 186 (and charsets (not (equal charsets (list message-posting-charset))))))
175 187
176(defun rfc2047-dissect-region (b e) 188;; ietf-drums-specials-token less \ . @
177 "Dissect the region between B and E into words." 189(defconst rfc2047-special-chars (append "()<>[]:;,\"" nil)
178 (let ((word-chars "-A-Za-z0-9!*+/") 190 "List of characters treated as special when rfc207-encoding address fields.
179 ;; Not using ietf-drums-specials-token makes life simple. 191When encoding other sorts of fields, bin it to nil to avoid treating
180 mail-parse-mule-charset 192RFC 2822 quoted words and comments specially.")
181 words point nonascii 193
182 result word) 194(defconst rfc2047-non-special-chars (concat "^" rfc2047-special-chars))
183 (save-restriction
184 (narrow-to-region b e)
185 (goto-char (point-min))
186 (skip-chars-forward "\000-\177")
187 ;; Fixme: This loop used to check charsets when it found
188 ;; non-ASCII characters. That's removed, since it doesn't make
189 ;; much sense in Emacs 22 and doesn't seem necessary in Emacs
190 ;; 21, even. I'm not sure exactly what it should be doing, and
191 ;; it needs another look, especially for efficiency's sake. -- fx
192 (while (not (eobp))
193 (setq point (point)
194 nonascii nil)
195 (skip-chars-backward word-chars b)
196 (unless (eq b (point))
197 (push (cons (buffer-substring b (point)) nil) words))
198 (setq b (point)
199 nonascii t)
200 (goto-char point)
201 (forward-char 1)
202 (skip-chars-forward word-chars)
203 (while (not (eobp))
204 (forward-char 1)
205 (skip-chars-forward word-chars))
206 (unless (eq b (point))
207 (push (cons (buffer-substring b (point)) nonascii) words))
208 (setq b (point))
209 (skip-chars-forward "\000-\177"))
210 (unless (eq b (point))
211 (push (cons (buffer-substring b (point)) nil) words)))
212 ;; merge adjacent words
213 (setq word (pop words))
214 (while word
215 (if (and (cdr word)
216 (caar words)
217 (not (cdar words))
218 (not (string-match "[^ \t]" (caar words))))
219 (if (eq (cdr (nth 1 words)) (cdr word))
220 (progn
221 (setq word (cons (concat
222 (car (nth 1 words)) (caar words)
223 (car word))
224 (cdr word)))
225 (pop words)
226 (pop words))
227 (push (cons (concat (caar words) (car word)) (cdr word))
228 result)
229 (pop words)
230 (setq word (pop words)))
231 (push word result)
232 (setq word (pop words))))
233 result))
234 195
196(defun rfc2047-dissect-region (b e)
197 "Dissect the region between B and E into tokens.
198The tokens comprise sequences of atoms, quoted strings, special
199characters and whitespace."
200 (save-restriction
201 (narrow-to-region b e)
202 (if (null rfc2047-special-chars)
203 ;; simple `mime' case -- no need to tokenize
204 (list (buffer-substring b e))
205 ;; `address-mime' case -- take care of quoted words, comments
206 (with-syntax-table ietf-drums-syntax-table
207 (let ((start (point))
208 words)
209 (goto-char (point-min))
210 (condition-case nil ; in case of unbalanced specials
211 ;; Dissect into: sequences of atoms, quoted strings,
212 ;; specials, whitespace. (Specials mustn't be encoded.)
213 (while (not (eobp))
214 (setq start (point))
215 (unless (= 0 (skip-chars-forward ietf-drums-wsp-token))
216 (push (buffer-substring start (point)) words)
217 (setq start (point)))
218 (cond
219 ((memq (char-after) rfc2047-special-chars)
220 ;; Grab string or special char.
221 (if (eq ?\" (char-after))
222 (progn
223 (forward-sexp)
224 (push (buffer-substring start (point)) words))
225 (push (string (char-after)) words)
226 (forward-char)))
227 ((not (char-after))) ; eob
228 (t ; normal token/whitespace sequence
229 (skip-chars-forward rfc2047-non-special-chars)
230 (skip-chars-backward ietf-drums-wsp-token)
231 (push (buffer-substring start (point)) words))))
232 (error (error "Invalid data for rfc2047 encoding: %s"
233 (buffer-substring b e))))
234 (nreverse words))))))
235
236;; Fixme: why does this cons a list of words and insert them, rather
237;; than encoding in place?
235(defun rfc2047-encode-region (b e) 238(defun rfc2047-encode-region (b e)
236 "Encode all encodable words in region B to E." 239 "Encode all encodable words in region B to E.
240By default, the region is treated as containing addresses (see
241`rfc2047-special-chars')."
237 (let ((words (rfc2047-dissect-region b e)) word) 242 (let ((words (rfc2047-dissect-region b e)) word)
238 (save-restriction 243 (save-restriction
239 (narrow-to-region b e) 244 (narrow-to-region b e)
240 (delete-region (point-min) (point-max)) 245 (delete-region (point-min) (point-max))
241 (while (setq word (pop words)) 246 (dolist (word words)
242 (if (not (cdr word)) 247 ;; Quoted strings can't contain encoded words. Strip the
243 (insert (car word)) 248 ;; quotes.
249 (if rfc2047-special-chars
250 (if (eq ?\" (aref word 0))
251 (setq word (substring word 1 -1))))
252 (if (string-match "\\`[\0-\177]*\\'" word) ; including whitespace
253 (insert word)
244 (rfc2047-fold-region (gnus-point-at-bol) (point)) 254 (rfc2047-fold-region (gnus-point-at-bol) (point))
245 (goto-char (point-max)) 255 (goto-char (point-max))
246 (if (> (- (point) (save-restriction 256 (if (> (- (point) (save-restriction
@@ -250,56 +260,63 @@ The buffer may be narrowed."
250 ;; Insert blank between encoded words 260 ;; Insert blank between encoded words
251 (if (eq (char-before) ?=) (insert " ")) 261 (if (eq (char-before) ?=) (insert " "))
252 (rfc2047-encode (point) 262 (rfc2047-encode (point)
253 (progn (insert (car word)) (point))))) 263 (progn (insert word) (point)))))
254 (rfc2047-fold-region (point-min) (point-max))))) 264 (rfc2047-fold-region (point-min) (point-max)))))
255 265
256(defun rfc2047-encode-string (string) 266(defun rfc2047-encode-string (string)
257 "Encode words in STRING." 267 "Encode words in STRING.
268By default, the string is treated as containing addresses (see
269`rfc2047-special-chars')."
258 (with-temp-buffer 270 (with-temp-buffer
259 (insert string) 271 (insert string)
260 (rfc2047-encode-region (point-min) (point-max)) 272 (rfc2047-encode-region (point-min) (point-max))
261 (buffer-string))) 273 (buffer-string)))
262 274
263(defun rfc2047-encode (b e) 275(defun rfc2047-encode (b e)
264 "Encode the word in the region B to E." 276 "Encode the word(s) in the region B to E.
265 (let* ((buff (current-buffer)) 277By default, the region is treated as containing addresses (see
266 (mime-charset (with-temp-buffer 278`rfc2047-special-chars')."
267 (insert-buffer-substring buff b e) 279 (let* ((mime-charset (mm-find-mime-charset-region b e))
268 (mm-find-mime-charset-region 1 (point-max))))
269 (cs (if (> (length mime-charset) 1) 280 (cs (if (> (length mime-charset) 1)
270 (error "Can't encode word: %s" (buffer-substring b e)) 281 ;; Fixme: instead of this, try to break region into
282 ;; parts that can be encoded separately.
283 (error "Can't rfc2047-encode `%s'"
284 (buffer-substring b e))
271 (setq mime-charset (car mime-charset)) 285 (setq mime-charset (car mime-charset))
272 (mm-charset-to-coding-system mime-charset))) 286 (mm-charset-to-coding-system mime-charset)))
273 (encoding (or (cdr (assq mime-charset 287 (encoding (if (assq mime-charset
288 rfc2047-charset-encoding-alist)
289 (cdr (assq mime-charset
274 rfc2047-charset-encoding-alist)) 290 rfc2047-charset-encoding-alist))
275 'B)) 291 'B))
276 (start (concat 292 (start (concat
277 "=?" (downcase (symbol-name mime-charset)) "?" 293 "=?" (downcase (symbol-name mime-charset)) "?"
278 (downcase (symbol-name encoding)) "?")) 294 (downcase (symbol-name encoding)) "?"))
279 (first t)) 295 (first t))
280 (save-restriction 296 (if mime-charset
281 (narrow-to-region b e) 297 (save-restriction
282 (when (eq encoding 'B) 298 (narrow-to-region b e)
283 ;; break into lines before encoding 299 (when (eq encoding 'B)
284 (goto-char (point-min)) 300 ;; break into lines before encoding
285 (while (not (eobp)) 301 (goto-char (point-min))
286 (goto-char (min (point-max) (+ 15 (point)))) 302 (while (not (eobp))
287 (unless (eobp) 303 (goto-char (min (point-max) (+ 15 (point))))
288 (insert "\n")))) 304 (unless (eobp)
289 (if (and (mm-multibyte-p) 305 (insert "\n"))))
290 (mm-coding-system-p cs)) 306 (if (and (mm-multibyte-p)
291 (mm-encode-coding-region (point-min) (point-max) cs)) 307 (mm-coding-system-p cs))
292 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) 308 (mm-encode-coding-region (point-min) (point-max) cs))
293 (point-min) (point-max)) 309 (funcall (cdr (assq encoding rfc2047-encoding-function-alist))
294 (goto-char (point-min)) 310 (point-min) (point-max))
295 (while (not (eobp)) 311 (goto-char (point-min))
296 (unless first 312 (while (not (eobp))
297 (insert " ")) 313 (unless first
298 (setq first nil) 314 (insert " "))
299 (insert start) 315 (setq first nil)
300 (end-of-line) 316 (insert start)
301 (insert "?=") 317 (end-of-line)
302 (forward-line 1))))) 318 (insert "?=")
319 (forward-line 1))))))
303 320
304(defun rfc2047-fold-region (b e) 321(defun rfc2047-fold-region (b e)
305 "Fold long lines in region B to E." 322 "Fold long lines in region B to E."