diff options
| author | Dave Love | 2002-10-18 10:52:56 +0000 |
|---|---|---|
| committer | Dave Love | 2002-10-18 10:52:56 +0000 |
| commit | 7f0321ff13dab15e3e6f5020bb66549ed20ad4b3 (patch) | |
| tree | 595937a5b2d5d82cce821ed68d2bedca257586ba | |
| parent | 5f8dd322a925dd06b357adb73d9e68ef358e9c76 (diff) | |
| download | emacs-7f0321ff13dab15e3e6f5020bb66549ed20ad4b3.tar.gz emacs-7f0321ff13dab15e3e6f5020bb66549ed20ad4b3.zip | |
(message-posting-charset): defvar when compiling.
(ietf-drums, gnus-util): Don't require.
(rfc2047-header-encoding-alist): Add `address-mime' part. Doc
fixes.
(rfc2047-charset-encoding-alist): Use B for iso-8859-7,
iso-8859-8.
(rfc2047-q-encoding-alist): Augment header list.
(rfc2047-encoding-type): New.
(rfc2047-dissect-region): Deleted.
(rfc2047-encode-region, rfc2047-encode): Rewritten to take
account of rfc2047 rules with respect to rfc2822 tokens and to do
encoding in place rather than by passing strings.
(rfc2047-encode-message-header): Don't include header name field
in encoding. Add `address-mime' case and bind
rfc2047-encoding-type for `mime' case.
(rfc2047-encode-string): Doc fix.
(rfc2047-encode): Use longer chunks for base64.
(rfc2047-fold-region): Insert single characters, not strings.
(rfc2047-encoded-word-regexp): Wrap in eval-and-compile.
(rfc2047-decode-region): Avoid consing regexp in loop.
| -rw-r--r-- | lisp/gnus/rfc2047.el | 452 |
1 files changed, 257 insertions, 195 deletions
diff --git a/lisp/gnus/rfc2047.el b/lisp/gnus/rfc2047.el index a5f3151d436..d0fed235c9b 100644 --- a/lisp/gnus/rfc2047.el +++ b/lisp/gnus/rfc2047.el | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
| 2 | ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | 2 | ;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. |
| 3 | 3 | ||
| 4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | 4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
| 5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | 5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> |
| @@ -27,20 +27,22 @@ | |||
| 27 | 27 | ||
| 28 | ;;; Code: | 28 | ;;; Code: |
| 29 | 29 | ||
| 30 | (eval-when-compile (require 'cl)) | 30 | (eval-when-compile |
| 31 | (require 'cl) | ||
| 32 | (defvar message-posting-charset)) | ||
| 31 | 33 | ||
| 32 | (require 'qp) | 34 | (require 'qp) |
| 33 | (require 'mm-util) | 35 | (require 'mm-util) |
| 34 | (require 'ietf-drums) | 36 | ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
| 35 | (require 'mail-prsvr) | 37 | (require 'mail-prsvr) |
| 36 | (require 'base64) | 38 | (require 'base64) |
| 37 | ;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus. | ||
| 38 | (require 'gnus-util) | ||
| 39 | (autoload 'mm-body-7-or-8 "mm-bodies") | 39 | (autoload 'mm-body-7-or-8 "mm-bodies") |
| 40 | 40 | ||
| 41 | (defvar rfc2047-header-encoding-alist | 41 | (defvar rfc2047-header-encoding-alist |
| 42 | '(("Newsgroups" . nil) | 42 | '(("Newsgroups" . nil) |
| 43 | ("Message-ID" . nil) | 43 | ("Message-ID" . nil) |
| 44 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . | ||
| 45 | address-mime) | ||
| 44 | (t . mime)) | 46 | (t . mime)) |
| 45 | "*Header/encoding method alist. | 47 | "*Header/encoding method alist. |
| 46 | The list is traversed sequentially. The keys can either be | 48 | The list is traversed sequentially. The keys can either be |
| @@ -50,8 +52,10 @@ The values can be: | |||
| 50 | 52 | ||
| 51 | 1) nil, in which case no encoding is done; | 53 | 1) nil, in which case no encoding is done; |
| 52 | 2) `mime', in which case the header will be encoded according to RFC2047; | 54 | 2) `mime', in which case the header will be encoded according to RFC2047; |
| 53 | 3) a charset, in which case it will be encoded as that charset; | 55 | 3) `address-mime', like `mime', but takes account of the rules for address |
| 54 | 4) `default', in which case the field will be encoded as the rest | 56 | fields (where quoted strings and comments must be treated separately); |
| 57 | 4) a charset, in which case it will be encoded as that charset; | ||
| 58 | 5) `default', in which case the field will be encoded as the rest | ||
| 55 | of the article.") | 59 | of the article.") |
| 56 | 60 | ||
| 57 | (defvar rfc2047-charset-encoding-alist | 61 | (defvar rfc2047-charset-encoding-alist |
| @@ -62,8 +66,8 @@ The values can be: | |||
| 62 | (iso-8859-4 . Q) | 66 | (iso-8859-4 . Q) |
| 63 | (iso-8859-5 . B) | 67 | (iso-8859-5 . B) |
| 64 | (koi8-r . B) | 68 | (koi8-r . B) |
| 65 | (iso-8859-7 . Q) | 69 | (iso-8859-7 . B) |
| 66 | (iso-8859-8 . Q) | 70 | (iso-8859-8 . B) |
| 67 | (iso-8859-9 . Q) | 71 | (iso-8859-9 . Q) |
| 68 | (iso-8859-14 . Q) | 72 | (iso-8859-14 . Q) |
| 69 | (iso-8859-15 . Q) | 73 | (iso-8859-15 . Q) |
| @@ -78,7 +82,8 @@ The values can be: | |||
| 78 | (iso-2022-jp-2 . B) | 82 | (iso-2022-jp-2 . B) |
| 79 | (iso-2022-int-1 . B)) | 83 | (iso-2022-int-1 . B)) |
| 80 | "Alist of MIME charsets to RFC2047 encodings. | 84 | "Alist of MIME charsets to RFC2047 encodings. |
| 81 | Valid encodings are nil, `Q' and `B'.") | 85 | Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
| 86 | quoted-printable and base64 respectively.") | ||
| 82 | 87 | ||
| 83 | (defvar rfc2047-encoding-function-alist | 88 | (defvar rfc2047-encoding-function-alist |
| 84 | '((Q . rfc2047-q-encode-region) | 89 | '((Q . rfc2047-q-encode-region) |
| @@ -87,7 +92,8 @@ Valid encodings are nil, `Q' and `B'.") | |||
| 87 | "Alist of RFC2047 encodings to encoding functions.") | 92 | "Alist of RFC2047 encodings to encoding functions.") |
| 88 | 93 | ||
| 89 | (defvar rfc2047-q-encoding-alist | 94 | (defvar rfc2047-q-encoding-alist |
| 90 | '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") | 95 | '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" |
| 96 | . "-A-Za-z0-9!*+/" ) | ||
| 91 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | 97 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. |
| 92 | ;; Avoid using 8bit characters. | 98 | ;; Avoid using 8bit characters. |
| 93 | ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | 99 | ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" |
| @@ -112,6 +118,12 @@ Valid encodings are nil, `Q' and `B'.") | |||
| 112 | (point-max)))) | 118 | (point-max)))) |
| 113 | (goto-char (point-min))) | 119 | (goto-char (point-min))) |
| 114 | 120 | ||
| 121 | (defvar rfc2047-encoding-type 'address-mime | ||
| 122 | "The type of encoding done by `rfc2047-encode-region'. | ||
| 123 | This should be dynamically bound around calls to | ||
| 124 | `rfc2047-encode-region' to either `mime' or `address-mime'. See | ||
| 125 | `rfc2047-header-encoding-alist', for definitions.") | ||
| 126 | |||
| 115 | (defun rfc2047-encode-message-header () | 127 | (defun rfc2047-encode-message-header () |
| 116 | "Encode the message header according to `rfc2047-header-encoding-alist'. | 128 | "Encode the message header according to `rfc2047-header-encoding-alist'. |
| 117 | Should be called narrowed to the head of the message." | 129 | Should be called narrowed to the head of the message." |
| @@ -141,21 +153,26 @@ Should be called narrowed to the head of the message." | |||
| 141 | (eq (car elem) t)) | 153 | (eq (car elem) t)) |
| 142 | (setq alist nil | 154 | (setq alist nil |
| 143 | method (cdr elem)))) | 155 | method (cdr elem)))) |
| 156 | (goto-char (point-min)) | ||
| 157 | (re-search-forward "^[^:]+: *" nil t) | ||
| 144 | (cond | 158 | (cond |
| 159 | ((eq method 'address-mime) | ||
| 160 | (rfc2047-encode-region (point) (point-max))) | ||
| 145 | ((eq method 'mime) | 161 | ((eq method 'mime) |
| 146 | (rfc2047-encode-region (point-min) (point-max))) | 162 | (let (rfc2047-encoding-type) |
| 163 | (rfc2047-encode-region (point) (point-max)))) | ||
| 147 | ((eq method 'default) | 164 | ((eq method 'default) |
| 148 | (if (and (featurep 'mule) | 165 | (if (and (featurep 'mule) |
| 149 | (if (boundp 'default-enable-multibyte-characters) | 166 | (if (boundp 'default-enable-multibyte-characters) |
| 150 | default-enable-multibyte-characters) | 167 | default-enable-multibyte-characters) |
| 151 | mail-parse-charset) | 168 | mail-parse-charset) |
| 152 | (mm-encode-coding-region (point-min) (point-max) | 169 | (mm-encode-coding-region (point) (point-max) |
| 153 | mail-parse-charset))) | 170 | mail-parse-charset))) |
| 154 | ((mm-coding-system-p method) | 171 | ((mm-coding-system-p method) |
| 155 | (if (and (featurep 'mule) | 172 | (if (and (featurep 'mule) |
| 156 | (if (boundp 'default-enable-multibyte-characters) | 173 | (if (boundp 'default-enable-multibyte-characters) |
| 157 | default-enable-multibyte-characters)) | 174 | default-enable-multibyte-characters)) |
| 158 | (mm-encode-coding-region (point-min) (point-max) method))) | 175 | (mm-encode-coding-region (point) (point-max) method))) |
| 159 | ;; Hm. | 176 | ;; Hm. |
| 160 | (t))) | 177 | (t))) |
| 161 | (goto-char (point-max))))))) | 178 | (goto-char (point-max))))))) |
| @@ -169,133 +186,176 @@ Should be called narrowed to the head of the message." | |||
| 169 | The buffer may be narrowed." | 186 | The buffer may be narrowed." |
| 170 | (require 'message) ; for message-posting-charset | 187 | (require 'message) ; for message-posting-charset |
| 171 | (let ((charsets | 188 | (let ((charsets |
| 172 | (mapcar | 189 | (mm-find-mime-charset-region (point-min) (point-max)))) |
| 173 | 'mm-mime-charset | 190 | (and charsets (not (equal charsets (list message-posting-charset)))))) |
| 174 | (mm-find-charset-region (point-min) (point-max)))) | 191 | |
| 175 | (cs (list 'us-ascii (car message-posting-charset))) | 192 | ;; Use this syntax table when parsing into regions that may need |
| 176 | found) | 193 | ;; encoding. Double quotes are string delimiters, backslash is |
| 177 | (while charsets | 194 | ;; character quoting, and all other RFC 2822 special characters are |
| 178 | (unless (memq (pop charsets) cs) | 195 | ;; treated as punctuation so we can use forward-sexp/forward-word to |
| 179 | (setq found t))) | 196 | ;; skip to the end of regions appropriately. Nb. ietf-drums does |
| 180 | found)) | 197 | ;; things differently. |
| 181 | 198 | (defconst rfc2047-syntax-table | |
| 182 | (defun rfc2047-dissect-region (b e) | 199 | (let ((table (make-char-table 'syntax-table '(2)))) |
| 183 | "Dissect the region between B and E into words." | 200 | (modify-syntax-entry ?\\ "\\" table) |
| 184 | (let ((word-chars "-A-Za-z0-9!*+/") | 201 | (modify-syntax-entry ?\" "\"" table) |
| 185 | ;; Not using ietf-drums-specials-token makes life simple. | 202 | (modify-syntax-entry ?\( "." table) |
| 186 | mail-parse-mule-charset | 203 | (modify-syntax-entry ?\) "." table) |
| 187 | words point current | 204 | (modify-syntax-entry ?\< "." table) |
| 188 | result word) | 205 | (modify-syntax-entry ?\> "." table) |
| 189 | (save-restriction | 206 | (modify-syntax-entry ?\[ "." table) |
| 190 | (narrow-to-region b e) | 207 | (modify-syntax-entry ?\] "." table) |
| 191 | (goto-char (point-min)) | 208 | (modify-syntax-entry ?: "." table) |
| 192 | (skip-chars-forward "\000-\177") | 209 | (modify-syntax-entry ?\; "." table) |
| 193 | (while (not (eobp)) | 210 | (modify-syntax-entry ?, "." table) |
| 194 | (setq point (point)) | 211 | (modify-syntax-entry ?@ "." table) |
| 195 | (skip-chars-backward word-chars b) | 212 | table)) |
| 196 | (unless (eq b (point)) | ||
| 197 | (push (cons (buffer-substring b (point)) nil) words)) | ||
| 198 | (setq b (point)) | ||
| 199 | (goto-char point) | ||
| 200 | (setq current (mm-charset-after)) | ||
| 201 | (forward-char 1) | ||
| 202 | (skip-chars-forward word-chars) | ||
| 203 | (while (and (not (eobp)) | ||
| 204 | (eq (mm-charset-after) current)) | ||
| 205 | (forward-char 1) | ||
| 206 | (skip-chars-forward word-chars)) | ||
| 207 | (unless (eq b (point)) | ||
| 208 | (push (cons (buffer-substring b (point)) current) words)) | ||
| 209 | (setq b (point)) | ||
| 210 | (skip-chars-forward "\000-\177")) | ||
| 211 | (unless (eq b (point)) | ||
| 212 | (push (cons (buffer-substring b (point)) nil) words))) | ||
| 213 | ;; merge adjacent words | ||
| 214 | (setq word (pop words)) | ||
| 215 | (while word | ||
| 216 | (if (and (cdr word) | ||
| 217 | (caar words) | ||
| 218 | (not (cdar words)) | ||
| 219 | (not (string-match "[^ \t]" (caar words)))) | ||
| 220 | (if (eq (cdr (nth 1 words)) (cdr word)) | ||
| 221 | (progn | ||
| 222 | (setq word (cons (concat | ||
| 223 | (car (nth 1 words)) (caar words) | ||
| 224 | (car word)) | ||
| 225 | (cdr word))) | ||
| 226 | (pop words) | ||
| 227 | (pop words)) | ||
| 228 | (push (cons (concat (caar words) (car word)) (cdr word)) | ||
| 229 | result) | ||
| 230 | (pop words) | ||
| 231 | (setq word (pop words))) | ||
| 232 | (push word result) | ||
| 233 | (setq word (pop words)))) | ||
| 234 | result)) | ||
| 235 | 213 | ||
| 236 | (defun rfc2047-encode-region (b e) | 214 | (defun rfc2047-encode-region (b e) |
| 237 | "Encode all encodable words in region B to E." | 215 | "Encode words in region B to E that need encoding. |
| 238 | (let ((words (rfc2047-dissect-region b e)) word) | 216 | By default, the region is treated as containing RFC2822 addresses. |
| 239 | (save-restriction | 217 | Dynamically bind `rfc2047-encoding-type' to change that." |
| 240 | (narrow-to-region b e) | 218 | (save-restriction |
| 241 | (delete-region (point-min) (point-max)) | 219 | (narrow-to-region b e) |
| 242 | (while (setq word (pop words)) | 220 | (if (eq 'mime rfc2047-encoding-type) |
| 243 | (if (not (cdr word)) | 221 | ;; Simple case -- treat as single word. |
| 244 | (insert (car word)) | 222 | (progn |
| 245 | (rfc2047-fold-region (gnus-point-at-bol) (point)) | 223 | (goto-char (point-min)) |
| 246 | (goto-char (point-max)) | 224 | ;; Does it need encoding? |
| 247 | (if (> (- (point) (save-restriction | 225 | (skip-chars-forward "\000-\177" e) |
| 248 | (widen) | 226 | (unless (eobp) |
| 249 | (gnus-point-at-bol))) 76) | 227 | (rfc2047-encode b e))) |
| 250 | (insert "\n ")) | 228 | ;; `address-mime' case -- take care of quoted words, comments. |
| 251 | ;; Insert blank between encoded words | 229 | (with-syntax-table rfc2047-syntax-table |
| 252 | (if (eq (char-before) ?=) (insert " ")) | 230 | (let ((start (point)) ; start of current token |
| 253 | (rfc2047-encode (point) | 231 | end ; end of current token |
| 254 | (progn (insert (car word)) (point)) | 232 | ;; Whether there's an encoded word before the current |
| 255 | (cdr word)))) | 233 | ;; tpken, either immediately or separated by space. |
| 256 | (rfc2047-fold-region (point-min) (point-max))))) | 234 | last-encoded) |
| 235 | (goto-char (point-min)) | ||
| 236 | (condition-case nil ; in case of unbalanced quotes | ||
| 237 | ;; Look for rfc2822-style: sequences of atoms, quoted | ||
| 238 | ;; strings, specials, whitespace. (Specials mustn't be | ||
| 239 | ;; encoded.) | ||
| 240 | (while (not (eobp)) | ||
| 241 | (setq start (point)) | ||
| 242 | ;; Skip whitespace. | ||
| 243 | (unless (= 0 (skip-chars-forward " \t")) | ||
| 244 | (setq start (point))) | ||
| 245 | (cond | ||
| 246 | ((not (char-after))) ; eob | ||
| 247 | ;; else token start | ||
| 248 | ((eq ?\" (char-syntax (char-after))) | ||
| 249 | ;; Quoted word. | ||
| 250 | (forward-sexp) | ||
| 251 | (setq end (point)) | ||
| 252 | ;; Does it need encoding? | ||
| 253 | (goto-char start) | ||
| 254 | (skip-chars-forward "\000-\177" end) | ||
| 255 | (if (= end (point)) | ||
| 256 | (setq last-encoded nil) | ||
| 257 | ;; It needs encoding. Strip the quotes first, | ||
| 258 | ;; since encoded words can't occur in quotes. | ||
| 259 | (goto-char end) | ||
| 260 | (delete-backward-char 1) | ||
| 261 | (goto-char start) | ||
| 262 | (delete-char 1) | ||
| 263 | (when last-encoded | ||
| 264 | ;; There was a preceding quoted word. We need | ||
| 265 | ;; to include any separating whitespace in this | ||
| 266 | ;; word to avoid it getting lost. | ||
| 267 | (skip-chars-backward " \t") | ||
| 268 | ;; A space is needed between the encoded words. | ||
| 269 | (insert ? ) | ||
| 270 | (setq start (point) | ||
| 271 | end (1+ end))) | ||
| 272 | ;; Adjust the end position for the deleted quotes. | ||
| 273 | (rfc2047-encode start (- end 2)) | ||
| 274 | (setq last-encoded t))) ; record that it was encoded | ||
| 275 | ((eq ?. (char-syntax (char-after))) | ||
| 276 | ;; Skip other delimiters, but record that they've | ||
| 277 | ;; potentially separated quoted words. | ||
| 278 | (forward-char) | ||
| 279 | (setq last-encoded nil)) | ||
| 280 | (t ; normal token/whitespace sequence | ||
| 281 | ;; Find the end. | ||
| 282 | (forward-word 1) | ||
| 283 | (skip-chars-backward " \t") | ||
| 284 | (setq end (point)) | ||
| 285 | ;; Deal with encoding and leading space as for | ||
| 286 | ;; quoted words. | ||
| 287 | (goto-char start) | ||
| 288 | (skip-chars-forward "\000-\177" end) | ||
| 289 | (if (= end (point)) | ||
| 290 | (setq last-encoded nil) | ||
| 291 | (when last-encoded | ||
| 292 | (goto-char start) | ||
| 293 | (skip-chars-backward " \t") | ||
| 294 | (insert ? ) | ||
| 295 | (setq start (point) | ||
| 296 | end (1+ end))) | ||
| 297 | (rfc2047-encode start end) | ||
| 298 | (setq last-encoded t))))) | ||
| 299 | (error (error "Invalid data for rfc2047 encoding: %s" | ||
| 300 | (buffer-substring b e))))))) | ||
| 301 | (rfc2047-fold-region b (point)))) | ||
| 257 | 302 | ||
| 258 | (defun rfc2047-encode-string (string) | 303 | (defun rfc2047-encode-string (string) |
| 259 | "Encode words in STRING." | 304 | "Encode words in STRING. |
| 305 | By default, the string is treated as containing addresses (see | ||
| 306 | `rfc2047-special-chars')." | ||
| 260 | (with-temp-buffer | 307 | (with-temp-buffer |
| 261 | (insert string) | 308 | (insert string) |
| 262 | (rfc2047-encode-region (point-min) (point-max)) | 309 | (rfc2047-encode-region (point-min) (point-max)) |
| 263 | (buffer-string))) | 310 | (buffer-string))) |
| 264 | 311 | ||
| 265 | (defun rfc2047-encode (b e charset) | 312 | (defun rfc2047-encode (b e) |
| 266 | "Encode the word in the region B to E with CHARSET." | 313 | "Encode the word(s) in the region B to E. |
| 267 | (let* ((mime-charset (mm-mime-charset charset)) | 314 | By default, the region is treated as containing addresses (see |
| 268 | (cs (mm-charset-to-coding-system mime-charset)) | 315 | `rfc2047-special-chars')." |
| 269 | (encoding (or (cdr (assq mime-charset | 316 | (let* ((mime-charset (mm-find-mime-charset-region b e)) |
| 317 | (cs (if (> (length mime-charset) 1) | ||
| 318 | ;; Fixme: Instead of this, try to break region into | ||
| 319 | ;; parts that can be encoded separately. | ||
| 320 | (error "Can't rfc2047-encode `%s'" | ||
| 321 | (buffer-substring b e)) | ||
| 322 | (setq mime-charset (car mime-charset)) | ||
| 323 | (mm-charset-to-coding-system mime-charset))) | ||
| 324 | ;; Fixme: Better, calculate the number of non-ASCII | ||
| 325 | ;; characters, at least for 8-bit charsets. | ||
| 326 | (encoding (if (assq mime-charset | ||
| 327 | rfc2047-charset-encoding-alist) | ||
| 328 | (cdr (assq mime-charset | ||
| 270 | rfc2047-charset-encoding-alist)) | 329 | rfc2047-charset-encoding-alist)) |
| 271 | 'B)) | 330 | 'B)) |
| 272 | (start (concat | 331 | (start (concat |
| 273 | "=?" (downcase (symbol-name mime-charset)) "?" | 332 | "=?" (downcase (symbol-name mime-charset)) "?" |
| 274 | (downcase (symbol-name encoding)) "?")) | 333 | (downcase (symbol-name encoding)) "?")) |
| 275 | (first t)) | 334 | (first t)) |
| 276 | (save-restriction | 335 | (if mime-charset |
| 277 | (narrow-to-region b e) | 336 | (save-restriction |
| 278 | (when (eq encoding 'B) | 337 | (narrow-to-region b e) |
| 279 | ;; break into lines before encoding | 338 | (when (eq encoding 'B) |
| 280 | (goto-char (point-min)) | 339 | ;; break into lines before encoding |
| 281 | (while (not (eobp)) | 340 | (goto-char (point-min)) |
| 282 | (goto-char (min (point-max) (+ 15 (point)))) | 341 | (while (not (eobp)) |
| 283 | (unless (eobp) | 342 | (goto-char (min (point-max) (+ 15 (point)))) |
| 284 | (insert "\n")))) | 343 | (unless (eobp) |
| 285 | (if (and (mm-multibyte-p) | 344 | (insert ?\n)))) |
| 286 | (mm-coding-system-p cs)) | 345 | (if (and (mm-multibyte-p) |
| 287 | (mm-encode-coding-region (point-min) (point-max) cs)) | 346 | (mm-coding-system-p cs)) |
| 288 | (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | 347 | (mm-encode-coding-region (point-min) (point-max) cs)) |
| 289 | (point-min) (point-max)) | 348 | (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) |
| 290 | (goto-char (point-min)) | 349 | (point-min) (point-max)) |
| 291 | (while (not (eobp)) | 350 | (goto-char (point-min)) |
| 292 | (unless first | 351 | (while (not (eobp)) |
| 293 | (insert " ")) | 352 | (unless first |
| 294 | (setq first nil) | 353 | (insert ? )) |
| 295 | (insert start) | 354 | (setq first nil) |
| 296 | (end-of-line) | 355 | (insert start) |
| 297 | (insert "?=") | 356 | (end-of-line) |
| 298 | (forward-line 1))))) | 357 | (insert "?=") |
| 358 | (forward-line 1)))))) | ||
| 299 | 359 | ||
| 300 | (defun rfc2047-fold-region (b e) | 360 | (defun rfc2047-fold-region (b e) |
| 301 | "Fold long lines in region B to E." | 361 | "Fold long lines in region B to E." |
| @@ -306,14 +366,14 @@ The buffer may be narrowed." | |||
| 306 | (qword-break nil) | 366 | (qword-break nil) |
| 307 | (bol (save-restriction | 367 | (bol (save-restriction |
| 308 | (widen) | 368 | (widen) |
| 309 | (gnus-point-at-bol)))) | 369 | (mm-point-at-bol)))) |
| 310 | (while (not (eobp)) | 370 | (while (not (eobp)) |
| 311 | (when (and (or break qword-break) (> (- (point) bol) 76)) | 371 | (when (and (or break qword-break) (> (- (point) bol) 76)) |
| 312 | (goto-char (or break qword-break)) | 372 | (goto-char (or break qword-break)) |
| 313 | (setq break nil | 373 | (setq break nil |
| 314 | qword-break nil) | 374 | qword-break nil) |
| 315 | (if (looking-at " \t") | 375 | (if (looking-at " \t") |
| 316 | (insert "\n") | 376 | (insert ?\n) |
| 317 | (insert "\n ")) | 377 | (insert "\n ")) |
| 318 | (setq bol (1- (point))) | 378 | (setq bol (1- (point))) |
| 319 | ;; Don't break before the first non-LWSP characters. | 379 | ;; Don't break before the first non-LWSP characters. |
| @@ -347,7 +407,7 @@ The buffer may be narrowed." | |||
| 347 | (setq break nil | 407 | (setq break nil |
| 348 | qword-break nil) | 408 | qword-break nil) |
| 349 | (if (looking-at " \t") | 409 | (if (looking-at " \t") |
| 350 | (insert "\n") | 410 | (insert ?\n) |
| 351 | (insert "\n ")) | 411 | (insert "\n ")) |
| 352 | (setq bol (1- (point))) | 412 | (setq bol (1- (point))) |
| 353 | ;; Don't break before the first non-LWSP characters. | 413 | ;; Don't break before the first non-LWSP characters. |
| @@ -361,21 +421,21 @@ The buffer may be narrowed." | |||
| 361 | (goto-char (point-min)) | 421 | (goto-char (point-min)) |
| 362 | (let ((bol (save-restriction | 422 | (let ((bol (save-restriction |
| 363 | (widen) | 423 | (widen) |
| 364 | (gnus-point-at-bol))) | 424 | (mm-point-at-bol))) |
| 365 | (eol (gnus-point-at-eol)) | 425 | (eol (mm-point-at-eol)) |
| 366 | leading) | 426 | leading) |
| 367 | (forward-line 1) | 427 | (forward-line 1) |
| 368 | (while (not (eobp)) | 428 | (while (not (eobp)) |
| 369 | (looking-at "[ \t]*") | 429 | (looking-at "[ \t]*") |
| 370 | (setq leading (- (match-end 0) (match-beginning 0))) | 430 | (setq leading (- (match-end 0) (match-beginning 0))) |
| 371 | (if (< (- (gnus-point-at-eol) bol leading) 76) | 431 | (if (< (- (mm-point-at-eol) bol leading) 76) |
| 372 | (progn | 432 | (progn |
| 373 | (goto-char eol) | 433 | (goto-char eol) |
| 374 | (delete-region eol (progn | 434 | (delete-region eol (progn |
| 375 | (skip-chars-forward "[ \t\n\r]+") | 435 | (skip-chars-forward "[ \t\n\r]+") |
| 376 | (1- (point))))) | 436 | (1- (point))))) |
| 377 | (setq bol (gnus-point-at-bol))) | 437 | (setq bol (mm-point-at-bol))) |
| 378 | (setq eol (gnus-point-at-eol)) | 438 | (setq eol (mm-point-at-eol)) |
| 379 | (forward-line 1))))) | 439 | (forward-line 1))))) |
| 380 | 440 | ||
| 381 | (defun rfc2047-b-encode-region (b e) | 441 | (defun rfc2047-b-encode-region (b e) |
| @@ -396,7 +456,7 @@ The buffer may be narrowed." | |||
| 396 | (let ((alist rfc2047-q-encoding-alist) | 456 | (let ((alist rfc2047-q-encoding-alist) |
| 397 | (bol (save-restriction | 457 | (bol (save-restriction |
| 398 | (widen) | 458 | (widen) |
| 399 | (gnus-point-at-bol)))) | 459 | (mm-point-at-bol)))) |
| 400 | (while alist | 460 | (while alist |
| 401 | (when (looking-at (caar alist)) | 461 | (when (looking-at (caar alist)) |
| 402 | (quoted-printable-encode-region b e nil (cdar alist)) | 462 | (quoted-printable-encode-region b e nil (cdar alist)) |
| @@ -413,51 +473,51 @@ The buffer may be narrowed." | |||
| 413 | (goto-char (min (point-max) (+ 56 bol))) | 473 | (goto-char (min (point-max) (+ 56 bol))) |
| 414 | (search-backward "=" (- (point) 2) t) | 474 | (search-backward "=" (- (point) 2) t) |
| 415 | (unless (or (bobp) (eobp)) | 475 | (unless (or (bobp) (eobp)) |
| 416 | (insert "\n") | 476 | (insert ?\n) |
| 417 | (setq bol (point))))))))) | 477 | (setq bol (point))))))))) |
| 418 | 478 | ||
| 419 | ;;; | 479 | ;;; |
| 420 | ;;; Functions for decoding RFC2047 messages | 480 | ;;; Functions for decoding RFC2047 messages |
| 421 | ;;; | 481 | ;;; |
| 422 | 482 | ||
| 423 | (defvar rfc2047-encoded-word-regexp | 483 | (eval-and-compile |
| 424 | "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") | 484 | (defvar rfc2047-encoded-word-regexp |
| 485 | "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\ | ||
| 486 | \\([!->@-~ +]+\\)\\?=")) | ||
| 425 | 487 | ||
| 426 | (defun rfc2047-decode-region (start end) | 488 | (defun rfc2047-decode-region (start end) |
| 427 | "Decode MIME-encoded words in region between START and END." | 489 | "Decode MIME-encoded words in region between START and END." |
| 428 | (interactive "r") | 490 | (interactive "r") |
| 429 | (let ((case-fold-search t) | 491 | (let ((case-fold-search t) |
| 492 | (undoing (not (eq t buffer-undo-list))) | ||
| 430 | b e) | 493 | b e) |
| 431 | (save-excursion | 494 | (unwind-protect |
| 432 | (save-restriction | 495 | (save-excursion |
| 433 | (narrow-to-region start end) | 496 | (save-restriction |
| 434 | (goto-char (point-min)) | 497 | (buffer-enable-undo) |
| 435 | ;; Remove whitespace between encoded words. | 498 | (narrow-to-region start end) |
| 436 | (while (re-search-forward | 499 | (goto-char (point-min)) |
| 437 | (concat "\\(" rfc2047-encoded-word-regexp "\\)" | 500 | ;; Remove whitespace between encoded words. |
| 438 | "\\(\n?[ \t]\\)+" | 501 | (while (re-search-forward |
| 439 | "\\(" rfc2047-encoded-word-regexp "\\)") | 502 | (eval-when-compile |
| 440 | nil t) | 503 | (concat "\\(" rfc2047-encoded-word-regexp "\\)" |
| 441 | (delete-region (goto-char (match-end 1)) (match-beginning 6))) | 504 | "\\(\n?[ \t]\\)+" |
| 442 | ;; Decode the encoded words. | 505 | "\\(" rfc2047-encoded-word-regexp "\\)")) |
| 443 | (setq b (goto-char (point-min))) | 506 | nil t) |
| 444 | (while (re-search-forward rfc2047-encoded-word-regexp nil t) | 507 | (delete-region (goto-char (match-end 1)) (match-beginning 6))) |
| 445 | (setq e (match-beginning 0)) | 508 | ;; Decode the encoded words. |
| 446 | (insert (rfc2047-parse-and-decode | 509 | (setq b (goto-char (point-min))) |
| 447 | (prog1 | 510 | (while (re-search-forward rfc2047-encoded-word-regexp nil t) |
| 448 | (match-string 0) | 511 | (setq e (match-beginning 0)) |
| 449 | (delete-region (match-beginning 0) (match-end 0))))) | 512 | (rfc2047-parse-and-decode (match-beginning 0) (match-end 0))) |
| 450 | (when (and (mm-multibyte-p) | 513 | (when (and (mm-multibyte-p) |
| 451 | mail-parse-charset | 514 | mail-parse-charset |
| 452 | (not (eq mail-parse-charset 'gnus-decoded))) | 515 | (not (eq mail-parse-charset 'us-ascii)) |
| 453 | (mm-decode-coding-region b e mail-parse-charset)) | 516 | (not (eq mail-parse-charset 'gnus-decoded))) |
| 454 | (setq b (point))) | 517 | (mm-decode-coding-region b (point-max) mail-parse-charset)) |
| 455 | (when (and (mm-multibyte-p) | 518 | (rfc2047-unfold-region (point-min) (point-max)))) |
| 456 | mail-parse-charset | 519 | (unless undoing |
| 457 | (not (eq mail-parse-charset 'us-ascii)) | 520 | (buffer-disable-undo))))) |
| 458 | (not (eq mail-parse-charset 'gnus-decoded))) | ||
| 459 | (mm-decode-coding-region b (point-max) mail-parse-charset)) | ||
| 460 | (rfc2047-unfold-region (point-min) (point-max)))))) | ||
| 461 | 521 | ||
| 462 | (defun rfc2047-decode-string (string) | 522 | (defun rfc2047-decode-string (string) |
| 463 | "Decode the quoted-printable-encoded STRING and return the results." | 523 | "Decode the quoted-printable-encoded STRING and return the results." |
| @@ -470,22 +530,26 @@ The buffer may be narrowed." | |||
| 470 | (rfc2047-decode-region (point-min) (point-max))) | 530 | (rfc2047-decode-region (point-min) (point-max))) |
| 471 | (buffer-string)))) | 531 | (buffer-string)))) |
| 472 | 532 | ||
| 473 | (defun rfc2047-parse-and-decode (word) | 533 | (defun rfc2047-parse-and-decode (b e) |
| 474 | "Decode WORD and return it if it is an encoded word. | 534 | "Decode WORD and return it if it is an encoded word. |
| 475 | Return WORD if not." | 535 | Return WORD if not." |
| 476 | (if (not (string-match rfc2047-encoded-word-regexp word)) | 536 | (save-restriction |
| 477 | word | 537 | (narrow-to-region b e) |
| 478 | (or | 538 | (goto-char b) |
| 479 | (condition-case nil | 539 | (when (looking-at (eval-when-compile |
| 480 | (rfc2047-decode | 540 | (concat "\\`" rfc2047-encoded-word-regexp "\\'"))) |
| 481 | (match-string 1 word) | 541 | (condition-case nil |
| 482 | (upcase (match-string 2 word)) | 542 | (let ((charset (match-string 1)) |
| 483 | (match-string 3 word)) | 543 | (encoding (upcase (match-string 2)))) |
| 484 | (error word)) | 544 | (undo-boundary) |
| 485 | word))) | 545 | (delete-region (match-beginning 0) (1+ (match-end 2))) |
| 486 | 546 | (delete-region (- (point-max) 2) (point-max)) | |
| 487 | (defun rfc2047-decode (charset encoding string) | 547 | (rfc2047-decode charset encoding (point-min) (point-max))) |
| 488 | "Decode STRING from the given MIME CHARSET in the given ENCODING. | 548 | ;; If we get an error, undo the change |
| 549 | (error (undo)))))) | ||
| 550 | |||
| 551 | (defun rfc2047-decode (charset encoding b e) | ||
| 552 | "Decode from the given MIME CHARSET in the given ENCODING in region B to E. | ||
| 489 | Valid ENCODINGs are \"B\" and \"Q\". | 553 | Valid ENCODINGs are \"B\" and \"Q\". |
| 490 | If your Emacs implementation can't decode CHARSET, return nil." | 554 | If your Emacs implementation can't decode CHARSET, return nil." |
| 491 | (if (stringp charset) | 555 | (if (stringp charset) |
| @@ -504,18 +568,16 @@ If your Emacs implementation can't decode CHARSET, return nil." | |||
| 504 | (when (and (eq cs 'ascii) | 568 | (when (and (eq cs 'ascii) |
| 505 | mail-parse-charset) | 569 | mail-parse-charset) |
| 506 | (setq cs mail-parse-charset)) | 570 | (setq cs mail-parse-charset)) |
| 507 | ;; Ensure unibyte result in Emacs 20. | 571 | (save-restriction |
| 508 | (let (default-enable-multibyte-characters) | 572 | (narrow-to-region b e) |
| 509 | (with-temp-buffer | 573 | (cond |
| 510 | (mm-decode-coding-string | 574 | ((equal "B" encoding) |
| 511 | (cond | 575 | (base64-decode-region b e)) |
| 512 | ((equal "B" encoding) | 576 | ((equal "Q" encoding) |
| 513 | (base64-decode-string string)) | 577 | (subst-char-in-region b e ?_ ? t) |
| 514 | ((equal "Q" encoding) | 578 | (quoted-printable-decode-region b e)) |
| 515 | (quoted-printable-decode-string | 579 | (t (error "Invalid encoding: %s" encoding))) |
| 516 | (mm-replace-chars-in-string string ?_ ? ))) | 580 | (mm-decode-coding-region (point-min) (point-max) cs))))) |
| 517 | (t (error "Invalid encoding: %s" encoding))) | ||
| 518 | cs)))))) | ||
| 519 | 581 | ||
| 520 | (provide 'rfc2047) | 582 | (provide 'rfc2047) |
| 521 | 583 | ||