aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard M. Stallman1994-04-24 03:51:13 +0000
committerRichard M. Stallman1994-04-24 03:51:13 +0000
commit154b3e397fc0a8da4c569d54b60a11ddf205bff3 (patch)
tree6862b48c7dd6049bae679b968cd512c8ce346456
parentab01d0a826d84064055007907f93bd4c8698cf93 (diff)
downloademacs-154b3e397fc0a8da4c569d54b60a11ddf205bff3.tar.gz
emacs-154b3e397fc0a8da4c569d54b60a11ddf205bff3.zip
(mail-extr-all-top-level-domains): Renamed from all-top-level-domains.
Major changes by jwz and drw.
-rw-r--r--lisp/mail/mail-extr.el2022
1 files changed, 1226 insertions, 796 deletions
diff --git a/lisp/mail/mail-extr.el b/lisp/mail/mail-extr.el
index 3d5a0f3ce1d..77e43d2d55d 100644
--- a/lisp/mail/mail-extr.el
+++ b/lisp/mail/mail-extr.el
@@ -1,17 +1,17 @@
1;;; mail-extr.el --- extract full name and address from RFC 822 mail header. 1;;; mail-extr.el --- extract full name and address from RFC 822 mail header.
2 2
3;; Copyright (C) 1992 Free Software Foundation, Inc. 3;; Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
4 4
5;; Author: Joe Wells <jbw@cs.bu.edu> 5;; Author: Joe Wells <jbw@cs.bu.edu>
6;; Version: 1.0 6;; Maintainer: Jamie Zawinski <jwz@lucid.com>
7;; Adapted-By: ESR 7;; Version: 1.8
8;; Keywords: mail 8;; Keywords: mail
9 9
10;; This file is part of GNU Emacs. 10;; This file is part of GNU Emacs.
11 11
12;; GNU Emacs is free software; you can redistribute it and/or modify 12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by 13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 1, or (at your option) 14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version. 15;; any later version.
16 16
17;; GNU Emacs is distributed in the hope that it will be useful, 17;; GNU Emacs is distributed in the hope that it will be useful,
@@ -25,51 +25,62 @@
25 25
26;;; Commentary: 26;;; Commentary:
27 27
28;; Here is `mail-extr', a package for extracting full names and canonical 28;; The entry point of this code is
29;; addresses from RFC 822 mail headers. It is intended to be hooked into 29;;
30;; other Emacs Lisp packages that deal with RFC 822 format messages, such as 30;; mail-extract-address-components: (address)
31;; Gnews, GNUS, RMAIL, MH-E, BBDB, VM, Supercite, etc. Thus, this release is 31;;
32;; mainly for Emacs Lisp developers. 32;; Given an RFC-822 ADDRESS, extract full name and canonical address.
33 33;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
34;; If no name can be extracted, FULL-NAME will be nil.
35;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
36;; (narrowed) portion of the buffer will be interpreted as the address.
37;; (This feature exists so that the clever caller might be able to avoid
38;; consing a string.)
39;; If ADDRESS contains more than one RFC-822 address, only the first is
40;; returned.
41;;
42;; This code is more correct (and more heuristic) parser than the code in
43;; rfc822.el. And despite its size, it's fairly fast.
44;;
34;; There are two main benefits: 45;; There are two main benefits:
35 46;;
36;; 1. Higher probability of getting the correct full name for a human than 47;; 1. Higher probability of getting the correct full name for a human than
37;; any other package I know of. (On the other hand, it will cheerfully 48;; any other package we know of. (On the other hand, it will cheerfully
38;; mangle non-human names/comments.) 49;; mangle non-human names/comments.)
39;; 2. Address part is put in a canonical form. 50;; 2. Address part is put in a canonical form.
40 51;;
41;; The interface is not yet carved in stone; please give me suggestions. 52;; The interface is not yet carved in stone; please give us suggestions.
42 53;;
43;; I have an extensive test-case collection of funny addresses if you want to 54;; We have an extensive test-case collection of funny addresses if you want to
44;; work with the code. Developing this code requires frequent testing to 55;; work with the code. Developing this code requires frequent testing to
45;; make sure you're not breaking functionality. I'm not posting the 56;; make sure you're not breaking functionality. The test cases aren't included
46;; test-cases because they take over 100K. 57;; because they are over 100K.
47 58;;
48;; If you find an address that mail-extr fails on, please send it to me along 59;; If you find an address that mail-extr fails on, please send it to the
49;; with what you think the correct results should be. I do not consider it a 60;; maintainer along with what you think the correct results should be. We do
50;; bug if mail-extr mangles a comment that does not correspond to a real 61;; not consider it a bug if mail-extr mangles a comment that does not
51;; human full name, although I would prefer that mail-extr would return the 62;; correspond to a real human full name, although we would prefer that
52;; comment as-is. 63;; mail-extr would return the comment as-is.
53 64;;
54;; Features: 65;; Features:
55 66;;
56;; * Full name handling: 67;; * Full name handling:
57 68;;
58;; * knows where full names can be found in an address. 69;; * knows where full names can be found in an address.
59;; * avoids using empty comments and quoted text. 70;; * avoids using empty comments and quoted text.
60;; * extracts full names from mailbox names. 71;; * extracts full names from mailbox names.
61;; * recognizes common formats for comments after a full name. 72;; * recognizes common formats for comments after a full name.
62;; * puts a period and a space after each initial. 73;; * puts a period and a space after each initial.
63;; * understands & referring to the mailbox name capitalized. 74;; * understands & referring to the mailbox name, capitalized.
64;; * strips name prefixes like "Prof.", etc.. 75;; * strips name prefixes like "Prof.", etc.
65;; * understands what characters can occur in names (not just letters). 76;; * understands what characters can occur in names (not just letters).
66;; * figures out middle initial from mailbox name. 77;; * figures out middle initial from mailbox name.
67;; * removes funny nicknames. 78;; * removes funny nicknames.
68;; * keeps suffixes such as Jr., Sr., III, etc. 79;; * keeps suffixes such as Jr., Sr., III, etc.
69;; * reorders "Last, First" type names. 80;; * reorders "Last, First" type names.
70 81;;
71;; * Address handling: 82;; * Address handling:
72 83;;
73;; * parses rfc822 quoted text, comments, and domain literals. 84;; * parses rfc822 quoted text, comments, and domain literals.
74;; * parses rfc822 multi-line headers. 85;; * parses rfc822 multi-line headers.
75;; * does something reasonable with rfc822 GROUP addresses. 86;; * does something reasonable with rfc822 GROUP addresses.
@@ -79,13 +90,13 @@
79;; * converts rfc822 ROUTE addresses to %-style addresses. 90;; * converts rfc822 ROUTE addresses to %-style addresses.
80;; * truncates %-style addresses at leftmost fully qualified domain name. 91;; * truncates %-style addresses at leftmost fully qualified domain name.
81;; * handles local relative precedence of ! vs. % and @ (untested). 92;; * handles local relative precedence of ! vs. % and @ (untested).
82 93;;
83;; It does almost no string creation. It primarily uses the built-in 94;; It does almost no string creation. It primarily uses the built-in
84;; parsing routines with the appropriate syntax tables. This should 95;; parsing routines with the appropriate syntax tables. This should
85;; result in greater speed. 96;; result in greater speed.
86 97;;
87;; TODO: 98;; TODO:
88 99;;
89;; * handle all test cases. (This will take forever.) 100;; * handle all test cases. (This will take forever.)
90;; * software to pick the correct header to use (eg., "Senders-Name:"). 101;; * software to pick the correct header to use (eg., "Senders-Name:").
91;; * multiple addresses in the "From:" header (almost all of the necessary 102;; * multiple addresses in the "From:" header (almost all of the necessary
@@ -102,12 +113,76 @@
102;; * delete unused variables. 113;; * delete unused variables.
103;; * arrange for testing with different relative precedences of ! vs. @ 114;; * arrange for testing with different relative precedences of ! vs. @
104;; and %. 115;; and %.
105;; * put mail-variant-method back into mail-extract-address-components.
106;; * insert documentation strings! 116;; * insert documentation strings!
107;; * handle X.400-gatewayed addresses according to RFC 1148. 117;; * handle X.400-gatewayed addresses according to RFC 1148.
108 118
109;;; Change Log: 119;;; Change Log:
110;; 120;;
121;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com)
122;;
123;; * merged with jbw's latest version
124;;
125;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com)
126;;
127;; * high-bit chars in comments weren't treated as word syntax
128;;
129;; Sat Feb 5 03:13:40 1994 Jamie Zawinski (jwz@lucid.com)
130;;
131;; * call replace-match with fixed-case arg
132;;
133;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com)
134;;
135;; * some more cleanup, doc, added provide
136;;
137;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu)
138;;
139;; * Made mail-full-name-prefixes a user-customizable variable.
140;; Allow passing the address as a buffer as well as as a string.
141;; Allow [ and ] as name characters (Finnish character set).
142;;
143;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu)
144;;
145;; * Handle "null" addresses. Handle = used for spacing in mailbox
146;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are
147;; missing their brackets. Handle uppercase "JR". Extract full
148;; names from X.400 addresses encoded in RFC-822. Fix bug in
149;; handling of multiple addresses where first has trailing comment.
150;; Handle more kinds of telephone extension lead-ins.
151;;
152;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu)
153;;
154;; * Handle HZ encoding for embedding GB encoded chinese characters.
155;;
156;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu)
157;;
158;; * Fixed too broad matching of ham radio call signs. Fixed bug in
159;; handling an unmatched ' in a name string. Enhanced recognition
160;; of when . in the mailbox name terminates the name portion.
161;; Narrowed conversion of . to space to only the necessary
162;; situation. Deal with VMS's stupid date stamps. Handle a unique
163;; way of introducing an alternate address. Fixed spacing bug I
164;; introduced in switching last name order. Fixed bug in handling
165;; address with ! and % but no @. Narrowed the cases in which
166;; certain trailing words are discarded.
167;;
168;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu)
169;;
170;; * Fixed bugs in handling GROUP addresses. Certain words in the
171;; middle of a name no longer terminate it. Handle LISTSERV list
172;; names. Ignore comment field containing mailbox name.
173;;
174;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu)
175;;
176;; * Moved variant-method code back into main function. Handle
177;; underscores as spaces in comments. Handle leading nickname. Add
178;; flag to ignore single-word names. Other changes.
179;;
180;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu)
181;;
182;; * Added in changes by Rod Whitby and Jamie Zawinski. This
183;; includes the flag mail-extr-guess-middle-initial and the fix for
184;; handling multiple addresses correctly.
185;;
111;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu) 186;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
112;; 187;;
113;; * Cleaned up some more. Release version 1.0 to world. 188;; * Cleaned up some more. Release version 1.0 to world.
@@ -127,9 +202,37 @@
127 202
128;;; Code: 203;;; Code:
129 204
130;; Variable definitions.
131 205
132(defvar mail-@-binds-tighter-than-! nil) 206;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
207;;
208;; User configuration variable definitions.
209;;
210
211(defvar mail-extr-guess-middle-initial nil
212 "*Whether to try to guess middle initial from mail address.
213If true, then when we see an address like \"John Smith <jqs@host.com>\"
214we will assume that \"John Q. Smith\" is the fellow's name.")
215
216(defvar mail-extr-ignore-single-names t
217 "*Whether to ignore a name that is just a single word.
218If true, then when we see an address like \"Idiot <dumb@stupid.com>\"
219we will act as though we couldn't find a full name in the address.")
220
221;; Matches a leading title that is not part of the name (does not
222;; contribute to uniquely identifying the person).
223(defvar mail-extr-full-name-prefixes
224 (purecopy
225 "\\(Prof\\|D[Rr]\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.?[ \t\n]")
226 "*Matches prefixes to the full name that identify a person's position.
227These are stripped from the full name because they do not contribute to
228uniquely identifying the person.")
229
230(defvar mail-extr-@-binds-tighter-than-! nil
231 "*Whether the local mail transport agent looks at ! before @.")
232
233(defvar mail-extr-mangle-uucp nil
234 "*Whether to throw away information in UUCP addresses
235by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".")
133 236
134;;---------------------------------------------------------------------- 237;;----------------------------------------------------------------------
135;; what orderings are meaningful????? 238;; what orderings are meaningful?????
@@ -142,120 +245,253 @@
142;; arbitrary address. 245;; arbitrary address.
143;;---------------------------------------------------------------------- 246;;----------------------------------------------------------------------
144 247
145(defconst mail-space-char 32) 248
146
147(defconst mail-whitespace " \t\n")
148 249
149;; Any character that can occur in a name in an RFC822 address. 250;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
150;; Yes, there are weird people with digits in their names. 251;;
151(defconst mail-all-letters "A-Za-z---{|}'~0-9`.") 252;; Constant definitions.
253;;
254
255;; Codes in
256;; Names in ISO 8859-1 Name
257;; ISO 10XXX ISO 8859-2 in
258;; ISO 6937 ISO 10646 RFC Swedish
259;; etc. Hex Oct 1345 TeX Split ASCII Description
260;; --------- ---------- ---- --- ----- ----- -------------------------------
261;; %a E4 344 a: \"a ae { latin small a + diaeresis d
262;; %o F6 366 o: \"o oe | latin small o + diaeresis v
263;; @a E5 345 aa \oa aa } latin small a + ring above e
264;; %u FC 374 u: \"u ue ~ latin small u + diaeresis |
265;; /e E9 351 e' \'e ` latin small e + acute i
266;; %A C4 304 A: \"A AE [ latin capital a + diaeresis D
267;; %O D6 326 O: \"O OE \ latin capital o + diaeresis V
268;; @A C5 305 AA \oA AA ] latin capital a + ring above E
269;; %U DC 334 U: \"U UE ^ latin capital u + diaeresis \
270;; /E C9 311 E' \'E @ latin capital e + acute I
271
272;; NOTE: @a and @A are not in ISO 8859-2 (the codes mentioned above invoke
273;; /l and /L). Some of this data was retrieved from
274;; listserv@jhuvm.hcf.jhu.edu.
152 275
153;; Any character that can occur in a name, not counting characters that 276;; Any character that can occur in a name, not counting characters that
154;; separate parts of a multipart name. 277;; separate parts of a multipart name (hyphen and period).
155(defconst mail-all-letters-but-separators "A-Za-z{|}'~0-9`") 278;; Yes, there are weird people with digits in their names.
156 279;; You will also notice the consideration for the
157;; Any character that can start a name 280;; Swedish/Finnish/Norwegian character set.
158(defconst mail-first-letters "A-Za-z") 281;; #### (go to \376 instead of \377 to work around bug in search.c...)
282(defconst mail-extr-all-letters-but-separators
283 (purecopy "][A-Za-z{|}'~0-9`\200-\376"))
284
285;; Any character that can occur in a name in an RFC822 address including
286;; the separator (hyphen and possibly period) for multipart names.
287;; #### should . be in here?
288(defconst mail-extr-all-letters
289 (purecopy (concat mail-extr-all-letters-but-separators "---")))
290
291;; Any character that can start a name.
292;; Keep this set as minimal as possible.
293(defconst mail-extr-first-letters (purecopy "A-Za-z"))
159 294
160;; Any character that can end a name. 295;; Any character that can end a name.
161(defconst mail-last-letters "A-Za-z`'.") 296;; Keep this set as minimal as possible.
297(defconst mail-extr-last-letters (purecopy "[A-Za-z`'."))
162 298
163;; Matches an initial not followed by both a period and a space. 299(defconst mail-extr-leading-garbage
164(defconst mail-bad-initials-pattern 300 (purecopy (format "[^%s]+" mail-extr-first-letters)))
165 (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
166 mail-all-letters mail-first-letters mail-all-letters))
167 301
168(defconst mail-non-name-chars (concat "^" mail-all-letters ".")) 302;; (defconst mail-extr-non-name-chars
303;; (purecopy (concat "^" mail-extr-all-letters ".")))
304;; (defconst mail-extr-non-begin-name-chars
305;; (purecopy (concat "^" mail-extr-first-letters)))
306;; (defconst mail-extr-non-end-name-chars
307;; (purecopy (concat "^" mail-extr-last-letters)))
169 308
170(defconst mail-non-begin-name-chars (concat "^" mail-first-letters)) 309;; Matches an initial not followed by both a period and a space.
171 310;; (defconst mail-extr-bad-initials-pattern
172(defconst mail-non-end-name-chars (concat "^" mail-last-letters)) 311;; (purecopy
312;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
313;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters)))
173 314
174;; Matches periods used instead of spaces. Must not match the period 315;; Matches periods used instead of spaces. Must not match the period
175;; following an initial. 316;; following an initial.
176(defconst mail-bad-\.-pattern 317(defconst mail-extr-bad-dot-pattern
177 (format "\\([%s][%s]\\)\\.+\\([%s]\\)" 318 (purecopy
178 mail-all-letters mail-last-letters mail-first-letters)) 319 (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
320 mail-extr-all-letters
321 mail-extr-last-letters
322 mail-extr-first-letters)))
179 323
180;; Matches an embedded or leading nickname that should be removed. 324;; Matches an embedded or leading nickname that should be removed.
181(defconst mail-nickname-pattern 325;; (defconst mail-extr-nickname-pattern
182 (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] " 326;; (purecopy
183 mail-all-letters)) 327;; (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
184 328;; mail-extr-all-letters)))
185;; Matches a leading title that is not part of the name (does not
186;; contribute to uniquely identifying the person).
187(defconst mail-full-name-prefixes
188 '"\\` *\\(Prof\\|Dr\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.? ")
189 329
190;; Matches the occurrence of a generational name suffix, and the last 330;; Matches the occurrence of a generational name suffix, and the last
191;; character of the preceding name. 331;; character of the preceding name. This is important because we want to
192(defconst mail-full-name-suffix-pattern 332;; keep such suffixes: they help to uniquely identify the person.
193 (format 333;; *** Perhaps this should be a user-customizable variable. However, the
194 "\\(,? ?\\([JjSs]r\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)" 334;; *** regular expression is fairly tricky to alter, so maybe not.
195 mail-all-letters mail-all-letters)) 335(defconst mail-extr-full-name-suffix-pattern
196 336 (purecopy
197(defconst mail-roman-numeral-pattern 337 (format
198 "V?I+V?\\b") 338 "\\(,? ?\\([JjSs][Rr]\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
339 mail-extr-all-letters mail-extr-all-letters)))
340
341(defconst mail-extr-roman-numeral-pattern (purecopy "V?I+V?\\b"))
199 342
200;; Matches a trailing uppercase (with other characters possible) acronym. 343;; Matches a trailing uppercase (with other characters possible) acronym.
201;; Must not match a trailing uppercase last name or trailing initial 344;; Must not match a trailing uppercase last name or trailing initial
202(defconst mail-weird-acronym-pattern "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)") 345(defconst mail-extr-weird-acronym-pattern
346 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
203 347
204;; Matches a mixed-case or lowercase name (not an initial). 348;; Matches a mixed-case or lowercase name (not an initial).
205(defconst mail-mixed-case-name-pattern 349;; #### Match Latin1 lower case letters here too?
206 (format 350;; (defconst mail-extr-mixed-case-name-pattern
207 "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)" 351;; (purecopy
208 mail-all-letters mail-last-letters 352;; (format
209 mail-first-letters mail-all-letters mail-all-letters mail-last-letters 353;; "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
210 mail-first-letters mail-all-letters)) 354;; mail-extr-all-letters mail-extr-last-letters
355;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters
356;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters)))
211 357
212;; Matches a trailing alternative address. 358;; Matches a trailing alternative address.
213(defconst mail-alternative-address-pattern "[a-zA-Z.]+[!@][a-zA-Z.]") 359;; #### Match Latin1 letters here too?
360;; #### Match _ before @ here too?
361(defconst mail-extr-alternative-address-pattern
362 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
214 363
215;; Matches a variety of trailing comments not including comma-delimited 364;; Matches a variety of trailing comments not including comma-delimited
216;; comments. 365;; comments.
217(defconst mail-trailing-comment-start-pattern " [-{]\\|--\\|[+@#></\;]") 366(defconst mail-extr-trailing-comment-start-pattern
367 (purecopy " [-{]\\|--\\|[+@#></\;]"))
218 368
219;; Matches a name (not an initial). 369;; Matches a name (not an initial).
220;; This doesn't force a word boundary at the end because sometimes a 370;; This doesn't force a word boundary at the end because sometimes a
221;; comment is separated by a `-' with no preceding space. 371;; comment is separated by a `-' with no preceding space.
222(defconst mail-name-pattern 372(defconst mail-extr-name-pattern
223 (format 373 (purecopy (format "\\b[%s][%s]*[%s]"
224 "\\b[%s][%s]*[%s]" 374 mail-extr-first-letters
225 mail-first-letters mail-all-letters mail-last-letters)) 375 mail-extr-all-letters
376 mail-extr-last-letters)))
226 377
227(defconst mail-initial-pattern 378(defconst mail-extr-initial-pattern
228 (format "\\b[%s]\\([. ]\\|\\b\\)" mail-first-letters)) 379 (purecopy (format "\\b[%s]\\([. ]\\|\\b\\)" mail-extr-first-letters)))
229 380
230;; Matches a single name before a comma. 381;; Matches a single name before a comma.
231(defconst mail-last-name-first-pattern 382;; (defconst mail-extr-last-name-first-pattern
232 (concat "\\`" mail-name-pattern ",")) 383;; (purecopy (concat "\\`" mail-extr-name-pattern ",")))
233 384
234;; Matches telephone extensions. 385;; Matches telephone extensions.
235(defconst mail-telephone-extension-pattern 386(defconst mail-extr-telephone-extension-pattern
236 "\\(\\([Ee]xt\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+") 387 (purecopy
388 "\\(\\([Ee]xt\\|\\|[Tt]ph\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+"))
237 389
238;; Matches ham radio call signs. 390;; Matches ham radio call signs.
239(defconst mail-ham-call-sign-pattern 391;; Help from: Mat Maessen N2NJZ <maessm@rpi.edu>, Mark Feit
240 "\\b[A-Z]+[0-9][A-Z0-9]*") 392;; <mark@era.com>, Michael Covington <mcovingt@ai.uga.edu>.
393;; Examples: DX504 DX515 K5MRU K8DHK KA9WGN KA9WGN KD3FU KD6EUI KD6HBW
394;; KE9TV KF0NV N1API N3FU N3GZE N3IGS N4KCC N7IKQ N9HHU W4YHF W6ANK WA2SUH
395;; WB7VZI N2NJZ NR3G KJ4KK AB4UM AL7NI KH6OH WN3KBT N4TMI W1A N0NZO
396(defconst mail-extr-ham-call-sign-pattern
397 (purecopy "\\b\\(DX[0-9]+\\|[AKNW][A-Z]?[0-9][A-Z][A-Z]?[A-Z]?\\)"))
398
399;; Possible trailing suffixes: "\\(/\\(KT\\|A[AEG]\\|[R0-9]\\)\\)?"
400;; /KT == Temporary Technician (has CSC but not "real" license)
401;; /AA == Temporary Advanced
402;; /AE == Temporary Extra
403;; /AG == Temporary General
404;; /R == repeater
405;; /# == stations operating out of home district
406;; I don't include these in the regexp above because I can't imagine
407;; anyone putting them with their name in an e-mail address.
241 408
242;; Matches normal single-part name 409;; Matches normal single-part name
243(defconst mail-normal-name-pattern 410(defconst mail-extr-normal-name-pattern
244 (format 411 (purecopy (format "\\b[%s][%s]+[%s]"
245 "\\b[%s][%s]+[%s]" 412 mail-extr-first-letters
246 mail-first-letters mail-all-letters-but-separators mail-last-letters)) 413 mail-extr-all-letters-but-separators
247 414 mail-extr-last-letters)))
415
416;; Matches a single word name.
417;; (defconst mail-extr-one-name-pattern
418;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'")))
419
248;; Matches normal two names with missing middle initial 420;; Matches normal two names with missing middle initial
249(defconst mail-two-name-pattern 421;; The first name is not allowed to have a hyphen because this can cause
250 (concat "\\`\\(" mail-normal-name-pattern 422;; false matches where the "middle initial" is actually the first letter
251 "\\|" mail-initial-pattern 423;; of the second part of the first name.
252 "\\) +\\(" mail-normal-name-pattern "\\)\\(,\\|\\'\\)")) 424(defconst mail-extr-two-name-pattern
253 425 (purecopy
254(defvar address-syntax-table (make-syntax-table)) 426 (concat "\\`\\(" mail-extr-normal-name-pattern
255(defvar address-comment-syntax-table (make-syntax-table)) 427 "\\|" mail-extr-initial-pattern
256(defvar address-domain-literal-syntax-table (make-syntax-table)) 428 "\\) +\\(" mail-extr-name-pattern "\\)\\(,\\|\\'\\)")))
257(defvar address-text-comment-syntax-table (make-syntax-table)) 429
258(defvar address-text-syntax-table (make-syntax-table)) 430(defconst mail-extr-listserv-list-name-pattern
431 (purecopy "Multiple recipients of list \\([-A-Z]+\\)"))
432
433(defconst mail-extr-stupid-vms-date-stamp-pattern
434 (purecopy
435 "[0-9][0-9]-[JFMASOND][aepuco][nbrylgptvc]-[0-9][0-9][0-9][0-9] [0-9]+ *"))
436
437;;; HZ -- GB (PRC Chinese character encoding) in ASCII embedding protocol
438;;
439;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is
440;; encountered. The character '~' is an escape character. By convention, it
441;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the
442;; following special meaning.
443;;
444;; o The escape sequence '~~' is interpreted as a '~'.
445;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
446;; o The escape sequence '~\n' is a line-continuation marker to be consumed
447;; with no output produced.
448;;
449;; In GB mode, characters are interpreted two bytes at a time as (pure) GB
450;; codes until the escape-from-GB code '~}' is read. This code switches the
451;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}'
452;; ($7E7D) is outside the defined GB range.)
453(defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern
454 (purecopy "~{\\([^~].\\|~[^\}]\\)+~}"))
455
456;; The leading optional lowercase letters are for a bastardized version of
457;; the encoding, as is the optional nature of the final slash.
458(defconst mail-extr-x400-encoded-address-pattern
459 (purecopy "[a-z]?[a-z]?\\(/[A-Za-z]+\\(\\.[A-Za-z]+\\)?=[^/]+\\)+/?\\'"))
460
461(defconst mail-extr-x400-encoded-address-field-pattern-format
462 (purecopy "/%s=\\([^/]+\\)\\(/\\|\\'\\)"))
463
464(defconst mail-extr-x400-encoded-address-surname-pattern
465 ;; S stands for Surname (family name).
466 (purecopy
467 (format mail-extr-x400-encoded-address-field-pattern-format "[Ss]")))
468
469(defconst mail-extr-x400-encoded-address-given-name-pattern
470 ;; G stands for Given name.
471 (purecopy
472 (format mail-extr-x400-encoded-address-field-pattern-format "[Gg]")))
473
474(defconst mail-extr-x400-encoded-address-full-name-pattern
475 ;; PN stands for Personal Name. When used it represents the combination
476 ;; of the G and S fields.
477 ;; "The one system I used having this field asked it with the prompt
478 ;; `Personal Name'. But they mapped it into G and S on outgoing real
479 ;; X.400 addresses. As they mapped G and S into PN on incoming..."
480 (purecopy
481 (format mail-extr-x400-encoded-address-field-pattern-format "[Pp][Nn]")))
482
483
484
485;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
486;;
487;; Syntax tables used for quick parsing.
488;;
489
490(defconst mail-extr-address-syntax-table (make-syntax-table))
491(defconst mail-extr-address-comment-syntax-table (make-syntax-table))
492(defconst mail-extr-address-domain-literal-syntax-table (make-syntax-table))
493(defconst mail-extr-address-text-comment-syntax-table (make-syntax-table))
494(defconst mail-extr-address-text-syntax-table (make-syntax-table))
259(mapcar 495(mapcar
260 (function 496 (function
261 (lambda (pair) 497 (lambda (pair)
@@ -264,20 +500,23 @@
264 (function 500 (function
265 (lambda (item) 501 (lambda (item)
266 (if (eq 2 (length item)) 502 (if (eq 2 (length item))
503 ;; modifying syntax of a single character
267 (modify-syntax-entry (car item) (car (cdr item)) syntax-table) 504 (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
268 (let ((char (car item)) 505 ;; modifying syntax of a range of characters
269 (bound (car (cdr item))) 506 (let ((char (nth 0 item))
270 (syntax (car (cdr (cdr item))))) 507 (bound (nth 1 item))
508 (syntax (nth 2 item)))
271 (while (<= char bound) 509 (while (<= char bound)
272 (modify-syntax-entry char syntax syntax-table) 510 (modify-syntax-entry char syntax syntax-table)
273 (setq char (1+ char))))))) 511 (setq char (1+ char)))))))
274 (cdr pair))))) 512 (cdr pair)))))
275 '((address-syntax-table 513 '((mail-extr-address-syntax-table
276 (0 31 "w") ;control characters 514 (?\000 ?\037 "w") ;control characters
277 (32 " ") ;SPC 515 (?\040 " ") ;SPC
278 (?! ?~ "w") ;printable characters 516 (?! ?~ "w") ;printable characters
279 (127 "w") ;DEL 517 (?\177 "w") ;DEL
280 (128 255 "w") ;high-bit-on characters 518 (?\200 ?\377 "w") ;high-bit-on characters
519 (?\240 " ") ;nobreakspace
281 (?\t " ") 520 (?\t " ")
282 (?\r " ") 521 (?\r " ")
283 (?\n " ") 522 (?\n " ")
@@ -296,20 +535,35 @@
296 (?\] ".") 535 (?\] ".")
297 ;; % and ! aren't RFC822 characters, but it is convenient to pretend 536 ;; % and ! aren't RFC822 characters, but it is convenient to pretend
298 (?% ".") 537 (?% ".")
299 (?! ".") 538 (?! ".") ;; this needs to be word-constituent when not in .UUCP mode
300 ) 539 )
301 (address-comment-syntax-table 540 (mail-extr-address-comment-syntax-table
302 (0 255 "w") 541 (?\000 ?\377 "w")
542 (?\040 " ")
543 (?\240 " ")
544 (?\t " ")
545 (?\r " ")
546 (?\n " ")
303 (?\( "\(\)") 547 (?\( "\(\)")
304 (?\) "\)\(") 548 (?\) "\)\(")
305 (?\\ "\\")) 549 (?\\ "\\"))
306 (address-domain-literal-syntax-table 550 (mail-extr-address-domain-literal-syntax-table
307 (0 255 "w") 551 (?\000 ?\377 "w")
552 (?\040 " ")
553 (?\240 " ")
554 (?\t " ")
555 (?\r " ")
556 (?\n " ")
308 (?\[ "\(\]") ;?????? 557 (?\[ "\(\]") ;??????
309 (?\] "\)\[") ;?????? 558 (?\] "\)\[") ;??????
310 (?\\ "\\")) 559 (?\\ "\\"))
311 (address-text-comment-syntax-table 560 (mail-extr-address-text-comment-syntax-table
312 (0 255 "w") 561 (?\000 ?\377 "w")
562 (?\040 " ")
563 (?\240 " ")
564 (?\t " ")
565 (?\r " ")
566 (?\n " ")
313 (?\( "\(\)") 567 (?\( "\(\)")
314 (?\) "\)\(") 568 (?\) "\)\(")
315 (?\[ "\(\]") 569 (?\[ "\(\]")
@@ -321,8 +575,13 @@
321 ;; (?\' "\)\`") 575 ;; (?\' "\)\`")
322 ;; (?\` "\(\'") 576 ;; (?\` "\(\'")
323 ) 577 )
324 (address-text-syntax-table 578 (mail-extr-address-text-syntax-table
325 (0 255 ".") 579 (?\000 ?\177 ".")
580 (?\200 ?\377 "w")
581 (?\040 " ")
582 (?\t " ")
583 (?\r " ")
584 (?\n " ")
326 (?A ?Z "w") 585 (?A ?Z "w")
327 (?a ?z "w") 586 (?a ?z "w")
328 (?- "w") 587 (?- "w")
@@ -335,110 +594,172 @@
335 )) 594 ))
336 595
337 596
597;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
598;;
338;; Utility functions and macros. 599;; Utility functions and macros.
600;;
601
602(defmacro mail-extr-delete-char (n)
603 ;; in v19, delete-char is compiled as a function call, but delete-region
604 ;; is byte-coded, so it's much much faster.
605 (list 'delete-region '(point) (list '+ '(point) n)))
606
607(defmacro mail-extr-skip-whitespace-forward ()
608 ;; v19 fn skip-syntax-forward is more tasteful, but not byte-coded.
609 '(skip-chars-forward " \t\n\r\240"))
610
611(defmacro mail-extr-skip-whitespace-backward ()
612 ;; v19 fn skip-syntax-backward is more tasteful, but not byte-coded.
613 '(skip-chars-backward " \t\n\r\240"))
614
339 615
340(defmacro mail-undo-backslash-quoting (beg end) 616(defmacro mail-extr-undo-backslash-quoting (beg end)
341 (`(save-excursion 617 (`(save-excursion
342 (save-restriction 618 (save-restriction
343 (narrow-to-region (, beg) (, end)) 619 (narrow-to-region (, beg) (, end))
344 (goto-char (point-min)) 620 (goto-char (point-min))
345 ;; undo \ quoting 621 ;; undo \ quoting
346 (while (re-search-forward "\\\\\\(.\\)" nil t) 622 (while (search-forward "\\" nil t)
347 (replace-match "\\1") 623 (mail-extr-delete-char -1)
348 ;; CHECK: does this leave point after the replacement? 624 (or (eobp)
625 (forward-char 1))
349 ))))) 626 )))))
350 627
351(defmacro mail-nuke-char-at (pos) 628(defmacro mail-extr-nuke-char-at (pos)
352 (` (save-excursion 629 (` (save-excursion
353 (goto-char (, pos)) 630 (goto-char (, pos))
354 (delete-char 1) 631 (mail-extr-delete-char 1)
355 (insert mail-space-char)))) 632 (insert ?\ ))))
356 633
357(defmacro mail-nuke-elements-outside-range (list-symbol beg-symbol end-symbol 634(put 'mail-extr-nuke-outside-range
358 &optional no-replace) 635 'edebug-form-spec '(symbolp &optional form form atom))
359 (` (progn 636
360 (setq temp (, list-symbol)) 637(defmacro mail-extr-nuke-outside-range (list-symbol
638 beg-symbol end-symbol
639 &optional no-replace)
640 ;; LIST-SYMBOL names a variable holding a list of buffer positions
641 ;; BEG-SYMBOL and END-SYMBOL name variables delimiting a range
642 ;; Each element of LIST-SYMBOL which lies outside of the range is
643 ;; deleted from the list.
644 ;; Unless NO-REPLACE is true, at each of the positions in LIST-SYMBOL
645 ;; which lie outside of the range, one character at that position is
646 ;; replaced with a SPC.
647 (or (memq no-replace '(t nil))
648 (error "no-replace must be t or nil, evalable at macroexpand-time."))
649 (` (let ((temp (, list-symbol))
650 ch)
361 (while temp 651 (while temp
362 (cond ((or (> (car temp) (, end-symbol)) 652 (setq ch (car temp))
363 (< (car temp) (, beg-symbol))) 653 (cond ((or (> ch (, end-symbol))
364 (, (or no-replace 654 (< ch (, beg-symbol)))
365 (` (mail-nuke-char-at (car temp))))) 655 (,@ (if no-replace
656 nil
657 (` ((mail-extr-nuke-char-at ch)))))
366 (setcar temp nil))) 658 (setcar temp nil)))
367 (setq temp (cdr temp))) 659 (setq temp (cdr temp)))
368 (setq (, list-symbol) (delq nil (, list-symbol)))))) 660 (setq (, list-symbol) (delq nil (, list-symbol))))))
369 661
370(defun mail-demarkerize (marker) 662(defun mail-extr-demarkerize (marker)
371 (and marker 663 ;; if arg is a marker, destroys the marker, then returns the old value.
372 (if (markerp marker) 664 ;; otherwise returns the arg.
373 (let ((temp (marker-position marker))) 665 (if (markerp marker)
374 (set-marker marker nil) 666 (let ((temp (marker-position marker)))
375 temp) 667 (set-marker marker nil)
376 marker))) 668 temp)
377 669 marker))
378(defun mail-markerize (pos) 670
379 (and pos 671(defun mail-extr-markerize (pos)
380 (if (markerp pos) 672 ;; coerces pos to a marker if non-nil.
381 pos 673 (if (or (markerp pos) (null pos))
382 (copy-marker pos)))) 674 pos
383 675 (copy-marker pos)))
384(defmacro mail-last-element (list) 676
385 "Return last element of LIST." 677(defmacro mail-extr-last (list)
678 ;; Returns last element of LIST.
679 ;; Could be a subst.
386 (` (let ((list (, list))) 680 (` (let ((list (, list)))
387 (while (not (null (cdr list))) 681 (while (not (null (cdr list)))
388 (setq list (cdr list))) 682 (setq list (cdr list)))
389 (car list)))) 683 (car list))))
390 684
391(defmacro mail-safe-move-sexp (arg) 685(defmacro mail-extr-safe-move-sexp (arg)
392 "Safely skip over one balanced sexp, if there is one. Return t if success." 686 ;; Safely skip over one balanced sexp, if there is one. Return t if success.
393 (` (condition-case error 687 (` (condition-case error
394 (progn 688 (progn
395 (goto-char (scan-sexps (point) (, arg))) 689 (goto-char (scan-sexps (point) (, arg)))
396 t) 690 t)
397 (error 691 (error
692 ;; #### kludge kludge kludge kludge kludge kludge kludge !!!
398 (if (string-equal (nth 1 error) "Unbalanced parentheses") 693 (if (string-equal (nth 1 error) "Unbalanced parentheses")
399 nil 694 nil
400 (while t 695 (while t
401 (signal (car error) (cdr error)))))))) 696 (signal (car error) (cdr error))))))))
402
403 697
698;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
699;;
404;; The main function to grind addresses 700;; The main function to grind addresses
701;;
702
703(defvar disable-initial-guessing-flag) ; dynamic assignment
704(defvar cbeg) ; dynamic assignment
705(defvar cend) ; dynamic assignment
405 706
707;;;###autoload
406(defun mail-extract-address-components (address) 708(defun mail-extract-address-components (address)
407 "Given an rfc 822 ADDRESS, extract full name and canonical address. 709 "Given an RFC-822 ADDRESS, extract full name and canonical address.
408Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)." 710Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
409 (let ((canonicalization-buffer (get-buffer-create "*canonical address*")) 711If no name can be extracted, FULL-NAME will be nil.
410 (extraction-buffer (get-buffer-create "*extract address components*")) 712ADDRESS may be a string or a buffer. If it is a buffer, the visible
411 (foo 'bar) 713 (narrowed) portion of the buffer will be interpreted as the address.
714 (This feature exists so that the clever caller might be able to avoid
715 consing a string.)
716If ADDRESS contains more than one RFC-822 address, only the first is
717 returned. Some day this function may be extended to extract multiple
718 addresses, or perhaps return the position at which parsing stopped."
719 (let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
720 (extraction-buffer (get-buffer-create " *extract address components*"))
412 char 721 char
413 multiple-addresses 722;; multiple-addresses
414 <-pos >-pos @-pos :-pos ,-pos !-pos %-pos \;-pos 723 <-pos >-pos @-pos :-pos ,-pos !-pos %-pos \;-pos
415 group-:-pos group-\;-pos route-addr-:-pos 724 group-:-pos group-\;-pos route-addr-:-pos
416 record-pos-symbol 725 record-pos-symbol
417 first-real-pos last-real-pos 726 first-real-pos last-real-pos
418 phrase-beg phrase-end 727 phrase-beg phrase-end
419 comment-beg comment-end 728 cbeg cend ; dynamically set from -voodoo
420 quote-beg quote-end 729 quote-beg quote-end
421 atom-beg atom-end 730 atom-beg atom-end
422 mbox-beg mbox-end 731 mbox-beg mbox-end
423 \.-ends-name 732 \.-ends-name
424 temp 733 temp
425 name-suffix 734;; name-suffix
426 saved-point 735 fi mi li ; first, middle, last initial
427 fi mi li
428 saved-%-pos saved-!-pos saved-@-pos 736 saved-%-pos saved-!-pos saved-@-pos
429 domain-pos \.-pos insert-point) 737 domain-pos \.-pos insert-point
738;; mailbox-name-processed-flag
739 disable-initial-guessing-flag ; dynamically set from -voodoo
740 )
430 741
431 (save-excursion 742 (save-excursion
432 (set-buffer extraction-buffer) 743 (set-buffer extraction-buffer)
744 (fundamental-mode)
745 (kill-all-local-variables)
433 (buffer-disable-undo extraction-buffer) 746 (buffer-disable-undo extraction-buffer)
434 (set-syntax-table address-syntax-table) 747 (set-syntax-table mail-extr-address-syntax-table)
435 (widen) 748 (widen)
436 (erase-buffer) 749 (erase-buffer)
437 (setq case-fold-search nil) 750 (setq case-fold-search nil)
438 751
439 ;; Insert extra space at beginning to allow later replacement with < 752 ;; Insert extra space at beginning to allow later replacement with <
440 ;; without having to move markers. 753 ;; without having to move markers.
441 (insert mail-space-char address) 754 (insert ?\ )
755
756 ;; Insert the address itself.
757 (cond ((stringp address)
758 (insert address))
759 ((bufferp address)
760 (insert-buffer-substring address))
761 (t
762 (error "Illegal address: %s" address)))
442 763
443 ;; stolen from rfc822.el 764 ;; stolen from rfc822.el
444 ;; Unfold multiple lines. 765 ;; Unfold multiple lines.
@@ -449,7 +770,7 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
449 ;; first pass grabs useful information about address 770 ;; first pass grabs useful information about address
450 (goto-char (point-min)) 771 (goto-char (point-min))
451 (while (progn 772 (while (progn
452 (skip-chars-forward mail-whitespace) 773 (mail-extr-skip-whitespace-forward)
453 (not (eobp))) 774 (not (eobp)))
454 (setq char (char-after (point))) 775 (setq char (char-after (point)))
455 (or first-real-pos 776 (or first-real-pos
@@ -458,51 +779,61 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
458 (cond 779 (cond
459 ;; comment 780 ;; comment
460 ((eq char ?\() 781 ((eq char ?\()
461 (set-syntax-table address-comment-syntax-table) 782 (set-syntax-table mail-extr-address-comment-syntax-table)
462 ;; only record the first non-empty comment's position 783 ;; only record the first non-empty comment's position
463 (if (and (not comment-beg) 784 (if (and (not cbeg)
464 (save-excursion 785 (save-excursion
465 (forward-char 1) 786 (forward-char 1)
466 (skip-chars-forward mail-whitespace) 787 (mail-extr-skip-whitespace-forward)
467 (not (eq ?\) (char-after (point)))))) 788 (not (eq ?\) (char-after (point))))))
468 (setq comment-beg (point))) 789 (setq cbeg (point)))
469 ;; TODO: don't record if unbalanced 790 ;; TODO: don't record if unbalanced
470 (or (mail-safe-move-sexp 1) 791 (or (mail-extr-safe-move-sexp 1)
471 (forward-char 1)) 792 (forward-char 1))
472 (set-syntax-table address-syntax-table) 793 (set-syntax-table mail-extr-address-syntax-table)
473 (if (and comment-beg 794 (if (and cbeg
474 (not comment-end)) 795 (not cend))
475 (setq comment-end (point)))) 796 (setq cend (point))))
476 ;; quoted text 797 ;; quoted text
477 ((eq char ?\") 798 ((eq char ?\")
478 ;; only record the first non-empty quote's position 799 ;; only record the first non-empty quote's position
479 (if (and (not quote-beg) 800 (if (and (not quote-beg)
480 (save-excursion 801 (save-excursion
481 (forward-char 1) 802 (forward-char 1)
482 (skip-chars-forward mail-whitespace) 803 (mail-extr-skip-whitespace-forward)
483 (not (eq ?\" (char-after (point)))))) 804 (not (eq ?\" (char-after (point))))))
484 (setq quote-beg (point))) 805 (setq quote-beg (point)))
485 ;; TODO: don't record if unbalanced 806 ;; TODO: don't record if unbalanced
486 (or (mail-safe-move-sexp 1) 807 (or (mail-extr-safe-move-sexp 1)
487 (forward-char 1)) 808 (forward-char 1))
488 (if (and quote-beg 809 (if (and quote-beg
489 (not quote-end)) 810 (not quote-end))
490 (setq quote-end (point)))) 811 (setq quote-end (point))))
491 ;; domain literals 812 ;; domain literals
492 ((eq char ?\[) 813 ((eq char ?\[)
493 (set-syntax-table address-domain-literal-syntax-table) 814 (set-syntax-table mail-extr-address-domain-literal-syntax-table)
494 (or (mail-safe-move-sexp 1) 815 (or (mail-extr-safe-move-sexp 1)
495 (forward-char 1)) 816 (forward-char 1))
496 (set-syntax-table address-syntax-table)) 817 (set-syntax-table mail-extr-address-syntax-table))
497 ;; commas delimit addresses when outside < > pairs. 818 ;; commas delimit addresses when outside < > pairs.
498 ((and (eq char ?,) 819 ((and (eq char ?,)
499 (or (null <-pos) 820 (or (and (null <-pos)
821 ;; Handle ROUTE-ADDR address that is missing its <.
822 (not (eq ?@ (char-after (1+ (point))))))
500 (and >-pos 823 (and >-pos
501 ;; handle weird munged addresses 824 ;; handle weird munged addresses
502 (> (mail-last-element <-pos) (car >-pos))))) 825 ;; BUG FIX: This test was reversed. Thanks to the
503 (setq multiple-addresses t) 826 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
504 (delete-char 1) 827 ;; for discovering this!
505 (narrow-to-region (point-min) (point))) 828 (< (mail-extr-last <-pos) (car >-pos)))))
829;; It'd be great if some day this worked, but for now, punt.
830;; (setq multiple-addresses t)
831;; ;; *** Why do I want this:
832;; (mail-extr-delete-char 1)
833;; (narrow-to-region (point-min) (point))
834 (delete-region (point) (point-max))
835 (setq char ?\() ; HAVE I NO SHAME??
836 )
506 ;; record the position of various interesting chars, determine 837 ;; record the position of various interesting chars, determine
507 ;; legality later. 838 ;; legality later.
508 ((setq record-pos-symbol 839 ((setq record-pos-symbol
@@ -524,30 +855,35 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
524 ;; domain literals, and comments 855 ;; domain literals, and comments
525 ?\\ 856 ?\\
526 )) 857 ))
527 (mail-nuke-char-at (point)) 858 (mail-extr-nuke-char-at (point))
528 (forward-char 1)) 859 (forward-char 1))
529 (t 860 (t
530 (forward-word 1))) 861 (forward-word 1)))
531 (or (eq char ?\() 862 (or (eq char ?\()
863 ;; At the end of first address of a multiple address header.
864 (and (eq char ?,)
865 (eobp))
532 (setq last-real-pos (point)))) 866 (setq last-real-pos (point))))
533 867
534 ;; Use only the leftmost <, if any. Replace all others with spaces. 868 ;; Use only the leftmost <, if any. Replace all others with spaces.
535 (while (cdr <-pos) 869 (while (cdr <-pos)
536 (mail-nuke-char-at (car <-pos)) 870 (mail-extr-nuke-char-at (car <-pos))
537 (setq <-pos (cdr <-pos))) 871 (setq <-pos (cdr <-pos)))
538 872
539 ;; Use only the rightmost >, if any. Replace all others with spaces. 873 ;; Use only the rightmost >, if any. Replace all others with spaces.
540 (while (cdr >-pos) 874 (while (cdr >-pos)
541 (mail-nuke-char-at (nth 1 >-pos)) 875 (mail-extr-nuke-char-at (nth 1 >-pos))
542 (setcdr >-pos (nthcdr 2 >-pos))) 876 (setcdr >-pos (nthcdr 2 >-pos)))
543 877
544 ;; If multiple @s and a :, but no < and >, insert around buffer. 878 ;; If multiple @s and a :, but no < and >, insert around buffer.
879 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
545 ;; This commonly happens on the UUCP "From " line. Ugh. 880 ;; This commonly happens on the UUCP "From " line. Ugh.
546 (cond ((and (> (length @-pos) 1) 881 (cond ((and (> (length @-pos) 1)
547 :-pos ;TODO: check if between @s 882 (eq 1 (length :-pos)) ;TODO: check if between last two @s
883 (not \;-pos)
548 (not <-pos)) 884 (not <-pos))
549 (goto-char (point-min)) 885 (goto-char (point-min))
550 (delete-char 1) 886 (mail-extr-delete-char 1)
551 (setq <-pos (list (point))) 887 (setq <-pos (list (point)))
552 (insert ?<))) 888 (insert ?<)))
553 889
@@ -561,7 +897,7 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
561 ;; If > but no <, replace > with space. 897 ;; If > but no <, replace > with space.
562 (cond ((and >-pos 898 (cond ((and >-pos
563 (null <-pos)) 899 (null <-pos))
564 (mail-nuke-char-at (car >-pos)) 900 (mail-extr-nuke-char-at (car >-pos))
565 (setq >-pos nil))) 901 (setq >-pos nil)))
566 902
567 ;; Turn >-pos and <-pos into non-lists 903 ;; Turn >-pos and <-pos into non-lists
@@ -573,12 +909,12 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
573 (cond (<-pos ; don't need to check >-pos also 909 (cond (<-pos ; don't need to check >-pos also
574 ;; handle bozo software that violates RFC 822 by sticking 910 ;; handle bozo software that violates RFC 822 by sticking
575 ;; punctuation marks outside of a < > pair 911 ;; punctuation marks outside of a < > pair
576 (mail-nuke-elements-outside-range @-pos <-pos >-pos t) 912 (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
577 ;; RFC 822 says nothing about these two outside < >, but 913 ;; RFC 822 says nothing about these two outside < >, but
578 ;; remove those positions from the lists to make things 914 ;; remove those positions from the lists to make things
579 ;; easier. 915 ;; easier.
580 (mail-nuke-elements-outside-range !-pos <-pos >-pos t) 916 (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
581 (mail-nuke-elements-outside-range %-pos <-pos >-pos t))) 917 (mail-extr-nuke-outside-range %-pos <-pos >-pos t)))
582 918
583 ;; Check for : that indicates GROUP list and for : part of 919 ;; Check for : that indicates GROUP list and for : part of
584 ;; ROUTE-ADDR spec. 920 ;; ROUTE-ADDR spec.
@@ -593,7 +929,7 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
593 (< (length @-pos) 2) 929 (< (length @-pos) 2)
594 (> temp (car @-pos)) 930 (> temp (car @-pos))
595 (< temp (nth 1 @-pos))) 931 (< temp (nth 1 @-pos)))
596 (mail-nuke-char-at temp) 932 (mail-extr-nuke-char-at temp)
597 (setq route-addr-:-pos temp))) 933 (setq route-addr-:-pos temp)))
598 ((or (not <-pos) 934 ((or (not <-pos)
599 (and <-pos 935 (and <-pos
@@ -608,39 +944,57 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
608 (cond ((and <-pos >-pos 944 (cond ((and <-pos >-pos
609 (> temp <-pos) 945 (> temp <-pos)
610 (< temp >-pos)) 946 (< temp >-pos))
611 (mail-nuke-char-at temp)) 947 (mail-extr-nuke-char-at temp))
612 ((and (or (not group-:-pos) 948 ((and (or (not group-:-pos)
613 (> temp group-:-pos)) 949 (> temp group-:-pos))
614 (not group-\;-pos)) 950 (not group-\;-pos))
615 (setq group-\;-pos temp)))) 951 (setq group-\;-pos temp))))
616 952
953 ;; Nuke unmatched GROUP syntax characters.
954 (cond ((and group-:-pos (not group-\;-pos))
955 ;; *** Do I really need to erase it?
956 (mail-extr-nuke-char-at group-:-pos)
957 (setq group-:-pos nil)))
958 (cond ((and group-\;-pos (not group-:-pos))
959 ;; *** Do I really need to erase it?
960 (mail-extr-nuke-char-at group-\;-pos)
961 (setq group-\;-pos nil)))
962
617 ;; Handle junk like ";@host.company.dom" that sendmail adds. 963 ;; Handle junk like ";@host.company.dom" that sendmail adds.
618 ;; **** should I remember comment positions? 964 ;; **** should I remember comment positions?
619 (and group-\;-pos 965 (cond
620 ;; this is fine for now 966 (group-\;-pos
621 (mail-nuke-elements-outside-range !-pos group-:-pos group-\;-pos t) 967 ;; this is fine for now
622 (mail-nuke-elements-outside-range @-pos group-:-pos group-\;-pos t) 968 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
623 (mail-nuke-elements-outside-range %-pos group-:-pos group-\;-pos t) 969 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
624 (mail-nuke-elements-outside-range ,-pos group-:-pos group-\;-pos t) 970 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
625 (and last-real-pos 971 (mail-extr-nuke-outside-range ,-pos group-:-pos group-\;-pos t)
626 (> last-real-pos (1+ group-\;-pos)) 972 (and last-real-pos
627 (setq last-real-pos (1+ group-\;-pos))) 973 (> last-real-pos (1+ group-\;-pos))
628 (and comment-end 974 (setq last-real-pos (1+ group-\;-pos)))
629 (> comment-end group-\;-pos) 975 ;; *** This may be wrong:
630 (setq comment-end nil 976 (and cend
631 comment-beg nil)) 977 (> cend group-\;-pos)
632 (and quote-end 978 (setq cend nil
633 (> quote-end group-\;-pos) 979 cbeg nil))
634 (setq quote-end nil 980 (and quote-end
635 quote-beg nil)) 981 (> quote-end group-\;-pos)
636 (narrow-to-region (point-min) group-\;-pos)) 982 (setq quote-end nil
983 quote-beg nil))
984 ;; This was both wrong and unnecessary:
985 ;;(narrow-to-region (point-min) group-\;-pos)
986
987 ;; *** The entire handling of GROUP addresses seems rather lame.
988 ;; *** It deserves a complete rethink, except that these addresses
989 ;; *** are hardly ever seen.
990 ))
637 991
638 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any 992 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
639 ;; others. 993 ;; others.
640 ;; Hell, go ahead an nuke all of the commas. 994 ;; Hell, go ahead an nuke all of the commas.
641 ;; **** This will cause problems when we start handling commas in 995 ;; **** This will cause problems when we start handling commas in
642 ;; the PHRASE part .... no it won't ... yes it will ... ????? 996 ;; the PHRASE part .... no it won't ... yes it will ... ?????
643 (mail-nuke-elements-outside-range ,-pos 1 1) 997 (mail-extr-nuke-outside-range ,-pos 1 1)
644 998
645 ;; can only have multiple @s inside < >. The fact that some MTAs 999 ;; can only have multiple @s inside < >. The fact that some MTAs
646 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is 1000 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
@@ -649,11 +1003,11 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
649 ;; Locate PHRASE part of ROUTE-ADDR. 1003 ;; Locate PHRASE part of ROUTE-ADDR.
650 (cond (<-pos 1004 (cond (<-pos
651 (goto-char <-pos) 1005 (goto-char <-pos)
652 (skip-chars-backward mail-whitespace) 1006 (mail-extr-skip-whitespace-backward)
653 (setq phrase-end (point)) 1007 (setq phrase-end (point))
654 (goto-char (or ;;group-:-pos 1008 (goto-char (or ;;group-:-pos
655 (point-min))) 1009 (point-min)))
656 (skip-chars-forward mail-whitespace) 1010 (mail-extr-skip-whitespace-forward)
657 (if (< (point) phrase-end) 1011 (if (< (point) phrase-end)
658 (setq phrase-beg (point)) 1012 (setq phrase-beg (point))
659 (setq phrase-end nil)))) 1013 (setq phrase-end nil))))
@@ -671,7 +1025,7 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
671 (insert-before-markers ?X) 1025 (insert-before-markers ?X)
672 (goto-char (car @-pos)) 1026 (goto-char (car @-pos))
673 (while (setq @-pos (cdr @-pos)) 1027 (while (setq @-pos (cdr @-pos))
674 (delete-char 1) 1028 (mail-extr-delete-char 1)
675 (setq %-pos (cons (point-marker) %-pos)) 1029 (setq %-pos (cons (point-marker) %-pos))
676 (insert "%") 1030 (insert "%")
677 (goto-char (1- >-pos)) 1031 (goto-char (1- >-pos))
@@ -683,12 +1037,12 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
683 (setq saved-@-pos (list (point))))) 1037 (setq saved-@-pos (list (point)))))
684 (setq @-pos saved-@-pos) 1038 (setq @-pos saved-@-pos)
685 (goto-char >-pos) 1039 (goto-char >-pos)
686 (delete-char -1) 1040 (mail-extr-delete-char -1)
687 (mail-nuke-char-at route-addr-:-pos) 1041 (mail-extr-nuke-char-at route-addr-:-pos)
688 (mail-demarkerize route-addr-:-pos) 1042 (mail-extr-demarkerize route-addr-:-pos)
689 (setq route-addr-:-pos nil 1043 (setq route-addr-:-pos nil
690 >-pos (mail-demarkerize >-pos) 1044 >-pos (mail-extr-demarkerize >-pos)
691 %-pos (mapcar 'mail-demarkerize %-pos)))) 1045 %-pos (mapcar 'mail-extr-demarkerize %-pos))))
692 1046
693 ;; de-listify @-pos 1047 ;; de-listify @-pos
694 (setq @-pos (car @-pos)) 1048 (setq @-pos (car @-pos))
@@ -696,9 +1050,10 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
696 ;; TODO: remove comments in the middle of an address 1050 ;; TODO: remove comments in the middle of an address
697 1051
698 (set-buffer canonicalization-buffer) 1052 (set-buffer canonicalization-buffer)
699 1053 (fundamental-mode)
1054 (kill-all-local-variables)
700 (buffer-disable-undo canonicalization-buffer) 1055 (buffer-disable-undo canonicalization-buffer)
701 (set-syntax-table address-syntax-table) 1056 (set-syntax-table mail-extr-address-syntax-table)
702 (setq case-fold-search nil) 1057 (setq case-fold-search nil)
703 1058
704 (widen) 1059 (widen)
@@ -708,43 +1063,57 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
708 (if <-pos 1063 (if <-pos
709 (narrow-to-region (progn 1064 (narrow-to-region (progn
710 (goto-char (1+ <-pos)) 1065 (goto-char (1+ <-pos))
711 (skip-chars-forward mail-whitespace) 1066 (mail-extr-skip-whitespace-forward)
712 (point)) 1067 (point))
713 >-pos) 1068 >-pos)
714 ;; ****** Oh no! What if the address is completely empty! 1069 (if (and first-real-pos last-real-pos)
715 (narrow-to-region first-real-pos last-real-pos)) 1070 (narrow-to-region first-real-pos last-real-pos)
1071 ;; ****** Oh no! What if the address is completely empty!
1072 ;; *** Is this correct?
1073 (narrow-to-region (point-max) (point-max))
1074 ))
716 1075
717 (and @-pos %-pos 1076 (and @-pos %-pos
718 (mail-nuke-elements-outside-range %-pos (point-min) @-pos)) 1077 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
719 (and %-pos !-pos 1078 (and %-pos !-pos
720 (mail-nuke-elements-outside-range !-pos (point-min) (car %-pos))) 1079 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
721 (and @-pos !-pos (not %-pos) 1080 (and @-pos !-pos (not %-pos)
722 (mail-nuke-elements-outside-range !-pos (point-min) @-pos)) 1081 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
723 1082
724 ;; Error condition:?? (and %-pos (not @-pos)) 1083 ;; Error condition:?? (and %-pos (not @-pos))
1084
1085 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
1086 (cond ((and %-pos
1087 (not @-pos))
1088 (goto-char (car %-pos))
1089 (mail-extr-delete-char 1)
1090 (setq @-pos (point))
1091 (insert "@")
1092 (setq %-pos (cdr %-pos))))
725 1093
1094 (if mail-extr-mangle-uucp
726 (cond (!-pos 1095 (cond (!-pos
727 ;; **** I don't understand this save-restriction and the 1096 ;; **** I don't understand this save-restriction and the
728 ;; narrow-to-region inside it. Why did I do that? 1097 ;; narrow-to-region inside it. Why did I do that?
729 (save-restriction 1098 (save-restriction
730 (cond ((and @-pos 1099 (cond ((and @-pos
731 mail-@-binds-tighter-than-!) 1100 mail-extr-@-binds-tighter-than-!)
732 (goto-char @-pos) 1101 (goto-char @-pos)
733 (setq %-pos (cons (point) %-pos) 1102 (setq %-pos (cons (point) %-pos)
734 @-pos nil) 1103 @-pos nil)
735 (delete-char 1) 1104 (mail-extr-delete-char 1)
736 (insert "%") 1105 (insert "%")
737 (setq insert-point (point-max))) 1106 (setq insert-point (point-max)))
738 (mail-@-binds-tighter-than-! 1107 (mail-extr-@-binds-tighter-than-!
739 (setq insert-point (point-max))) 1108 (setq insert-point (point-max)))
740 (%-pos 1109 (%-pos
741 (setq insert-point (mail-last-element %-pos) 1110 (setq insert-point (mail-extr-last %-pos)
742 saved-%-pos (mapcar 'mail-markerize %-pos) 1111 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
743 %-pos nil 1112 %-pos nil
744 @-pos (mail-markerize @-pos))) 1113 @-pos (mail-extr-markerize @-pos)))
745 (@-pos 1114 (@-pos
746 (setq insert-point @-pos) 1115 (setq insert-point @-pos)
747 (setq @-pos (mail-markerize @-pos))) 1116 (setq @-pos (mail-extr-markerize @-pos)))
748 (t 1117 (t
749 (setq insert-point (point-max)))) 1118 (setq insert-point (point-max))))
750 (narrow-to-region (point-min) insert-point) 1119 (narrow-to-region (point-min) insert-point)
@@ -765,31 +1134,35 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
765 (1+ (nth 1 !-pos)) 1134 (1+ (nth 1 !-pos))
766 (point-min)) 1135 (point-min))
767 (car !-pos)) 1136 (car !-pos))
768 (delete-char 1) 1137 (mail-extr-delete-char 1)
769 (or (save-excursion 1138 (or (save-excursion
770 (mail-safe-move-sexp -1) 1139 (mail-extr-safe-move-sexp -1)
771 (skip-chars-backward mail-whitespace) 1140 (mail-extr-skip-whitespace-backward)
772 (eq ?. (preceding-char))) 1141 (eq ?. (preceding-char)))
773 (insert-before-markers 1142 (insert-before-markers
774 (if (save-excursion 1143 (if (save-excursion
775 (skip-chars-backward mail-whitespace) 1144 (mail-extr-skip-whitespace-backward)
776 (eq ?. (preceding-char))) 1145 (eq ?. (preceding-char)))
777 "" 1146 ""
778 ".") 1147 ".")
779 "uucp")) 1148 "uucp"))
780 (setq !-pos (cdr !-pos)))) 1149 (setq !-pos (cdr !-pos))))
781 (and saved-%-pos 1150 (and saved-%-pos
782 (setq %-pos (append (mapcar 'mail-demarkerize saved-%-pos) 1151 (setq %-pos (append (mapcar 'mail-extr-demarkerize
783 %-pos))) 1152 saved-%-pos)
784 (setq @-pos (mail-demarkerize @-pos)) 1153 %-pos)))
785 (narrow-to-region (1+ saved-!-pos) (point-max)))) 1154 (setq @-pos (mail-extr-demarkerize @-pos))
1155 (narrow-to-region (1+ saved-!-pos) (point-max)))))
1156
1157 ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
786 (cond ((and %-pos 1158 (cond ((and %-pos
787 (not @-pos)) 1159 (not @-pos))
788 (goto-char (car %-pos)) 1160 (goto-char (car %-pos))
789 (delete-char 1) 1161 (mail-extr-delete-char 1)
790 (setq @-pos (point)) 1162 (setq @-pos (point))
791 (insert "@") 1163 (insert "@")
792 (setq %-pos (cdr %-pos)))) 1164 (setq %-pos (cdr %-pos))))
1165
793 (setq %-pos (nreverse %-pos)) 1166 (setq %-pos (nreverse %-pos))
794 ;; RFC 1034 doesn't approve of this, oh well: 1167 ;; RFC 1034 doesn't approve of this, oh well:
795 (downcase-region (or (car %-pos) @-pos (point-max)) (point-max)) 1168 (downcase-region (or (car %-pos) @-pos (point-max)) (point-max))
@@ -799,20 +1172,21 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
799 (while temp 1172 (while temp
800 (goto-char (or (nth 1 temp) 1173 (goto-char (or (nth 1 temp)
801 @-pos)) 1174 @-pos))
802 (skip-chars-backward mail-whitespace) 1175 (mail-extr-skip-whitespace-backward)
803 (save-excursion 1176 (save-excursion
804 (mail-safe-move-sexp -1) 1177 (mail-extr-safe-move-sexp -1)
805 (setq domain-pos (point)) 1178 (setq domain-pos (point))
806 (skip-chars-backward mail-whitespace) 1179 (mail-extr-skip-whitespace-backward)
807 (setq \.-pos (eq ?. (preceding-char)))) 1180 (setq \.-pos (eq ?. (preceding-char))))
808 (cond ((and \.-pos 1181 (cond ((and \.-pos
809 (get 1182 ;; #### string consing
810 (intern 1183 (let ((s (intern-soft
811 (buffer-substring domain-pos (point))) 1184 (buffer-substring domain-pos (point))
812 'domain-name)) 1185 mail-extr-all-top-level-domains)))
1186 (and s (get s 'domain-name))))
813 (narrow-to-region (point-min) (point)) 1187 (narrow-to-region (point-min) (point))
814 (goto-char (car temp)) 1188 (goto-char (car temp))
815 (delete-char 1) 1189 (mail-extr-delete-char 1)
816 (setq @-pos (point)) 1190 (setq @-pos (point))
817 (setcdr temp nil) 1191 (setcdr temp nil)
818 (setq %-pos (delq @-pos %-pos)) 1192 (setq %-pos (delq @-pos %-pos))
@@ -828,214 +1202,181 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
828 1202
829 (set-buffer extraction-buffer) 1203 (set-buffer extraction-buffer)
830 1204
831 ;; Find the full name 1205 ;; Decide what part of the address to search to find the full name.
832 1206 (cond (
833 (cond ((and phrase-beg 1207 ;; Example: "First M. Last" <fml@foo.bar.dom>
1208 (and phrase-beg
834 (eq quote-beg phrase-beg) 1209 (eq quote-beg phrase-beg)
835 (<= quote-end phrase-end)) 1210 (<= quote-end phrase-end))
836 (narrow-to-region (1+ quote-beg) (1- quote-end)) 1211 (narrow-to-region (1+ quote-beg) (1- quote-end))
837 (mail-undo-backslash-quoting (point-min) (point-max))) 1212 (mail-extr-undo-backslash-quoting (point-min) (point-max)))
1213
1214 ;; Example: First Last <fml@foo.bar.dom>
838 (phrase-beg 1215 (phrase-beg
839 (narrow-to-region phrase-beg phrase-end)) 1216 (narrow-to-region phrase-beg phrase-end))
840 (comment-beg 1217
841 (narrow-to-region (1+ comment-beg) (1- comment-end)) 1218 ;; Example: fml@foo.bar.dom (First M. Last)
842 (mail-undo-backslash-quoting (point-min) (point-max))) 1219 (cbeg
1220 (narrow-to-region (1+ cbeg) (1- cend))
1221 (mail-extr-undo-backslash-quoting (point-min) (point-max))
1222
1223 ;; Deal with spacing problems
1224 (goto-char (point-min))
1225; (cond ((not (search-forward " " nil t))
1226; (goto-char (point-min))
1227; (cond ((search-forward "_" nil t)
1228; ;; Handle the *idiotic* use of underlines as spaces.
1229; ;; Example: fml@foo.bar.dom (First_M._Last)
1230; (goto-char (point-min))
1231; (while (search-forward "_" nil t)
1232; (replace-match " " t)))
1233; ((search-forward "." nil t)
1234; ;; Fix . used as space
1235; ;; Example: danj1@cb.att.com (daniel.jacobson)
1236; (goto-char (point-min))
1237; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1238; (replace-match "\\1 \\2" t))))))
1239 )
1240
1241 ;; Otherwise we try to get the name from the mailbox portion
1242 ;; of the address.
1243 ;; Example: First_M_Last@foo.bar.dom
843 (t 1244 (t
844 ;; *** Work in canon buffer instead? No, can't. Hmm. 1245 ;; *** Work in canon buffer instead? No, can't. Hmm.
845 (delete-region (point-min) (point-max)) 1246 (goto-char (point-max))
1247 (narrow-to-region (point) (point))
846 (insert-buffer-substring canonicalization-buffer 1248 (insert-buffer-substring canonicalization-buffer
847 mbox-beg mbox-end) 1249 mbox-beg mbox-end)
848 (goto-char (point-min)) 1250 (goto-char (point-min))
849 (setq \.-ends-name (search-forward "_" nil t)) 1251
1252 ;; Example: First_Last.XXX@foo.bar.dom
1253 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
1254
850 (goto-char (point-min)) 1255 (goto-char (point-min))
1256
1257 (if (not mail-extr-mangle-uucp)
1258 (modify-syntax-entry ?! "w" (syntax-table)))
1259
851 (while (progn 1260 (while (progn
852 (skip-chars-forward mail-whitespace) 1261 (mail-extr-skip-whitespace-forward)
853 (not (eobp))) 1262 (not (eobp)))
854 (setq char (char-after (point))) 1263 (setq char (char-after (point)))
855 (cond 1264 (cond
856 ((eq char ?\") 1265 ((eq char ?\")
857 (setq quote-beg (point)) 1266 (setq quote-beg (point))
858 (or (mail-safe-move-sexp 1) 1267 (or (mail-extr-safe-move-sexp 1)
859 ;; TODO: handle this error condition!!!!! 1268 ;; TODO: handle this error condition!!!!!
860 (forward-char 1)) 1269 (forward-char 1))
861 ;; take into account deletions 1270 ;; take into account deletions
862 (setq quote-end (- (point) 2)) 1271 (setq quote-end (- (point) 2))
863 (save-excursion 1272 (save-excursion
864 (backward-char 1) 1273 (backward-char 1)
865 (delete-char 1) 1274 (mail-extr-delete-char 1)
866 (goto-char quote-beg) 1275 (goto-char quote-beg)
867 (delete-char 1)) 1276 (mail-extr-delete-char 1))
868 (mail-undo-backslash-quoting quote-beg quote-end) 1277 (mail-extr-undo-backslash-quoting quote-beg quote-end)
869 (or (eq mail-space-char (char-after (point))) 1278 (or (eq ?\ (char-after (point)))
870 (insert " ")) 1279 (insert " "))
1280;; (setq mailbox-name-processed-flag t)
871 (setq \.-ends-name t)) 1281 (setq \.-ends-name t))
872 ((eq char ?.) 1282 ((eq char ?.)
873 (if (eq (char-after (1+ (point))) ?_) 1283 (if (memq (char-after (1+ (point))) '(?_ ?=))
874 (progn 1284 (progn
875 (forward-char 1) 1285 (forward-char 1)
876 (delete-char 1) 1286 (mail-extr-delete-char 1)
877 (insert mail-space-char)) 1287 (insert ?\ ))
878 (if \.-ends-name 1288 (if \.-ends-name
879 (narrow-to-region (point-min) (point)) 1289 (narrow-to-region (point-min) (point))
880 (delete-char 1) 1290 (mail-extr-delete-char 1)
881 (insert " ")))) 1291 (insert " ")))
1292;; (setq mailbox-name-processed-flag t)
1293 )
882 ((memq (char-syntax char) '(?. ?\\)) 1294 ((memq (char-syntax char) '(?. ?\\))
883 (delete-char 1) 1295 (mail-extr-delete-char 1)
884 (insert " ")) 1296 (insert " ")
1297;; (setq mailbox-name-processed-flag t)
1298 )
885 (t 1299 (t
886 (setq atom-beg (point)) 1300 (setq atom-beg (point))
887 (forward-word 1) 1301 (forward-word 1)
888 (setq atom-end (point)) 1302 (setq atom-end (point))
1303 (goto-char atom-beg)
889 (save-restriction 1304 (save-restriction
890 (narrow-to-region atom-beg atom-end) 1305 (narrow-to-region atom-beg atom-end)
891 (goto-char (point-min)) 1306 (cond
892 (while (re-search-forward "\\([^_]+\\)_" nil t) 1307
893 (replace-match "\\1 ")) 1308 ;; Handle X.400 addresses encoded in RFC-822.
894 (goto-char (point-max)))))))) 1309 ;; *** Shit! This has to handle the case where it is
1310 ;; *** embedded in a quote too!
1311 ;; *** Shit! The input is being broken up into atoms
1312 ;; *** by periods!
1313 ((looking-at mail-extr-x400-encoded-address-pattern)
1314
1315 ;; Copy the contents of the individual fields that
1316 ;; might hold name data to the beginning.
1317 (mapcar
1318 (function
1319 (lambda (field-pattern)
1320 (cond
1321 ((save-excursion
1322 (re-search-forward field-pattern nil t))
1323 (insert-buffer-substring (current-buffer)
1324 (match-beginning 1)
1325 (match-end 1))
1326 (insert " ")))))
1327 (list mail-extr-x400-encoded-address-given-name-pattern
1328 mail-extr-x400-encoded-address-surname-pattern
1329 mail-extr-x400-encoded-address-full-name-pattern))
1330
1331 ;; Discard the rest, since it contains stuff like
1332 ;; routing information, not part of a name.
1333 (mail-extr-skip-whitespace-backward)
1334 (delete-region (point) (point-max))
1335
1336 ;; Handle periods used for spacing.
1337 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1338 (replace-match "\\1 \\2" t))
1339
1340;; (setq mailbox-name-processed-flag t)
1341 )
1342
1343 ;; Handle normal addresses.
1344 (t
1345 (goto-char (point-min))
1346 ;; Handle _ and = used for spacing.
1347 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
1348 (replace-match "\\1 " t)
1349;; (setq mailbox-name-processed-flag t)
1350 )
1351 (goto-char (point-max))))))))
1352
1353 ;; undo the dirty deed
1354 (if (not mail-extr-mangle-uucp)
1355 (modify-syntax-entry ?! "." (syntax-table)))
1356 ;;
1357 ;; If we derived the name from the mailbox part of the address,
1358 ;; and we only got one word out of it, don't treat that as a
1359 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
1360 ;; (if (not mailbox-name-processed-flag)
1361 ;; (delete-region (point-min) (point-max)))
1362 ))
895 1363
896 (set-syntax-table address-text-syntax-table) 1364 (set-syntax-table mail-extr-address-text-syntax-table)
897 1365
898 (setq xxx (mail-variant-method (buffer-string))) 1366 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
899 (delete-region (point-min) (point-max))
900 (insert xxx)
901 (goto-char (point-min)) 1367 (goto-char (point-min))
902 1368
903;; ;; Compress whitespace
904;; (goto-char (point-min))
905;; (while (re-search-forward "[ \t\n]+" nil t)
906;; (replace-match " "))
907;;
908;; ;; Fix . used as space
909;; (goto-char (point-min))
910;; (while (re-search-forward mail-bad-\.-pattern nil t)
911;; (replace-match "\\1 \\2"))
912;;
913;; ;; Delete trailing parenthesized comment
914;; (goto-char (point-max))
915;; (skip-chars-backward mail-whitespace)
916;; (cond ((memq (char-after (1- (point))) '(?\) ?\} ?\]))
917;; (setq comment-end (point))
918;; (set-syntax-table address-text-comment-syntax-table)
919;; (or (mail-safe-move-sexp -1)
920;; (backward-char 1))
921;; (set-syntax-table address-text-syntax-table)
922;; (setq comment-beg (point))
923;; (skip-chars-backward mail-whitespace)
924;; (if (bobp)
925;; (narrow-to-region (1+ comment-beg) (1- comment-end))
926;; (narrow-to-region (point-min) (point)))))
927;;
928;; ;; Find, save, and delete any name suffix
929;; ;; *** Broken!
930;; (goto-char (point-min))
931;; (cond ((re-search-forward mail-full-name-suffix-pattern nil t)
932;; (setq name-suffix (buffer-substring (match-beginning 3)
933;; (match-end 3)))
934;; (replace-match "\\1 \\4")))
935;;
936;; ;; Delete ALL CAPS words and after, if preceded by mixed-case or
937;; ;; lowercase words. Eg. XT-DEM.
938;; (goto-char (point-min))
939;; ;; ## This will lose on something like "SMITH MAX".
940;; ;; ## maybe it should be
941;; ;; ## " \\([A-Z]+[-_/][A-Z]+\\|[A-Z][A-Z][A-Z]\\)\\b.*[^A-Z \t]"
942;; ;; ## that is, three-letter-upper-case-word with non-upper-case
943;; ;; ## characters following it.
944;; (if (re-search-forward mail-mixed-case-name-pattern nil t)
945;; (if (re-search-forward mail-weird-acronym-pattern nil t)
946;; (narrow-to-region (point-min) (match-beginning 0))))
947;;
948;; ;; Delete trailing alternative address
949;; (goto-char (point-min))
950;; (if (re-search-forward mail-alternative-address-pattern nil t)
951;; (narrow-to-region (point-min) (match-beginning 0)))
952;;
953;; ;; Delete trailing comment
954;; (goto-char (point-min))
955;; (if (re-search-forward mail-trailing-comment-start-pattern nil t)
956;; (or (progn
957;; (goto-char (match-beginning 0))
958;; (skip-chars-backward mail-whitespace)
959;; (bobp))
960;; (narrow-to-region (point-min) (match-beginning 0))))
961;;
962;; ;; Delete trailing comma-separated comment
963;; (goto-char (point-min))
964;; ;; ## doesn't this break "Smith, John"? Yes.
965;; (re-search-forward mail-last-name-first-pattern nil t)
966;; (while (search-forward "," nil t)
967;; (or (save-excursion
968;; (backward-char 2)
969;; (looking-at mail-full-name-suffix-pattern))
970;; (narrow-to-region (point-min) (1- (point)))))
971;;
972;; ;; Delete telephone numbers and ham radio call signs
973;; (goto-char (point-min))
974;; (if (re-search-forward mail-telephone-extension-pattern nil t)
975;; (narrow-to-region (point-min) (match-beginning 0)))
976;; (goto-char (point-min))
977;; (if (re-search-forward mail-ham-call-sign-pattern nil t)
978;; (if (eq (match-beginning 0) (point-min))
979;; (narrow-to-region (match-end 0) (point-max))
980;; (narrow-to-region (point-min) (match-beginning 0))))
981;;
982;; ;; Delete trailing word followed immediately by .
983;; (goto-char (point-min))
984;; ;; ## what's this for? doesn't it mess up "Public, Harry Q."? No.
985;; (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t)
986;; (narrow-to-region (point-min) (match-beginning 0)))
987;;
988;; ;; Handle & substitution
989;; ;; TODO: remember to disable middle initial guessing
990;; (goto-char (point-min))
991;; (cond ((re-search-forward "\\( \\|\\`\\)&\\( \\|\\'\\)" nil t)
992;; (goto-char (match-end 1))
993;; (delete-char 1)
994;; (capitalize-region
995;; (point)
996;; (progn
997;; (insert-buffer-substring canonicalization-buffer
998;; mbox-beg mbox-end)
999;; (point)))))
1000;;
1001;; ;; Delete nickname
1002;; (goto-char (point-min))
1003;; (if (re-search-forward mail-nickname-pattern nil t)
1004;; (replace-match (if (eq (match-beginning 2) (1- (match-end 2)))
1005;; " \\2 "
1006;; " ")))
1007;;
1008;; ;; Fixup initials
1009;; (while (progn
1010;; (goto-char (point-min))
1011;; (re-search-forward mail-bad-initials-pattern nil t))
1012;; (replace-match
1013;; (if (match-beginning 4)
1014;; "\\1. \\4"
1015;; (if (match-beginning 5)
1016;; "\\1. \\5"
1017;; "\\1. "))))
1018;;
1019;; ;; Delete title
1020;; (goto-char (point-min))
1021;; (if (re-search-forward mail-full-name-prefixes nil t)
1022;; (narrow-to-region (point) (point-max)))
1023;;
1024;; ;; Delete trailing and preceding non-name characters
1025;; (goto-char (point-min))
1026;; (skip-chars-forward mail-non-begin-name-chars)
1027;; (narrow-to-region (point) (point-max))
1028;; (goto-char (point-max))
1029;; (skip-chars-backward mail-non-end-name-chars)
1030;; (narrow-to-region (point-min) (point))
1031
1032 ;; If name is "First Last" and userid is "F?L", then assume 1369 ;; If name is "First Last" and userid is "F?L", then assume
1033 ;; the middle initial is the second letter in the userid. 1370 ;; the middle initial is the second letter in the userid.
1034 ;; initially by Jamie Zawinski <jwz@lucid.com> 1371 ;; Initial code by Jamie Zawinski <jwz@lucid.com>
1035 (cond ((and (eq 3 (- mbox-end mbox-beg)) 1372 ;; *** Make it work when there's a suffix as well.
1373 (goto-char (point-min))
1374 (cond ((and mail-extr-guess-middle-initial
1375 (not disable-initial-guessing-flag)
1376 (eq 3 (- mbox-end mbox-beg))
1036 (progn 1377 (progn
1037 (goto-char (point-min)) 1378 (goto-char (point-min))
1038 (looking-at mail-two-name-pattern))) 1379 (looking-at mail-extr-two-name-pattern)))
1039 (setq fi (char-after (match-beginning 0)) 1380 (setq fi (char-after (match-beginning 0))
1040 li (char-after (match-beginning 3))) 1381 li (char-after (match-beginning 3)))
1041 (save-excursion 1382 (save-excursion
@@ -1052,417 +1393,506 @@ Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
1052 (goto-char (match-beginning 3)) 1393 (goto-char (match-beginning 3))
1053 (insert (upcase mi) ". "))))) 1394 (insert (upcase mi) ". ")))))
1054 1395
1055;; ;; Restore suffix 1396 ;; Nuke name if it is the same as mailbox name.
1056;; (cond (name-suffix 1397 (let ((buffer-length (- (point-max) (point-min)))
1057;; (goto-char (point-max)) 1398 (i 0)
1058;; (insert ", " name-suffix) 1399 (names-match-flag t))
1059;; (backward-word 1) 1400 (cond ((and (> buffer-length 0)
1060;; (cond ((memq (following-char) '(?j ?J ?s ?S)) 1401 (eq buffer-length (- mbox-end mbox-beg)))
1061;; (capitalize-word 1) 1402 (goto-char (point-max))
1062;; (or (eq (following-char) ?.) 1403 (insert-buffer-substring canonicalization-buffer
1063;; (insert ?.))) 1404 mbox-beg mbox-end)
1064;; (t 1405 (while (and names-match-flag
1065;; (upcase-word 1))))) 1406 (< i buffer-length))
1407 (or (eq (downcase (char-after (+ i (point-min))))
1408 (downcase
1409 (char-after (+ i buffer-length (point-min)))))
1410 (setq names-match-flag nil))
1411 (setq i (1+ i)))
1412 (delete-region (+ (point-min) buffer-length) (point-max))
1413 (if names-match-flag
1414 (narrow-to-region (point) (point))))))
1415
1416 ;; Nuke name if it's just one word.
1417 (goto-char (point-min))
1418 (and mail-extr-ignore-single-names
1419 (not (re-search-forward "[- ]" nil t))
1420 (narrow-to-region (point) (point)))
1066 1421
1067 ;; Result 1422 ;; Result
1068 (list (buffer-string) 1423 (list (if (not (= (point-min) (point-max)))
1424 (buffer-string))
1069 (progn 1425 (progn
1070 (set-buffer canonicalization-buffer) 1426 (set-buffer canonicalization-buffer)
1071 (buffer-string))) 1427 (if (not (= (point-min) (point-max)))
1428 (buffer-string))))
1072 ))) 1429 )))
1073 1430
1074;; TODO: put this back in the above function now that it's proven: 1431(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
1075(defun mail-variant-method (string) 1432 (let ((word-count 0)
1076 (let ((variant-buffer (get-buffer-create "*variant method buffer*")) 1433 (case-fold-search nil)
1077 (word-count 0) 1434 mixed-case-flag lower-case-flag ;;upper-case-flag
1078 mixed-case-flag lower-case-flag upper-case-flag
1079 suffix-flag last-name-comma-flag 1435 suffix-flag last-name-comma-flag
1080 comment-beg comment-end initial beg end 1436 ;;cbeg cend
1437 initial
1438 begin-again-flag
1439 drop-this-word-if-trailing-flag
1440 drop-last-word-if-trailing-flag
1441 word-found-flag
1442 this-word-beg last-word-beg
1443 name-beg name-end
1444 name-done-flag
1081 ) 1445 )
1082 (save-excursion 1446 (save-excursion
1083 (set-buffer variant-buffer) 1447 (set-syntax-table mail-extr-address-text-syntax-table)
1084 (buffer-disable-undo variant-buffer)
1085 (set-syntax-table address-text-syntax-table)
1086 (widen)
1087 (erase-buffer)
1088 (setq case-fold-search nil)
1089
1090 (insert string)
1091 1448
1449 ;; This was moved above.
1092 ;; Fix . used as space 1450 ;; Fix . used as space
1093 (goto-char (point-min)) 1451 ;; But it belongs here because it occurs not only as
1094 (while (re-search-forward mail-bad-\.-pattern nil t) 1452 ;; rypens@reks.uia.ac.be (Piet.Rypens)
1095 (replace-match "\\1 \\2")) 1453 ;; but also as
1454 ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
1455 ;;(goto-char (point-min))
1456 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
1457 ;; (replace-match "\\1 \\2" t))
1458
1459 (cond ((not (search-forward " " nil t))
1460 (goto-char (point-min))
1461 (cond ((search-forward "_" nil t)
1462 ;; Handle the *idiotic* use of underlines as spaces.
1463 ;; Example: fml@foo.bar.dom (First_M._Last)
1464 (goto-char (point-min))
1465 (while (search-forward "_" nil t)
1466 (replace-match " " t)))
1467 ((search-forward "." nil t)
1468 ;; Fix . used as space
1469 ;; Example: danj1@cb.att.com (daniel.jacobson)
1470 (goto-char (point-min))
1471 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1472 (replace-match "\\1 \\2" t))))))
1473
1096 1474
1097 ;; Skip any initial garbage. 1475 ;; Loop over the words (and other junk) in the name.
1098 (goto-char (point-min)) 1476 (goto-char (point-min))
1099 (skip-chars-forward mail-non-begin-name-chars) 1477 (while (not name-done-flag)
1100 (skip-chars-backward "& \"") 1478
1101 (narrow-to-region (point) (point-max)) 1479 (cond (word-found-flag
1102 1480 ;; Last time through this loop we skipped over a word.
1103 (catch 'stop 1481 (setq last-word-beg this-word-beg)
1104 (while t 1482 (setq drop-last-word-if-trailing-flag
1105 (skip-chars-forward mail-whitespace) 1483 drop-this-word-if-trailing-flag)
1106 1484 (setq word-found-flag nil)))
1485
1486 (cond (begin-again-flag
1487 ;; Last time through the loop we found something that
1488 ;; indicates we should pretend we are beginning again from
1489 ;; the start.
1490 (setq word-count 0)
1491 (setq last-word-beg nil)
1492 (setq drop-last-word-if-trailing-flag nil)
1493 (setq mixed-case-flag nil)
1494 (setq lower-case-flag nil)
1495;; (setq upper-case-flag nil)
1496 (setq begin-again-flag nil)
1497 ))
1498
1499 ;; Initialize for this iteration of the loop.
1500 (mail-extr-skip-whitespace-forward)
1501 (if (eq word-count 0) (narrow-to-region (point) (point-max)))
1502 (setq this-word-beg (point))
1503 (setq drop-this-word-if-trailing-flag nil)
1504
1505 ;; Decide what to do based on what we are looking at.
1506 (cond
1507
1508 ;; Delete title
1509 ((and (eq word-count 0)
1510 (looking-at mail-extr-full-name-prefixes))
1511 (goto-char (match-end 0))
1512 (narrow-to-region (point) (point-max)))
1513
1514 ;; Stop after name suffix
1515 ((and (>= word-count 2)
1516 (looking-at mail-extr-full-name-suffix-pattern))
1517 (mail-extr-skip-whitespace-backward)
1518 (setq suffix-flag (point))
1519 (if (eq ?, (following-char))
1520 (forward-char 1)
1521 (insert ?,))
1522 ;; Enforce at least one space after comma
1523 (or (eq ?\ (following-char))
1524 (insert ?\ ))
1525 (mail-extr-skip-whitespace-forward)
1526 (cond ((memq (following-char) '(?j ?J ?s ?S))
1527 (capitalize-word 1)
1528 (if (eq (following-char) ?.)
1529 (forward-char 1)
1530 (insert ?.)))
1531 (t
1532 (upcase-word 1)))
1533 (setq word-found-flag t)
1534 (setq name-done-flag t))
1535
1536 ;; Handle SCA names
1537 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
1538 (goto-char (match-beginning 1))
1539 (narrow-to-region (point) (point-max))
1540 (setq begin-again-flag t))
1541
1542 ;; Check for initial last name followed by comma
1543 ((and (eq ?, (following-char))
1544 (eq word-count 1))
1545 (forward-char 1)
1546 (setq last-name-comma-flag t)
1547 (or (eq ?\ (following-char))
1548 (insert ?\ )))
1549
1550 ;; Stop before trailing comma-separated comment
1551 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1552 ;; *** This case is redundant???
1553 ;;((eq ?, (following-char))
1554 ;; (setq name-done-flag t))
1555
1556 ;; Delete parenthesized/quoted comment/nickname
1557 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
1558 (setq cbeg (point))
1559 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1560 (cond ((memq (following-char) '(?\' ?\`))
1561 (or (search-forward "'" nil t
1562 (if (eq ?\' (following-char)) 2 1))
1563 (mail-extr-delete-char 1)))
1564 (t
1565 (or (mail-extr-safe-move-sexp 1)
1566 (goto-char (point-max)))))
1567 (set-syntax-table mail-extr-address-text-syntax-table)
1568 (setq cend (point))
1107 (cond 1569 (cond
1108 1570 ;; Handle case of entire name being quoted
1109 ;; Delete title
1110 ((and (eq word-count 0) 1571 ((and (eq word-count 0)
1111 (looking-at mail-full-name-prefixes)) 1572 (looking-at " *\\'")
1112 (goto-char (match-end 0)) 1573 (>= (- cend cbeg) 2))
1113 (narrow-to-region (point) (point-max))) 1574 (narrow-to-region (1+ cbeg) (1- cend))
1114 1575 (goto-char (point-min)))
1115 ;; Stop after name suffix 1576 (t
1116 ((and (>= word-count 2) 1577 ;; Handle case of quoted initial
1117 (looking-at mail-full-name-suffix-pattern)) 1578 (if (and (or (= 3 (- cend cbeg))
1118 (skip-chars-backward mail-whitespace) 1579 (and (= 4 (- cend cbeg))
1119 (setq suffix-flag (point)) 1580 (eq ?. (char-after (+ 2 cbeg)))))
1120 (if (eq ?, (following-char)) 1581 (not (looking-at " *\\'")))
1121 (forward-char 1) 1582 (setq initial (char-after (1+ cbeg)))
1122 (insert ?,)) 1583 (setq initial nil))
1123 ;; Enforce at least one space after comma 1584 (delete-region cbeg cend)
1124 (or (eq mail-space-char (following-char)) 1585 (if initial
1125 (insert mail-space-char)) 1586 (insert initial ". ")))))
1126 (skip-chars-forward mail-whitespace) 1587
1127 (cond ((memq (following-char) '(?j ?J ?s ?S)) 1588 ;; Handle & substitution
1128 (capitalize-word 1) 1589 ((and (or (bobp)
1129 (if (eq (following-char) ?.) 1590 (eq ?\ (preceding-char)))
1130 (forward-char 1) 1591 (looking-at "&\\( \\|\\'\\)"))
1131 (insert ?.))) 1592 (mail-extr-delete-char 1)
1132 (t 1593 (capitalize-region
1133 (upcase-word 1))) 1594 (point)
1134 (setq word-count (1+ word-count)) 1595 (progn
1135 (throw 'stop t)) 1596 (insert-buffer-substring canonicalization-buffer
1136 1597 mbox-beg mbox-end)
1137 ;; Handle SCA names 1598 (point)))
1138 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" 1599 (setq disable-initial-guessing-flag t)
1139 (setq word-count 0) 1600 (setq word-found-flag t))
1140 (goto-char (match-beginning 1)) 1601
1141 (narrow-to-region (point) (point-max))) 1602 ;; Handle *Stupid* VMS date stamps
1142 1603 ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
1143 ;; Various stopping points 1604 (replace-match "" t))
1144 ((or 1605
1145 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case or 1606 ;; Handle Chinese characters.
1146 ;; lowercase words. Eg. XT-DEM. 1607 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
1147 (and (>= word-count 2) 1608 (goto-char (match-end 0))
1148 (or mixed-case-flag lower-case-flag) 1609 (setq word-found-flag t))
1149 (looking-at mail-weird-acronym-pattern) 1610
1150 (not (looking-at mail-roman-numeral-pattern))) 1611 ;; Skip initial garbage characters.
1151 ;; Stop before 4-or-more letter lowercase words preceded by 1612 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1152 ;; mixed case or uppercase words. 1613 ((and (eq word-count 0)
1153 (and (>= word-count 2) 1614 (looking-at mail-extr-leading-garbage))
1154 (or upper-case-flag mixed-case-flag) 1615 (goto-char (match-end 0))
1155 (looking-at "[a-z][a-z][a-z][a-z]+\\b")) 1616 ;; *** Skip backward over these???
1156 ;; Stop before trailing alternative address 1617 ;; (skip-chars-backward "& \"")
1157 (looking-at mail-alternative-address-pattern) 1618 (narrow-to-region (point) (point-max)))
1158 ;; Stop before trailing comment not introduced by comma 1619
1159 (looking-at mail-trailing-comment-start-pattern) 1620 ;; Various stopping points
1160 ;; Stop before telephone numbers 1621 ((or
1161 (looking-at mail-telephone-extension-pattern))
1162 (throw 'stop t))
1163
1164 ;; Check for initial last name followed by comma
1165 ((and (eq ?, (following-char))
1166 (eq word-count 1))
1167 (forward-char 1)
1168 (setq last-name-comma-flag t)
1169 (or (eq mail-space-char (following-char))
1170 (insert mail-space-char)))
1171
1172 ;; Stop before trailing comma-separated comment
1173 ((eq ?, (following-char))
1174 (throw 'stop t))
1175 1622
1176 ;; Delete parenthesized/quoted comment/nickname 1623 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
1177 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) 1624 ;; words. Example: XT-DEM.
1178 (setq comment-beg (point)) 1625 (and (>= word-count 2)
1179 (set-syntax-table address-text-comment-syntax-table) 1626 mixed-case-flag
1180 (cond ((memq (following-char) '(?\' ?\`)) 1627 (looking-at mail-extr-weird-acronym-pattern)
1181 (if (eq ?\' (following-char)) 1628 (not (looking-at mail-extr-roman-numeral-pattern)))
1182 (forward-char 1))
1183 (or (search-forward "'" nil t)
1184 (delete-char 1)))
1185 (t
1186 (or (mail-safe-move-sexp 1)
1187 (goto-char (point-max)))))
1188 (set-syntax-table address-text-syntax-table)
1189 (setq comment-end (point))
1190 (cond
1191 ;; Handle case of entire name being quoted
1192 ((and (eq word-count 0)
1193 (looking-at " *\\'")
1194 (>= (- comment-end comment-beg) 2))
1195 (narrow-to-region (1+ comment-beg) (1- comment-end))
1196 (goto-char (point-min)))
1197 (t
1198 ;; Handle case of quoted initial
1199 (if (and (or (= 3 (- comment-end comment-beg))
1200 (and (= 4 (- comment-end comment-beg))
1201 (eq ?. (char-after (+ 2 comment-beg)))))
1202 (not (looking-at " *\\'")))
1203 (setq initial (char-after (1+ comment-beg)))
1204 (setq initial nil))
1205 (delete-region comment-beg comment-end)
1206 (if initial
1207 (insert initial ". ")))))
1208 1629
1209 ;; Delete ham radio call signs 1630 ;; Stop before trailing alternative address
1210 ((looking-at mail-ham-call-sign-pattern) 1631 (looking-at mail-extr-alternative-address-pattern)
1211 (delete-region (match-beginning 0) (match-end 0)))
1212 1632
1213 ;; Handle & substitution 1633 ;; Stop before trailing comment not introduced by comma
1214 ;; TODO: remember to disable middle initial guessing 1634 ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
1215 ((and (or (bobp) 1635 (looking-at mail-extr-trailing-comment-start-pattern)
1216 (eq mail-space-char (preceding-char)))
1217 (looking-at "&\\( \\|\\'\\)"))
1218 (delete-char 1)
1219 (capitalize-region
1220 (point)
1221 (progn
1222 (insert-buffer-substring canonicalization-buffer
1223 mbox-beg mbox-end)
1224 (point))))
1225 1636
1226 ;; Fixup initials 1637 ;; Stop before telephone numbers
1227 ((looking-at mail-initial-pattern) 1638 (looking-at mail-extr-telephone-extension-pattern))
1228 (or (eq (following-char) (upcase (following-char))) 1639 (setq name-done-flag t))
1640
1641 ;; Delete ham radio call signs
1642 ((looking-at mail-extr-ham-call-sign-pattern)
1643 (delete-region (match-beginning 0) (match-end 0)))
1644
1645 ;; Fixup initials
1646 ((looking-at mail-extr-initial-pattern)
1647 (or (eq (following-char) (upcase (following-char)))
1648 (setq lower-case-flag t))
1649 (forward-char 1)
1650 (if (eq ?. (following-char))
1651 (forward-char 1)
1652 (insert ?.))
1653 (or (eq ?\ (following-char))
1654 (insert ?\ ))
1655 (setq word-found-flag t))
1656
1657 ;; Handle BITNET LISTSERV list names.
1658 ((and (eq word-count 0)
1659 (looking-at mail-extr-listserv-list-name-pattern))
1660 (narrow-to-region (match-beginning 1) (match-end 1))
1661 (setq word-found-flag t)
1662 (setq name-done-flag t))
1663
1664 ;; Regular name words
1665 ((looking-at mail-extr-name-pattern)
1666 (setq name-beg (point))
1667 (setq name-end (match-end 0))
1668
1669 ;; Certain words will be dropped if they are at the end.
1670 (and (>= word-count 2)
1671 (not lower-case-flag)
1672 (or
1673 ;; A trailing 4-or-more letter lowercase words preceded by
1674 ;; mixed case or uppercase words will be dropped.
1675 (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'")
1676 ;; Drop a trailing word which is terminated with a period.
1677 (eq ?. (char-after (1- name-end))))
1678 (setq drop-this-word-if-trailing-flag t))
1679
1680 ;; Set the flags that indicate whether we have seen a lowercase
1681 ;; word, a mixed case word, and an uppercase word.
1682 (if (re-search-forward "[a-z]" name-end t)
1683 (if (progn
1684 (goto-char name-beg)
1685 (re-search-forward "[A-Z]" name-end t))
1686 (setq mixed-case-flag t)
1229 (setq lower-case-flag t)) 1687 (setq lower-case-flag t))
1230 (forward-char 1) 1688;; (setq upper-case-flag t)
1231 (if (eq ?. (following-char)) 1689 )
1232 (forward-char 1) 1690
1233 (insert ?.)) 1691 (goto-char name-end)
1234 (or (eq mail-space-char (following-char)) 1692 (setq word-found-flag t))
1235 (insert mail-space-char))
1236 (setq word-count (1+ word-count)))
1237
1238 ;; Regular name words
1239 ((looking-at mail-name-pattern)
1240 (setq beg (point))
1241 (setq end (match-end 0))
1242 (set (if (re-search-forward "[a-z]" end t)
1243 (if (progn
1244 (goto-char beg)
1245 (re-search-forward "[A-Z]" end t))
1246 'mixed-case-flag
1247 'lower-case-flag)
1248 'upper-case-flag) t)
1249 (goto-char end)
1250 (setq word-count (1+ word-count)))
1251 1693
1252 (t 1694 (t
1253 (throw 'stop t))))) 1695 (setq name-done-flag t)
1696 ))
1697
1698 ;; Count any word that we skipped over.
1699 (if word-found-flag
1700 (setq word-count (1+ word-count))))
1254 1701
1255 (narrow-to-region (point-min) (point)) 1702 ;; If the last thing in the name is 2 or more periods, or one or more
1256 1703 ;; other sentence terminators (but not a single period) then keep them
1257 ;; Delete trailing word followed immediately by . 1704 ;; and the preceeding word. This is for the benefit of whole sentences
1705 ;; in the name field: it's better behavior than dropping the last word
1706 ;; of the sentence...
1707 (if (and (not suffix-flag)
1708 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
1709 (goto-char (setq suffix-flag (point-max))))
1710
1711 ;; Drop everything after point and certain trailing words.
1712 (narrow-to-region (point-min)
1713 (or (and drop-last-word-if-trailing-flag
1714 last-word-beg)
1715 (point)))
1716
1717 ;; Xerox's mailers SUCK!!!!!!
1718 ;; We simply refuse to believe that any last name is PARC or ADOC.
1719 ;; If it looks like that is the last name, that there is no meaningful
1720 ;; here at all. Actually I guess it would be best to map patterns
1721 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
1722 ;; actually know that that is what's going on.
1258 (cond ((not suffix-flag) 1723 (cond ((not suffix-flag)
1259 (goto-char (point-min)) 1724 (goto-char (point-min))
1260 (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t) 1725 (let ((case-fold-search t))
1261 (narrow-to-region (point-min) (match-beginning 0))))) 1726 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
1262 1727 (erase-buffer)))))
1728
1263 ;; If last name first put it at end (but before suffix) 1729 ;; If last name first put it at end (but before suffix)
1264 (cond (last-name-comma-flag 1730 (cond (last-name-comma-flag
1265 (goto-char (point-min)) 1731 (goto-char (point-min))
1266 (search-forward ",") 1732 (search-forward ",")
1267 (setq end (1- (point))) 1733 (setq name-end (1- (point)))
1268 (goto-char (or suffix-flag (point-max))) 1734 (goto-char (or suffix-flag (point-max)))
1269 (or (eq mail-space-char (preceding-char)) 1735 (or (eq ?\ (preceding-char))
1270 (insert mail-space-char)) 1736 (insert ?\ ))
1271 (insert-buffer-substring (current-buffer) (point-min) end) 1737 (insert-buffer-substring (current-buffer) (point-min) name-end)
1272 (narrow-to-region (1+ end) (point-max)))) 1738 (goto-char name-end)
1739 (skip-chars-forward "\t ,")
1740 (narrow-to-region (point) (point-max))))
1273 1741
1274 (goto-char (point-max)) 1742 ;; Delete leading and trailing junk characters.
1275 (skip-chars-backward mail-non-end-name-chars) 1743 ;; *** This is probably completly unneeded now.
1276 (if (eq ?. (following-char)) 1744 ;;(goto-char (point-max))
1277 (forward-char 1)) 1745 ;;(skip-chars-backward mail-extr-non-end-name-chars)
1278 (narrow-to-region (point) 1746 ;;(if (eq ?. (following-char))
1279 (progn 1747 ;; (forward-char 1))
1280 (goto-char (point-min)) 1748 ;;(narrow-to-region (point)
1281 (skip-chars-forward mail-non-begin-name-chars) 1749 ;; (progn
1282 (point))) 1750 ;; (goto-char (point-min))
1751 ;; (skip-chars-forward mail-extr-non-begin-name-chars)
1752 ;; (point)))
1283 1753
1284 ;; Compress whitespace 1754 ;; Compress whitespace
1285 (goto-char (point-min)) 1755 (goto-char (point-min))
1286 (while (re-search-forward "[ \t\n]+" nil t) 1756 (while (re-search-forward "[ \t\n]+" nil t)
1287 (replace-match " ")) 1757 (replace-match (if (eobp) "" " ") t))
1288
1289 (buffer-substring (point-min) (point-max))
1290
1291 ))) 1758 )))
1292 1759
1293;; The country names are just in there for show right now, and because 1760
1294;; Jamie thought it would be neat. They aren't used yet.
1295 1761
1762;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1763;;
1764;; Table of top-level domain names.
1765;;
1766;; This is used during address canonicalization; be careful of format changes.
1296;; Keep in mind that the country abbreviations follow ISO-3166. There is 1767;; Keep in mind that the country abbreviations follow ISO-3166. There is
1297;; a U.S. FIPS that specifies a different set of two-letter country 1768;; a U.S. FIPS that specifies a different set of two-letter country
1298;; abbreviations. 1769;; abbreviations.
1299 1770
1300;; TODO: put this in its own obarray, instead of cluttering up the main 1771(defconst mail-extr-all-top-level-domains
1301;; symbol table with junk. 1772 (let ((ob (make-vector 509 0)))
1302 1773 (mapcar
1303(mapcar 1774 (function
1304 (function 1775 (lambda (x)
1305 (lambda (x) 1776 (put (intern (downcase (car x)) ob)
1306 (if (symbolp x) 1777 'domain-name
1307 (put x 'domain-name t) 1778 (if (nth 2 x)
1308 (put (car x) 'domain-name (nth 1 x))))) 1779 (format (nth 2 x) (nth 1 x))
1309 '((ag "Antigua") 1780 (nth 1 x)))))
1310 (ar "Argentina") ; Argentine Republic 1781 '(("ag" "Antigua")
1311 arpa ; Advanced Projects Research Agency 1782 ("ar" "Argentina" "Argentine Republic")
1312 (at "Austria") ; The Republic of _ 1783 ("arpa" t "Advanced Projects Research Agency")
1313 (au "Australia") 1784 ("at" "Austria" "The Republic of %s")
1314 (bb "Barbados") 1785 ("au" "Australia")
1315 (be "Belgium") ; The Kingdom of _ 1786 ("bb" "Barbados")
1316 (bg "Bulgaria") 1787 ("be" "Belgium" "The Kingdom of %s")
1317 bitnet ; Because It's Time NET 1788 ("bg" "Bulgaria")
1318 (bo "Bolivia") ; Republic of _ 1789 ("bitnet" t "Because It's Time NET")
1319 (br "Brazil") ; The Federative Republic of _ 1790 ("bo" "Bolivia" "Republic of %s")
1320 (bs "Bahamas") 1791 ("br" "Brazil" "The Federative Republic of %s")
1321 (bz "Belize") 1792 ("bs" "Bahamas")
1322 (ca "Canada") 1793 ("bz" "Belize")
1323 (ch "Switzerland") ; The Swiss Confederation 1794 ("ca" "Canada")
1324 (cl "Chile") ; The Republic of _ 1795 ("ch" "Switzerland" "The Swiss Confederation")
1325 (cn "China") ; The People's Republic of _ 1796 ("cl" "Chile" "The Republic of %s")
1326 (co "Columbia") 1797 ("cn" "China" "The People's Republic of %s")
1327 com ; Commercial 1798 ("co" "Columbia")
1328 (cr "Costa Rica") ; The Republic of _ 1799 ("com" t "Commercial")
1329 (cs "Czechoslovakia") 1800 ("cr" "Costa Rica" "The Republic of %s")
1330 (de "Germany") 1801 ("cs" "Czechoslovakia")
1331 (dk "Denmark") 1802 ("de" "Germany")
1332 (dm "Dominica") 1803 ("dk" "Denmark")
1333 (do "Dominican Republic") ; The _ 1804 ("dm" "Dominica")
1334 (ec "Ecuador") ; The Republic of _ 1805 ("do" "Dominican Republic" "The %s")
1335 edu ; Educational 1806 ("ec" "Ecuador" "The Republic of %s")
1336 (eg "Egypt") ; The Arab Republic of _ 1807 ("edu" t "Educational")
1337 (es "Spain") ; The Kingdom of _ 1808 ("eg" "Egypt" "The Arab Republic of %s")
1338 (fi "Finland") ; The Republic of _ 1809 ("es" "Spain" "The Kingdom of %s")
1339 (fj "Fiji") 1810 ("fi" "Finland" "The Republic of %s")
1340 (fr "France") 1811 ("fj" "Fiji")
1341 gov ; Government (U.S.A.) 1812 ("fr" "France")
1342 (gr "Greece") ; The Hellenic Republic 1813 ("gov" t "Government (U.S.A.)")
1343 (hk "Hong Kong") 1814 ("gr" "Greece" "The Hellenic Republic (%s)")
1344 (hu "Hungary") ; The Hungarian People's Republic (???) 1815 ("hk" "Hong Kong")
1345 (ie "Ireland") 1816 ("hu" "Hungary" "The Hungarian People's Republic") ;???
1346 (il "Israel") ; The State of _ 1817 ("ie" "Ireland")
1347 (in "India") ; The Republic of _ 1818 ("il" "Israel" "The State of %s")
1348 int ; something British, don't know what 1819 ("in" "India" "The Republic of %s")
1349 (is "Iceland") ; The Republic of _ 1820 ("int" t "(something British, don't know what)")
1350 (it "Italy") ; The Italian Republic 1821 ("is" "Iceland" "The Republic of %s")
1351 (jm "Jamaica") 1822 ("it" "Italy" "The Italian Republic")
1352 (jp "Japan") 1823 ("jm" "Jamaica")
1353 (kn "St. Kitts and Nevis") 1824 ("jp" "Japan")
1354 (kr "South Korea") 1825 ("kn" "St. Kitts and Nevis")
1355 (lc "St. Lucia") 1826 ("kr" "South Korea")
1356 (lk "Sri Lanka") ; The Democratic Socialist Republic of _ 1827 ("lc" "St. Lucia")
1357 mil ; Military (U.S.A.) 1828 ("lk" "Sri Lanka" "The Democratic Socialist Republic of %s")
1358 (mx "Mexico") ; The United Mexican States 1829 ("mil" t "Military (U.S.A.)")
1359 (my "Malaysia") ; changed to Myanmar???? 1830 ("mx" "Mexico" "The United Mexican States")
1360 (na "Namibia") 1831 ("my" "Malaysia" "%s (changed to Myanmar?)") ;???
1361 nato ; North Atlantic Treaty Organization 1832 ("na" "Namibia")
1362 net ; Network 1833 ("nato" t "North Atlantic Treaty Organization")
1363 (ni "Nicaragua") ; The Republic of _ 1834 ("net" t "Network")
1364 (nl "Netherlands") ; The Kingdom of the _ 1835 ("ni" "Nicaragua" "The Republic of %s")
1365 (no "Norway") ; The Kingdom of _ 1836 ("nl" "Netherlands" "The Kingdom of the %s")
1366 (nz "New Zealand") 1837 ("no" "Norway" "The Kingdom of %s")
1367 org ; Organization 1838 ("nz" "New Zealand")
1368 (pe "Peru") 1839 ("org" t "Organization")
1369 (pg "Papua New Guinea") 1840 ("pe" "Peru")
1370 (ph "Philippines") ; The Republic of the _ 1841 ("pg" "Papua New Guinea")
1371 (pl "Poland") 1842 ("ph" "Philippines" "The Republic of the %s")
1372 (pr "Puerto Rico") 1843 ("pl" "Poland")
1373 (pt "Portugal") ; The Portugese Republic 1844 ("pr" "Puerto Rico")
1374 (py "Paraguay") 1845 ("pt" "Portugal" "The Portugese Republic")
1375 (se "Sweden") ; The Kingdom of _ 1846 ("py" "Paraguay")
1376 (sg "Singapore") ; The Republic of _ 1847 ("se" "Sweden" "The Kingdom of %s")
1377 (sr "Suriname") 1848 ("sg" "Singapore" "The Republic of %s")
1378 (su "Soviet Union") 1849 ("sr" "Suriname")
1379 (th "Thailand") ; The Kingdom of _ 1850 ("su" "Soviet Union")
1380 (tn "Tunisia") 1851 ("th" "Thailand" "The Kingdom of %s")
1381 (tr "Turkey") ; The Republic of _ 1852 ("tn" "Tunisia")
1382 (tt "Trinidad and Tobago") 1853 ("tr" "Turkey" "The Republic of %s")
1383 (tw "Taiwan") 1854 ("tt" "Trinidad and Tobago")
1384 (uk "United Kingdom") ; The _ of Great Britain 1855 ("tw" "Taiwan")
1385 unter-dom ; something German 1856 ("uk" "United Kingdom" "The %s of Great Britain")
1386 (us "U.S.A.") ; The United States of America 1857 ("unter-dom" t "(something German)")
1387 uucp ; Unix to Unix CoPy 1858 ("us" "U.S.A." "The United States of America")
1388 (uy "Uruguay") ; The Eastern Republic of _ 1859 ("uucp" t "Unix to Unix CoPy")
1389 (vc "St. Vincent and the Grenadines") 1860 ("uy" "Uruguay" "The Eastern Republic of %s")
1390 (ve "Venezuela") ; The Republic of _ 1861 ("vc" "St. Vincent and the Grenadines")
1391 (yu "Yugoslavia") ; The Socialist Federal Republic of _ 1862 ("ve" "Venezuela" "The Republic of %s")
1392 ;; Also said to be Zambia ... 1863 ("yu" "Yugoslavia" "The Socialist Federal Republic of %s")
1393 (za "South Africa") ; The Republic of _ (why not Zaire???) 1864 ;; Also said to be Zambia ... (why not Zaire???)
1394 (zw "Zimbabwe") ; Republic of _ 1865 ("za" "South Africa" "The Republic of %s (or Zambia? Zaire?)")
1395 )) 1866 ("zw" "Zimbabwe" "Republic of %s")
1396;; fipnet 1867;; fipnet
1868 ))
1869 ob))
1870
1871;;;###autoload
1872(defun what-domain (domain)
1873 "Convert mail domain to country tit corresponds to."
1874 (interactive
1875 (let ((completion-ignore-case t))
1876 (list (completing-read "Domain: "
1877 mail-extr-all-top-level-domains nil t))))
1878 (or (setq domain (intern-soft (downcase domain)
1879 mail-extr-all-top-level-domains))
1880 (error "no such domain"))
1881 (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name)))
1397 1882
1398 1883
1399;; Code for testing. 1884;(let ((all nil))
1400 1885; (mapatoms #'(lambda (x)
1401(defun time-extract () 1886; (if (and (boundp x)
1402 (let (times list) 1887; (string-match "^mail-extr-" (symbol-name x)))
1403 (setq times (cons (current-time-string) times) 1888; (setq all (cons x all)))))
1404 list problem-address-alist) 1889; (setq all (sort all #'string-lessp))
1405 (while list 1890; (cons 'setq
1406 (mail-extract-address-components (car (car list))) 1891; (apply 'nconc (mapcar #'(lambda (x)
1407 (setq list (cdr list))) 1892; (list x (symbol-value x)))
1408 (setq times (cons (current-time-string) times)) 1893; all))))
1409 (nreverse times)))
1410
1411(defun test-extract (&optional starting-point)
1412 (interactive)
1413 (set-buffer (get-buffer-create "*Testing*"))
1414 (erase-buffer)
1415 (sit-for 0)
1416 (mapcar 'test-extract-internal
1417 (if starting-point
1418 (memq starting-point problem-address-alist)
1419 problem-address-alist)))
1420
1421(defvar failed-item)
1422(defun test-extract-internal (item)
1423 (setq failed-item item)
1424 (let* ((address (car item))
1425 (correct-name (nth 1 item))
1426 (correct-canon (nth 2 item))
1427 (result (mail-extract-address-components address))
1428 (name (car result))
1429 (canon (nth 1 result))
1430 (name-correct (or (null correct-name)
1431 (string-equal (downcase correct-name)
1432 (downcase name))))
1433 (canon-correct (or (null correct-canon)
1434 (string-equal correct-canon canon))))
1435 (cond ((not (and name-correct canon-correct))
1436 (pop-to-buffer "*Testing*")
1437 (select-window (get-buffer-window (current-buffer)))
1438 (goto-char (point-max))
1439 (insert "Address: " address "\n")
1440 (if (not name-correct)
1441 (insert " Correct Name: [" correct-name
1442 "]\; Result: [" name "]\n"))
1443 (if (not canon-correct)
1444 (insert " Correct Canon: [" correct-canon
1445 "]\; Result: [" canon "]\n"))
1446 (insert "\n")
1447 (sit-for 0))))
1448 (setq failed-item nil))
1449
1450(defun test-continue-extract ()
1451 (interactive)
1452 (test-extract failed-item))
1453 1894
1454 1895
1455;; Assorted junk. 1896(provide 'mail-extr)
1456
1457;; warsaw@nlm.nih.gov (A Bad Dude -- Barry Warsaw)
1458
1459;;'(from
1460;; reply-to
1461;; return-path
1462;; x-uucp-from
1463;; sender
1464;; resent-from
1465;; resent-sender
1466;; resent-reply-to)
1467 1897
1468;;; mail-extr.el ends here 1898;;; mail-extr.el ends here