aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarl Heuer1997-11-20 21:45:59 +0000
committerKarl Heuer1997-11-20 21:45:59 +0000
commitd1782bd882dc768d06d49234c7af86f344bcf653 (patch)
tree9d230a76805ea5f6d551eba5a2859838312c2be5
parent074028631a165313f89b0d153f28a785477c0e7b (diff)
downloademacs-d1782bd882dc768d06d49234c7af86f344bcf653.tar.gz
emacs-d1782bd882dc768d06d49234c7af86f344bcf653.zip
(mail-extract-address-components):
New arg ALL says return info about all the addresses. Clarify buffer switching logic using save-excursion.
-rw-r--r--lisp/mail/mail-extr.el1393
1 files changed, 713 insertions, 680 deletions
diff --git a/lisp/mail/mail-extr.el b/lisp/mail/mail-extr.el
index 6989e8aefa3..daa50daa8f7 100644
--- a/lisp/mail/mail-extr.el
+++ b/lisp/mail/mail-extr.el
@@ -1,10 +1,9 @@
1;;; mail-extr.el --- extract full name and address from RFC 822 mail header. 1;;; mail-extr.el --- extract full name and address from RFC 822 mail header.
2 2
3;; Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc. 3;; Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
4 4
5;; Author: Joe Wells <jbw@cs.bu.edu> 5;; Author: Joe Wells <jbw@cs.bu.edu>
6;; Maintainer: Jamie Zawinski <jwz@lucid.com> 6;; Maintainer: FSF
7;; Version: 1.8
8;; Keywords: mail 7;; Keywords: mail
9 8
10;; This file is part of GNU Emacs. 9;; This file is part of GNU Emacs.
@@ -28,7 +27,7 @@
28 27
29;; The entry point of this code is 28;; The entry point of this code is
30;; 29;;
31;; mail-extract-address-components: (address) 30;; mail-extract-address-components: (address &optional all)
32;; 31;;
33;; Given an RFC-822 ADDRESS, extract full name and canonical address. 32;; Given an RFC-822 ADDRESS, extract full name and canonical address.
34;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). 33;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
@@ -40,6 +39,10 @@
40;; If ADDRESS contains more than one RFC-822 address, only the first is 39;; If ADDRESS contains more than one RFC-822 address, only the first is
41;; returned. 40;; returned.
42;; 41;;
42;; If ALL is non-nil, that means return info about all the addresses
43;; that are found in ADDRESS. The value is a list of elements of
44;; the form (FULL-NAME CANONICAL-ADDRESS), one per address.
45;;
43;; This code is more correct (and more heuristic) parser than the code in 46;; This code is more correct (and more heuristic) parser than the code in
44;; rfc822.el. And despite its size, it's fairly fast. 47;; rfc822.el. And despite its size, it's fairly fast.
45;; 48;;
@@ -706,44 +709,28 @@ by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".")
706(defvar cend) ; dynamic assignment 709(defvar cend) ; dynamic assignment
707 710
708;;;###autoload 711;;;###autoload
709(defun mail-extract-address-components (address) 712(defun mail-extract-address-components (address &optional all)
710 "Given an RFC-822 ADDRESS, extract full name and canonical address. 713 "Given an RFC-822 address ADDRESS, extract full name and canonical address.
711Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). 714Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
712If no name can be extracted, FULL-NAME will be nil. 715If no name can be extracted, FULL-NAME will be nil.
716
717If the optional argument ALL is non-nil, then ADDRESS can contain zero
718or more recipients, separated by commas, and we return a list of
719the form ((FULL-NAME CANONICAL-ADDRESS) ...) with one element for
720each recipient. If ALL is nil, then if ADDRESS contains more than
721one recipients, all but the first is ignored.
722
713ADDRESS may be a string or a buffer. If it is a buffer, the visible 723ADDRESS may be a string or a buffer. If it is a buffer, the visible
714 (narrowed) portion of the buffer will be interpreted as the address. 724 (narrowed) portion of the buffer will be interpreted as the address.
715 (This feature exists so that the clever caller might be able to avoid 725 (This feature exists so that the clever caller might be able to avoid
716 consing a string.) 726 consing a string.)"
717If ADDRESS contains more than one RFC-822 address, only the first is
718 returned. Some day this function may be extended to extract multiple
719 addresses, or perhaps return the position at which parsing stopped."
720 (let ((canonicalization-buffer (get-buffer-create " *canonical address*")) 727 (let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
721 (extraction-buffer (get-buffer-create " *extract address components*")) 728 (extraction-buffer (get-buffer-create " *extract address components*"))
722 char 729 value-list)
723;; multiple-addresses 730
724 <-pos >-pos @-pos :-pos comma-pos !-pos %-pos \;-pos
725 group-:-pos group-\;-pos route-addr-:-pos
726 record-pos-symbol
727 first-real-pos last-real-pos
728 phrase-beg phrase-end
729 cbeg cend ; dynamically set from -voodoo
730 quote-beg quote-end
731 atom-beg atom-end
732 mbox-beg mbox-end
733 \.-ends-name
734 temp
735;; name-suffix
736 fi mi li ; first, middle, last initial
737 saved-%-pos saved-!-pos saved-@-pos
738 domain-pos \.-pos insert-point
739;; mailbox-name-processed-flag
740 disable-initial-guessing-flag ; dynamically set from -voodoo
741 )
742
743 (save-excursion 731 (save-excursion
744 (set-buffer extraction-buffer) 732 (set-buffer extraction-buffer)
745 (fundamental-mode) 733 (fundamental-mode)
746 (kill-all-local-variables)
747 (buffer-disable-undo extraction-buffer) 734 (buffer-disable-undo extraction-buffer)
748 (set-syntax-table mail-extr-address-syntax-table) 735 (set-syntax-table mail-extr-address-syntax-table)
749 (widen) 736 (widen)
@@ -763,672 +750,718 @@ If ADDRESS contains more than one RFC-822 address, only the first is
763 (error "Invalid address: %s" address))) 750 (error "Invalid address: %s" address)))
764 751
765 (set-text-properties (point-min) (point-max) nil) 752 (set-text-properties (point-min) (point-max) nil)
753
754 (save-excursion
755 (set-buffer canonicalization-buffer)
756 (fundamental-mode)
757 (buffer-disable-undo canonicalization-buffer)
758 (set-syntax-table mail-extr-address-syntax-table)
759 (setq case-fold-search nil))
760
766 761
767 ;; stolen from rfc822.el
768 ;; Unfold multiple lines. 762 ;; Unfold multiple lines.
769 (goto-char (point-min)) 763 (goto-char (point-min))
770 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) 764 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
771 (replace-match "\\1 " t)) 765 (replace-match "\\1 " t))
772 766
773 ;; first pass grabs useful information about address 767 ;; Loop over addresses until we have as many as we want.
774 (goto-char (point-min)) 768 (while (and (or all (null value-list))
775 (while (progn 769 (progn (goto-char (point-min))
776 (mail-extr-skip-whitespace-forward) 770 (skip-chars-forward " \t")
777 (not (eobp))) 771 (not (eobp))))
778 (setq char (char-after (point))) 772 (let (char
779 (or first-real-pos 773 end-of-address
780 (if (not (eq char ?\()) 774 <-pos >-pos @-pos :-pos comma-pos !-pos %-pos \;-pos
781 (setq first-real-pos (point)))) 775 group-:-pos group-\;-pos route-addr-:-pos
782 (cond 776 record-pos-symbol
783 ;; comment 777 first-real-pos last-real-pos
784 ((eq char ?\() 778 phrase-beg phrase-end
785 (set-syntax-table mail-extr-address-comment-syntax-table) 779 cbeg cend ; dynamically set from -voodoo
786 ;; only record the first non-empty comment's position 780 quote-beg quote-end
787 (if (and (not cbeg) 781 atom-beg atom-end
788 (save-excursion 782 mbox-beg mbox-end
789 (forward-char 1) 783 \.-ends-name
790 (mail-extr-skip-whitespace-forward) 784 temp
791 (not (eq ?\) (char-after (point)))))) 785 ;; name-suffix
792 (setq cbeg (point))) 786 fi mi li ; first, middle, last initial
793 ;; TODO: don't record if unbalanced 787 saved-%-pos saved-!-pos saved-@-pos
794 (or (mail-extr-safe-move-sexp 1) 788 domain-pos \.-pos insert-point
789 ;; mailbox-name-processed-flag
790 disable-initial-guessing-flag) ; dynamically set from -voodoo
791
792 (goto-char (point-min))
793
794 ;; Insert extra space at beginning to allow later replacement with <
795 ;; without having to move markers.
796 (or (eq (following-char) ?\ )
797 (insert ?\ ))
798
799 ;; First pass grabs useful information about address.
800 (while (progn
801 (mail-extr-skip-whitespace-forward)
802 (not (eobp)))
803 (setq char (char-after (point)))
804 (or first-real-pos
805 (if (not (eq char ?\())
806 (setq first-real-pos (point))))
807 (cond
808 ;; comment
809 ((eq char ?\()
810 (set-syntax-table mail-extr-address-comment-syntax-table)
811 ;; only record the first non-empty comment's position
812 (if (and (not cbeg)
813 (save-excursion
814 (forward-char 1)
815 (mail-extr-skip-whitespace-forward)
816 (not (eq ?\) (char-after (point))))))
817 (setq cbeg (point)))
818 ;; TODO: don't record if unbalanced
819 (or (mail-extr-safe-move-sexp 1)
820 (forward-char 1))
821 (set-syntax-table mail-extr-address-syntax-table)
822 (if (and cbeg
823 (not cend))
824 (setq cend (point))))
825 ;; quoted text
826 ((eq char ?\")
827 ;; only record the first non-empty quote's position
828 (if (and (not quote-beg)
829 (save-excursion
830 (forward-char 1)
831 (mail-extr-skip-whitespace-forward)
832 (not (eq ?\" (char-after (point))))))
833 (setq quote-beg (point)))
834 ;; TODO: don't record if unbalanced
835 (or (mail-extr-safe-move-sexp 1)
836 (forward-char 1))
837 (if (and quote-beg
838 (not quote-end))
839 (setq quote-end (point))))
840 ;; domain literals
841 ((eq char ?\[)
842 (set-syntax-table mail-extr-address-domain-literal-syntax-table)
843 (or (mail-extr-safe-move-sexp 1)
844 (forward-char 1))
845 (set-syntax-table mail-extr-address-syntax-table))
846 ;; commas delimit addresses when outside < > pairs.
847 ((and (eq char ?,)
848 (or (and (null <-pos)
849 ;; Handle ROUTE-ADDR address that is missing its <.
850 (not (eq ?@ (char-after (1+ (point))))))
851 (and >-pos
852 ;; handle weird munged addresses
853 ;; BUG FIX: This test was reversed. Thanks to the
854 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
855 ;; for discovering this!
856 (< (mail-extr-last <-pos) (car >-pos)))))
857 ;; The argument contains more than one address.
858 ;; Temporarily hide everything after this one.
859 (setq end-of-address (copy-marker (1+ (point))))
860 (narrow-to-region (point-min) (1+ (point)))
861 (mail-extr-delete-char 1)
862 (setq char ?\() ; HAVE I NO SHAME??
863 )
864 ;; record the position of various interesting chars, determine
865 ;; legality later.
866 ((setq record-pos-symbol
867 (cdr (assq char
868 '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
869 (?: . :-pos) (?, . comma-pos) (?! . !-pos)
870 (?% . %-pos) (?\; . \;-pos)))))
871 (set record-pos-symbol
872 (cons (point) (symbol-value record-pos-symbol)))
795 (forward-char 1)) 873 (forward-char 1))
796 (set-syntax-table mail-extr-address-syntax-table) 874 ((eq char ?.)
797 (if (and cbeg
798 (not cend))
799 (setq cend (point))))
800 ;; quoted text
801 ((eq char ?\")
802 ;; only record the first non-empty quote's position
803 (if (and (not quote-beg)
804 (save-excursion
805 (forward-char 1)
806 (mail-extr-skip-whitespace-forward)
807 (not (eq ?\" (char-after (point))))))
808 (setq quote-beg (point)))
809 ;; TODO: don't record if unbalanced
810 (or (mail-extr-safe-move-sexp 1)
811 (forward-char 1)) 875 (forward-char 1))
812 (if (and quote-beg 876 ((memq char '(
813 (not quote-end)) 877 ;; comment terminator illegal
814 (setq quote-end (point)))) 878 ?\)
815 ;; domain literals 879 ;; domain literal terminator illegal
816 ((eq char ?\[) 880 ?\]
817 (set-syntax-table mail-extr-address-domain-literal-syntax-table) 881 ;; \ allowed only within quoted strings,
818 (or (mail-extr-safe-move-sexp 1) 882 ;; domain literals, and comments
883 ?\\
884 ))
885 (mail-extr-nuke-char-at (point))
819 (forward-char 1)) 886 (forward-char 1))
820 (set-syntax-table mail-extr-address-syntax-table)) 887 (t
821 ;; commas delimit addresses when outside < > pairs. 888 (forward-word 1)))
822 ((and (eq char ?,) 889 (or (eq char ?\()
823 (or (and (null <-pos) 890 ;; At the end of first address of a multiple address header.
824 ;; Handle ROUTE-ADDR address that is missing its <. 891 (and (eq char ?,)
825 (not (eq ?@ (char-after (1+ (point)))))) 892 (eobp))
826 (and >-pos 893 (setq last-real-pos (point))))
827 ;; handle weird munged addresses 894
828 ;; BUG FIX: This test was reversed. Thanks to the 895 ;; Use only the leftmost <, if any. Replace all others with spaces.
829 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au> 896 (while (cdr <-pos)
830 ;; for discovering this! 897 (mail-extr-nuke-char-at (car <-pos))
831 (< (mail-extr-last <-pos) (car >-pos))))) 898 (setq <-pos (cdr <-pos)))
832;; It'd be great if some day this worked, but for now, punt. 899
833;; (setq multiple-addresses t) 900 ;; Use only the rightmost >, if any. Replace all others with spaces.
834;; ;; *** Why do I want this: 901 (while (cdr >-pos)
835;; (mail-extr-delete-char 1) 902 (mail-extr-nuke-char-at (nth 1 >-pos))
836;; (narrow-to-region (point-min) (point)) 903 (setcdr >-pos (nthcdr 2 >-pos)))
837 (delete-region (point) (point-max)) 904
838 (setq char ?\() ; HAVE I NO SHAME?? 905 ;; If multiple @s and a :, but no < and >, insert around buffer.
839 ) 906 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
840 ;; record the position of various interesting chars, determine 907 ;; This commonly happens on the UUCP "From " line. Ugh.
841 ;; legality later. 908 (cond ((and (> (length @-pos) 1)
842 ((setq record-pos-symbol 909 (eq 1 (length :-pos)) ;TODO: check if between last two @s
843 (cdr (assq char 910 (not \;-pos)
844 '((?< . <-pos) (?> . >-pos) (?@ . @-pos) 911 (not <-pos))
845 (?: . :-pos) (?, . comma-pos) (?! . !-pos) 912 (goto-char (point-min))
846 (?% . %-pos) (?\; . \;-pos)))))
847 (set record-pos-symbol
848 (cons (point) (symbol-value record-pos-symbol)))
849 (forward-char 1))
850 ((eq char ?.)
851 (forward-char 1))
852 ((memq char '(
853 ;; comment terminator illegal
854 ?\)
855 ;; domain literal terminator illegal
856 ?\]
857 ;; \ allowed only within quoted strings,
858 ;; domain literals, and comments
859 ?\\
860 ))
861 (mail-extr-nuke-char-at (point))
862 (forward-char 1))
863 (t
864 (forward-word 1)))
865 (or (eq char ?\()
866 ;; At the end of first address of a multiple address header.
867 (and (eq char ?,)
868 (eobp))
869 (setq last-real-pos (point))))
870
871 ;; Use only the leftmost <, if any. Replace all others with spaces.
872 (while (cdr <-pos)
873 (mail-extr-nuke-char-at (car <-pos))
874 (setq <-pos (cdr <-pos)))
875
876 ;; Use only the rightmost >, if any. Replace all others with spaces.
877 (while (cdr >-pos)
878 (mail-extr-nuke-char-at (nth 1 >-pos))
879 (setcdr >-pos (nthcdr 2 >-pos)))
880
881 ;; If multiple @s and a :, but no < and >, insert around buffer.
882 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
883 ;; This commonly happens on the UUCP "From " line. Ugh.
884 (cond ((and (> (length @-pos) 1)
885 (eq 1 (length :-pos)) ;TODO: check if between last two @s
886 (not \;-pos)
887 (not <-pos))
888 (goto-char (point-min))
889 (mail-extr-delete-char 1)
890 (setq <-pos (list (point)))
891 (insert ?<)))
892
893 ;; If < but no >, insert > in rightmost possible position
894 (cond ((and <-pos
895 (null >-pos))
896 (goto-char (point-max))
897 (setq >-pos (list (point)))
898 (insert ?>)))
899
900 ;; If > but no <, replace > with space.
901 (cond ((and >-pos
902 (null <-pos))
903 (mail-extr-nuke-char-at (car >-pos))
904 (setq >-pos nil)))
905
906 ;; Turn >-pos and <-pos into non-lists
907 (setq >-pos (car >-pos)
908 <-pos (car <-pos))
909
910 ;; Trim other punctuation lists of items outside < > pair to handle
911 ;; stupid MTAs.
912 (cond (<-pos ; don't need to check >-pos also
913 ;; handle bozo software that violates RFC 822 by sticking
914 ;; punctuation marks outside of a < > pair
915 (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
916 ;; RFC 822 says nothing about these two outside < >, but
917 ;; remove those positions from the lists to make things
918 ;; easier.
919 (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
920 (mail-extr-nuke-outside-range %-pos <-pos >-pos t)))
921
922 ;; Check for : that indicates GROUP list and for : part of
923 ;; ROUTE-ADDR spec.
924 ;; Can't possibly be more than two :. Nuke any extra.
925 (while :-pos
926 (setq temp (car :-pos)
927 :-pos (cdr :-pos))
928 (cond ((and <-pos >-pos
929 (> temp <-pos)
930 (< temp >-pos))
931 (if (or route-addr-:-pos
932 (< (length @-pos) 2)
933 (> temp (car @-pos))
934 (< temp (nth 1 @-pos)))
935 (mail-extr-nuke-char-at temp)
936 (setq route-addr-:-pos temp)))
937 ((or (not <-pos)
938 (and <-pos
939 (< temp <-pos)))
940 (setq group-:-pos temp))))
941
942 ;; Nuke any ; that is in or to the left of a < > pair or to the left
943 ;; of a GROUP starting :. Also, there may only be one ;.
944 (while \;-pos
945 (setq temp (car \;-pos)
946 \;-pos (cdr \;-pos))
947 (cond ((and <-pos >-pos
948 (> temp <-pos)
949 (< temp >-pos))
950 (mail-extr-nuke-char-at temp))
951 ((and (or (not group-:-pos)
952 (> temp group-:-pos))
953 (not group-\;-pos))
954 (setq group-\;-pos temp))))
955
956 ;; Nuke unmatched GROUP syntax characters.
957 (cond ((and group-:-pos (not group-\;-pos))
958 ;; *** Do I really need to erase it?
959 (mail-extr-nuke-char-at group-:-pos)
960 (setq group-:-pos nil)))
961 (cond ((and group-\;-pos (not group-:-pos))
962 ;; *** Do I really need to erase it?
963 (mail-extr-nuke-char-at group-\;-pos)
964 (setq group-\;-pos nil)))
965
966 ;; Handle junk like ";@host.company.dom" that sendmail adds.
967 ;; **** should I remember comment positions?
968 (cond
969 (group-\;-pos
970 ;; this is fine for now
971 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
972 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
973 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
974 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t)
975 (and last-real-pos
976 (> last-real-pos (1+ group-\;-pos))
977 (setq last-real-pos (1+ group-\;-pos)))
978 ;; *** This may be wrong:
979 (and cend
980 (> cend group-\;-pos)
981 (setq cend nil
982 cbeg nil))
983 (and quote-end
984 (> quote-end group-\;-pos)
985 (setq quote-end nil
986 quote-beg nil))
987 ;; This was both wrong and unnecessary:
988 ;;(narrow-to-region (point-min) group-\;-pos)
989
990 ;; *** The entire handling of GROUP addresses seems rather lame.
991 ;; *** It deserves a complete rethink, except that these addresses
992 ;; *** are hardly ever seen.
993 ))
994
995 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
996 ;; others.
997 ;; Hell, go ahead an nuke all of the commas.
998 ;; **** This will cause problems when we start handling commas in
999 ;; the PHRASE part .... no it won't ... yes it will ... ?????
1000 (mail-extr-nuke-outside-range comma-pos 1 1)
1001
1002 ;; can only have multiple @s inside < >. The fact that some MTAs
1003 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
1004 ;; handled above.
1005
1006 ;; Locate PHRASE part of ROUTE-ADDR.
1007 (cond (<-pos
1008 (goto-char <-pos)
1009 (mail-extr-skip-whitespace-backward)
1010 (setq phrase-end (point))
1011 (goto-char (or ;;group-:-pos
1012 (point-min)))
1013 (mail-extr-skip-whitespace-forward)
1014 (if (< (point) phrase-end)
1015 (setq phrase-beg (point))
1016 (setq phrase-end nil))))
1017
1018 ;; handle ROUTE-ADDRS with real ROUTEs.
1019 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
1020 ;; any % or ! must be semantically meaningless.
1021 ;; TODO: do this processing into canonicalization buffer
1022 (cond (route-addr-:-pos
1023 (setq !-pos nil
1024 %-pos nil
1025 >-pos (copy-marker >-pos)
1026 route-addr-:-pos (copy-marker route-addr-:-pos))
1027 (goto-char >-pos)
1028 (insert-before-markers ?X)
1029 (goto-char (car @-pos))
1030 (while (setq @-pos (cdr @-pos))
1031 (mail-extr-delete-char 1)
1032 (setq %-pos (cons (point-marker) %-pos))
1033 (insert "%")
1034 (goto-char (1- >-pos))
1035 (save-excursion
1036 (insert-buffer-substring extraction-buffer
1037 (car @-pos) route-addr-:-pos)
1038 (delete-region (car @-pos) route-addr-:-pos))
1039 (or (cdr @-pos)
1040 (setq saved-@-pos (list (point)))))
1041 (setq @-pos saved-@-pos)
1042 (goto-char >-pos)
1043 (mail-extr-delete-char -1)
1044 (mail-extr-nuke-char-at route-addr-:-pos)
1045 (mail-extr-demarkerize route-addr-:-pos)
1046 (setq route-addr-:-pos nil
1047 >-pos (mail-extr-demarkerize >-pos)
1048 %-pos (mapcar 'mail-extr-demarkerize %-pos))))
1049
1050 ;; de-listify @-pos
1051 (setq @-pos (car @-pos))
1052
1053 ;; TODO: remove comments in the middle of an address
1054
1055 (set-buffer canonicalization-buffer)
1056 (fundamental-mode)
1057 (kill-all-local-variables)
1058 (buffer-disable-undo canonicalization-buffer)
1059 (set-syntax-table mail-extr-address-syntax-table)
1060 (setq case-fold-search nil)
1061
1062 (widen)
1063 (erase-buffer)
1064 (insert-buffer-substring extraction-buffer)
1065
1066 (if <-pos
1067 (narrow-to-region (progn
1068 (goto-char (1+ <-pos))
1069 (mail-extr-skip-whitespace-forward)
1070 (point))
1071 >-pos)
1072 (if (and first-real-pos last-real-pos)
1073 (narrow-to-region first-real-pos last-real-pos)
1074 ;; ****** Oh no! What if the address is completely empty!
1075 ;; *** Is this correct?
1076 (narrow-to-region (point-max) (point-max))
1077 ))
1078
1079 (and @-pos %-pos
1080 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
1081 (and %-pos !-pos
1082 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
1083 (and @-pos !-pos (not %-pos)
1084 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
1085
1086 ;; Error condition:?? (and %-pos (not @-pos))
1087
1088 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
1089 (cond ((and %-pos
1090 (not @-pos))
1091 (goto-char (car %-pos))
1092 (mail-extr-delete-char 1)
1093 (setq @-pos (point))
1094 (insert "@")
1095 (setq %-pos (cdr %-pos))))
1096
1097 (if mail-extr-mangle-uucp
1098 (cond (!-pos
1099 ;; **** I don't understand this save-restriction and the
1100 ;; narrow-to-region inside it. Why did I do that?
1101 (save-restriction
1102 (cond ((and @-pos
1103 mail-extr-@-binds-tighter-than-!)
1104 (goto-char @-pos)
1105 (setq %-pos (cons (point) %-pos)
1106 @-pos nil)
1107 (mail-extr-delete-char 1)
1108 (insert "%")
1109 (setq insert-point (point-max)))
1110 (mail-extr-@-binds-tighter-than-!
1111 (setq insert-point (point-max)))
1112 (%-pos
1113 (setq insert-point (mail-extr-last %-pos)
1114 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
1115 %-pos nil
1116 @-pos (mail-extr-markerize @-pos)))
1117 (@-pos
1118 (setq insert-point @-pos)
1119 (setq @-pos (mail-extr-markerize @-pos)))
1120 (t
1121 (setq insert-point (point-max))))
1122 (narrow-to-region (point-min) insert-point)
1123 (setq saved-!-pos (car !-pos))
1124 (while !-pos
1125 (goto-char (point-max))
1126 (cond ((and (not @-pos)
1127 (not (cdr !-pos)))
1128 (setq @-pos (point))
1129 (insert-before-markers "@ "))
1130 (t
1131 (setq %-pos (cons (point) %-pos))
1132 (insert-before-markers "% ")))
1133 (backward-char 1)
1134 (insert-buffer-substring
1135 (current-buffer)
1136 (if (nth 1 !-pos)
1137 (1+ (nth 1 !-pos))
1138 (point-min))
1139 (car !-pos))
1140 (mail-extr-delete-char 1) 913 (mail-extr-delete-char 1)
1141 (or (save-excursion 914 (setq <-pos (list (point)))
1142 (mail-extr-safe-move-sexp -1) 915 (insert ?<)))
1143 (mail-extr-skip-whitespace-backward)
1144 (eq ?. (preceding-char)))
1145 (insert-before-markers
1146 (if (save-excursion
1147 (mail-extr-skip-whitespace-backward)
1148 (eq ?. (preceding-char)))
1149 ""
1150 ".")
1151 "uucp"))
1152 (setq !-pos (cdr !-pos))))
1153 (and saved-%-pos
1154 (setq %-pos (append (mapcar 'mail-extr-demarkerize
1155 saved-%-pos)
1156 %-pos)))
1157 (setq @-pos (mail-extr-demarkerize @-pos))
1158 (narrow-to-region (1+ saved-!-pos) (point-max)))))
1159
1160 ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
1161 (cond ((and %-pos
1162 (not @-pos))
1163 (goto-char (car %-pos))
1164 (mail-extr-delete-char 1)
1165 (setq @-pos (point))
1166 (insert "@")
1167 (setq %-pos (cdr %-pos))))
1168
1169 (setq %-pos (nreverse %-pos))
1170 (cond (%-pos ; implies @-pos valid
1171 (setq temp %-pos)
1172 (catch 'truncated
1173 (while temp
1174 (goto-char (or (nth 1 temp)
1175 @-pos))
1176 (mail-extr-skip-whitespace-backward)
1177 (save-excursion
1178 (mail-extr-safe-move-sexp -1)
1179 (setq domain-pos (point))
1180 (mail-extr-skip-whitespace-backward)
1181 (setq \.-pos (eq ?. (preceding-char))))
1182 (cond ((and \.-pos
1183 ;; #### string consing
1184 (let ((s (intern-soft
1185 (buffer-substring domain-pos (point))
1186 mail-extr-all-top-level-domains)))
1187 (and s (get s 'domain-name))))
1188 (narrow-to-region (point-min) (point))
1189 (goto-char (car temp))
1190 (mail-extr-delete-char 1)
1191 (setq @-pos (point))
1192 (setcdr temp nil)
1193 (setq %-pos (delq @-pos %-pos))
1194 (insert "@")
1195 (throw 'truncated t)))
1196 (setq temp (cdr temp))))))
1197 (setq mbox-beg (point-min)
1198 mbox-end (if %-pos (car %-pos)
1199 (or @-pos
1200 (point-max))))
1201
1202 ;; Done canonicalizing address.
1203
1204 (set-buffer extraction-buffer)
1205
1206 ;; Decide what part of the address to search to find the full name.
1207 (cond (
1208 ;; Example: "First M. Last" <fml@foo.bar.dom>
1209 (and phrase-beg
1210 (eq quote-beg phrase-beg)
1211 (<= quote-end phrase-end))
1212 (narrow-to-region (1+ quote-beg) (1- quote-end))
1213 (mail-extr-undo-backslash-quoting (point-min) (point-max)))
1214
1215 ;; Example: First Last <fml@foo.bar.dom>
1216 (phrase-beg
1217 (narrow-to-region phrase-beg phrase-end))
1218
1219 ;; Example: fml@foo.bar.dom (First M. Last)
1220 (cbeg
1221 (narrow-to-region (1+ cbeg) (1- cend))
1222 (mail-extr-undo-backslash-quoting (point-min) (point-max))
1223
1224 ;; Deal with spacing problems
1225 (goto-char (point-min))
1226; (cond ((not (search-forward " " nil t))
1227; (goto-char (point-min))
1228; (cond ((search-forward "_" nil t)
1229; ;; Handle the *idiotic* use of underlines as spaces.
1230; ;; Example: fml@foo.bar.dom (First_M._Last)
1231; (goto-char (point-min))
1232; (while (search-forward "_" nil t)
1233; (replace-match " " t)))
1234; ((search-forward "." nil t)
1235; ;; Fix . used as space
1236; ;; Example: danj1@cb.att.com (daniel.jacobson)
1237; (goto-char (point-min))
1238; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1239; (replace-match "\\1 \\2" t))))))
1240 )
1241
1242 ;; Otherwise we try to get the name from the mailbox portion
1243 ;; of the address.
1244 ;; Example: First_M_Last@foo.bar.dom
1245 (t
1246 ;; *** Work in canon buffer instead? No, can't. Hmm.
1247 (goto-char (point-max))
1248 (narrow-to-region (point) (point))
1249 (insert-buffer-substring canonicalization-buffer
1250 mbox-beg mbox-end)
1251 (goto-char (point-min))
1252
1253 ;; Example: First_Last.XXX@foo.bar.dom
1254 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
1255
1256 (goto-char (point-min))
1257
1258 (if (not mail-extr-mangle-uucp)
1259 (modify-syntax-entry ?! "w" (syntax-table)))
1260 916
1261 (while (progn 917 ;; If < but no >, insert > in rightmost possible position
1262 (mail-extr-skip-whitespace-forward) 918 (cond ((and <-pos
1263 (not (eobp))) 919 (null >-pos))
1264 (setq char (char-after (point))) 920 (goto-char (point-max))
1265 (cond 921 (setq >-pos (list (point)))
1266 ((eq char ?\") 922 (insert ?>)))
1267 (setq quote-beg (point)) 923
1268 (or (mail-extr-safe-move-sexp 1) 924 ;; If > but no <, replace > with space.
1269 ;; TODO: handle this error condition!!!!! 925 (cond ((and >-pos
1270 (forward-char 1)) 926 (null <-pos))
1271 ;; take into account deletions 927 (mail-extr-nuke-char-at (car >-pos))
1272 (setq quote-end (- (point) 2)) 928 (setq >-pos nil)))
1273 (save-excursion 929
1274 (backward-char 1) 930 ;; Turn >-pos and <-pos into non-lists
931 (setq >-pos (car >-pos)
932 <-pos (car <-pos))
933
934 ;; Trim other punctuation lists of items outside < > pair to handle
935 ;; stupid MTAs.
936 (cond (<-pos ; don't need to check >-pos also
937 ;; handle bozo software that violates RFC 822 by sticking
938 ;; punctuation marks outside of a < > pair
939 (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
940 ;; RFC 822 says nothing about these two outside < >, but
941 ;; remove those positions from the lists to make things
942 ;; easier.
943 (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
944 (mail-extr-nuke-outside-range %-pos <-pos >-pos t)))
945
946 ;; Check for : that indicates GROUP list and for : part of
947 ;; ROUTE-ADDR spec.
948 ;; Can't possibly be more than two :. Nuke any extra.
949 (while :-pos
950 (setq temp (car :-pos)
951 :-pos (cdr :-pos))
952 (cond ((and <-pos >-pos
953 (> temp <-pos)
954 (< temp >-pos))
955 (if (or route-addr-:-pos
956 (< (length @-pos) 2)
957 (> temp (car @-pos))
958 (< temp (nth 1 @-pos)))
959 (mail-extr-nuke-char-at temp)
960 (setq route-addr-:-pos temp)))
961 ((or (not <-pos)
962 (and <-pos
963 (< temp <-pos)))
964 (setq group-:-pos temp))))
965
966 ;; Nuke any ; that is in or to the left of a < > pair or to the left
967 ;; of a GROUP starting :. Also, there may only be one ;.
968 (while \;-pos
969 (setq temp (car \;-pos)
970 \;-pos (cdr \;-pos))
971 (cond ((and <-pos >-pos
972 (> temp <-pos)
973 (< temp >-pos))
974 (mail-extr-nuke-char-at temp))
975 ((and (or (not group-:-pos)
976 (> temp group-:-pos))
977 (not group-\;-pos))
978 (setq group-\;-pos temp))))
979
980 ;; Nuke unmatched GROUP syntax characters.
981 (cond ((and group-:-pos (not group-\;-pos))
982 ;; *** Do I really need to erase it?
983 (mail-extr-nuke-char-at group-:-pos)
984 (setq group-:-pos nil)))
985 (cond ((and group-\;-pos (not group-:-pos))
986 ;; *** Do I really need to erase it?
987 (mail-extr-nuke-char-at group-\;-pos)
988 (setq group-\;-pos nil)))
989
990 ;; Handle junk like ";@host.company.dom" that sendmail adds.
991 ;; **** should I remember comment positions?
992 (cond
993 (group-\;-pos
994 ;; this is fine for now
995 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
996 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
997 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
998 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t)
999 (and last-real-pos
1000 (> last-real-pos (1+ group-\;-pos))
1001 (setq last-real-pos (1+ group-\;-pos)))
1002 ;; *** This may be wrong:
1003 (and cend
1004 (> cend group-\;-pos)
1005 (setq cend nil
1006 cbeg nil))
1007 (and quote-end
1008 (> quote-end group-\;-pos)
1009 (setq quote-end nil
1010 quote-beg nil))
1011 ;; This was both wrong and unnecessary:
1012 ;;(narrow-to-region (point-min) group-\;-pos)
1013
1014 ;; *** The entire handling of GROUP addresses seems rather lame.
1015 ;; *** It deserves a complete rethink, except that these addresses
1016 ;; *** are hardly ever seen.
1017 ))
1018
1019 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
1020 ;; others.
1021 ;; Hell, go ahead an nuke all of the commas.
1022 ;; **** This will cause problems when we start handling commas in
1023 ;; the PHRASE part .... no it won't ... yes it will ... ?????
1024 (mail-extr-nuke-outside-range comma-pos 1 1)
1025
1026 ;; can only have multiple @s inside < >. The fact that some MTAs
1027 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
1028 ;; handled above.
1029
1030 ;; Locate PHRASE part of ROUTE-ADDR.
1031 (cond (<-pos
1032 (goto-char <-pos)
1033 (mail-extr-skip-whitespace-backward)
1034 (setq phrase-end (point))
1035 (goto-char (or ;;group-:-pos
1036 (point-min)))
1037 (mail-extr-skip-whitespace-forward)
1038 (if (< (point) phrase-end)
1039 (setq phrase-beg (point))
1040 (setq phrase-end nil))))
1041
1042 ;; handle ROUTE-ADDRS with real ROUTEs.
1043 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
1044 ;; any % or ! must be semantically meaningless.
1045 ;; TODO: do this processing into canonicalization buffer
1046 (cond (route-addr-:-pos
1047 (setq !-pos nil
1048 %-pos nil
1049 >-pos (copy-marker >-pos)
1050 route-addr-:-pos (copy-marker route-addr-:-pos))
1051 (goto-char >-pos)
1052 (insert-before-markers ?X)
1053 (goto-char (car @-pos))
1054 (while (setq @-pos (cdr @-pos))
1275 (mail-extr-delete-char 1) 1055 (mail-extr-delete-char 1)
1276 (goto-char quote-beg) 1056 (setq %-pos (cons (point-marker) %-pos))
1277 (or (eobp) 1057 (insert "%")
1278 (mail-extr-delete-char 1))) 1058 (goto-char (1- >-pos))
1279 (mail-extr-undo-backslash-quoting quote-beg quote-end) 1059 (save-excursion
1280 (or (eq ?\ (char-after (point))) 1060 (insert-buffer-substring extraction-buffer
1281 (insert " ")) 1061 (car @-pos) route-addr-:-pos)
1282;; (setq mailbox-name-processed-flag t) 1062 (delete-region (car @-pos) route-addr-:-pos))
1283 (setq \.-ends-name t)) 1063 (or (cdr @-pos)
1284 ((eq char ?.) 1064 (setq saved-@-pos (list (point)))))
1285 (if (memq (char-after (1+ (point))) '(?_ ?=)) 1065 (setq @-pos saved-@-pos)
1286 (progn 1066 (goto-char >-pos)
1287 (forward-char 1) 1067 (mail-extr-delete-char -1)
1288 (mail-extr-delete-char 1) 1068 (mail-extr-nuke-char-at route-addr-:-pos)
1289 (insert ?\ )) 1069 (mail-extr-demarkerize route-addr-:-pos)
1290 (if \.-ends-name 1070 (setq route-addr-:-pos nil
1291 (narrow-to-region (point-min) (point)) 1071 >-pos (mail-extr-demarkerize >-pos)
1292 (mail-extr-delete-char 1) 1072 %-pos (mapcar 'mail-extr-demarkerize %-pos))))
1293 (insert " "))) 1073
1294;; (setq mailbox-name-processed-flag t) 1074 ;; de-listify @-pos
1295 ) 1075 (setq @-pos (car @-pos))
1296 ((memq (char-syntax char) '(?. ?\\)) 1076
1297 (mail-extr-delete-char 1) 1077 ;; TODO: remove comments in the middle of an address
1298 (insert " ") 1078
1299;; (setq mailbox-name-processed-flag t) 1079 (save-excursion
1080 (set-buffer canonicalization-buffer)
1081
1082 (widen)
1083 (erase-buffer)
1084 (insert-buffer-substring extraction-buffer)
1085
1086 (if <-pos
1087 (narrow-to-region (progn
1088 (goto-char (1+ <-pos))
1089 (mail-extr-skip-whitespace-forward)
1090 (point))
1091 >-pos)
1092 (if (and first-real-pos last-real-pos)
1093 (narrow-to-region first-real-pos last-real-pos)
1094 ;; ****** Oh no! What if the address is completely empty!
1095 ;; *** Is this correct?
1096 (narrow-to-region (point-max) (point-max))
1097 ))
1098
1099 (and @-pos %-pos
1100 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
1101 (and %-pos !-pos
1102 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
1103 (and @-pos !-pos (not %-pos)
1104 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
1105
1106 ;; Error condition:?? (and %-pos (not @-pos))
1107
1108 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
1109 (cond ((and %-pos
1110 (not @-pos))
1111 (goto-char (car %-pos))
1112 (mail-extr-delete-char 1)
1113 (setq @-pos (point))
1114 (insert "@")
1115 (setq %-pos (cdr %-pos))))
1116
1117 (if mail-extr-mangle-uucp
1118 (cond (!-pos
1119 ;; **** I don't understand this save-restriction and the
1120 ;; narrow-to-region inside it. Why did I do that?
1121 (save-restriction
1122 (cond ((and @-pos
1123 mail-extr-@-binds-tighter-than-!)
1124 (goto-char @-pos)
1125 (setq %-pos (cons (point) %-pos)
1126 @-pos nil)
1127 (mail-extr-delete-char 1)
1128 (insert "%")
1129 (setq insert-point (point-max)))
1130 (mail-extr-@-binds-tighter-than-!
1131 (setq insert-point (point-max)))
1132 (%-pos
1133 (setq insert-point (mail-extr-last %-pos)
1134 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
1135 %-pos nil
1136 @-pos (mail-extr-markerize @-pos)))
1137 (@-pos
1138 (setq insert-point @-pos)
1139 (setq @-pos (mail-extr-markerize @-pos)))
1140 (t
1141 (setq insert-point (point-max))))
1142 (narrow-to-region (point-min) insert-point)
1143 (setq saved-!-pos (car !-pos))
1144 (while !-pos
1145 (goto-char (point-max))
1146 (cond ((and (not @-pos)
1147 (not (cdr !-pos)))
1148 (setq @-pos (point))
1149 (insert-before-markers "@ "))
1150 (t
1151 (setq %-pos (cons (point) %-pos))
1152 (insert-before-markers "% ")))
1153 (backward-char 1)
1154 (insert-buffer-substring
1155 (current-buffer)
1156 (if (nth 1 !-pos)
1157 (1+ (nth 1 !-pos))
1158 (point-min))
1159 (car !-pos))
1160 (mail-extr-delete-char 1)
1161 (or (save-excursion
1162 (mail-extr-safe-move-sexp -1)
1163 (mail-extr-skip-whitespace-backward)
1164 (eq ?. (preceding-char)))
1165 (insert-before-markers
1166 (if (save-excursion
1167 (mail-extr-skip-whitespace-backward)
1168 (eq ?. (preceding-char)))
1169 ""
1170 ".")
1171 "uucp"))
1172 (setq !-pos (cdr !-pos))))
1173 (and saved-%-pos
1174 (setq %-pos (append (mapcar 'mail-extr-demarkerize
1175 saved-%-pos)
1176 %-pos)))
1177 (setq @-pos (mail-extr-demarkerize @-pos))
1178 (narrow-to-region (1+ saved-!-pos) (point-max)))))
1179
1180 ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
1181 (cond ((and %-pos
1182 (not @-pos))
1183 (goto-char (car %-pos))
1184 (mail-extr-delete-char 1)
1185 (setq @-pos (point))
1186 (insert "@")
1187 (setq %-pos (cdr %-pos))))
1188
1189 (setq %-pos (nreverse %-pos))
1190 (cond (%-pos ; implies @-pos valid
1191 (setq temp %-pos)
1192 (catch 'truncated
1193 (while temp
1194 (goto-char (or (nth 1 temp)
1195 @-pos))
1196 (mail-extr-skip-whitespace-backward)
1197 (save-excursion
1198 (mail-extr-safe-move-sexp -1)
1199 (setq domain-pos (point))
1200 (mail-extr-skip-whitespace-backward)
1201 (setq \.-pos (eq ?. (preceding-char))))
1202 (cond ((and \.-pos
1203 ;; #### string consing
1204 (let ((s (intern-soft
1205 (buffer-substring domain-pos (point))
1206 mail-extr-all-top-level-domains)))
1207 (and s (get s 'domain-name))))
1208 (narrow-to-region (point-min) (point))
1209 (goto-char (car temp))
1210 (mail-extr-delete-char 1)
1211 (setq @-pos (point))
1212 (setcdr temp nil)
1213 (setq %-pos (delq @-pos %-pos))
1214 (insert "@")
1215 (throw 'truncated t)))
1216 (setq temp (cdr temp))))))
1217 (setq mbox-beg (point-min)
1218 mbox-end (if %-pos (car %-pos)
1219 (or @-pos
1220 (point-max)))))
1221
1222 ;; Done canonicalizing address.
1223 ;; We are now back in extraction-buffer.
1224
1225 ;; Decide what part of the address to search to find the full name.
1226 (cond (
1227 ;; Example: "First M. Last" <fml@foo.bar.dom>
1228 (and phrase-beg
1229 (eq quote-beg phrase-beg)
1230 (<= quote-end phrase-end))
1231 (narrow-to-region (1+ quote-beg) (1- quote-end))
1232 (mail-extr-undo-backslash-quoting (point-min) (point-max)))
1233
1234 ;; Example: First Last <fml@foo.bar.dom>
1235 (phrase-beg
1236 (narrow-to-region phrase-beg phrase-end))
1237
1238 ;; Example: fml@foo.bar.dom (First M. Last)
1239 (cbeg
1240 (narrow-to-region (1+ cbeg) (1- cend))
1241 (mail-extr-undo-backslash-quoting (point-min) (point-max))
1242
1243 ;; Deal with spacing problems
1244 (goto-char (point-min))
1245;;; (cond ((not (search-forward " " nil t))
1246;;; (goto-char (point-min))
1247;;; (cond ((search-forward "_" nil t)
1248;;; ;; Handle the *idiotic* use of underlines as spaces.
1249;;; ;; Example: fml@foo.bar.dom (First_M._Last)
1250;;; (goto-char (point-min))
1251;;; (while (search-forward "_" nil t)
1252;;; (replace-match " " t)))
1253;;; ((search-forward "." nil t)
1254;;; ;; Fix . used as space
1255;;; ;; Example: danj1@cb.att.com (daniel.jacobson)
1256;;; (goto-char (point-min))
1257;;; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1258;;; (replace-match "\\1 \\2" t))))))
1300 ) 1259 )
1260
1261 ;; Otherwise we try to get the name from the mailbox portion
1262 ;; of the address.
1263 ;; Example: First_M_Last@foo.bar.dom
1301 (t 1264 (t
1302 (setq atom-beg (point)) 1265 ;; *** Work in canon buffer instead? No, can't. Hmm.
1303 (forward-word 1) 1266 (goto-char (point-max))
1304 (setq atom-end (point)) 1267 (narrow-to-region (point) (point))
1305 (goto-char atom-beg) 1268 (insert-buffer-substring canonicalization-buffer
1306 (save-restriction 1269 mbox-beg mbox-end)
1307 (narrow-to-region atom-beg atom-end) 1270 (goto-char (point-min))
1271
1272 ;; Example: First_Last.XXX@foo.bar.dom
1273 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
1274
1275 (goto-char (point-min))
1276
1277 (if (not mail-extr-mangle-uucp)
1278 (modify-syntax-entry ?! "w" (syntax-table)))
1279
1280 (while (progn
1281 (mail-extr-skip-whitespace-forward)
1282 (not (eobp)))
1283 (setq char (char-after (point)))
1308 (cond 1284 (cond
1309 1285 ((eq char ?\")
1310 ;; Handle X.400 addresses encoded in RFC-822. 1286 (setq quote-beg (point))
1311 ;; *** Shit! This has to handle the case where it is 1287 (or (mail-extr-safe-move-sexp 1)
1312 ;; *** embedded in a quote too! 1288 ;; TODO: handle this error condition!!!!!
1313 ;; *** Shit! The input is being broken up into atoms 1289 (forward-char 1))
1314 ;; *** by periods! 1290 ;; take into account deletions
1315 ((looking-at mail-extr-x400-encoded-address-pattern) 1291 (setq quote-end (- (point) 2))
1316 1292 (save-excursion
1317 ;; Copy the contents of the individual fields that 1293 (backward-char 1)
1318 ;; might hold name data to the beginning. 1294 (mail-extr-delete-char 1)
1319 (mapcar 1295 (goto-char quote-beg)
1320 (function 1296 (or (eobp)
1321 (lambda (field-pattern) 1297 (mail-extr-delete-char 1)))
1322 (cond 1298 (mail-extr-undo-backslash-quoting quote-beg quote-end)
1323 ((save-excursion 1299 (or (eq ?\ (char-after (point)))
1324 (re-search-forward field-pattern nil t)) 1300 (insert " "))
1325 (insert-buffer-substring (current-buffer) 1301 ;; (setq mailbox-name-processed-flag t)
1326 (match-beginning 1) 1302 (setq \.-ends-name t))
1327 (match-end 1)) 1303 ((eq char ?.)
1328 (insert " "))))) 1304 (if (memq (char-after (1+ (point))) '(?_ ?=))
1329 (list mail-extr-x400-encoded-address-given-name-pattern 1305 (progn
1330 mail-extr-x400-encoded-address-surname-pattern 1306 (forward-char 1)
1331 mail-extr-x400-encoded-address-full-name-pattern)) 1307 (mail-extr-delete-char 1)
1332 1308 (insert ?\ ))
1333 ;; Discard the rest, since it contains stuff like 1309 (if \.-ends-name
1334 ;; routing information, not part of a name. 1310 (narrow-to-region (point-min) (point))
1335 (mail-extr-skip-whitespace-backward) 1311 (mail-extr-delete-char 1)
1336 (delete-region (point) (point-max)) 1312 (insert " ")))
1337 1313 ;; (setq mailbox-name-processed-flag t)
1338 ;; Handle periods used for spacing. 1314 )
1339 (while (re-search-forward mail-extr-bad-dot-pattern nil t) 1315 ((memq (char-syntax char) '(?. ?\\))
1340 (replace-match "\\1 \\2" t)) 1316 (mail-extr-delete-char 1)
1341 1317 (insert " ")
1342;; (setq mailbox-name-processed-flag t) 1318 ;; (setq mailbox-name-processed-flag t)
1343 ) 1319 )
1344
1345 ;; Handle normal addresses.
1346 (t 1320 (t
1347 (goto-char (point-min)) 1321 (setq atom-beg (point))
1348 ;; Handle _ and = used for spacing. 1322 (forward-word 1)
1349 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t) 1323 (setq atom-end (point))
1350 (replace-match "\\1 " t) 1324 (goto-char atom-beg)
1351;; (setq mailbox-name-processed-flag t) 1325 (save-restriction
1352 ) 1326 (narrow-to-region atom-beg atom-end)
1353 (goto-char (point-max)))))))) 1327 (cond
1354 1328
1355 ;; undo the dirty deed 1329 ;; Handle X.400 addresses encoded in RFC-822.
1356 (if (not mail-extr-mangle-uucp) 1330 ;; *** Shit! This has to handle the case where it is
1357 (modify-syntax-entry ?! "." (syntax-table))) 1331 ;; *** embedded in a quote too!
1358 ;; 1332 ;; *** Shit! The input is being broken up into atoms
1359 ;; If we derived the name from the mailbox part of the address, 1333 ;; *** by periods!
1360 ;; and we only got one word out of it, don't treat that as a 1334 ((looking-at mail-extr-x400-encoded-address-pattern)
1361 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar") 1335
1362 ;; (if (not mailbox-name-processed-flag) 1336 ;; Copy the contents of the individual fields that
1363 ;; (delete-region (point-min) (point-max))) 1337 ;; might hold name data to the beginning.
1364 )) 1338 (mapcar
1365 1339 (function
1366 (set-syntax-table mail-extr-address-text-syntax-table) 1340 (lambda (field-pattern)
1367 1341 (cond
1368 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer) 1342 ((save-excursion
1369 (goto-char (point-min)) 1343 (re-search-forward field-pattern nil t))
1344 (insert-buffer-substring (current-buffer)
1345 (match-beginning 1)
1346 (match-end 1))
1347 (insert " ")))))
1348 (list mail-extr-x400-encoded-address-given-name-pattern
1349 mail-extr-x400-encoded-address-surname-pattern
1350 mail-extr-x400-encoded-address-full-name-pattern))
1351
1352 ;; Discard the rest, since it contains stuff like
1353 ;; routing information, not part of a name.
1354 (mail-extr-skip-whitespace-backward)
1355 (delete-region (point) (point-max))
1356
1357 ;; Handle periods used for spacing.
1358 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1359 (replace-match "\\1 \\2" t))
1360
1361 ;; (setq mailbox-name-processed-flag t)
1362 )
1363
1364 ;; Handle normal addresses.
1365 (t
1366 (goto-char (point-min))
1367 ;; Handle _ and = used for spacing.
1368 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
1369 (replace-match "\\1 " t)
1370 ;; (setq mailbox-name-processed-flag t)
1371 )
1372 (goto-char (point-max))))))))
1373
1374 ;; undo the dirty deed
1375 (if (not mail-extr-mangle-uucp)
1376 (modify-syntax-entry ?! "." (syntax-table)))
1377 ;;
1378 ;; If we derived the name from the mailbox part of the address,
1379 ;; and we only got one word out of it, don't treat that as a
1380 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
1381 ;; (if (not mailbox-name-processed-flag)
1382 ;; (delete-region (point-min) (point-max)))
1383 ))
1370 1384
1371 ;; If name is "First Last" and userid is "F?L", then assume 1385 (set-syntax-table mail-extr-address-text-syntax-table)
1372 ;; the middle initial is the second letter in the userid. 1386
1373 ;; Initial code by Jamie Zawinski <jwz@lucid.com> 1387 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
1374 ;; *** Make it work when there's a suffix as well. 1388 (goto-char (point-min))
1375 (goto-char (point-min)) 1389
1376 (cond ((and mail-extr-guess-middle-initial 1390 ;; If name is "First Last" and userid is "F?L", then assume
1377 (not disable-initial-guessing-flag) 1391 ;; the middle initial is the second letter in the userid.
1378 (eq 3 (- mbox-end mbox-beg)) 1392 ;; Initial code by Jamie Zawinski <jwz@lucid.com>
1379 (progn 1393 ;; *** Make it work when there's a suffix as well.
1380 (goto-char (point-min)) 1394 (goto-char (point-min))
1381 (looking-at mail-extr-two-name-pattern))) 1395 (cond ((and mail-extr-guess-middle-initial
1382 (setq fi (char-after (match-beginning 0)) 1396 (not disable-initial-guessing-flag)
1383 li (char-after (match-beginning 3))) 1397 (eq 3 (- mbox-end mbox-beg))
1384 (save-excursion 1398 (progn
1385 (set-buffer canonicalization-buffer) 1399 (goto-char (point-min))
1386 ;; char-equal is ignoring case here, so no need to upcase 1400 (looking-at mail-extr-two-name-pattern)))
1387 ;; or downcase. 1401 (setq fi (char-after (match-beginning 0))
1388 (let ((case-fold-search t)) 1402 li (char-after (match-beginning 3)))
1389 (and (char-equal fi (char-after mbox-beg)) 1403 (save-excursion
1390 (char-equal li (char-after (1- mbox-end))) 1404 (set-buffer canonicalization-buffer)
1391 (setq mi (char-after (1+ mbox-beg)))))) 1405 ;; char-equal is ignoring case here, so no need to upcase
1392 (cond ((and mi 1406 ;; or downcase.
1393 ;; TODO: use better table than syntax table 1407 (let ((case-fold-search t))
1394 (eq ?w (char-syntax mi))) 1408 (and (char-equal fi (char-after mbox-beg))
1395 (goto-char (match-beginning 3)) 1409 (char-equal li (char-after (1- mbox-end)))
1396 (insert (upcase mi) ". "))))) 1410 (setq mi (char-after (1+ mbox-beg))))))
1397 1411 (cond ((and mi
1398 ;; Nuke name if it is the same as mailbox name. 1412 ;; TODO: use better table than syntax table
1399 (let ((buffer-length (- (point-max) (point-min))) 1413 (eq ?w (char-syntax mi)))
1400 (i 0) 1414 (goto-char (match-beginning 3))
1401 (names-match-flag t)) 1415 (insert (upcase mi) ". ")))))
1402 (cond ((and (> buffer-length 0) 1416
1403 (eq buffer-length (- mbox-end mbox-beg))) 1417 ;; Nuke name if it is the same as mailbox name.
1404 (goto-char (point-max)) 1418 (let ((buffer-length (- (point-max) (point-min)))
1405 (insert-buffer-substring canonicalization-buffer 1419 (i 0)
1406 mbox-beg mbox-end) 1420 (names-match-flag t))
1407 (while (and names-match-flag 1421 (cond ((and (> buffer-length 0)
1408 (< i buffer-length)) 1422 (eq buffer-length (- mbox-end mbox-beg)))
1409 (or (eq (downcase (char-after (+ i (point-min)))) 1423 (goto-char (point-max))
1410 (downcase 1424 (insert-buffer-substring canonicalization-buffer
1411 (char-after (+ i buffer-length (point-min))))) 1425 mbox-beg mbox-end)
1412 (setq names-match-flag nil)) 1426 (while (and names-match-flag
1413 (setq i (1+ i))) 1427 (< i buffer-length))
1414 (delete-region (+ (point-min) buffer-length) (point-max)) 1428 (or (eq (downcase (char-after (+ i (point-min))))
1415 (if names-match-flag 1429 (downcase
1416 (narrow-to-region (point) (point)))))) 1430 (char-after (+ i buffer-length (point-min)))))
1417 1431 (setq names-match-flag nil))
1418 ;; Nuke name if it's just one word. 1432 (setq i (1+ i)))
1419 (goto-char (point-min)) 1433 (delete-region (+ (point-min) buffer-length) (point-max))
1420 (and mail-extr-ignore-single-names 1434 (if names-match-flag
1421 (not (re-search-forward "[- ]" nil t)) 1435 (narrow-to-region (point) (point))))))
1422 (narrow-to-region (point) (point))) 1436
1423 1437 ;; Nuke name if it's just one word.
1424 ;; Result 1438 (goto-char (point-min))
1425 (list (if (not (= (point-min) (point-max))) 1439 (and mail-extr-ignore-single-names
1426 (buffer-string)) 1440 (not (re-search-forward "[- ]" nil t))
1427 (progn 1441 (narrow-to-region (point) (point)))
1428 (set-buffer canonicalization-buffer) 1442
1429 (if (not (= (point-min) (point-max))) 1443 ;; Record the result
1430 (buffer-string)))) 1444 (setq value-list
1431 ))) 1445 (cons (list (if (not (= (point-min) (point-max)))
1446 (buffer-string))
1447 (save-excursion
1448 (set-buffer canonicalization-buffer)
1449 (if (not (= (point-min) (point-max)))
1450 (buffer-string))))
1451 value-list))
1452
1453 ;; Unless one address is all we wanted,
1454 ;; delete this one from extraction-buffer
1455 ;; and get ready to extract the next address.
1456 (when all
1457 (if end-of-address
1458 (narrow-to-region 1 end-of-address)
1459 (widen))
1460 (delete-region (point-min) (point-max))
1461 (widen))
1462 )))
1463 (if all (nreverse value-list) (car value-list))
1464 ))
1432 1465
1433(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) 1466(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
1434 (let ((word-count 0) 1467 (let ((word-count 0)