diff options
| author | Kenichi Handa | 2004-08-05 00:15:15 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2004-08-05 00:15:15 +0000 |
| commit | 2d8a544976354b3787e4f28f2d97b3ab96f4a052 (patch) | |
| tree | 8c9c6dce6aa62893ce22f243f525e9eb7bc90798 | |
| parent | af1781acbf8fe9e866ee36313d137a7ffb78383b (diff) | |
| download | emacs-2d8a544976354b3787e4f28f2d97b3ab96f4a052.tar.gz emacs-2d8a544976354b3787e4f28f2d97b3ab96f4a052.zip | |
(mail-extr-disable-voodoo): New variable.
(mail-extr-voodoo): Check mail-extr-disable-voodoo.
| -rw-r--r-- | lisp/mail/mail-extr.el | 724 |
1 files changed, 369 insertions, 355 deletions
diff --git a/lisp/mail/mail-extr.el b/lisp/mail/mail-extr.el index faa7ca1bb74..7f2e6fef6b6 100644 --- a/lisp/mail/mail-extr.el +++ b/lisp/mail/mail-extr.el | |||
| @@ -1434,374 +1434,388 @@ consing a string.)" | |||
| 1434 | (if all (nreverse value-list) (car value-list)) | 1434 | (if all (nreverse value-list) (car value-list)) |
| 1435 | )) | 1435 | )) |
| 1436 | 1436 | ||
| 1437 | (defcustom mail-extr-disable-voodoo "\\cj" | ||
| 1438 | "*If it is a regexp, names matching it will never be modified. | ||
| 1439 | If it is neither nil nor a string, modifying of names will never take | ||
| 1440 | place. It affects how `mail-extract-address-components' works." | ||
| 1441 | :type '(choice (regexp :size 0) | ||
| 1442 | (const :tag "Always enabled" nil) | ||
| 1443 | (const :tag "Always disabled" t)) | ||
| 1444 | :group 'mail-extr) | ||
| 1445 | |||
| 1437 | (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) | 1446 | (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) |
| 1438 | (let ((word-count 0) | 1447 | (unless (and mail-extr-disable-voodoo |
| 1439 | (case-fold-search nil) | 1448 | (or (not (stringp mail-extr-disable-voodoo)) |
| 1440 | mixed-case-flag lower-case-flag ;;upper-case-flag | 1449 | (progn |
| 1441 | suffix-flag last-name-comma-flag | 1450 | (goto-char (point-min)) |
| 1442 | ;;cbeg cend | 1451 | (re-search-forward mail-extr-disable-voodoo nil t)))) |
| 1443 | initial | 1452 | (let ((word-count 0) |
| 1444 | begin-again-flag | 1453 | (case-fold-search nil) |
| 1445 | drop-this-word-if-trailing-flag | 1454 | mixed-case-flag lower-case-flag ;;upper-case-flag |
| 1446 | drop-last-word-if-trailing-flag | 1455 | suffix-flag last-name-comma-flag |
| 1447 | word-found-flag | 1456 | ;;cbeg cend |
| 1448 | this-word-beg last-word-beg | 1457 | initial |
| 1449 | name-beg name-end | 1458 | begin-again-flag |
| 1450 | name-done-flag | 1459 | drop-this-word-if-trailing-flag |
| 1451 | ) | 1460 | drop-last-word-if-trailing-flag |
| 1452 | (save-excursion | 1461 | word-found-flag |
| 1453 | (set-syntax-table mail-extr-address-text-syntax-table) | 1462 | this-word-beg last-word-beg |
| 1454 | 1463 | name-beg name-end | |
| 1455 | ;; Get rid of comments. | 1464 | name-done-flag |
| 1456 | (goto-char (point-min)) | 1465 | ) |
| 1457 | (while (not (eobp)) | 1466 | (save-excursion |
| 1458 | ;; Initialize for this iteration of the loop. | 1467 | (set-syntax-table mail-extr-address-text-syntax-table) |
| 1459 | (skip-chars-forward "^({[\"'`") | 1468 | |
| 1460 | (let ((cbeg (point))) | 1469 | ;; Get rid of comments. |
| 1461 | (set-syntax-table mail-extr-address-text-comment-syntax-table) | ||
| 1462 | (if (memq (following-char) '(?\' ?\`)) | ||
| 1463 | (search-forward "'" nil 'move | ||
| 1464 | (if (eq ?\' (following-char)) 2 1)) | ||
| 1465 | (or (mail-extr-safe-move-sexp 1) | ||
| 1466 | (goto-char (point-max)))) | ||
| 1467 | (set-syntax-table mail-extr-address-text-syntax-table) | ||
| 1468 | (when (eq (char-after cbeg) ?\() | ||
| 1469 | ;; Delete the comment itself. | ||
| 1470 | (delete-region cbeg (point)) | ||
| 1471 | ;; Canonicalize whitespace where the comment was. | ||
| 1472 | (skip-chars-backward " \t") | ||
| 1473 | (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)") | ||
| 1474 | (replace-match "") | ||
| 1475 | (setq cbeg (point)) | ||
| 1476 | (skip-chars-forward " \t") | ||
| 1477 | (if (bobp) | ||
| 1478 | (delete-region (point) cbeg) | ||
| 1479 | (just-one-space)))))) | ||
| 1480 | |||
| 1481 | ;; This was moved above. | ||
| 1482 | ;; Fix . used as space | ||
| 1483 | ;; But it belongs here because it occurs not only as | ||
| 1484 | ;; rypens@reks.uia.ac.be (Piet.Rypens) | ||
| 1485 | ;; but also as | ||
| 1486 | ;; "Piet.Rypens" <rypens@reks.uia.ac.be> | ||
| 1487 | ;;(goto-char (point-min)) | ||
| 1488 | ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t) | ||
| 1489 | ;; (replace-match "\\1 \\2" t)) | ||
| 1490 | |||
| 1491 | (unless (search-forward " " nil t) | ||
| 1492 | (goto-char (point-min)) | 1470 | (goto-char (point-min)) |
| 1493 | (cond ((search-forward "_" nil t) | 1471 | (while (not (eobp)) |
| 1494 | ;; Handle the *idiotic* use of underlines as spaces. | 1472 | ;; Initialize for this iteration of the loop. |
| 1495 | ;; Example: fml@foo.bar.dom (First_M._Last) | 1473 | (skip-chars-forward "^({[\"'`") |
| 1496 | (goto-char (point-min)) | 1474 | (let ((cbeg (point))) |
| 1497 | (while (search-forward "_" nil t) | 1475 | (set-syntax-table mail-extr-address-text-comment-syntax-table) |
| 1498 | (replace-match " " t))) | 1476 | (if (memq (following-char) '(?\' ?\`)) |
| 1499 | ((search-forward "." nil t) | 1477 | (search-forward "'" nil 'move |
| 1500 | ;; Fix . used as space | 1478 | (if (eq ?\' (following-char)) 2 1)) |
| 1501 | ;; Example: danj1@cb.att.com (daniel.jacobson) | 1479 | (or (mail-extr-safe-move-sexp 1) |
| 1502 | (goto-char (point-min)) | 1480 | (goto-char (point-max)))) |
| 1503 | (while (re-search-forward mail-extr-bad-dot-pattern nil t) | 1481 | (set-syntax-table mail-extr-address-text-syntax-table) |
| 1504 | (replace-match "\\1 \\2" t))))) | 1482 | (when (eq (char-after cbeg) ?\() |
| 1505 | 1483 | ;; Delete the comment itself. | |
| 1506 | ;; Loop over the words (and other junk) in the name. | 1484 | (delete-region cbeg (point)) |
| 1507 | (goto-char (point-min)) | 1485 | ;; Canonicalize whitespace where the comment was. |
| 1508 | (while (not name-done-flag) | 1486 | (skip-chars-backward " \t") |
| 1509 | 1487 | (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)") | |
| 1510 | (when word-found-flag | 1488 | (replace-match "") |
| 1511 | ;; Last time through this loop we skipped over a word. | 1489 | (setq cbeg (point)) |
| 1512 | (setq last-word-beg this-word-beg) | 1490 | (skip-chars-forward " \t") |
| 1513 | (setq drop-last-word-if-trailing-flag | 1491 | (if (bobp) |
| 1514 | drop-this-word-if-trailing-flag) | 1492 | (delete-region (point) cbeg) |
| 1515 | (setq word-found-flag nil)) | 1493 | (just-one-space)))))) |
| 1516 | 1494 | ||
| 1517 | (when begin-again-flag | 1495 | ;; This was moved above. |
| 1518 | ;; Last time through the loop we found something that | 1496 | ;; Fix . used as space |
| 1519 | ;; indicates we should pretend we are beginning again from | 1497 | ;; But it belongs here because it occurs not only as |
| 1520 | ;; the start. | 1498 | ;; rypens@reks.uia.ac.be (Piet.Rypens) |
| 1521 | (setq word-count 0) | 1499 | ;; but also as |
| 1522 | (setq last-word-beg nil) | 1500 | ;; "Piet.Rypens" <rypens@reks.uia.ac.be> |
| 1523 | (setq drop-last-word-if-trailing-flag nil) | 1501 | ;;(goto-char (point-min)) |
| 1524 | (setq mixed-case-flag nil) | 1502 | ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t) |
| 1525 | (setq lower-case-flag nil) | 1503 | ;; (replace-match "\\1 \\2" t)) |
| 1526 | ;; (setq upper-case-flag nil) | 1504 | |
| 1527 | (setq begin-again-flag nil)) | 1505 | (unless (search-forward " " nil t) |
| 1528 | 1506 | (goto-char (point-min)) | |
| 1529 | ;; Initialize for this iteration of the loop. | 1507 | (cond ((search-forward "_" nil t) |
| 1530 | (mail-extr-skip-whitespace-forward) | 1508 | ;; Handle the *idiotic* use of underlines as spaces. |
| 1531 | (if (eq word-count 0) (narrow-to-region (point) (point-max))) | 1509 | ;; Example: fml@foo.bar.dom (First_M._Last) |
| 1532 | (setq this-word-beg (point)) | 1510 | (goto-char (point-min)) |
| 1533 | (setq drop-this-word-if-trailing-flag nil) | 1511 | (while (search-forward "_" nil t) |
| 1534 | 1512 | (replace-match " " t))) | |
| 1535 | ;; Decide what to do based on what we are looking at. | 1513 | ((search-forward "." nil t) |
| 1536 | (cond | 1514 | ;; Fix . used as space |
| 1537 | 1515 | ;; Example: danj1@cb.att.com (daniel.jacobson) | |
| 1538 | ;; Delete title | 1516 | (goto-char (point-min)) |
| 1539 | ((and (eq word-count 0) | 1517 | (while (re-search-forward mail-extr-bad-dot-pattern nil t) |
| 1540 | (looking-at mail-extr-full-name-prefixes)) | 1518 | (replace-match "\\1 \\2" t))))) |
| 1541 | (goto-char (match-end 0)) | ||
| 1542 | (narrow-to-region (point) (point-max))) | ||
| 1543 | 1519 | ||
| 1544 | ;; Stop after name suffix | 1520 | ;; Loop over the words (and other junk) in the name. |
| 1545 | ((and (>= word-count 2) | 1521 | (goto-char (point-min)) |
| 1546 | (looking-at mail-extr-full-name-suffix-pattern)) | 1522 | (while (not name-done-flag) |
| 1547 | (mail-extr-skip-whitespace-backward) | 1523 | |
| 1548 | (setq suffix-flag (point)) | 1524 | (when word-found-flag |
| 1549 | (if (eq ?, (following-char)) | 1525 | ;; Last time through this loop we skipped over a word. |
| 1550 | (forward-char 1) | 1526 | (setq last-word-beg this-word-beg) |
| 1551 | (insert ?,)) | 1527 | (setq drop-last-word-if-trailing-flag |
| 1552 | ;; Enforce at least one space after comma | 1528 | drop-this-word-if-trailing-flag) |
| 1553 | (or (eq ?\ (following-char)) | 1529 | (setq word-found-flag nil)) |
| 1554 | (insert ?\ )) | 1530 | |
| 1531 | (when begin-again-flag | ||
| 1532 | ;; Last time through the loop we found something that | ||
| 1533 | ;; indicates we should pretend we are beginning again from | ||
| 1534 | ;; the start. | ||
| 1535 | (setq word-count 0) | ||
| 1536 | (setq last-word-beg nil) | ||
| 1537 | (setq drop-last-word-if-trailing-flag nil) | ||
| 1538 | (setq mixed-case-flag nil) | ||
| 1539 | (setq lower-case-flag nil) | ||
| 1540 | ;; (setq upper-case-flag nil) | ||
| 1541 | (setq begin-again-flag nil)) | ||
| 1542 | |||
| 1543 | ;; Initialize for this iteration of the loop. | ||
| 1555 | (mail-extr-skip-whitespace-forward) | 1544 | (mail-extr-skip-whitespace-forward) |
| 1556 | (cond ((memq (following-char) '(?j ?J ?s ?S)) | 1545 | (if (eq word-count 0) (narrow-to-region (point) (point-max))) |
| 1557 | (capitalize-word 1) | 1546 | (setq this-word-beg (point)) |
| 1558 | (if (eq (following-char) ?.) | 1547 | (setq drop-this-word-if-trailing-flag nil) |
| 1559 | (forward-char 1) | 1548 | |
| 1560 | (insert ?.))) | 1549 | ;; Decide what to do based on what we are looking at. |
| 1561 | (t | ||
| 1562 | (upcase-word 1))) | ||
| 1563 | (setq word-found-flag t) | ||
| 1564 | (setq name-done-flag t)) | ||
| 1565 | |||
| 1566 | ;; Handle SCA names | ||
| 1567 | ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" | ||
| 1568 | (goto-char (match-beginning 1)) | ||
| 1569 | (narrow-to-region (point) (point-max)) | ||
| 1570 | (setq begin-again-flag t)) | ||
| 1571 | |||
| 1572 | ;; Check for initial last name followed by comma | ||
| 1573 | ((and (eq ?, (following-char)) | ||
| 1574 | (eq word-count 1)) | ||
| 1575 | (forward-char 1) | ||
| 1576 | (setq last-name-comma-flag t) | ||
| 1577 | (or (eq ?\ (following-char)) | ||
| 1578 | (insert ?\ ))) | ||
| 1579 | |||
| 1580 | ;; Stop before trailing comma-separated comment | ||
| 1581 | ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | ||
| 1582 | ;; *** This case is redundant??? | ||
| 1583 | ;;((eq ?, (following-char)) | ||
| 1584 | ;; (setq name-done-flag t)) | ||
| 1585 | |||
| 1586 | ;; Delete parenthesized/quoted comment/nickname | ||
| 1587 | ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) | ||
| 1588 | (setq cbeg (point)) | ||
| 1589 | (set-syntax-table mail-extr-address-text-comment-syntax-table) | ||
| 1590 | (cond ((memq (following-char) '(?\' ?\`)) | ||
| 1591 | (or (search-forward "'" nil t | ||
| 1592 | (if (eq ?\' (following-char)) 2 1)) | ||
| 1593 | (delete-char 1))) | ||
| 1594 | (t | ||
| 1595 | (or (mail-extr-safe-move-sexp 1) | ||
| 1596 | (goto-char (point-max))))) | ||
| 1597 | (set-syntax-table mail-extr-address-text-syntax-table) | ||
| 1598 | (setq cend (point)) | ||
| 1599 | (cond | 1550 | (cond |
| 1600 | ;; Handle case of entire name being quoted | 1551 | |
| 1552 | ;; Delete title | ||
| 1601 | ((and (eq word-count 0) | 1553 | ((and (eq word-count 0) |
| 1602 | (looking-at " *\\'") | 1554 | (looking-at mail-extr-full-name-prefixes)) |
| 1603 | (>= (- cend cbeg) 2)) | 1555 | (goto-char (match-end 0)) |
| 1604 | (narrow-to-region (1+ cbeg) (1- cend)) | 1556 | (narrow-to-region (point) (point-max))) |
| 1605 | (goto-char (point-min))) | ||
| 1606 | (t | ||
| 1607 | ;; Handle case of quoted initial | ||
| 1608 | (if (and (or (= 3 (- cend cbeg)) | ||
| 1609 | (and (= 4 (- cend cbeg)) | ||
| 1610 | (eq ?. (char-after (+ 2 cbeg))))) | ||
| 1611 | (not (looking-at " *\\'"))) | ||
| 1612 | (setq initial (char-after (1+ cbeg))) | ||
| 1613 | (setq initial nil)) | ||
| 1614 | (delete-region cbeg cend) | ||
| 1615 | (if initial | ||
| 1616 | (insert initial ". "))))) | ||
| 1617 | |||
| 1618 | ;; Handle *Stupid* VMS date stamps | ||
| 1619 | ((looking-at mail-extr-stupid-vms-date-stamp-pattern) | ||
| 1620 | (replace-match "" t)) | ||
| 1621 | |||
| 1622 | ;; Handle Chinese characters. | ||
| 1623 | ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) | ||
| 1624 | (goto-char (match-end 0)) | ||
| 1625 | (setq word-found-flag t)) | ||
| 1626 | |||
| 1627 | ;; Skip initial garbage characters. | ||
| 1628 | ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | ||
| 1629 | ((and (eq word-count 0) | ||
| 1630 | (looking-at mail-extr-leading-garbage)) | ||
| 1631 | (goto-char (match-end 0)) | ||
| 1632 | ;; *** Skip backward over these??? | ||
| 1633 | ;; (skip-chars-backward "& \"") | ||
| 1634 | (narrow-to-region (point) (point-max))) | ||
| 1635 | 1557 | ||
| 1636 | ;; Various stopping points | 1558 | ;; Stop after name suffix |
| 1637 | ((or | 1559 | ((and (>= word-count 2) |
| 1638 | 1560 | (looking-at mail-extr-full-name-suffix-pattern)) | |
| 1639 | ;; Stop before ALL CAPS acronyms, if preceded by mixed-case | 1561 | (mail-extr-skip-whitespace-backward) |
| 1640 | ;; words. Example: XT-DEM. | 1562 | (setq suffix-flag (point)) |
| 1641 | (and (>= word-count 2) | 1563 | (if (eq ?, (following-char)) |
| 1642 | mixed-case-flag | 1564 | (forward-char 1) |
| 1643 | (looking-at mail-extr-weird-acronym-pattern) | 1565 | (insert ?,)) |
| 1644 | (not (looking-at mail-extr-roman-numeral-pattern))) | 1566 | ;; Enforce at least one space after comma |
| 1645 | 1567 | (or (eq ?\ (following-char)) | |
| 1646 | ;; Stop before trailing alternative address | 1568 | (insert ?\ )) |
| 1647 | (looking-at mail-extr-alternative-address-pattern) | 1569 | (mail-extr-skip-whitespace-forward) |
| 1648 | 1570 | (cond ((memq (following-char) '(?j ?J ?s ?S)) | |
| 1649 | ;; Stop before trailing comment not introduced by comma | 1571 | (capitalize-word 1) |
| 1650 | ;; THIS CASE MUST BE AFTER AN EARLIER CASE. | 1572 | (if (eq (following-char) ?.) |
| 1651 | (looking-at mail-extr-trailing-comment-start-pattern) | 1573 | (forward-char 1) |
| 1652 | 1574 | (insert ?.))) | |
| 1653 | ;; Stop before telephone numbers | 1575 | (t |
| 1654 | (and (>= word-count 1) | 1576 | (upcase-word 1))) |
| 1655 | (looking-at mail-extr-telephone-extension-pattern))) | 1577 | (setq word-found-flag t) |
| 1656 | (setq name-done-flag t)) | 1578 | (setq name-done-flag t)) |
| 1657 | 1579 | ||
| 1658 | ;; Delete ham radio call signs | 1580 | ;; Handle SCA names |
| 1659 | ((looking-at mail-extr-ham-call-sign-pattern) | 1581 | ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" |
| 1660 | (delete-region (match-beginning 0) (match-end 0))) | 1582 | (goto-char (match-beginning 1)) |
| 1661 | 1583 | (narrow-to-region (point) (point-max)) | |
| 1662 | ;; Fixup initials | 1584 | (setq begin-again-flag t)) |
| 1663 | ((looking-at mail-extr-initial-pattern) | 1585 | |
| 1664 | (or (eq (following-char) (upcase (following-char))) | 1586 | ;; Check for initial last name followed by comma |
| 1665 | (setq lower-case-flag t)) | 1587 | ((and (eq ?, (following-char)) |
| 1666 | (forward-char 1) | 1588 | (eq word-count 1)) |
| 1667 | (if (eq ?. (following-char)) | 1589 | (forward-char 1) |
| 1668 | (forward-char 1) | 1590 | (setq last-name-comma-flag t) |
| 1669 | (insert ?.)) | 1591 | (or (eq ?\ (following-char)) |
| 1670 | (or (eq ?\ (following-char)) | 1592 | (insert ?\ ))) |
| 1671 | (insert ?\ )) | 1593 | |
| 1672 | (setq word-found-flag t)) | 1594 | ;; Stop before trailing comma-separated comment |
| 1673 | 1595 | ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | |
| 1674 | ;; Handle BITNET LISTSERV list names. | 1596 | ;; *** This case is redundant??? |
| 1675 | ((and (eq word-count 0) | 1597 | ;;((eq ?, (following-char)) |
| 1676 | (looking-at mail-extr-listserv-list-name-pattern)) | 1598 | ;; (setq name-done-flag t)) |
| 1677 | (narrow-to-region (match-beginning 1) (match-end 1)) | 1599 | |
| 1678 | (setq word-found-flag t) | 1600 | ;; Delete parenthesized/quoted comment/nickname |
| 1679 | (setq name-done-flag t)) | 1601 | ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) |
| 1680 | 1602 | (setq cbeg (point)) | |
| 1681 | ;; Handle & substitution, when & is last and is not first. | 1603 | (set-syntax-table mail-extr-address-text-comment-syntax-table) |
| 1682 | ((and (> word-count 0) | 1604 | (cond ((memq (following-char) '(?\' ?\`)) |
| 1683 | (eq ?\ (preceding-char)) | 1605 | (or (search-forward "'" nil t |
| 1684 | (eq (following-char) ?&) | 1606 | (if (eq ?\' (following-char)) 2 1)) |
| 1685 | (eq (1+ (point)) (point-max))) | 1607 | (delete-char 1))) |
| 1686 | (delete-char 1) | 1608 | (t |
| 1687 | (capitalize-region | 1609 | (or (mail-extr-safe-move-sexp 1) |
| 1688 | (point) | 1610 | (goto-char (point-max))))) |
| 1689 | (progn | 1611 | (set-syntax-table mail-extr-address-text-syntax-table) |
| 1690 | (insert-buffer-substring canonicalization-buffer | 1612 | (setq cend (point)) |
| 1691 | mbox-beg mbox-end) | 1613 | (cond |
| 1692 | (point))) | 1614 | ;; Handle case of entire name being quoted |
| 1693 | (setq disable-initial-guessing-flag t) | 1615 | ((and (eq word-count 0) |
| 1694 | (setq word-found-flag t)) | 1616 | (looking-at " *\\'") |
| 1695 | 1617 | (>= (- cend cbeg) 2)) | |
| 1696 | ;; Handle & between names, as in "Bob & Susie". | 1618 | (narrow-to-region (1+ cbeg) (1- cend)) |
| 1697 | ((and (> word-count 0) (eq (following-char) ?\&)) | 1619 | (goto-char (point-min))) |
| 1698 | (setq name-beg (point)) | 1620 | (t |
| 1699 | (setq name-end (1+ name-beg)) | 1621 | ;; Handle case of quoted initial |
| 1700 | (setq word-found-flag t) | 1622 | (if (and (or (= 3 (- cend cbeg)) |
| 1701 | (goto-char name-end)) | 1623 | (and (= 4 (- cend cbeg)) |
| 1702 | 1624 | (eq ?. (char-after (+ 2 cbeg))))) | |
| 1703 | ;; Regular name words | 1625 | (not (looking-at " *\\'"))) |
| 1704 | ((looking-at mail-extr-name-pattern) | 1626 | (setq initial (char-after (1+ cbeg))) |
| 1705 | (setq name-beg (point)) | 1627 | (setq initial nil)) |
| 1706 | (setq name-end (match-end 0)) | 1628 | (delete-region cbeg cend) |
| 1707 | 1629 | (if initial | |
| 1708 | ;; Certain words will be dropped if they are at the end. | 1630 | (insert initial ". "))))) |
| 1709 | (and (>= word-count 2) | 1631 | |
| 1710 | (not lower-case-flag) | 1632 | ;; Handle *Stupid* VMS date stamps |
| 1711 | (or | 1633 | ((looking-at mail-extr-stupid-vms-date-stamp-pattern) |
| 1712 | ;; Trailing 4-or-more letter lowercase words preceded by | 1634 | (replace-match "" t)) |
| 1713 | ;; mixed case or uppercase words will be dropped. | 1635 | |
| 1714 | (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'") | 1636 | ;; Handle Chinese characters. |
| 1715 | ;; Drop a trailing word which is terminated with a period. | 1637 | ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) |
| 1716 | (eq ?. (char-after (1- name-end)))) | 1638 | (goto-char (match-end 0)) |
| 1717 | (setq drop-this-word-if-trailing-flag t)) | 1639 | (setq word-found-flag t)) |
| 1718 | 1640 | ||
| 1719 | ;; Set the flags that indicate whether we have seen a lowercase | 1641 | ;; Skip initial garbage characters. |
| 1720 | ;; word, a mixed case word, and an uppercase word. | 1642 | ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. |
| 1721 | (if (re-search-forward "[[:lower:]]" name-end t) | 1643 | ((and (eq word-count 0) |
| 1722 | (if (progn | 1644 | (looking-at mail-extr-leading-garbage)) |
| 1723 | (goto-char name-beg) | 1645 | (goto-char (match-end 0)) |
| 1724 | (re-search-forward "[[:upper:]]" name-end t)) | 1646 | ;; *** Skip backward over these??? |
| 1725 | (setq mixed-case-flag t) | 1647 | ;; (skip-chars-backward "& \"") |
| 1648 | (narrow-to-region (point) (point-max))) | ||
| 1649 | |||
| 1650 | ;; Various stopping points | ||
| 1651 | ((or | ||
| 1652 | |||
| 1653 | ;; Stop before ALL CAPS acronyms, if preceded by mixed-case | ||
| 1654 | ;; words. Example: XT-DEM. | ||
| 1655 | (and (>= word-count 2) | ||
| 1656 | mixed-case-flag | ||
| 1657 | (looking-at mail-extr-weird-acronym-pattern) | ||
| 1658 | (not (looking-at mail-extr-roman-numeral-pattern))) | ||
| 1659 | |||
| 1660 | ;; Stop before trailing alternative address | ||
| 1661 | (looking-at mail-extr-alternative-address-pattern) | ||
| 1662 | |||
| 1663 | ;; Stop before trailing comment not introduced by comma | ||
| 1664 | ;; THIS CASE MUST BE AFTER AN EARLIER CASE. | ||
| 1665 | (looking-at mail-extr-trailing-comment-start-pattern) | ||
| 1666 | |||
| 1667 | ;; Stop before telephone numbers | ||
| 1668 | (and (>= word-count 1) | ||
| 1669 | (looking-at mail-extr-telephone-extension-pattern))) | ||
| 1670 | (setq name-done-flag t)) | ||
| 1671 | |||
| 1672 | ;; Delete ham radio call signs | ||
| 1673 | ((looking-at mail-extr-ham-call-sign-pattern) | ||
| 1674 | (delete-region (match-beginning 0) (match-end 0))) | ||
| 1675 | |||
| 1676 | ;; Fixup initials | ||
| 1677 | ((looking-at mail-extr-initial-pattern) | ||
| 1678 | (or (eq (following-char) (upcase (following-char))) | ||
| 1726 | (setq lower-case-flag t)) | 1679 | (setq lower-case-flag t)) |
| 1727 | ;; (setq upper-case-flag t) | 1680 | (forward-char 1) |
| 1728 | ) | 1681 | (if (eq ?. (following-char)) |
| 1682 | (forward-char 1) | ||
| 1683 | (insert ?.)) | ||
| 1684 | (or (eq ?\ (following-char)) | ||
| 1685 | (insert ?\ )) | ||
| 1686 | (setq word-found-flag t)) | ||
| 1687 | |||
| 1688 | ;; Handle BITNET LISTSERV list names. | ||
| 1689 | ((and (eq word-count 0) | ||
| 1690 | (looking-at mail-extr-listserv-list-name-pattern)) | ||
| 1691 | (narrow-to-region (match-beginning 1) (match-end 1)) | ||
| 1692 | (setq word-found-flag t) | ||
| 1693 | (setq name-done-flag t)) | ||
| 1694 | |||
| 1695 | ;; Handle & substitution, when & is last and is not first. | ||
| 1696 | ((and (> word-count 0) | ||
| 1697 | (eq ?\ (preceding-char)) | ||
| 1698 | (eq (following-char) ?&) | ||
| 1699 | (eq (1+ (point)) (point-max))) | ||
| 1700 | (delete-char 1) | ||
| 1701 | (capitalize-region | ||
| 1702 | (point) | ||
| 1703 | (progn | ||
| 1704 | (insert-buffer-substring canonicalization-buffer | ||
| 1705 | mbox-beg mbox-end) | ||
| 1706 | (point))) | ||
| 1707 | (setq disable-initial-guessing-flag t) | ||
| 1708 | (setq word-found-flag t)) | ||
| 1709 | |||
| 1710 | ;; Handle & between names, as in "Bob & Susie". | ||
| 1711 | ((and (> word-count 0) (eq (following-char) ?\&)) | ||
| 1712 | (setq name-beg (point)) | ||
| 1713 | (setq name-end (1+ name-beg)) | ||
| 1714 | (setq word-found-flag t) | ||
| 1715 | (goto-char name-end)) | ||
| 1716 | |||
| 1717 | ;; Regular name words | ||
| 1718 | ((looking-at mail-extr-name-pattern) | ||
| 1719 | (setq name-beg (point)) | ||
| 1720 | (setq name-end (match-end 0)) | ||
| 1721 | |||
| 1722 | ;; Certain words will be dropped if they are at the end. | ||
| 1723 | (and (>= word-count 2) | ||
| 1724 | (not lower-case-flag) | ||
| 1725 | (or | ||
| 1726 | ;; Trailing 4-or-more letter lowercase words preceded by | ||
| 1727 | ;; mixed case or uppercase words will be dropped. | ||
| 1728 | (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'") | ||
| 1729 | ;; Drop a trailing word which is terminated with a period. | ||
| 1730 | (eq ?. (char-after (1- name-end)))) | ||
| 1731 | (setq drop-this-word-if-trailing-flag t)) | ||
| 1732 | |||
| 1733 | ;; Set the flags that indicate whether we have seen a lowercase | ||
| 1734 | ;; word, a mixed case word, and an uppercase word. | ||
| 1735 | (if (re-search-forward "[[:lower:]]" name-end t) | ||
| 1736 | (if (progn | ||
| 1737 | (goto-char name-beg) | ||
| 1738 | (re-search-forward "[[:upper:]]" name-end t)) | ||
| 1739 | (setq mixed-case-flag t) | ||
| 1740 | (setq lower-case-flag t)) | ||
| 1741 | ;; (setq upper-case-flag t) | ||
| 1742 | ) | ||
| 1729 | 1743 | ||
| 1730 | (goto-char name-end) | 1744 | (goto-char name-end) |
| 1731 | (setq word-found-flag t)) | 1745 | (setq word-found-flag t)) |
| 1732 | 1746 | ||
| 1733 | ;; Allow a number as a word, if it doesn't mean anything else. | 1747 | ;; Allow a number as a word, if it doesn't mean anything else. |
| 1734 | ((looking-at "[0-9]+\\>") | 1748 | ((looking-at "[0-9]+\\>") |
| 1735 | (setq name-beg (point)) | 1749 | (setq name-beg (point)) |
| 1736 | (setq name-end (match-end 0)) | 1750 | (setq name-end (match-end 0)) |
| 1751 | (goto-char name-end) | ||
| 1752 | (setq word-found-flag t)) | ||
| 1753 | |||
| 1754 | (t | ||
| 1755 | (setq name-done-flag t) | ||
| 1756 | )) | ||
| 1757 | |||
| 1758 | ;; Count any word that we skipped over. | ||
| 1759 | (if word-found-flag | ||
| 1760 | (setq word-count (1+ word-count)))) | ||
| 1761 | |||
| 1762 | ;; If the last thing in the name is 2 or more periods, or one or more | ||
| 1763 | ;; other sentence terminators (but not a single period) then keep them | ||
| 1764 | ;; and the preceding word. This is for the benefit of whole sentences | ||
| 1765 | ;; in the name field: it's better behavior than dropping the last word | ||
| 1766 | ;; of the sentence... | ||
| 1767 | (if (and (not suffix-flag) | ||
| 1768 | (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'")) | ||
| 1769 | (goto-char (setq suffix-flag (point-max)))) | ||
| 1770 | |||
| 1771 | ;; Drop everything after point and certain trailing words. | ||
| 1772 | (narrow-to-region (point-min) | ||
| 1773 | (or (and drop-last-word-if-trailing-flag | ||
| 1774 | last-word-beg) | ||
| 1775 | (point))) | ||
| 1776 | |||
| 1777 | ;; Xerox's mailers SUCK!!!!!! | ||
| 1778 | ;; We simply refuse to believe that any last name is PARC or ADOC. | ||
| 1779 | ;; If it looks like that is the last name, that there is no meaningful | ||
| 1780 | ;; here at all. Actually I guess it would be best to map patterns | ||
| 1781 | ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't | ||
| 1782 | ;; actually know that that is what's going on. | ||
| 1783 | (unless suffix-flag | ||
| 1784 | (goto-char (point-min)) | ||
| 1785 | (let ((case-fold-search t)) | ||
| 1786 | (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'") | ||
| 1787 | (erase-buffer)))) | ||
| 1788 | |||
| 1789 | ;; If last name first put it at end (but before suffix) | ||
| 1790 | (when last-name-comma-flag | ||
| 1791 | (goto-char (point-min)) | ||
| 1792 | (search-forward ",") | ||
| 1793 | (setq name-end (1- (point))) | ||
| 1794 | (goto-char (or suffix-flag (point-max))) | ||
| 1795 | (or (eq ?\ (preceding-char)) | ||
| 1796 | (insert ?\ )) | ||
| 1797 | (insert-buffer-substring (current-buffer) (point-min) name-end) | ||
| 1737 | (goto-char name-end) | 1798 | (goto-char name-end) |
| 1738 | (setq word-found-flag t)) | 1799 | (skip-chars-forward "\t ,") |
| 1739 | 1800 | (narrow-to-region (point) (point-max))) | |
| 1740 | (t | ||
| 1741 | (setq name-done-flag t) | ||
| 1742 | )) | ||
| 1743 | |||
| 1744 | ;; Count any word that we skipped over. | ||
| 1745 | (if word-found-flag | ||
| 1746 | (setq word-count (1+ word-count)))) | ||
| 1747 | |||
| 1748 | ;; If the last thing in the name is 2 or more periods, or one or more | ||
| 1749 | ;; other sentence terminators (but not a single period) then keep them | ||
| 1750 | ;; and the preceding word. This is for the benefit of whole sentences | ||
| 1751 | ;; in the name field: it's better behavior than dropping the last word | ||
| 1752 | ;; of the sentence... | ||
| 1753 | (if (and (not suffix-flag) | ||
| 1754 | (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'")) | ||
| 1755 | (goto-char (setq suffix-flag (point-max)))) | ||
| 1756 | |||
| 1757 | ;; Drop everything after point and certain trailing words. | ||
| 1758 | (narrow-to-region (point-min) | ||
| 1759 | (or (and drop-last-word-if-trailing-flag | ||
| 1760 | last-word-beg) | ||
| 1761 | (point))) | ||
| 1762 | |||
| 1763 | ;; Xerox's mailers SUCK!!!!!! | ||
| 1764 | ;; We simply refuse to believe that any last name is PARC or ADOC. | ||
| 1765 | ;; If it looks like that is the last name, that there is no meaningful | ||
| 1766 | ;; here at all. Actually I guess it would be best to map patterns | ||
| 1767 | ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't | ||
| 1768 | ;; actually know that that is what's going on. | ||
| 1769 | (unless suffix-flag | ||
| 1770 | (goto-char (point-min)) | ||
| 1771 | (let ((case-fold-search t)) | ||
| 1772 | (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'") | ||
| 1773 | (erase-buffer)))) | ||
| 1774 | 1801 | ||
| 1775 | ;; If last name first put it at end (but before suffix) | 1802 | ;; Delete leading and trailing junk characters. |
| 1776 | (when last-name-comma-flag | 1803 | ;; *** This is probably completely unneeded now. |
| 1804 | ;;(goto-char (point-max)) | ||
| 1805 | ;;(skip-chars-backward mail-extr-non-end-name-chars) | ||
| 1806 | ;;(if (eq ?. (following-char)) | ||
| 1807 | ;; (forward-char 1)) | ||
| 1808 | ;;(narrow-to-region (point) | ||
| 1809 | ;; (progn | ||
| 1810 | ;; (goto-char (point-min)) | ||
| 1811 | ;; (skip-chars-forward mail-extr-non-begin-name-chars) | ||
| 1812 | ;; (point))) | ||
| 1813 | |||
| 1814 | ;; Compress whitespace | ||
| 1777 | (goto-char (point-min)) | 1815 | (goto-char (point-min)) |
| 1778 | (search-forward ",") | 1816 | (while (re-search-forward "[ \t\n]+" nil t) |
| 1779 | (setq name-end (1- (point))) | 1817 | (replace-match (if (eobp) "" " ") t)) |
| 1780 | (goto-char (or suffix-flag (point-max))) | 1818 | )))) |
| 1781 | (or (eq ?\ (preceding-char)) | ||
| 1782 | (insert ?\ )) | ||
| 1783 | (insert-buffer-substring (current-buffer) (point-min) name-end) | ||
| 1784 | (goto-char name-end) | ||
| 1785 | (skip-chars-forward "\t ,") | ||
| 1786 | (narrow-to-region (point) (point-max))) | ||
| 1787 | |||
| 1788 | ;; Delete leading and trailing junk characters. | ||
| 1789 | ;; *** This is probably completely unneeded now. | ||
| 1790 | ;;(goto-char (point-max)) | ||
| 1791 | ;;(skip-chars-backward mail-extr-non-end-name-chars) | ||
| 1792 | ;;(if (eq ?. (following-char)) | ||
| 1793 | ;; (forward-char 1)) | ||
| 1794 | ;;(narrow-to-region (point) | ||
| 1795 | ;; (progn | ||
| 1796 | ;; (goto-char (point-min)) | ||
| 1797 | ;; (skip-chars-forward mail-extr-non-begin-name-chars) | ||
| 1798 | ;; (point))) | ||
| 1799 | |||
| 1800 | ;; Compress whitespace | ||
| 1801 | (goto-char (point-min)) | ||
| 1802 | (while (re-search-forward "[ \t\n]+" nil t) | ||
| 1803 | (replace-match (if (eobp) "" " ") t)) | ||
| 1804 | ))) | ||
| 1805 | 1819 | ||
| 1806 | 1820 | ||
| 1807 | 1821 | ||