diff options
| author | Chong Yidong | 2013-02-04 20:02:25 +0800 |
|---|---|---|
| committer | Chong Yidong | 2013-02-04 20:02:25 +0800 |
| commit | 6e5c1569e941d385d28466a337ece0322bfa93e7 (patch) | |
| tree | 4a2cef76a54c9ca882c33f797d9c91980a3554b1 | |
| parent | 84a06b500fd1cb500e89b93d3f5746b60c6ffdd4 (diff) | |
| download | emacs-6e5c1569e941d385d28466a337ece0322bfa93e7.tar.gz emacs-6e5c1569e941d385d28466a337ece0322bfa93e7.zip | |
Merge FFAP's URI-detection code into thingatpt.el.
* lisp/ffap.el: Require thingatpt.
(ffap-url-at-point): Delegate URI detection to thing-at-point.
All URI-valid characters are now recognized.
(ffap-string-at-point): Use use-region-p.
(ffap-url-regexp): Extra character is handled by thing-at-point.
(ffap-string-at-point-mode-alist): Allow parentheses.
(ffap-newsgroup-regexp, ffap-newsgroup-heads, ffap-newsgroup-p):
Convert to aliases; code moved to thingatpt.el.
(ffap-gnus-hook): Use setq-local.
* lisp/thingatpt.el: Rewrite the URL detection routines, absorbing some
code from ffap.el.
(thing-at-point-beginning-of-url-regexp): New var.
(thing-at-point-uri-schemes): Update list of URI schemes.
(thing-at-point-url-regexp): Variable deleted.
(thing-at-point-markedup-url-regexp): Disallow newlines.
(thing-at-point-newsgroup-regexp)
(thing-at-point-newsgroup-heads)
(thing-at-point-default-mail-uri-scheme): New variables.
(thing-at-point-bounds-of-url-at-point): Rewrite. Use ffap's
method to find the possible bounds of the URI at point. New
optional argument to find ill-formed URIs.
(thing-at-point-url-at-point): Rewrite. New arguments for finding
ill-formed URIs. Use thing-at-point-bounds-of-url-at-point, and
the scheme-adding heuristics from ffap-url-at-point.
(thing-at-point--bounds-of-well-formed-url): New function. Do
parens matching to decide whether to include parens in the URI
* test/automated/thingatpt.el: New file.
Fixes: debbugs:5673
| -rw-r--r-- | lisp/ChangeLog | 31 | ||||
| -rw-r--r-- | lisp/ffap.el | 115 | ||||
| -rw-r--r-- | lisp/thingatpt.el | 295 | ||||
| -rw-r--r-- | test/ChangeLog | 4 | ||||
| -rw-r--r-- | test/automated/thingatpt.el | 88 |
5 files changed, 362 insertions, 171 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index b43b7bc61d1..9a7a48b186a 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,34 @@ | |||
| 1 | 2013-02-04 Chong Yidong <cyd@gnu.org> | ||
| 2 | |||
| 3 | * thingatpt.el: Rewrite the URL detection routines, absorbing some | ||
| 4 | code from ffap.el. | ||
| 5 | (thing-at-point-beginning-of-url-regexp): New var. | ||
| 6 | (thing-at-point-uri-schemes): Update list of URI schemes. | ||
| 7 | (thing-at-point-url-regexp): Variable deleted. | ||
| 8 | (thing-at-point-markedup-url-regexp): Disallow newlines. | ||
| 9 | (thing-at-point-newsgroup-regexp) | ||
| 10 | (thing-at-point-newsgroup-heads) | ||
| 11 | (thing-at-point-default-mail-uri-scheme): New variables. | ||
| 12 | (thing-at-point-bounds-of-url-at-point): Rewrite. Use ffap's | ||
| 13 | method to find the possible bounds of the URI at point. New | ||
| 14 | optional argument to find ill-formed URIs. | ||
| 15 | (thing-at-point-url-at-point): Rewrite. New arguments for finding | ||
| 16 | ill-formed URIs. Use thing-at-point-bounds-of-url-at-point, and | ||
| 17 | the scheme-adding heuristics from ffap-url-at-point. | ||
| 18 | (thing-at-point--bounds-of-well-formed-url): New function. Do | ||
| 19 | parens matching to decide whether to include parens in the URI | ||
| 20 | (Bug#9153). | ||
| 21 | |||
| 22 | * ffap.el: Require thingatpt. | ||
| 23 | (ffap-url-at-point): Delegate URI detection to thing-at-point. | ||
| 24 | All URI-valid characters are now recognized (Bug#5673). | ||
| 25 | (ffap-string-at-point): Use use-region-p. | ||
| 26 | (ffap-url-regexp): Extra character is handled by thing-at-point. | ||
| 27 | (ffap-string-at-point-mode-alist): Allow parentheses. | ||
| 28 | (ffap-newsgroup-regexp, ffap-newsgroup-heads, ffap-newsgroup-p): | ||
| 29 | Convert to aliases; code moved to thingatpt.el. | ||
| 30 | (ffap-gnus-hook): Use setq-local. | ||
| 31 | |||
| 1 | 2013-02-04 Glenn Morris <rgm@gnu.org> | 32 | 2013-02-04 Glenn Morris <rgm@gnu.org> |
| 2 | 33 | ||
| 3 | * emacs-lisp/ert.el (ert--explain-format-atom): | 34 | * emacs-lisp/ert.el (ert--explain-format-atom): |
diff --git a/lisp/ffap.el b/lisp/ffap.el index c5b0784e5a2..0769469cbf2 100644 --- a/lisp/ffap.el +++ b/lisp/ffap.el | |||
| @@ -106,6 +106,7 @@ | |||
| 106 | ;;; Code: | 106 | ;;; Code: |
| 107 | 107 | ||
| 108 | (require 'url-parse) | 108 | (require 'url-parse) |
| 109 | (require 'thingatpt) | ||
| 109 | 110 | ||
| 110 | (define-obsolete-variable-alias 'ffap-version 'emacs-version "23.2") | 111 | (define-obsolete-variable-alias 'ffap-version 'emacs-version "23.2") |
| 111 | 112 | ||
| @@ -178,16 +179,14 @@ Note this name may be omitted if it equals the default | |||
| 178 | :group 'ffap) | 179 | :group 'ffap) |
| 179 | 180 | ||
| 180 | (defvar ffap-url-regexp | 181 | (defvar ffap-url-regexp |
| 181 | ;; Could just use `url-nonrelative-link' of w3, if loaded. | ||
| 182 | ;; This regexp is not exhaustive, it just matches common cases. | ||
| 183 | (concat | 182 | (concat |
| 184 | "\\(" | 183 | "\\(" |
| 185 | "news\\(post\\)?:\\|mailto:\\|file:" ; no host ok | 184 | "news\\(post\\)?:\\|mailto:\\|file:" ; no host ok |
| 186 | "\\|" | 185 | "\\|" |
| 187 | "\\(ftp\\|https?\\|telnet\\|gopher\\|www\\|wais\\)://" ; needs host | 186 | "\\(ftp\\|https?\\|telnet\\|gopher\\|www\\|wais\\)://" ; needs host |
| 188 | "\\)." ; require one more character | 187 | "\\)") |
| 189 | ) | 188 | "Regexp matching the beginning of a URI, for FFAP. |
| 190 | "Regexp matching URLs. Use nil to disable URL features in ffap.") | 189 | If the value is nil, disable URL-matching features in ffap.") |
| 191 | 190 | ||
| 192 | (defcustom ffap-foo-at-bar-prefix "mailto" | 191 | (defcustom ffap-foo-at-bar-prefix "mailto" |
| 193 | "Presumed URL prefix type of strings like \"<foo.9z@bar>\". | 192 | "Presumed URL prefix type of strings like \"<foo.9z@bar>\". |
| @@ -571,38 +570,9 @@ Looks at `ffap-ftp-default-user', returns \"\" for \"localhost\"." | |||
| 571 | (ffap-ftp-regexp (ffap-host-to-filename mach)) | 570 | (ffap-ftp-regexp (ffap-host-to-filename mach)) |
| 572 | )) | 571 | )) |
| 573 | 572 | ||
| 574 | (defvar ffap-newsgroup-regexp "^[[:lower:]]+\\.[-+[:lower:]_0-9.]+$" | 573 | (defvaralias 'ffap-newsgroup-regexp 'thing-at-point-newsgroup-regexp) |
| 575 | "Strings not matching this fail `ffap-newsgroup-p'.") | 574 | (defvaralias 'ffap-newsgroup-heads 'thing-at-point-newsgroup-heads) |
| 576 | (defvar ffap-newsgroup-heads ; entirely inadequate | 575 | (defalias 'ffap-newsgroup-p 'thing-at-point-newsgroup-p) |
| 577 | '("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk") | ||
| 578 | "Used by `ffap-newsgroup-p' if gnus is not running.") | ||
| 579 | |||
| 580 | (defun ffap-newsgroup-p (string) | ||
| 581 | "Return STRING if it looks like a newsgroup name, else nil." | ||
| 582 | (and | ||
| 583 | (string-match ffap-newsgroup-regexp string) | ||
| 584 | (let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb)) | ||
| 585 | (heads ffap-newsgroup-heads) | ||
| 586 | htb ret) | ||
| 587 | (while htbs | ||
| 588 | (setq htb (car htbs) htbs (cdr htbs)) | ||
| 589 | (condition-case nil | ||
| 590 | (progn | ||
| 591 | ;; errs: htb symbol may be unbound, or not a hash-table. | ||
| 592 | ;; gnus-gethash is just a macro for intern-soft. | ||
| 593 | (and (symbol-value htb) | ||
| 594 | (intern-soft string (symbol-value htb)) | ||
| 595 | (setq ret string htbs nil)) | ||
| 596 | ;; If we made it this far, gnus is running, so ignore "heads": | ||
| 597 | (setq heads nil)) | ||
| 598 | (error nil))) | ||
| 599 | (or ret (not heads) | ||
| 600 | (let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string))) | ||
| 601 | (and head (setq head (substring string 0 (match-end 1))) | ||
| 602 | (member head heads) | ||
| 603 | (setq ret string)))) | ||
| 604 | ;; Is there ever a need to modify string as a newsgroup name? | ||
| 605 | ret))) | ||
| 606 | 576 | ||
| 607 | (defsubst ffap-url-p (string) | 577 | (defsubst ffap-url-p (string) |
| 608 | "If STRING looks like an URL, return it (maybe improved), else nil." | 578 | "If STRING looks like an URL, return it (maybe improved), else nil." |
| @@ -1017,7 +987,7 @@ If a given RFC isn't in these then `ffap-rfc-path' is offered." | |||
| 1017 | ;; * no commas (good for latex) | 987 | ;; * no commas (good for latex) |
| 1018 | (file "--:\\\\$+<>@-Z_[:alpha:]~*?" "<@" "@>;.,!:") | 988 | (file "--:\\\\$+<>@-Z_[:alpha:]~*?" "<@" "@>;.,!:") |
| 1019 | ;; An url, or maybe a email/news message-id: | 989 | ;; An url, or maybe a email/news message-id: |
| 1020 | (url "--:=&?$+@-Z_[:alpha:]~#,%;*" "^[:alnum:]" ":;.,!?") | 990 | (url "--:=&?$+@-Z_[:alpha:]~#,%;*()!'" "^[0-9a-zA-Z]" ":;.,!?") |
| 1021 | ;; Find a string that does *not* contain a colon: | 991 | ;; Find a string that does *not* contain a colon: |
| 1022 | (nocolon "--9$+<>@-Z_[:alpha:]~" "<@" "@>;.,!?") | 992 | (nocolon "--9$+<>@-Z_[:alpha:]~" "<@" "@>;.,!?") |
| 1023 | ;; A machine: | 993 | ;; A machine: |
| @@ -1031,7 +1001,7 @@ possibly a major-mode name, or one of the symbol | |||
| 1031 | Function `ffap-string-at-point' uses the data fields as follows: | 1001 | Function `ffap-string-at-point' uses the data fields as follows: |
| 1032 | 1. find a maximal string of CHARS around point, | 1002 | 1. find a maximal string of CHARS around point, |
| 1033 | 2. strip BEG chars before point from the beginning, | 1003 | 2. strip BEG chars before point from the beginning, |
| 1034 | 3. Strip END chars after point from the end.") | 1004 | 3. strip END chars after point from the end.") |
| 1035 | 1005 | ||
| 1036 | (defvar ffap-string-at-point nil | 1006 | (defvar ffap-string-at-point nil |
| 1037 | ;; Added at suggestion of RHOGEE (for ff-paths), 7/24/95. | 1007 | ;; Added at suggestion of RHOGEE (for ff-paths), 7/24/95. |
| @@ -1050,22 +1020,22 @@ Sets the variable `ffap-string-at-point' and the variable | |||
| 1050 | (or (assq (or mode major-mode) ffap-string-at-point-mode-alist) | 1020 | (or (assq (or mode major-mode) ffap-string-at-point-mode-alist) |
| 1051 | (assq 'file ffap-string-at-point-mode-alist)))) | 1021 | (assq 'file ffap-string-at-point-mode-alist)))) |
| 1052 | (pt (point)) | 1022 | (pt (point)) |
| 1053 | (str | 1023 | (beg (if (use-region-p) |
| 1054 | (if (and transient-mark-mode mark-active) | 1024 | (region-beginning) |
| 1055 | (buffer-substring | 1025 | (save-excursion |
| 1056 | (setcar ffap-string-at-point-region (region-beginning)) | 1026 | (skip-chars-backward (car args)) |
| 1057 | (setcar (cdr ffap-string-at-point-region) (region-end))) | 1027 | (skip-chars-forward (nth 1 args) pt) |
| 1058 | (buffer-substring | 1028 | (point)))) |
| 1059 | (save-excursion | 1029 | (end (if (use-region-p) |
| 1060 | (skip-chars-backward (car args)) | 1030 | (region-end) |
| 1061 | (skip-chars-forward (nth 1 args) pt) | 1031 | (save-excursion |
| 1062 | (setcar ffap-string-at-point-region (point))) | 1032 | (skip-chars-forward (car args)) |
| 1063 | (save-excursion | 1033 | (skip-chars-backward (nth 2 args) pt) |
| 1064 | (skip-chars-forward (car args)) | 1034 | (point))))) |
| 1065 | (skip-chars-backward (nth 2 args) pt) | 1035 | (setq ffap-string-at-point |
| 1066 | (setcar (cdr ffap-string-at-point-region) (point))))))) | 1036 | (buffer-substring-no-properties |
| 1067 | (set-text-properties 0 (length str) nil str) | 1037 | (setcar ffap-string-at-point-region beg) |
| 1068 | (setq ffap-string-at-point str))) | 1038 | (setcar (cdr ffap-string-at-point-region) end))))) |
| 1069 | 1039 | ||
| 1070 | (defun ffap-string-around () | 1040 | (defun ffap-string-around () |
| 1071 | ;; Sometimes useful to decide how to treat a string. | 1041 | ;; Sometimes useful to decide how to treat a string. |
| @@ -1098,35 +1068,15 @@ Assumes the buffer has not changed." | |||
| 1098 | 1068 | ||
| 1099 | (defun ffap-url-at-point () | 1069 | (defun ffap-url-at-point () |
| 1100 | "Return URL from around point if it exists, or nil." | 1070 | "Return URL from around point if it exists, or nil." |
| 1101 | ;; Could use w3's url-get-url-at-point instead. Both handle "URL:", | ||
| 1102 | ;; ignore non-relative links, trim punctuation. The other will | ||
| 1103 | ;; actually look back if point is in whitespace, but I would rather | ||
| 1104 | ;; ffap be less aggressive in such situations. | ||
| 1105 | (when ffap-url-regexp | 1071 | (when ffap-url-regexp |
| 1106 | (or (and (eq major-mode 'w3-mode) ; In a w3 buffer button? | 1072 | (or (and (eq major-mode 'w3-mode) ; In a w3 buffer button? |
| 1107 | (w3-view-this-url t)) | 1073 | (w3-view-this-url t)) |
| 1108 | ;; Is there a reason not to strip trailing colon? | 1074 | (let ((thing-at-point-beginning-of-url-regexp ffap-url-regexp) |
| 1109 | (let ((name (ffap-string-at-point 'url))) | 1075 | (thing-at-point-default-mail-scheme ffap-foo-at-bar-prefix)) |
| 1110 | (cond | 1076 | (thing-at-point-url-at-point t |
| 1111 | ((string-match "^url:" name) (setq name (substring name 4))) | 1077 | (if (use-region-p) |
| 1112 | ((and (string-match "\\`[^:</>@]+@[^:</>@]+[[:alnum:]]\\'" name) | 1078 | (cons (region-beginning) |
| 1113 | ;; "foo@bar": could be "mailto" or "news" (a Message-ID). | 1079 | (region-end)))))))) |
| 1114 | ;; Without "<>" it must be "mailto". Otherwise could be | ||
| 1115 | ;; either, so consult `ffap-foo-at-bar-prefix'. | ||
| 1116 | (let ((prefix (if (and (equal (ffap-string-around) "<>") | ||
| 1117 | ;; Expect some odd characters: | ||
| 1118 | (string-match "[$.0-9].*[$.0-9].*@" name)) | ||
| 1119 | ;; Could be news: | ||
| 1120 | ffap-foo-at-bar-prefix | ||
| 1121 | "mailto"))) | ||
| 1122 | (and prefix (setq name (concat prefix ":" name)))))) | ||
| 1123 | ((ffap-newsgroup-p name) (setq name (concat "news:" name))) | ||
| 1124 | ((and (string-match "\\`[[:alnum:]]+\\'" name) ; <mic> <root> <nobody> | ||
| 1125 | (equal (ffap-string-around) "<>") | ||
| 1126 | ;; (ffap-user-p name): | ||
| 1127 | (not (string-match "~" (expand-file-name (concat "~" name))))) | ||
| 1128 | (setq name (concat "mailto:" name))) | ||
| 1129 | ((ffap-url-p name))))))) | ||
| 1130 | 1080 | ||
| 1131 | (defvar ffap-gopher-regexp | 1081 | (defvar ffap-gopher-regexp |
| 1132 | "^.*\\<\\(Type\\|Name\\|Path\\|Host\\|Port\\) *= *\\(.*\\) *$" | 1082 | "^.*\\<\\(Type\\|Name\\|Path\\|Host\\|Port\\) *= *\\(.*\\) *$" |
| @@ -1763,7 +1713,8 @@ Only intended for interactive use." | |||
| 1763 | 1713 | ||
| 1764 | (defun ffap-gnus-hook () | 1714 | (defun ffap-gnus-hook () |
| 1765 | "Bind `ffap-gnus-next' and `ffap-gnus-menu' to M-l and M-m, resp." | 1715 | "Bind `ffap-gnus-next' and `ffap-gnus-menu' to M-l and M-m, resp." |
| 1766 | (set (make-local-variable 'ffap-foo-at-bar-prefix) "news") ; message-id's | 1716 | ;; message-id's |
| 1717 | (setq-local thing-at-point-default-mail-uri-scheme "news") | ||
| 1767 | ;; Note "l", "L", "m", "M" are taken: | 1718 | ;; Note "l", "L", "m", "M" are taken: |
| 1768 | (local-set-key "\M-l" 'ffap-gnus-next) | 1719 | (local-set-key "\M-l" 'ffap-gnus-next) |
| 1769 | (local-set-key "\M-m" 'ffap-gnus-menu)) | 1720 | (local-set-key "\M-m" 'ffap-gnus-menu)) |
diff --git a/lisp/thingatpt.el b/lisp/thingatpt.el index e1e3e8e1e46..9526cb76e74 100644 --- a/lisp/thingatpt.el +++ b/lisp/thingatpt.el | |||
| @@ -232,7 +232,7 @@ The bounds of THING are determined by `bounds-of-thing-at-point'." | |||
| 232 | (put 'defun 'end-op 'end-of-defun) | 232 | (put 'defun 'end-op 'end-of-defun) |
| 233 | (put 'defun 'forward-op 'end-of-defun) | 233 | (put 'defun 'forward-op 'end-of-defun) |
| 234 | 234 | ||
| 235 | ;; Filenames and URLs www.com/foo%32bar | 235 | ;; Filenames |
| 236 | 236 | ||
| 237 | (defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:" | 237 | (defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:" |
| 238 | "Characters allowable in filenames.") | 238 | "Characters allowable in filenames.") |
| @@ -248,94 +248,224 @@ The bounds of THING are determined by `bounds-of-thing-at-point'." | |||
| 248 | (forward-char) | 248 | (forward-char) |
| 249 | (goto-char (point-min))))) | 249 | (goto-char (point-min))))) |
| 250 | 250 | ||
| 251 | ;; URIs | ||
| 252 | |||
| 253 | (defvar thing-at-point-beginning-of-url-regexp nil | ||
| 254 | "Regexp matching the beginning of a well-formed URI. | ||
| 255 | If nil, construct the regexp from `thing-at-point-uri-schemes'.") | ||
| 256 | |||
| 251 | (defvar thing-at-point-url-path-regexp | 257 | (defvar thing-at-point-url-path-regexp |
| 252 | "[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+" | 258 | "[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+" |
| 253 | "A regular expression probably matching the host and filename or e-mail part of a URL.") | 259 | "Regexp matching the host and filename or e-mail part of a URL.") |
| 254 | 260 | ||
| 255 | (defvar thing-at-point-short-url-regexp | 261 | (defvar thing-at-point-short-url-regexp |
| 256 | (concat "[-A-Za-z0-9]+\\.[-A-Za-z0-9.]+" thing-at-point-url-path-regexp) | 262 | (concat "[-A-Za-z0-9]+\\.[-A-Za-z0-9.]+" thing-at-point-url-path-regexp) |
| 257 | "A regular expression probably matching a URL without an access scheme. | 263 | "Regexp matching a URI without a scheme component.") |
| 258 | Hostname matching is stricter in this case than for | ||
| 259 | ``thing-at-point-url-regexp''.") | ||
| 260 | 264 | ||
| 261 | (defvar thing-at-point-uri-schemes | 265 | (defvar thing-at-point-uri-schemes |
| 262 | ;; Officials from http://www.iana.org/assignments/uri-schemes.html | 266 | ;; Officials from http://www.iana.org/assignments/uri-schemes.html |
| 263 | '("ftp://" "http://" "gopher://" "mailto:" "news:" "nntp:" | 267 | '("aaa://" "about:" "acap://" "apt:" "bzr://" "bzr+ssh://" |
| 264 | "telnet://" "wais://" "file:/" "prospero:" "z39.50s:" "z39.50r:" | 268 | "attachment:/" "chrome://" "cid:" "content://" "crid://" "cvs://" |
| 265 | "cid:" "mid:" "vemmi:" "service:" "imap:" "nfs:" "acap:" "rtsp:" | 269 | "data:" "dav:" "dict://" "doi:" "dns:" "dtn:" "feed:" "file:/" |
| 266 | "tip:" "pop:" "data:" "dav:" "opaquelocktoken:" "sip:" "tel:" "fax:" | 270 | "finger://" "fish://" "ftp://" "geo:" "git://" "go:" "gopher://" |
| 267 | "modem:" "ldap:" "https://" "soap.beep:" "soap.beeps:" "urn:" "go:" | 271 | "h323:" "http://" "https://" "im:" "imap://" "info:" "ipp:" |
| 268 | "afs:" "tn3270:" "mailserver:" | 272 | "irc://" "irc6://" "ircs://" "iris.beep:" "jar:" "ldap://" |
| 269 | "crid:" "dict:" "dns:" "dtn:" "h323:" "im:" "info:" "ipp:" | 273 | "ldaps://" "mailto:" "mid:" "mtqp://" "mupdate://" "news:" |
| 270 | "iris.beep:" "mtqp:" "mupdate:" "pres:" "sips:" "snmp:" "tag:" | 274 | "nfs://" "nntp://" "opaquelocktoken:" "pop://" "pres:" |
| 271 | "tftp:" "xmlrpc.beep:" "xmlrpc.beeps:" "xmpp:" | 275 | "resource://" "rmi://" "rsync://" "rtsp://" "rtspu://" "service:" |
| 272 | ;; Compatibility | 276 | "sftp://" "sip:" "sips:" "smb://" "sms:" "snmp://" "soap.beep://" |
| 273 | "snews:" "irc:" "mms://" "mmsh://") | 277 | "soap.beeps://" "ssh://" "svn://" "svn+ssh://" "tag:" "tel:" |
| 274 | "Uniform Resource Identifier (URI) Schemes.") | 278 | "telnet://" "tftp://" "tip://" "tn3270://" "udp://" "urn:" |
| 275 | 279 | "uuid:" "vemmi://" "webcal://" "xri://" "xmlrpc.beep://" | |
| 276 | (defvar thing-at-point-url-regexp | 280 | "xmlrpc.beeps://" "z39.50r://" "z39.50s://" "xmpp:" |
| 277 | (concat "\\<\\(" (mapconcat 'identity thing-at-point-uri-schemes "\\|") "\\)" | 281 | ;; Compatibility |
| 278 | thing-at-point-url-path-regexp) | 282 | "fax:" "mms://" "mmsh://" "modem:" "prospero:" "snews:" |
| 279 | "A regular expression probably matching a complete URL.") | 283 | "wais://") |
| 280 | 284 | "List of URI schemes recognized by `thing-at-point-url-at-point'. | |
| 281 | (defvar thing-at-point-markedup-url-regexp | 285 | Each string in this list should correspond to the start of a |
| 282 | "<URL:[^>]+>" | 286 | URI's scheme component, up to and including the trailing // if |
| 283 | "A regular expression matching a URL marked up per RFC1738. | 287 | the scheme calls for that to be present.") |
| 284 | This may contain whitespace (including newlines) .") | 288 | |
| 289 | (defvar thing-at-point-markedup-url-regexp "<URL:\\([^<>\n]+\\)>" | ||
| 290 | "Regexp matching a URL marked up per RFC1738. | ||
| 291 | This kind of markup was formerly recommended as a way to indicate | ||
| 292 | URIs, but as of RFC 3986 it is no longer recommended. | ||
| 293 | Subexpression 1 should contain the delimited URL.") | ||
| 294 | |||
| 295 | (defvar thing-at-point-newsgroup-regexp | ||
| 296 | "\\`[[:lower:]]+\\.[-+[:lower:]_0-9.]+\\'" | ||
| 297 | "Regexp matching a newsgroup name.") | ||
| 298 | |||
| 299 | (defvar thing-at-point-newsgroup-heads | ||
| 300 | '("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk") | ||
| 301 | "Used by `thing-at-point-newsgroup-p' if gnus is not running.") | ||
| 302 | |||
| 303 | (defvar thing-at-point-default-mail-uri-scheme "mailto" | ||
| 304 | "Default scheme for ill-formed URIs that look like <foo@example.com>. | ||
| 305 | If nil, do not give such URIs a scheme.") | ||
| 285 | 306 | ||
| 286 | (put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point) | 307 | (put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point) |
| 287 | (defun thing-at-point-bounds-of-url-at-point () | 308 | |
| 288 | (let ((strip (thing-at-point-looking-at | 309 | (defun thing-at-point-bounds-of-url-at-point (&optional lax) |
| 289 | thing-at-point-markedup-url-regexp))) ;; (url "") short | 310 | "Return a cons cell containing the start and end of the URI at point. |
| 290 | (if (or strip | 311 | Try to find a URI using `thing-at-point-markedup-url-regexp'. |
| 291 | (thing-at-point-looking-at thing-at-point-url-regexp) | 312 | If that fails, try with `thing-at-point-beginning-of-url-regexp'. |
| 292 | ;; Access scheme omitted? | 313 | If that also fails, and optional argument LAX is non-nil, return |
| 293 | ;; (setq short (thing-at-point-looking-at | 314 | the bounds of a possible ill-formed URI (one lacking a scheme)." |
| 294 | ;; thing-at-point-short-url-regexp)) | 315 | ;; Look for the old <URL:foo> markup. If found, use it. |
| 295 | ) | 316 | (or (thing-at-point--bounds-of-markedup-url) |
| 296 | (let ((beginning (match-beginning 0)) | 317 | ;; Otherwise, find the bounds within which a URI may exist. The |
| 297 | (end (match-end 0))) | 318 | ;; method is similar to `ffap-string-at-point'. Note that URIs |
| 298 | (when strip | 319 | ;; may contain parentheses but may not contain spaces (RFC3986). |
| 299 | (setq beginning (+ beginning 5)) | 320 | (let* ((allowed-chars "--:=&?$+@-Z_[:alpha:]~#,%;*()!'") |
| 300 | (setq end (- end 1))) | 321 | (skip-before "^[0-9a-zA-Z]") |
| 301 | (cons beginning end))))) | 322 | (skip-after ":;.,!?") |
| 323 | (pt (point)) | ||
| 324 | (beg (save-excursion | ||
| 325 | (skip-chars-backward allowed-chars) | ||
| 326 | (skip-chars-forward skip-before pt) | ||
| 327 | (point))) | ||
| 328 | (end (save-excursion | ||
| 329 | (skip-chars-forward allowed-chars) | ||
| 330 | (skip-chars-backward skip-after pt) | ||
| 331 | (point)))) | ||
| 332 | (or (thing-at-point--bounds-of-well-formed-url beg end pt) | ||
| 333 | (if lax (cons beg end)))))) | ||
| 334 | |||
| 335 | (defun thing-at-point--bounds-of-markedup-url () | ||
| 336 | (when thing-at-point-markedup-url-regexp | ||
| 337 | (let ((case-fold-search t) | ||
| 338 | (pt (point)) | ||
| 339 | (beg (line-beginning-position)) | ||
| 340 | (end (line-end-position)) | ||
| 341 | found) | ||
| 342 | (save-excursion | ||
| 343 | (goto-char beg) | ||
| 344 | (while (and (not found) | ||
| 345 | (<= (point) pt) | ||
| 346 | (< (point) end)) | ||
| 347 | (and (re-search-forward thing-at-point-markedup-url-regexp | ||
| 348 | end 1) | ||
| 349 | (> (point) pt) | ||
| 350 | (setq found t)))) | ||
| 351 | (if found | ||
| 352 | (cons (match-beginning 1) (match-end 1)))))) | ||
| 353 | |||
| 354 | (defun thing-at-point--bounds-of-well-formed-url (beg end pt) | ||
| 355 | (save-excursion | ||
| 356 | (goto-char beg) | ||
| 357 | (let (url-beg paren-end regexp) | ||
| 358 | (save-restriction | ||
| 359 | (narrow-to-region beg end) | ||
| 360 | ;; The scheme component must either match at BEG, or have no | ||
| 361 | ;; other alphanumerical ASCII characters before it. | ||
| 362 | (setq regexp (concat "\\(?:\\`\\|[^a-zA-Z0-9]\\)\\(" | ||
| 363 | (or thing-at-point-beginning-of-url-regexp | ||
| 364 | (regexp-opt thing-at-point-uri-schemes)) | ||
| 365 | "\\)")) | ||
| 366 | (and (re-search-forward regexp end t) | ||
| 367 | ;; URI must have non-empty contents. | ||
| 368 | (< (point) end) | ||
| 369 | (setq url-beg (match-beginning 1)))) | ||
| 370 | (when url-beg | ||
| 371 | ;; If there is an open paren before the URI, truncate to the | ||
| 372 | ;; matching close paren. | ||
| 373 | (and (> url-beg (point-min)) | ||
| 374 | (eq (car-safe (syntax-after (1- url-beg))) 4) | ||
| 375 | (save-restriction | ||
| 376 | (narrow-to-region (1- url-beg) (min end (point-max))) | ||
| 377 | (setq paren-end (ignore-errors | ||
| 378 | (scan-lists (1- url-beg) 1 0)))) | ||
| 379 | (not (blink-matching-check-mismatch (1- url-beg) paren-end)) | ||
| 380 | (setq end (1- paren-end))) | ||
| 381 | (cons url-beg end))))) | ||
| 302 | 382 | ||
| 303 | (put 'url 'thing-at-point 'thing-at-point-url-at-point) | 383 | (put 'url 'thing-at-point 'thing-at-point-url-at-point) |
| 304 | (defun thing-at-point-url-at-point () | ||
| 305 | "Return the URL around or before point. | ||
| 306 | 384 | ||
| 307 | Search backwards for the start of a URL ending at or after point. If | 385 | (defun thing-at-point-url-at-point (&optional lax bounds) |
| 308 | no URL found, return nil. The access scheme will be prepended if | 386 | "Return the URL around or before point. |
| 309 | absent: \"mailto:\" if the string contains \"@\", \"ftp://\" if it | 387 | If no URL is found, return nil. |
| 310 | starts with \"ftp\" and not \"ftp:/\", or \"http://\" by default." | 388 | |
| 311 | 389 | If optional argument LAX is non-nil, look for URLs that are not | |
| 312 | (let ((url "") short strip) | 390 | well-formed, such as foo@bar or <nobody>. |
| 313 | (if (or (setq strip (thing-at-point-looking-at | 391 | |
| 314 | thing-at-point-markedup-url-regexp)) | 392 | If optional arguments BOUNDS are non-nil, it should be a cons |
| 315 | (thing-at-point-looking-at thing-at-point-url-regexp) | 393 | cell of the form (START . END), containing the beginning and end |
| 316 | ;; Access scheme omitted? | 394 | positions of the URI. Otherwise, these positions are detected |
| 317 | (setq short (thing-at-point-looking-at | 395 | automatically from the text around point. |
| 318 | thing-at-point-short-url-regexp))) | 396 | |
| 319 | (progn | 397 | If the scheme component is absent, either because a URI delimited |
| 320 | (setq url (buffer-substring-no-properties (match-beginning 0) | 398 | with <url:...> lacks one, or because an ill-formed URI was found |
| 321 | (match-end 0))) | 399 | with LAX or BEG and END, try to add a scheme in the returned URI. |
| 322 | (and strip (setq url (substring url 5 -1))) ; Drop "<URL:" & ">" | 400 | The scheme is chosen heuristically: \"mailto:\" if the address |
| 323 | ;; strip whitespace | 401 | looks like an email address, \"ftp://\" if it starts with |
| 324 | (while (string-match "[ \t\n\r]+" url) | 402 | \"ftp\", etc." |
| 325 | (setq url (replace-match "" t t url))) | 403 | (unless bounds |
| 326 | (and short (setq url (concat (cond ((string-match "^[a-zA-Z]+:" url) | 404 | (setq bounds (thing-at-point-bounds-of-url-at-point lax))) |
| 327 | ;; already has a URL scheme. | 405 | (when (and bounds (< (car bounds) (cdr bounds))) |
| 328 | "") | 406 | (let ((str (buffer-substring-no-properties (car bounds) (cdr bounds)))) |
| 329 | ((string-match "@" url) | 407 | ;; If there is no scheme component, try to add one. |
| 330 | "mailto:") | 408 | (unless (string-match "\\`[a-zA-Z][-a-zA-Z0-9+.]*:" str) |
| 331 | ;; e.g. ftp.swiss... or ftp-swiss... | 409 | (or |
| 332 | ((string-match "^ftp" url) | 410 | ;; If the URI has the form <foo@bar>, treat it according to |
| 333 | "ftp://") | 411 | ;; `thing-at-point-default-mail-uri-scheme'. If there are |
| 334 | (t "http://")) | 412 | ;; no angle brackets, it must be mailto. |
| 335 | url))) | 413 | (when (string-match "\\`[^:</>@]+@[-.0-9=&?$+A-Z_a-z~#,%;*]" str) |
| 336 | (if (string-equal "" url) | 414 | (let ((scheme (if (and (eq (char-before (car bounds)) ?<) |
| 337 | nil | 415 | (eq (char-after (cdr bounds)) ?>)) |
| 338 | url))))) | 416 | thing-at-point-default-mail-uri-scheme |
| 417 | "mailto"))) | ||
| 418 | (if scheme | ||
| 419 | (setq str (concat scheme ":" str))))) | ||
| 420 | ;; If the string is like <FOO>, where FOO is an existing user | ||
| 421 | ;; name on the system, treat that as an email address. | ||
| 422 | (and (string-match "\\`[[:alnum:]]+\\'" str) | ||
| 423 | (eq (char-before (car bounds)) ?<) | ||
| 424 | (eq (char-after (cdr bounds)) ?>) | ||
| 425 | (not (string-match "~" (expand-file-name (concat "~" str)))) | ||
| 426 | (setq str (concat "mailto:" str))) | ||
| 427 | ;; If it looks like news.example.com, treat it as news. | ||
| 428 | (if (thing-at-point-newsgroup-p str) | ||
| 429 | (setq str (concat "news:" str))) | ||
| 430 | ;; If it looks like ftp.example.com. treat it as ftp. | ||
| 431 | (if (string-match "\\`ftp\\." str) | ||
| 432 | (setq str (concat "ftp://" str))) | ||
| 433 | ;; If it looks like www.example.com. treat it as http. | ||
| 434 | (if (string-match "\\`www\\." str) | ||
| 435 | (setq str (concat "http://" str))) | ||
| 436 | ;; Otherwise, it just isn't a URI. | ||
| 437 | (setq str nil))) | ||
| 438 | str))) | ||
| 439 | |||
| 440 | (defun thing-at-point-newsgroup-p (string) | ||
| 441 | "Return STRING if it looks like a newsgroup name, else nil." | ||
| 442 | (and | ||
| 443 | (string-match thing-at-point-newsgroup-regexp string) | ||
| 444 | (let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb)) | ||
| 445 | (heads thing-at-point-newsgroup-heads) | ||
| 446 | htb ret) | ||
| 447 | (while htbs | ||
| 448 | (setq htb (car htbs) htbs (cdr htbs)) | ||
| 449 | (condition-case nil | ||
| 450 | (progn | ||
| 451 | ;; errs: htb symbol may be unbound, or not a hash-table. | ||
| 452 | ;; gnus-gethash is just a macro for intern-soft. | ||
| 453 | (and (symbol-value htb) | ||
| 454 | (intern-soft string (symbol-value htb)) | ||
| 455 | (setq ret string htbs nil)) | ||
| 456 | ;; If we made it this far, gnus is running, so ignore "heads": | ||
| 457 | (setq heads nil)) | ||
| 458 | (error nil))) | ||
| 459 | (or ret (not heads) | ||
| 460 | (let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string))) | ||
| 461 | (and head (setq head (substring string 0 (match-end 1))) | ||
| 462 | (member head heads) | ||
| 463 | (setq ret string)))) | ||
| 464 | ret))) | ||
| 465 | |||
| 466 | (put 'url 'end-op (lambda () (end-of-thing 'url))) | ||
| 467 | |||
| 468 | (put 'url 'beginning-op (lambda () (end-of-thing 'url))) | ||
| 339 | 469 | ||
| 340 | ;; The normal thingatpt mechanism doesn't work for complex regexps. | 470 | ;; The normal thingatpt mechanism doesn't work for complex regexps. |
| 341 | ;; This should work for almost any regexp wherever we are in the | 471 | ;; This should work for almost any regexp wherever we are in the |
| @@ -372,19 +502,6 @@ point." | |||
| 372 | (goto-char match) | 502 | (goto-char match) |
| 373 | (looking-at regexp))))) | 503 | (looking-at regexp))))) |
| 374 | 504 | ||
| 375 | (put 'url 'end-op | ||
| 376 | (lambda () | ||
| 377 | (let ((bounds (thing-at-point-bounds-of-url-at-point))) | ||
| 378 | (if bounds | ||
| 379 | (goto-char (cdr bounds)) | ||
| 380 | (error "No URL here"))))) | ||
| 381 | (put 'url 'beginning-op | ||
| 382 | (lambda () | ||
| 383 | (let ((bounds (thing-at-point-bounds-of-url-at-point))) | ||
| 384 | (if bounds | ||
| 385 | (goto-char (car bounds)) | ||
| 386 | (error "No URL here"))))) | ||
| 387 | |||
| 388 | ;; Email addresses | 505 | ;; Email addresses |
| 389 | (defvar thing-at-point-email-regexp | 506 | (defvar thing-at-point-email-regexp |
| 390 | "<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?" | 507 | "<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?" |
diff --git a/test/ChangeLog b/test/ChangeLog index 651453566f2..41bb1be190e 100644 --- a/test/ChangeLog +++ b/test/ChangeLog | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | 2013-02-04 Chong Yidong <cyd@gnu.org> | ||
| 2 | |||
| 3 | * automated/thingatpt.el: New file. | ||
| 4 | |||
| 1 | 2013-02-03 Chong Yidong <cyd@gnu.org> | 5 | 2013-02-03 Chong Yidong <cyd@gnu.org> |
| 2 | 6 | ||
| 3 | * automated/files.el (file-test--do-local-variables-test): Avoid | 7 | * automated/files.el (file-test--do-local-variables-test): Avoid |
diff --git a/test/automated/thingatpt.el b/test/automated/thingatpt.el new file mode 100644 index 00000000000..f33a8f4b0e6 --- /dev/null +++ b/test/automated/thingatpt.el | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | ;;; thingatpt.el --- tests for thing-at-point. | ||
| 2 | |||
| 3 | ;; Copyright (C) 2013 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; This file is part of GNU Emacs. | ||
| 6 | |||
| 7 | ;; GNU Emacs is free software: you can redistribute it and/or modify | ||
| 8 | ;; it under the terms of the GNU General Public License as published by | ||
| 9 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 10 | ;; (at your option) any later version. | ||
| 11 | |||
| 12 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 13 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | ;; GNU General Public License for more details. | ||
| 16 | |||
| 17 | ;; You should have received a copy of the GNU General Public License | ||
| 18 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | ||
| 19 | |||
| 20 | ;;; Code: | ||
| 21 | |||
| 22 | (require 'ert) | ||
| 23 | |||
| 24 | (defvar thing-at-point-test-data | ||
| 25 | '(("http://1.gnu.org" 1 url "http://1.gnu.org") | ||
| 26 | ("http://2.gnu.org" 6 url "http://2.gnu.org") | ||
| 27 | ("http://3.gnu.org" 19 url "http://3.gnu.org") | ||
| 28 | ("https://4.gnu.org" 1 url "https://4.gnu.org") | ||
| 29 | ("bzr://savannah.gnu.org" 1 url "bzr://savannah.gnu.org") | ||
| 30 | ("A geo URI (geo:3.14159,-2.71828)." 12 url "geo:3.14159,-2.71828") | ||
| 31 | ("Visit http://5.gnu.org now." 5 url nil) | ||
| 32 | ("Visit http://6.gnu.org now." 7 url "http://6.gnu.org") | ||
| 33 | ("Visit http://7.gnu.org now." 22 url "http://7.gnu.org") | ||
| 34 | ("Visit http://8.gnu.org now." 22 url "http://8.gnu.org") | ||
| 35 | ("Visit http://9.gnu.org now." 24 url nil) | ||
| 36 | ;; Invalid URIs | ||
| 37 | ("<<<<" 2 url nil) | ||
| 38 | ("<>" 1 url nil) | ||
| 39 | ("<url:>" 1 url nil) | ||
| 40 | ("http://" 1 url nil) | ||
| 41 | ;; Invalid schema | ||
| 42 | ("foo://www.gnu.org" 1 url nil) | ||
| 43 | ("foohttp://www.gnu.org" 1 url nil) | ||
| 44 | ;; Non alphanumeric characters can be found in URIs | ||
| 45 | ("ftp://example.net/~foo!;#bar=baz&goo=bob" 3 url "ftp://example.net/~foo!;#bar=baz&goo=bob") | ||
| 46 | ("bzr+ssh://user@example.net:5/a%20d,5" 34 url "bzr+ssh://user@example.net:5/a%20d,5") | ||
| 47 | ;; <url:...> markup | ||
| 48 | ("Url: <url:foo://1.example.com>..." 8 url "foo://1.example.com") | ||
| 49 | ("Url: <url:foo://2.example.com>..." 30 url "foo://2.example.com") | ||
| 50 | ("Url: <url:foo://www.gnu.org/a bc>..." 20 url "foo://www.gnu.org/a bc") | ||
| 51 | ;; Hack used by thing-at-point: drop punctuation at end of URI. | ||
| 52 | ("Go to http://www.gnu.org, for details" 7 url "http://www.gnu.org") | ||
| 53 | ("Go to http://www.gnu.org." 24 url "http://www.gnu.org") | ||
| 54 | ;; Standard URI delimiters | ||
| 55 | ("Go to \"http://10.gnu.org\"." 8 url "http://10.gnu.org") | ||
| 56 | ("Go to \"http://11.gnu.org/\"." 26 url "http://11.gnu.org/") | ||
| 57 | ("Go to <http://12.gnu.org> now." 8 url "http://12.gnu.org") | ||
| 58 | ("Go to <http://13.gnu.org> now." 24 url "http://13.gnu.org") | ||
| 59 | ;; Parenthesis handling (non-standard) | ||
| 60 | ("http://example.com/a(b)c" 21 url "http://example.com/a(b)c") | ||
| 61 | ("http://example.com/a(b)" 21 url "http://example.com/a(b)") | ||
| 62 | ("(http://example.com/abc)" 2 url "http://example.com/abc") | ||
| 63 | ("This (http://example.com/a(b))" 7 url "http://example.com/a(b)") | ||
| 64 | ("This (http://example.com/a(b))" 30 url "http://example.com/a(b)") | ||
| 65 | ("This (http://example.com/a(b))" 5 url nil) | ||
| 66 | ("http://example.com/ab)c" 4 url "http://example.com/ab)c") | ||
| 67 | ;; URL markup, lacking schema | ||
| 68 | ("<url:foo@example.com>" 1 url "mailto:foo@example.com") | ||
| 69 | ("<url:ftp.example.net/abc/>" 1 url "ftp://ftp.example.net/abc/")) | ||
| 70 | "List of thing-at-point tests. | ||
| 71 | Each list element should have the form | ||
| 72 | |||
| 73 | (STRING POS THING RESULT) | ||
| 74 | |||
| 75 | where STRING is a string of buffer contents, POS is the value of | ||
| 76 | point, THING is a symbol argument for `thing-at-point', and | ||
| 77 | RESULT should be the result of calling `thing-at-point' from that | ||
| 78 | position to retrieve THING.") | ||
| 79 | |||
| 80 | (ert-deftest thing-at-point-tests () | ||
| 81 | "Test the file-local variables implementation." | ||
| 82 | (dolist (test thing-at-point-test-data) | ||
| 83 | (with-temp-buffer | ||
| 84 | (insert (nth 0 test)) | ||
| 85 | (goto-char (nth 1 test)) | ||
| 86 | (should (equal (thing-at-point (nth 2 test)) (nth 3 test)))))) | ||
| 87 | |||
| 88 | ;;; thingatpt.el ends here | ||