aboutsummaryrefslogtreecommitdiffstats
path: root/lisp/gnus/nnweb.el
diff options
context:
space:
mode:
authorShengHuo ZHU2001-10-31 04:16:51 +0000
committerShengHuo ZHU2001-10-31 04:16:51 +0000
commit95fa1ff74aa9ae40d5ef4b680ea606287c40327f (patch)
tree900b4c445ed113bf645086ede4de094dd33c2230 /lisp/gnus/nnweb.el
parentbf9bb76fe5da844622da05f1fd9aa140d8030381 (diff)
downloademacs-95fa1ff74aa9ae40d5ef4b680ea606287c40327f.tar.gz
emacs-95fa1ff74aa9ae40d5ef4b680ea606287c40327f.zip
* mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with
the Gnus CVS. * mm-util.el (mm-mime-mule-charset-alist): Move down and call mm-coding-system-p. Don't correct it only in XEmacs. (mm-charset-to-coding-system): Use mm-coding-system-p and mm-get-coding-system-list. (mm-emacs-mule, mm-mule4-p): New. (mm-enable-multibyte, mm-disable-multibyte, mm-enable-multibyte-mule4, mm-disable-multibyte-mule4, mm-with-unibyte-current-buffer, mm-with-unibyte-current-buffer-mule4): Use them. (mm-find-mime-charset-region): Treat iso-2022-jp. From Dave Love <fx@gnu.org>: * mm-util.el (mm-mime-mule-charset-alist): Make it correct by construction. (mm-charset-synonym-alist): Remove windows-125[02]. Make other entries conditional on not having a coding system defined for them. (mm-mule-charset-to-mime-charset): Use find-coding-systems-for-charsets if defined. (mm-charset-to-coding-system): Don't use mm-get-coding-system-list. Look in mm-charset-synonym-alist later. Add last resort search of coding systems. (mm-enable-multibyte-mule4, mm-disable-multibyte-mule4) (mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like Mule 4. (mm-find-mime-charset-region): Re-write. (mm-with-unibyte-current-buffer): Restore buffer as well as multibyteness.
Diffstat (limited to 'lisp/gnus/nnweb.el')
-rw-r--r--lisp/gnus/nnweb.el236
1 files changed, 203 insertions, 33 deletions
diff --git a/lisp/gnus/nnweb.el b/lisp/gnus/nnweb.el
index c4ff7248e6b..740b182639f 100644
--- a/lisp/gnus/nnweb.el
+++ b/lisp/gnus/nnweb.el
@@ -1,5 +1,5 @@
1;;; nnweb.el --- retrieving articles via web search engines 1;;; nnweb.el --- retrieving articles via web search engines
2;; Copyright (C) 1996, 1997, 1998, 1999, 2000 2;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
3;; Free Software Foundation, Inc. 3;; Free Software Foundation, Inc.
4 4
5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
@@ -55,25 +55,48 @@
55(defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") 55(defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
56 "Where nnweb will save its files.") 56 "Where nnweb will save its files.")
57 57
58(defvoo nnweb-type 'dejanews 58(defvoo nnweb-type 'google
59 "What search engine type is being used. 59 "What search engine type is being used.
60Valid types include `dejanews', `dejanewsold', `reference', 60Valid types include `google', `dejanews', `dejanewsold', `reference',
61and `altavista'.") 61and `altavista'.")
62 62
63(defvar nnweb-type-definition 63(defvar nnweb-type-definition
64 '((dejanews 64 '(
65 (google
66 ;;(article . nnweb-google-wash-article)
67 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
65 (article . ignore) 68 (article . ignore)
66 (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") 69 (id . "http://groups.google.com/groups?selm=%s&output=gplain")
67 (map . nnweb-dejanews-create-mapping) 70 ;;(reference . nnweb-google-reference)
68 (search . nnweb-dejanews-search) 71 (reference . identity)
69 (address . "http://www.deja.com/=dnc/qs.xp") 72 (map . nnweb-google-create-mapping)
70 (identifier . nnweb-dejanews-identity)) 73 (search . nnweb-google-search)
71 (dejanewsold 74 (address . "http://groups.google.com/groups")
75 (identifier . nnweb-google-identity))
76 (dejanews ;; alias of google
77 ;;(article . nnweb-google-wash-article)
78 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
72 (article . ignore) 79 (article . ignore)
73 (map . nnweb-dejanews-create-mapping) 80 (id . "http://groups.google.com/groups?selm=%s&output=gplain")
74 (search . nnweb-dejanewsold-search) 81 ;;(reference . nnweb-google-reference)
75 (address . "http://www.deja.com/dnquery.xp") 82 (reference . identity)
76 (identifier . nnweb-dejanews-identity)) 83 (map . nnweb-google-create-mapping)
84 (search . nnweb-google-search)
85 (address . "http://groups.google.com/groups")
86 (identifier . nnweb-google-identity))
87;;; (dejanews
88;;; (article . ignore)
89;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text")
90;;; (map . nnweb-dejanews-create-mapping)
91;;; (search . nnweb-dejanews-search)
92;;; (address . "http://www.deja.com/=dnc/qs.xp")
93;;; (identifier . nnweb-dejanews-identity))
94;;; (dejanewsold
95;;; (article . ignore)
96;;; (map . nnweb-dejanews-create-mapping)
97;;; (search . nnweb-dejanewsold-search)
98;;; (address . "http://www.deja.com/dnquery.xp")
99;;; (identifier . nnweb-dejanews-identity))
77 (reference 100 (reference
78 (article . nnweb-reference-wash-article) 101 (article . nnweb-reference-wash-article)
79 (map . nnweb-reference-create-mapping) 102 (map . nnweb-reference-create-mapping)
@@ -124,6 +147,8 @@ and `altavista'.")
124 147
125(deffoo nnweb-request-scan (&optional group server) 148(deffoo nnweb-request-scan (&optional group server)
126 (nnweb-possibly-change-server group server) 149 (nnweb-possibly-change-server group server)
150 (if nnweb-ephemeral-p
151 (setq nnweb-hashtb (gnus-make-hashtable 4095)))
127 (funcall (nnweb-definition 'map)) 152 (funcall (nnweb-definition 'map))
128 (unless nnweb-ephemeral-p 153 (unless nnweb-ephemeral-p
129 (nnweb-write-active) 154 (nnweb-write-active)
@@ -134,9 +159,10 @@ and `altavista'.")
134 (when (and group 159 (when (and group
135 (not (equal group nnweb-group)) 160 (not (equal group nnweb-group))
136 (not nnweb-ephemeral-p)) 161 (not nnweb-ephemeral-p))
162 (setq nnweb-group group
163 nnweb-articles nil)
137 (let ((info (assoc group nnweb-group-alist))) 164 (let ((info (assoc group nnweb-group-alist)))
138 (when info 165 (when info
139 (setq nnweb-group group)
140 (setq nnweb-type (nth 2 info)) 166 (setq nnweb-type (nth 2 info))
141 (setq nnweb-search (nth 3 info)) 167 (setq nnweb-search (nth 3 info))
142 (unless dont-check 168 (unless dont-check
@@ -175,17 +201,19 @@ and `altavista'.")
175 (and (stringp article) 201 (and (stringp article)
176 (nnweb-definition 'id t) 202 (nnweb-definition 'id t)
177 (let ((fetch (nnweb-definition 'id)) 203 (let ((fetch (nnweb-definition 'id))
178 art) 204 art active)
179 (when (string-match "^<\\(.*\\)>$" article) 205 (when (string-match "^<\\(.*\\)>$" article)
180 (setq art (match-string 1 article))) 206 (setq art (match-string 1 article)))
181 (and fetch 207 (when (and fetch art)
182 art 208 (setq url (format fetch art))
183 (mm-with-unibyte-current-buffer 209 (mm-with-unibyte-current-buffer
184 (nnweb-fetch-url 210 (nnweb-fetch-url url))
185 (format fetch article))))))) 211 (if (nnweb-definition 'reference t)
212 (setq article
213 (funcall (nnweb-definition
214 'reference) article)))))))
186 (unless nnheader-callback-function 215 (unless nnheader-callback-function
187 (funcall (nnweb-definition 'article)) 216 (funcall (nnweb-definition 'article)))
188 (nnweb-decode-entities))
189 (nnheader-report 'nnweb "Fetched article %s" article) 217 (nnheader-report 'nnweb "Fetched article %s" article)
190 (cons group (and (numberp article) article)))))) 218 (cons group (and (numberp article) article))))))
191 219
@@ -290,10 +318,11 @@ and `altavista'.")
290 (nnweb-open-server server))) 318 (nnweb-open-server server)))
291 (unless nnweb-group-alist 319 (unless nnweb-group-alist
292 (nnweb-read-active)) 320 (nnweb-read-active))
321 (unless nnweb-hashtb
322 (setq nnweb-hashtb (gnus-make-hashtable 4095)))
293 (when group 323 (when group
294 (when (and (not nnweb-ephemeral-p) 324 (when (and (not nnweb-ephemeral-p)
295 (not (equal group nnweb-group))) 325 (equal group nnweb-group))
296 (setq nnweb-hashtb (gnus-make-hashtable 4095))
297 (nnweb-request-group group nil t)))) 326 (nnweb-request-group group nil t))))
298 327
299(defun nnweb-init (server) 328(defun nnweb-init (server)
@@ -393,7 +422,7 @@ and `altavista'.")
393 (car (rassq (string-to-number 422 (car (rassq (string-to-number
394 (match-string 2 date)) 423 (match-string 2 date))
395 parse-time-months)) 424 parse-time-months))
396 (match-string 3 date) 425 (match-string 3 date)
397 (match-string 1 date))) 426 (match-string 1 date)))
398 (setq date "Jan 1 00:00:00 0000")) 427 (setq date "Jan 1 00:00:00 0000"))
399 (incf i) 428 (incf i)
@@ -559,6 +588,7 @@ and `altavista'.")
559 (while (search-forward "," nil t) 588 (while (search-forward "," nil t)
560 (replace-match " " t t))) 589 (replace-match " " t t)))
561 (widen) 590 (widen)
591 (nnweb-decode-entities)
562 (set-marker body nil)))) 592 (set-marker body nil))))
563 593
564(defun nnweb-reference-search (search) 594(defun nnweb-reference-search (search)
@@ -663,7 +693,8 @@ and `altavista'.")
663 (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t) 693 (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t)
664 (replace-match "&lt;\\1&gt; " t))) 694 (replace-match "&lt;\\1&gt; " t)))
665 (widen) 695 (widen)
666 (nnweb-remove-markup))) 696 (nnweb-remove-markup)
697 (nnweb-decode-entities)))
667 698
668(defun nnweb-altavista-search (search &optional part) 699(defun nnweb-altavista-search (search &optional part)
669 (url-insert-file-contents 700 (url-insert-file-contents
@@ -683,13 +714,147 @@ and `altavista'.")
683 t) 714 t)
684 715
685;;; 716;;;
717;;; Deja bought by google.com
718;;;
719
720(defun nnweb-google-wash-article ()
721 (let ((case-fold-search t) url)
722 (goto-char (point-min))
723 (re-search-forward "^<pre>" nil t)
724 (narrow-to-region (point-min) (point))
725 (search-backward "<table " nil t 2)
726 (delete-region (point-min) (point))
727 (if (re-search-forward "Search Result [0-9]+" nil t)
728 (replace-match ""))
729 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
730 (replace-match ""))
731 (goto-char (point-min))
732 (while (search-forward "<br>" nil t)
733 (replace-match "\n"))
734 (nnweb-remove-markup)
735 (goto-char (point-min))
736 (while (re-search-forward "^[ \t]*\n" nil t)
737 (replace-match ""))
738 (goto-char (point-max))
739 (insert "\n")
740 (widen)
741 (narrow-to-region (point) (point-max))
742 (search-forward "</pre>" nil t)
743 (delete-region (point) (point-max))
744 (nnweb-remove-markup)
745 (widen)))
746
747(defun nnweb-google-parse-1 (&optional Message-ID)
748 (let ((i 0)
749 (case-fold-search t)
750 (active (cadr (assoc nnweb-group nnweb-group-alist)))
751 Subject Score Date Newsgroups From
752 map url mid)
753 (unless active
754 (push (list nnweb-group (setq active (cons 1 0))
755 nnweb-type nnweb-search)
756 nnweb-group-alist))
757 ;; Go through all the article hits on this page.
758 (goto-char (point-min))
759 (while (re-search-forward
760 "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
761 (setq mid (match-string 2)
762 url (format
763 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
764 (narrow-to-region (search-forward ">" nil t)
765 (search-forward "</a>" nil t))
766 (nnweb-remove-markup)
767 (nnweb-decode-entities)
768 (setq Subject (buffer-string))
769 (goto-char (point-max))
770 (widen)
771 (forward-line 1)
772 (when (looking-at "<br><font[^>]+>")
773 (goto-char (match-end 0)))
774 (if (not (looking-at "<a[^>]+>"))
775 (skip-chars-forward " \t")
776 (narrow-to-region (point)
777 (search-forward "</a>" nil t))
778 (nnweb-remove-markup)
779 (nnweb-decode-entities)
780 (setq Newsgroups (buffer-string))
781 (goto-char (point-max))
782 (widen)
783 (skip-chars-forward "- \t"))
784 (when (looking-at
785 "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
786 (setq From (match-string 2)
787 Date (match-string 1)))
788 (forward-line 1)
789 (incf i)
790 (unless (nnweb-get-hashtb url)
791 (push
792 (list
793 (incf (cdr active))
794 (make-full-mail-header
795 (cdr active) (if Newsgroups
796 (concat "(" Newsgroups ") " Subject)
797 Subject)
798 From Date (or Message-ID mid)
799 nil 0 0 url))
800 map)
801 (nnweb-set-hashtb (cadar map) (car map))))
802 map))
803
804(defun nnweb-google-reference (id)
805 (let ((map (nnweb-google-parse-1 id)) header)
806 (setq nnweb-articles
807 (nconc nnweb-articles map))
808 (when (setq header (cadar map))
809 (mm-with-unibyte-current-buffer
810 (nnweb-fetch-url (mail-header-xref header)))
811 (caar map))))
812
813(defun nnweb-google-create-mapping ()
814 "Perform the search and create an number-to-url alist."
815 (save-excursion
816 (set-buffer nnweb-buffer)
817 (erase-buffer)
818 (when (funcall (nnweb-definition 'search) nnweb-search)
819 (let ((more t))
820 (while more
821 (setq nnweb-articles
822 (nconc nnweb-articles (nnweb-google-parse-1)))
823 ;; FIXME: There is more.
824 (setq more nil))
825 ;; Return the articles in the right order.
826 (setq nnweb-articles
827 (sort nnweb-articles 'car-less-than-car))))))
828
829(defun nnweb-google-search (search)
830 (nnweb-insert
831 (concat
832 (nnweb-definition 'address)
833 "?"
834 (nnweb-encode-www-form-urlencoded
835 `(("q" . ,search)
836 ("num". "100")
837 ("hq" . "")
838 ("hl" . "")
839 ("lr" . "")
840 ("safe" . "off")
841 ("sites" . "groups")))))
842 t)
843
844(defun nnweb-google-identity (url)
845 "Return an unique identifier based on URL."
846 (if (string-match "selm=\\([^ &>]+\\)" url)
847 (match-string 1 url)
848 url))
849
850;;;
686;;; General web/w3 interface utility functions 851;;; General web/w3 interface utility functions
687;;; 852;;;
688 853
689(defun nnweb-insert-html (parse) 854(defun nnweb-insert-html (parse)
690 "Insert HTML based on a w3 parse tree." 855 "Insert HTML based on a w3 parse tree."
691 (if (stringp parse) 856 (if (stringp parse)
692 (insert parse) 857 (insert (nnheader-string-as-multibyte parse))
693 (insert "<" (symbol-name (car parse)) " ") 858 (insert "<" (symbol-name (car parse)) " ")
694 (insert (mapconcat 859 (insert (mapconcat
695 (lambda (param) 860 (lambda (param)
@@ -729,7 +894,7 @@ and `altavista'.")
729 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) 894 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
730 (let ((elem (if (eq (aref (match-string 1) 0) ?\#) 895 (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
731 (let ((c 896 (let ((c
732 (string-to-number (substring 897 (string-to-number (substring
733 (match-string 1) 1)))) 898 (match-string 1) 1))))
734 (if (mm-char-or-char-int-p c) c 32)) 899 (if (mm-char-or-char-int-p c) c 32))
735 (or (cdr (assq (intern (match-string 1)) 900 (or (cdr (assq (intern (match-string 1))
@@ -739,9 +904,9 @@ and `altavista'.")
739 (setq elem (char-to-string elem))) 904 (setq elem (char-to-string elem)))
740 (replace-match elem t t)))) 905 (replace-match elem t t))))
741 906
742(defun nnweb-decode-entities-string (str) 907(defun nnweb-decode-entities-string (string)
743 (with-temp-buffer 908 (with-temp-buffer
744 (insert str) 909 (insert string)
745 (nnweb-decode-entities) 910 (nnweb-decode-entities)
746 (buffer-substring (point-min) (point-max)))) 911 (buffer-substring (point-min) (point-max))))
747 912
@@ -760,12 +925,12 @@ and `altavista'.")
760 "Insert the contents from an URL in the current buffer. 925 "Insert the contents from an URL in the current buffer.
761If FOLLOW-REFRESH is non-nil, redirect refresh url in META." 926If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
762 (let ((name buffer-file-name)) 927 (let ((name buffer-file-name))
763 (if follow-refresh 928 (if follow-refresh
764 (save-restriction 929 (save-restriction
765 (narrow-to-region (point) (point)) 930 (narrow-to-region (point) (point))
766 (url-insert-file-contents url) 931 (url-insert-file-contents url)
767 (goto-char (point-min)) 932 (goto-char (point-min))
768 (when (re-search-forward 933 (when (re-search-forward
769 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) 934 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
770 (let ((url (match-string 1))) 935 (let ((url (match-string 1)))
771 (delete-region (point-min) (point-max)) 936 (delete-region (point-min) (point-max))
@@ -822,6 +987,11 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
822 (listp (cdr element))) 987 (listp (cdr element)))
823 (nnweb-text-1 element))))) 988 (nnweb-text-1 element)))))
824 989
990(defun nnweb-replace-in-string (string match newtext)
991 (while (string-match match string)
992 (setq string (replace-match newtext t t string)))
993 string)
994
825(provide 'nnweb) 995(provide 'nnweb)
826 996
827;;; nnweb.el ends here 997;;; nnweb.el ends here