aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lisp/gnus/ChangeLog35
-rw-r--r--lisp/gnus/mm-util.el491
-rw-r--r--lisp/gnus/nnslashdot.el284
-rw-r--r--lisp/gnus/nnultimate.el88
-rw-r--r--lisp/gnus/nnweb.el236
5 files changed, 715 insertions, 419 deletions
diff --git a/lisp/gnus/ChangeLog b/lisp/gnus/ChangeLog
index de61ebb79c0..14138f18820 100644
--- a/lisp/gnus/ChangeLog
+++ b/lisp/gnus/ChangeLog
@@ -1,3 +1,38 @@
12001-10-30 ShengHuo ZHU <zsh@cs.rochester.edu>
2
3 * mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with
4 the Gnus CVS.
5
6 * mm-util.el (mm-mime-mule-charset-alist): Move down and call
7 mm-coding-system-p. Don't correct it only in XEmacs.
8 (mm-charset-to-coding-system): Use mm-coding-system-p and
9 mm-get-coding-system-list.
10 (mm-emacs-mule, mm-mule4-p): New.
11 (mm-enable-multibyte, mm-disable-multibyte,
12 mm-enable-multibyte-mule4, mm-disable-multibyte-mule4,
13 mm-with-unibyte-current-buffer,
14 mm-with-unibyte-current-buffer-mule4): Use them.
15 (mm-find-mime-charset-region): Treat iso-2022-jp.
16
17 From Dave Love <fx@gnu.org>:
18
19 * mm-util.el (mm-mime-mule-charset-alist): Make it correct by
20 construction.
21 (mm-charset-synonym-alist): Remove windows-125[02]. Make other
22 entries conditional on not having a coding system defined for
23 them.
24 (mm-mule-charset-to-mime-charset): Use
25 find-coding-systems-for-charsets if defined.
26 (mm-charset-to-coding-system): Don't use
27 mm-get-coding-system-list. Look in mm-charset-synonym-alist
28 later. Add last resort search of coding systems.
29 (mm-enable-multibyte-mule4, mm-disable-multibyte-mule4)
30 (mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like
31 Mule 4.
32 (mm-find-mime-charset-region): Re-write.
33 (mm-with-unibyte-current-buffer): Restore buffer as well as
34 multibyteness.
35
12001-10-30 Simon Josefsson <jas@extundo.com> 362001-10-30 Simon Josefsson <jas@extundo.com>
2 37
3 * nnimap.el (nnimap-date-days-ago): Defeat locale. 38 * nnimap.el (nnimap-date-days-ago): Defeat locale.
diff --git a/lisp/gnus/mm-util.el b/lisp/gnus/mm-util.el
index 95ab4f6291f..69823c43d1c 100644
--- a/lisp/gnus/mm-util.el
+++ b/lisp/gnus/mm-util.el
@@ -1,4 +1,4 @@
1;;; mm-util.el --- utility functions for MIME things 1;;; mm-util.el --- Utility functions for Mule and low level things
2;; Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. 2;; Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 3
4;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 4;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
@@ -27,63 +27,6 @@
27(eval-when-compile (require 'cl)) 27(eval-when-compile (require 'cl))
28(require 'mail-prsvr) 28(require 'mail-prsvr)
29 29
30(defun mm-coding-system-p (sym)
31 "Return non-nil if SYM is a coding system."
32 (or (and (fboundp 'coding-system-p) (coding-system-p sym))
33 (memq sym (mm-get-coding-system-list))))
34
35(defvar mm-mime-mule-charset-alist
36 `((us-ascii ascii)
37 (iso-8859-1 latin-iso8859-1)
38 (iso-8859-2 latin-iso8859-2)
39 (iso-8859-3 latin-iso8859-3)
40 (iso-8859-4 latin-iso8859-4)
41 (iso-8859-5 cyrillic-iso8859-5)
42 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
43 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
44 ;; charset is koi8-r, not iso-8859-5.
45 (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
46 (iso-8859-6 arabic-iso8859-6)
47 (iso-8859-7 greek-iso8859-7)
48 (iso-8859-8 hebrew-iso8859-8)
49 (iso-8859-9 latin-iso8859-9)
50 (iso-8859-14 latin-iso8859-14)
51 (iso-8859-15 latin-iso8859-15)
52 (viscii vietnamese-viscii-lower)
53 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
54 (euc-kr korean-ksc5601)
55 (gb2312 chinese-gb2312)
56 (big5 chinese-big5-1 chinese-big5-2)
57 (tibetan tibetan)
58 (thai-tis620 thai-tis620)
59 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
60 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
61 latin-jisx0201 japanese-jisx0208-1978
62 chinese-gb2312 japanese-jisx0208
63 korean-ksc5601 japanese-jisx0212
64 katakana-jisx0201)
65 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
66 latin-jisx0201 japanese-jisx0208-1978
67 chinese-gb2312 japanese-jisx0208
68 korean-ksc5601 japanese-jisx0212
69 chinese-cns11643-1 chinese-cns11643-2)
70 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
71 cyrillic-iso8859-5 greek-iso8859-7
72 latin-jisx0201 japanese-jisx0208-1978
73 chinese-gb2312 japanese-jisx0208
74 korean-ksc5601 japanese-jisx0212
75 chinese-cns11643-1 chinese-cns11643-2
76 chinese-cns11643-3 chinese-cns11643-4
77 chinese-cns11643-5 chinese-cns11643-6
78 chinese-cns11643-7)
79 ;; utf-8 comes either from Mule-UCS or Mule 5+.
80 ,@(if (mm-coding-system-p 'utf-8)
81 (list (cons 'utf-8 (delete 'ascii
82 (coding-system-get
83 'mule-utf-8
84 'safe-charsets))))))
85 "Alist of MIME-charset/MULE-charsets.")
86
87(eval-and-compile 30(eval-and-compile
88 (mapcar 31 (mapcar
89 (lambda (elem) 32 (lambda (elem)
@@ -104,12 +47,6 @@
104 (make-char 47 (make-char
105 . (lambda (charset int) 48 . (lambda (charset int)
106 (int-to-char int))) 49 (int-to-char int)))
107 (read-coding-system
108 . (lambda (prompt)
109 "Prompt the user for a coding system."
110 (completing-read
111 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
112 mm-mime-mule-charset-alist))))
113 (read-charset 50 (read-charset
114 . (lambda (prompt) 51 . (lambda (prompt)
115 "Return a charset." 52 "Return a charset."
@@ -119,40 +56,85 @@
119 (mapcar (lambda (e) (list (symbol-name (car e)))) 56 (mapcar (lambda (e) (list (symbol-name (car e))))
120 mm-mime-mule-charset-alist) 57 mm-mime-mule-charset-alist)
121 nil t)))) 58 nil t))))
59 (subst-char-in-string
60 . (lambda (from to string) ;; stolen (and renamed) from nnheader.el
61 "Replace characters in STRING from FROM to TO."
62 (let ((string (substring string 0)) ;Copy string.
63 (len (length string))
64 (idx 0))
65 ;; Replace all occurrences of FROM with TO.
66 (while (< idx len)
67 (when (= (aref string idx) from)
68 (aset string idx to))
69 (setq idx (1+ idx)))
70 string)))
122 (string-as-unibyte . identity) 71 (string-as-unibyte . identity)
123 (multibyte-string-p . ignore) 72 (string-as-multibyte . identity)
124 ))) 73 (multibyte-string-p . ignore))))
125 74
126(eval-and-compile 75(eval-and-compile
127 (defalias 'mm-char-or-char-int-p 76 (defalias 'mm-char-or-char-int-p
128 (cond 77 (cond
129 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) 78 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
130 ((fboundp 'char-valid-p) 'char-valid-p) 79 ((fboundp 'char-valid-p) 'char-valid-p)
131 (t 'identity)))) 80 (t 'identity))))
132 81
82(eval-and-compile
83 (defalias 'mm-read-coding-system
84 (cond
85 ((fboundp 'read-coding-system)
86 (if (and (featurep 'xemacs)
87 (<= (string-to-number emacs-version) 21.1))
88 (lambda (prompt &optional default-coding-system)
89 (read-coding-system prompt))
90 'read-coding-system))
91 (t (lambda (prompt &optional default-coding-system)
92 "Prompt the user for a coding system."
93 (completing-read
94 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
95 mm-mime-mule-charset-alist)))))))
96
133(defvar mm-coding-system-list nil) 97(defvar mm-coding-system-list nil)
134(defun mm-get-coding-system-list () 98(defun mm-get-coding-system-list ()
135 "Get the coding system list." 99 "Get the coding system list."
136 (or mm-coding-system-list 100 (or mm-coding-system-list
137 (setq mm-coding-system-list (mm-coding-system-list)))) 101 (setq mm-coding-system-list (mm-coding-system-list))))
138 102
103(defun mm-coding-system-p (sym)
104 "Return non-nil if SYM is a coding system."
105 (or (and (fboundp 'coding-system-p) (coding-system-p sym))
106 (memq sym (mm-get-coding-system-list))))
107
139(defvar mm-charset-synonym-alist 108(defvar mm-charset-synonym-alist
140 `((big5 . cn-big5) 109 `(
141 (gb2312 . cn-gb-2312) 110 ;; Perfectly fine? A valid MIME name, anyhow.
111 ,(unless (mm-coding-system-p 'big5)
112 '(big5 . cn-big5))
113 ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
114 ,(unless (mm-coding-system-p 'x-ctext)
115 '(x-ctext . ctext))
116 ;; Apparently not defined in Emacs 20, but is a valid MIME name.
117 ,(unless (mm-coding-system-p 'gb2312)
118 '(gb2312 . cn-gb-2312))
142 ;; Windows-1252 is actually a superset of Latin-1. See also 119 ;; Windows-1252 is actually a superset of Latin-1. See also
143 ;; `gnus-article-dumbquotes-map'. 120 ;; `gnus-article-dumbquotes-map'.
144 ,(unless (mm-coding-system-p 'windows-1252) ; should be defined eventually 121 ;;,(unless (mm-coding-system-p 'windows-1252)
145 '(windows-1252 . iso-8859-1)) 122 ; should be defined eventually
123 ;; '(windows-1252 . iso-8859-1))
124 ;; ISO-8859-15 is very similar to ISO-8859-1.
125 ;;,(unless (mm-coding-system-p 'iso-8859-15) ; Emacs 21 defines it.
126 ;; '(iso-8859-15 . iso-8859-1))
146 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft 127 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
147 ;; Outlook users in Czech republic. Use this to allow reading of their 128 ;; Outlook users in Czech republic. Use this to allow reading of their
148 ;; e-mails. cp1250 should be defined by M-x codepage-setup. 129 ;; e-mails. cp1250 should be defined by M-x codepage-setup.
149 ,(unless (mm-coding-system-p 'windows-1250) ; should be defined eventually 130 ;;,(unless (mm-coding-system-p 'windows-1250)
150 '(windows-1250 . cp1250)) 131 ; should be defined eventually
151 (x-ctext . ctext)) 132 ;; '(windows-1250 . cp1250))
133 )
152 "A mapping from invalid charset names to the real charset names.") 134 "A mapping from invalid charset names to the real charset names.")
153 135
154(defvar mm-binary-coding-system 136(defvar mm-binary-coding-system
155 (cond 137 (cond
156 ((mm-coding-system-p 'binary) 'binary) 138 ((mm-coding-system-p 'binary) 'binary)
157 ((mm-coding-system-p 'no-conversion) 'no-conversion) 139 ((mm-coding-system-p 'no-conversion) 'no-conversion)
158 (t nil)) 140 (t nil))
@@ -169,30 +151,113 @@
169 "Text coding system for write.") 151 "Text coding system for write.")
170 152
171(defvar mm-auto-save-coding-system 153(defvar mm-auto-save-coding-system
172 (cond 154 (cond
173 ((mm-coding-system-p 'emacs-mule) 155 ((mm-coding-system-p 'emacs-mule)
174 (if (memq system-type '(windows-nt ms-dos ms-windows)) 156 (if (memq system-type '(windows-nt ms-dos ms-windows))
175 (if (mm-coding-system-p 'emacs-mule-dos) 157 (if (mm-coding-system-p 'emacs-mule-dos)
176 'emacs-mule-dos mm-binary-coding-system) 158 'emacs-mule-dos mm-binary-coding-system)
177 'emacs-mule)) 159 'emacs-mule))
178 ((mm-coding-system-p 'escape-quoted) 'escape-quoted) 160 ((mm-coding-system-p 'escape-quoted) 'escape-quoted)
179 (t mm-binary-coding-system)) 161 (t mm-binary-coding-system))
180 "Coding system of auto save file.") 162 "Coding system of auto save file.")
181 163
164(defvar mm-universal-coding-system mm-auto-save-coding-system
165 "The universal Coding system.")
166
167;; Fixme: some of the cars here aren't valid MIME charsets. That
168;; should only matter with XEmacs, though.
169(defvar mm-mime-mule-charset-alist
170 `((us-ascii ascii)
171 (iso-8859-1 latin-iso8859-1)
172 (iso-8859-2 latin-iso8859-2)
173 (iso-8859-3 latin-iso8859-3)
174 (iso-8859-4 latin-iso8859-4)
175 (iso-8859-5 cyrillic-iso8859-5)
176 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
177 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
178 ;; charset is koi8-r, not iso-8859-5.
179 (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
180 (iso-8859-6 arabic-iso8859-6)
181 (iso-8859-7 greek-iso8859-7)
182 (iso-8859-8 hebrew-iso8859-8)
183 (iso-8859-9 latin-iso8859-9)
184 (iso-8859-14 latin-iso8859-14)
185 (iso-8859-15 latin-iso8859-15)
186 (viscii vietnamese-viscii-lower)
187 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
188 (euc-kr korean-ksc5601)
189 (gb2312 chinese-gb2312)
190 (big5 chinese-big5-1 chinese-big5-2)
191 (tibetan tibetan)
192 (thai-tis620 thai-tis620)
193 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
194 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
195 latin-jisx0201 japanese-jisx0208-1978
196 chinese-gb2312 japanese-jisx0208
197 korean-ksc5601 japanese-jisx0212
198 katakana-jisx0201)
199 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
200 latin-jisx0201 japanese-jisx0208-1978
201 chinese-gb2312 japanese-jisx0208
202 korean-ksc5601 japanese-jisx0212
203 chinese-cns11643-1 chinese-cns11643-2)
204 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
205 cyrillic-iso8859-5 greek-iso8859-7
206 latin-jisx0201 japanese-jisx0208-1978
207 chinese-gb2312 japanese-jisx0208
208 korean-ksc5601 japanese-jisx0212
209 chinese-cns11643-1 chinese-cns11643-2
210 chinese-cns11643-3 chinese-cns11643-4
211 chinese-cns11643-5 chinese-cns11643-6
212 chinese-cns11643-7)
213 ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case
214 (charsetp 'unicode-a)
215 (not (mm-coding-system-p 'mule-utf-8)))
216 '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)
217 ;; If we have utf-8 we're in Mule 5+.
218 (append '(utf-8)
219 (delete 'ascii
220 (coding-system-get 'mule-utf-8 'safe-charsets)))))
221 "Alist of MIME-charset/MULE-charsets.")
222
223;; Correct by construction, but should be unnecessary:
224;; XEmacs hates it.
225(when (and (not (featurep 'xemacs))
226 (fboundp 'coding-system-list)
227 (fboundp 'sort-coding-systems))
228 (setq mm-mime-mule-charset-alist
229 (apply
230 'nconc
231 (mapcar
232 (lambda (cs)
233 (when (and (coding-system-get cs 'mime-charset)
234 (not (eq t (coding-system-get cs 'safe-charsets))))
235 (list (cons (coding-system-get cs 'mime-charset)
236 (delq 'ascii
237 (coding-system-get cs 'safe-charsets))))))
238 (sort-coding-systems (coding-system-list 'base-only))))))
239
182;;; Internal variables: 240;;; Internal variables:
183 241
184;;; Functions: 242;;; Functions:
185 243
186(defun mm-mule-charset-to-mime-charset (charset) 244(defun mm-mule-charset-to-mime-charset (charset)
187 "Return the MIME charset corresponding to the given Mule CHARSET." 245 "Return the MIME charset corresponding to the given Mule CHARSET."
188 (let ((alist mm-mime-mule-charset-alist) 246 (if (fboundp 'find-coding-systems-for-charsets)
189 out) 247 (let (mime)
190 (while alist 248 (dolist (cs (find-coding-systems-for-charsets (list charset)))
191 (when (memq charset (cdar alist)) 249 (unless mime
192 (setq out (caar alist) 250 (when cs
193 alist nil)) 251 (setq mime (coding-system-get cs 'mime-charset)))))
194 (pop alist)) 252 mime)
195 out)) 253 (let ((alist mm-mime-mule-charset-alist)
254 out)
255 (while alist
256 (when (memq charset (cdar alist))
257 (setq out (caar alist)
258 alist nil))
259 (pop alist))
260 out)))
196 261
197(defun mm-charset-to-coding-system (charset &optional lbt) 262(defun mm-charset-to-coding-system (charset &optional lbt)
198 "Return coding-system corresponding to CHARSET. 263 "Return coding-system corresponding to CHARSET.
@@ -201,9 +266,6 @@ If optional argument LBT (`unix', `dos' or `mac') is specified, it is
201used as the line break code type of the coding system." 266used as the line break code type of the coding system."
202 (when (stringp charset) 267 (when (stringp charset)
203 (setq charset (intern (downcase charset)))) 268 (setq charset (intern (downcase charset))))
204 (setq charset
205 (or (cdr (assq charset mm-charset-synonym-alist))
206 charset))
207 (when lbt 269 (when lbt
208 (setq charset (intern (format "%s-%s" charset lbt)))) 270 (setq charset (intern (format "%s-%s" charset lbt))))
209 (cond 271 (cond
@@ -215,58 +277,73 @@ used as the line break code type of the coding system."
215 'ascii) 277 'ascii)
216 ;; Check to see whether we can handle this charset. (This depends 278 ;; Check to see whether we can handle this charset. (This depends
217 ;; on there being some coding system matching each `mime-charset' 279 ;; on there being some coding system matching each `mime-charset'
218 ;; coding sysytem property defined, as there should be.) 280 ;; property defined, as there should be.)
219 ((memq charset (mm-get-coding-system-list)) 281 ((and (mm-coding-system-p charset)
282;;; Doing this would potentially weed out incorrect charsets.
283;;; charset
284;;; (eq charset (coding-system-get charset 'mime-charset))
285 )
220 charset) 286 charset)
221 ;; Nope. 287 ;; Translate invalid charsets.
222 (t 288 ((mm-coding-system-p (setq charset
223 nil))) 289 (cdr (assq charset
224 290 mm-charset-synonym-alist))))
225(if (fboundp 'subst-char-in-string) 291 charset)
226 (defsubst mm-replace-chars-in-string (string from to) 292 ;; Last resort: search the coding system list for entries which
227 (subst-char-in-string from to string)) 293 ;; have the right mime-charset in case the canonical name isn't
228 (defun mm-replace-chars-in-string (string from to) 294 ;; defined (though it should be).
229 "Replace characters in STRING from FROM to TO." 295 ((let (cs)
230 (let ((string (substring string 0)) ;Copy string. 296 ;; mm-get-coding-system-list returns a list of cs without lbt.
231 (len (length string)) 297 ;; Do we need -lbt?
232 (idx 0)) 298 (dolist (c (mm-get-coding-system-list))
233 ;; Replace all occurrences of FROM with TO. 299 (if (and (null cs)
234 (while (< idx len) 300 (eq charset (coding-system-get c 'mime-charset)))
235 (when (= (aref string idx) from) 301 (setq cs c)))
236 (aset string idx to)) 302 cs))))
237 (setq idx (1+ idx))) 303
238 string))) 304(defsubst mm-replace-chars-in-string (string from to)
239 305 (mm-subst-char-in-string from to string))
240(defsubst mm-enable-multibyte () 306
241 "Set the multibyte flag of the current buffer. 307(eval-and-compile
308 (defvar mm-emacs-mule (and (not (featurep 'xemacs))
309 (boundp 'default-enable-multibyte-characters)
310 default-enable-multibyte-characters
311 (fboundp 'set-buffer-multibyte))
312 "Emacs mule.")
313
314 (defvar mm-mule4-p (and mm-emacs-mule
315 (fboundp 'charsetp)
316 (not (charsetp 'eight-bit-control)))
317 "Mule version 4.")
318
319 (if mm-emacs-mule
320 (defun mm-enable-multibyte ()
321 "Set the multibyte flag of the current buffer.
242Only do this if the default value of `enable-multibyte-characters' is 322Only do this if the default value of `enable-multibyte-characters' is
243non-nil. This is a no-op in XEmacs." 323non-nil. This is a no-op in XEmacs."
244 (when (and (fboundp 'set-buffer-multibyte) 324 (set-buffer-multibyte t))
245 (boundp 'enable-multibyte-characters) 325 (defalias 'mm-enable-multibyte 'ignore))
246 (default-value 'enable-multibyte-characters))
247 (set-buffer-multibyte t)))
248 326
249(defsubst mm-disable-multibyte () 327 (if mm-emacs-mule
250 "Unset the multibyte flag of in the current buffer. 328 (defun mm-disable-multibyte ()
329 "Unset the multibyte flag of in the current buffer.
251This is a no-op in XEmacs." 330This is a no-op in XEmacs."
252 (when (fboundp 'set-buffer-multibyte) 331 (set-buffer-multibyte nil))
253 (set-buffer-multibyte nil))) 332 (defalias 'mm-disable-multibyte 'ignore))
254 333
255(defsubst mm-enable-multibyte-mule4 () 334 (if mm-mule4-p
256 "Enable multibyte in the current buffer. 335 (defun mm-enable-multibyte-mule4 ()
336 "Enable multibyte in the current buffer.
257Only used in Emacs Mule 4." 337Only used in Emacs Mule 4."
258 (when (and (fboundp 'set-buffer-multibyte) 338 (set-buffer-multibyte t))
259 (boundp 'enable-multibyte-characters) 339 (defalias 'mm-enable-multibyte-mule4 'ignore))
260 (default-value 'enable-multibyte-characters) 340
261 (not (charsetp 'eight-bit-control))) 341 (if mm-mule4-p
262 (set-buffer-multibyte t))) 342 (defun mm-disable-multibyte-mule4 ()
263 343 "Disable multibyte in the current buffer.
264(defsubst mm-disable-multibyte-mule4 ()
265 "Disable multibyte in the current buffer.
266Only used in Emacs Mule 4." 344Only used in Emacs Mule 4."
267 (when (and (fboundp 'set-buffer-multibyte) 345 (set-buffer-multibyte nil))
268 (not (charsetp 'eight-bit-control))) 346 (defalias 'mm-disable-multibyte-mule4 'ignore)))
269 (set-buffer-multibyte nil)))
270 347
271(defun mm-preferred-coding-system (charset) 348(defun mm-preferred-coding-system (charset)
272 ;; A typo in some Emacs versions. 349 ;; A typo in some Emacs versions.
@@ -294,10 +371,10 @@ If the charset is `composition', return the actual one."
294 (progn 371 (progn
295 (setq mail-parse-mule-charset 372 (setq mail-parse-mule-charset
296 (and (boundp 'current-language-environment) 373 (and (boundp 'current-language-environment)
297 (car (last 374 (car (last
298 (assq 'charset 375 (assq 'charset
299 (assoc current-language-environment 376 (assoc current-language-environment
300 language-info-alist)))))) 377 language-info-alist))))))
301 (if (or (not mail-parse-mule-charset) 378 (if (or (not mail-parse-mule-charset)
302 (eq mail-parse-mule-charset 'ascii)) 379 (eq mail-parse-mule-charset 'ascii))
303 (setq mail-parse-mule-charset 380 (setq mail-parse-mule-charset
@@ -309,6 +386,8 @@ If the charset is `composition', return the actual one."
309 386
310(defun mm-mime-charset (charset) 387(defun mm-mime-charset (charset)
311 "Return the MIME charset corresponding to the given Mule CHARSET." 388 "Return the MIME charset corresponding to the given Mule CHARSET."
389 (if (eq charset 'unknown)
390 (error "The message contains non-printable characters, please use attachment"))
312 (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) 391 (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
313 ;; This exists in Emacs 20. 392 ;; This exists in Emacs 20.
314 (or 393 (or
@@ -317,6 +396,7 @@ If the charset is `composition', return the actual one."
317 (mm-preferred-coding-system charset) 'mime-charset)) 396 (mm-preferred-coding-system charset) 'mime-charset))
318 (and (eq charset 'ascii) 397 (and (eq charset 'ascii)
319 'us-ascii) 398 'us-ascii)
399 (mm-preferred-coding-system charset)
320 (mm-mule-charset-to-mime-charset charset)) 400 (mm-mule-charset-to-mime-charset charset))
321 ;; This is for XEmacs. 401 ;; This is for XEmacs.
322 (mm-mule-charset-to-mime-charset charset))) 402 (mm-mule-charset-to-mime-charset charset)))
@@ -330,21 +410,8 @@ If the charset is `composition', return the actual one."
330 (setq result (cons head result))) 410 (setq result (cons head result)))
331 (nreverse result))) 411 (nreverse result)))
332 412
333(defun mm-find-mime-charset-region (b e) 413;; It's not clear whether this is supposed to mean the global or local
334 "Return the MIME charsets needed to encode the region between B and E." 414;; setting. I think it's used inconsistently. -- fx
335 (let ((charsets (mapcar 'mm-mime-charset
336 (delq 'ascii
337 (mm-find-charset-region b e)))))
338 (when (memq 'iso-2022-jp-2 charsets)
339 (setq charsets (delq 'iso-2022-jp charsets)))
340 (setq charsets (mm-delete-duplicates charsets))
341 (if (and (> (length charsets) 1)
342 (fboundp 'find-coding-systems-region)
343 (let ((cs (find-coding-systems-region b e)))
344 (or (memq 'utf-8 cs) (memq 'mule-utf-8 cs))))
345 '(utf-8)
346 charsets)))
347
348(defsubst mm-multibyte-p () 415(defsubst mm-multibyte-p ()
349 "Say whether multibyte is enabled." 416 "Say whether multibyte is enabled."
350 (if (and (not (featurep 'xemacs)) 417 (if (and (not (featurep 'xemacs))
@@ -352,6 +419,39 @@ If the charset is `composition', return the actual one."
352 enable-multibyte-characters 419 enable-multibyte-characters
353 (featurep 'mule))) 420 (featurep 'mule)))
354 421
422(defun mm-find-mime-charset-region (b e)
423 "Return the MIME charsets needed to encode the region between B and E.
424Nil means ASCII, a single-element list represents an appropriate MIME
425charset, and a longer list means no appropriate charset."
426 ;; The return possibilities of this function are a mess...
427 (or (and
428 (mm-multibyte-p)
429 (fboundp 'find-coding-systems-region)
430 ;; Find the mime-charset of the most preferred coding
431 ;; system that has one.
432 (let ((systems (find-coding-systems-region b e))
433 result)
434 ;; Fixme: The `mime-charset' (`x-ctext') of `compound-text'
435 ;; is not in the IANA list.
436 (setq systems (delq 'compound-text systems))
437 (unless (equal systems '(undecided))
438 (while systems
439 (let ((cs (coding-system-get (pop systems) 'mime-charset)))
440 (if cs
441 (setq systems nil
442 result (list cs))))))
443 result))
444 ;; Otherwise we're not multibyte, XEmacs or a single coding
445 ;; system won't cover it.
446 (let ((charsets
447 (mm-delete-duplicates
448 (mapcar 'mm-mime-charset
449 (delq 'ascii
450 (mm-find-charset-region b e))))))
451 (if (memq 'iso-2022-jp-2 charsets)
452 (delq 'iso-2022-jp charsets)
453 charsets))))
454
355(defmacro mm-with-unibyte-buffer (&rest forms) 455(defmacro mm-with-unibyte-buffer (&rest forms)
356 "Create a temporary buffer, and evaluate FORMS there like `progn'. 456 "Create a temporary buffer, and evaluate FORMS there like `progn'.
357Use unibyte mode for this." 457Use unibyte mode for this."
@@ -364,15 +464,18 @@ Use unibyte mode for this."
364 "Evaluate FORMS with current current buffer temporarily made unibyte. 464 "Evaluate FORMS with current current buffer temporarily made unibyte.
365Also bind `default-enable-multibyte-characters' to nil. 465Also bind `default-enable-multibyte-characters' to nil.
366Equivalent to `progn' in XEmacs" 466Equivalent to `progn' in XEmacs"
367 (let ((multibyte (make-symbol "multibyte"))) 467 (let ((multibyte (make-symbol "multibyte"))
368 `(if (fboundp 'set-buffer-multibyte) 468 (buffer (make-symbol "buffer")))
369 (let ((,multibyte enable-multibyte-characters)) 469 `(if mm-emacs-mule
470 (let ((,multibyte enable-multibyte-characters)
471 (,buffer (current-buffer)))
370 (unwind-protect 472 (unwind-protect
371 (let (default-enable-multibyte-characters) 473 (let (default-enable-multibyte-characters)
372 (set-buffer-multibyte nil) 474 (set-buffer-multibyte nil)
373 ,@forms) 475 ,@forms)
476 (set-buffer ,buffer)
374 (set-buffer-multibyte ,multibyte))) 477 (set-buffer-multibyte ,multibyte)))
375 (progn 478 (let (default-enable-multibyte-characters)
376 ,@forms)))) 479 ,@forms))))
377(put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) 480(put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0)
378(put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) 481(put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body))
@@ -380,22 +483,19 @@ Equivalent to `progn' in XEmacs"
380(defmacro mm-with-unibyte-current-buffer-mule4 (&rest forms) 483(defmacro mm-with-unibyte-current-buffer-mule4 (&rest forms)
381 "Evaluate FORMS there like `progn' in current buffer. 484 "Evaluate FORMS there like `progn' in current buffer.
382Mule4 only." 485Mule4 only."
383 (let ((multibyte (make-symbol "multibyte"))) 486 (let ((multibyte (make-symbol "multibyte"))
384 `(if (or (featurep 'xemacs) 487 (buffer (make-symbol "buffer")))
385 (not (fboundp 'set-buffer-multibyte)) 488 `(if mm-mule4-p
386 (charsetp 'eight-bit-control)) ;; For Emacs Mule 4 only. 489 (let ((,multibyte enable-multibyte-characters)
387 (progn 490 (,buffer (current-buffer)))
388 ,@forms) 491 (unwind-protect
389 (let ((,multibyte (default-value 'enable-multibyte-characters))) 492 (let (default-enable-multibyte-characters)
390 (unwind-protect 493 (set-buffer-multibyte nil)
391 (let ((buffer-file-coding-system mm-binary-coding-system) 494 ,@forms)
392 (coding-system-for-read mm-binary-coding-system) 495 (set-buffer ,buffer)
393 (coding-system-for-write mm-binary-coding-system)) 496 (set-buffer-multibyte ,multibyte)))
394 (set-buffer-multibyte nil) 497 (let (default-enable-multibyte-characters)
395 (setq-default enable-multibyte-characters nil) 498 ,@forms))))
396 ,@forms)
397 (setq-default enable-multibyte-characters ,multibyte)
398 (set-buffer-multibyte ,multibyte))))))
399(put 'mm-with-unibyte-current-buffer-mule4 'lisp-indent-function 0) 499(put 'mm-with-unibyte-current-buffer-mule4 'lisp-indent-function 0)
400(put 'mm-with-unibyte-current-buffer-mule4 'edebug-form-spec '(body)) 500(put 'mm-with-unibyte-current-buffer-mule4 'edebug-form-spec '(body))
401 501
@@ -410,9 +510,14 @@ Mule4 only."
410 "Return a list of Emacs charsets in the region B to E." 510 "Return a list of Emacs charsets in the region B to E."
411 (cond 511 (cond
412 ((and (mm-multibyte-p) 512 ((and (mm-multibyte-p)
413 (fboundp 'find-charset-region)) 513 (fboundp 'find-charset-region))
414 ;; Remove composition since the base charsets have been included. 514 ;; Remove composition since the base charsets have been included.
415 (delq 'composition (find-charset-region b e))) 515 ;; Remove eight-bit-*, treat them as ascii.
516 (let ((css (find-charset-region b e)))
517 (mapcar (lambda (cs) (setq css (delq cs css)))
518 '(composition eight-bit-control eight-bit-graphic
519 control-1))
520 css))
416 (t 521 (t
417 ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. 522 ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
418 (save-excursion 523 (save-excursion
@@ -425,8 +530,8 @@ Mule4 only."
425 (let (charset) 530 (let (charset)
426 (setq charset 531 (setq charset
427 (and (boundp 'current-language-environment) 532 (and (boundp 'current-language-environment)
428 (car (last (assq 'charset 533 (car (last (assq 'charset
429 (assoc current-language-environment 534 (assoc current-language-environment
430 language-info-alist)))))) 535 language-info-alist))))))
431 (if (eq charset 'ascii) (setq charset nil)) 536 (if (eq charset 'ascii) (setq charset nil))
432 (or charset 537 (or charset
@@ -476,15 +581,15 @@ If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers.
476 (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) 581 (auto-mode-alist (if inhibit nil (mm-auto-mode-alist)))
477 (default-major-mode 'fundamental-mode) 582 (default-major-mode 'fundamental-mode)
478 (enable-local-variables nil) 583 (enable-local-variables nil)
479 (after-insert-file-functions nil) 584 (after-insert-file-functions nil)
480 (enable-local-eval nil) 585 (enable-local-eval nil)
481 (find-file-hooks nil) 586 (find-file-hooks nil)
482 (inhibit-file-name-operation (if inhibit 587 (inhibit-file-name-operation (if inhibit
483 'insert-file-contents 588 'insert-file-contents
484 inhibit-file-name-operation)) 589 inhibit-file-name-operation))
485 (inhibit-file-name-handlers 590 (inhibit-file-name-handlers
486 (if inhibit 591 (if inhibit
487 (append mm-inhibit-file-name-handlers 592 (append mm-inhibit-file-name-handlers
488 inhibit-file-name-handlers) 593 inhibit-file-name-handlers)
489 inhibit-file-name-handlers))) 594 inhibit-file-name-handlers)))
490 (insert-file-contents filename visit beg end replace))) 595 (insert-file-contents filename visit beg end replace)))
@@ -497,37 +602,47 @@ saying what text to write.
497Optional fourth argument specifies the coding system to use when 602Optional fourth argument specifies the coding system to use when
498encoding the file. 603encoding the file.
499If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." 604If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers."
500 (let ((coding-system-for-write 605 (let ((coding-system-for-write
501 (or codesys mm-text-coding-system-for-write 606 (or codesys mm-text-coding-system-for-write
502 mm-text-coding-system)) 607 mm-text-coding-system))
503 (inhibit-file-name-operation (if inhibit 608 (inhibit-file-name-operation (if inhibit
504 'append-to-file 609 'append-to-file
505 inhibit-file-name-operation)) 610 inhibit-file-name-operation))
506 (inhibit-file-name-handlers 611 (inhibit-file-name-handlers
507 (if inhibit 612 (if inhibit
508 (append mm-inhibit-file-name-handlers 613 (append mm-inhibit-file-name-handlers
509 inhibit-file-name-handlers) 614 inhibit-file-name-handlers)
510 inhibit-file-name-handlers))) 615 inhibit-file-name-handlers)))
511 (append-to-file start end filename))) 616 (append-to-file start end filename)))
512 617
513(defun mm-write-region (start end filename &optional append visit lockname 618(defun mm-write-region (start end filename &optional append visit lockname
514 coding-system inhibit) 619 coding-system inhibit)
515 620
516 "Like `write-region'. 621 "Like `write-region'.
517If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." 622If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers."
518 (let ((coding-system-for-write 623 (let ((coding-system-for-write
519 (or coding-system mm-text-coding-system-for-write 624 (or coding-system mm-text-coding-system-for-write
520 mm-text-coding-system)) 625 mm-text-coding-system))
521 (inhibit-file-name-operation (if inhibit 626 (inhibit-file-name-operation (if inhibit
522 'write-region 627 'write-region
523 inhibit-file-name-operation)) 628 inhibit-file-name-operation))
524 (inhibit-file-name-handlers 629 (inhibit-file-name-handlers
525 (if inhibit 630 (if inhibit
526 (append mm-inhibit-file-name-handlers 631 (append mm-inhibit-file-name-handlers
527 inhibit-file-name-handlers) 632 inhibit-file-name-handlers)
528 inhibit-file-name-handlers))) 633 inhibit-file-name-handlers)))
529 (write-region start end filename append visit lockname))) 634 (write-region start end filename append visit lockname)))
530 635
636(defun mm-image-load-path (&optional package)
637 (let (dir result)
638 (dolist (path load-path (nreverse result))
639 (if (file-directory-p
640 (setq dir (concat (file-name-directory
641 (directory-file-name path))
642 "etc/" (or package "gnus/"))))
643 (push dir result))
644 (push path result))))
645
531(provide 'mm-util) 646(provide 'mm-util)
532 647
533;;; mm-util.el ends here 648;;; mm-util.el ends here
diff --git a/lisp/gnus/nnslashdot.el b/lisp/gnus/nnslashdot.el
index 246a3613a81..8290b2c73b1 100644
--- a/lisp/gnus/nnslashdot.el
+++ b/lisp/gnus/nnslashdot.el
@@ -1,5 +1,5 @@
1;;; nnslashdot.el --- interfacing with Slashdot 1;;; nnslashdot.el --- interfacing with Slashdot
2;; Copyright (C) 1999, 2000 Free Software Foundation, Inc. 2;; Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 3
4;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 4;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5;; Keywords: news 5;; Keywords: news
@@ -57,6 +57,9 @@
57 "http://slashdot.org/article.pl?sid=%s&mode=nocomment" 57 "http://slashdot.org/article.pl?sid=%s&mode=nocomment"
58 "Where nnslashdot will fetch the article from.") 58 "Where nnslashdot will fetch the article from.")
59 59
60(defvoo nnslashdot-backslash-url "http://slashdot.org/slashdot.xml"
61 "Where nnslashdot will fetch the stories from.")
62
60(defvoo nnslashdot-threshold -1 63(defvoo nnslashdot-threshold -1
61 "The article threshold.") 64 "The article threshold.")
62 65
@@ -86,19 +89,17 @@
86 (nnslashdot-possibly-change-server group server) 89 (nnslashdot-possibly-change-server group server)
87 (condition-case why 90 (condition-case why
88 (unless gnus-nov-is-evil 91 (unless gnus-nov-is-evil
89 (if nnslashdot-threaded 92 (nnslashdot-retrieve-headers-1 articles group))
90 (nnslashdot-threaded-retrieve-headers articles group)
91 (nnslashdot-sane-retrieve-headers articles group)))
92 (search-failed (nnslashdot-lose why)))) 93 (search-failed (nnslashdot-lose why))))
93 94
94(deffoo nnslashdot-threaded-retrieve-headers (articles group) 95(deffoo nnslashdot-retrieve-headers-1 (articles group)
95 (let ((last (car (last articles))) 96 (let* ((last (car (last articles)))
96 (did nil) 97 (start (if nnslashdot-threaded 1 (pop articles)))
97 (start 1) 98 (entry (assoc group nnslashdot-groups))
98 (sid (caddr (assoc group nnslashdot-groups))) 99 (sid (nth 2 entry))
99 (first-comments t) 100 (first-comments t)
100 (startats '(1)) 101 headers article subject score from date lines parent point cid
101 headers article subject score from date lines parent point s) 102 s startats changed)
102 (save-excursion 103 (save-excursion
103 (set-buffer nnslashdot-buffer) 104 (set-buffer nnslashdot-buffer)
104 (let ((case-fold-search t)) 105 (let ((case-fold-search t))
@@ -107,10 +108,10 @@
107 (nnweb-insert (format nnslashdot-article-url 108 (nnweb-insert (format nnslashdot-article-url
108 (nnslashdot-sid-strip sid)) t) 109 (nnslashdot-sid-strip sid)) t)
109 (goto-char (point-min)) 110 (goto-char (point-min))
110 (search-forward "Posted by ") 111 (re-search-forward "Posted by[ \t\r\n]+")
111 (when (looking-at "<a[^>]+>\\([^<]+\\)") 112 (when (looking-at "\\(<a[^>]+>\\)?[ \t\r\n]*\\([^<\r\n]+\\)")
112 (setq from (nnweb-decode-entities-string (match-string 1)))) 113 (setq from (nnweb-decode-entities-string (match-string 2))))
113 (search-forward " on ") 114 (search-forward "on ")
114 (setq date (nnslashdot-date-to-date 115 (setq date (nnslashdot-date-to-date
115 (buffer-substring (point) (1- (search-forward "<"))))) 116 (buffer-substring (point) (1- (search-forward "<")))))
116 (setq lines (/ (- (point) 117 (setq lines (/ (- (point)
@@ -123,16 +124,16 @@
123 1 group from date 124 1 group from date
124 (concat "<" (nnslashdot-sid-strip sid) "%1@slashdot>") 125 (concat "<" (nnslashdot-sid-strip sid) "%1@slashdot>")
125 "" 0 lines nil nil)) 126 "" 0 lines nil nil))
126 headers)) 127 headers)
127 (while (and (setq start (pop startats)) 128 (setq start (if nnslashdot-threaded 2 (pop articles))))
128 (< start last)) 129 (while (and start (<= start last))
129 (setq point (goto-char (point-max))) 130 (setq point (goto-char (point-max)))
130 (nnweb-insert 131 (nnweb-insert
131 (format nnslashdot-comments-url 132 (format nnslashdot-comments-url
132 (nnslashdot-sid-strip sid) 133 (nnslashdot-sid-strip sid)
133 nnslashdot-threshold 0 start) 134 nnslashdot-threshold 0 (- start 2))
134 t) 135 t)
135 (when first-comments 136 (when (and nnslashdot-threaded first-comments)
136 (setq first-comments nil) 137 (setq first-comments nil)
137 (goto-char (point-max)) 138 (goto-char (point-max))
138 (while (re-search-backward "startat=\\([0-9]+\\)" nil t) 139 (while (re-search-backward "startat=\\([0-9]+\\)" nil t)
@@ -140,58 +141,68 @@
140 (unless (memq s startats) 141 (unless (memq s startats)
141 (push s startats))) 142 (push s startats)))
142 (setq startats (sort startats '<))) 143 (setq startats (sort startats '<)))
144 (setq article (if (and article (< start article)) article start))
143 (goto-char point) 145 (goto-char point)
144 (while (re-search-forward 146 (while (re-search-forward
145 "<a name=\"\\([0-9]+\\)\"><\\(b\\|H4\\)>\\([^<]+\\)</\\(b\\|H4\\)>.*score:\\([^)]+\\))" 147 "<a name=\"\\([0-9]+\\)\"><\\(b\\|H4\\)>\\([^<]+\\)</\\(b\\|H4\\)>.*score:\\([^)]+\\))"
146 nil t) 148 nil t)
147 (setq article (string-to-number (match-string 1)) 149 (setq cid (match-string 1)
148 subject (match-string 3) 150 subject (match-string 3)
149 score (match-string 5)) 151 score (match-string 5))
152 (unless (assq article (nth 4 entry))
153 (setcar (nthcdr 4 entry) (cons (cons article cid) (nth 4 entry)))
154 (setq changed t))
150 (when (string-match "^Re: *" subject) 155 (when (string-match "^Re: *" subject)
151 (setq subject (concat "Re: " (substring subject (match-end 0))))) 156 (setq subject (concat "Re: " (substring subject (match-end 0)))))
152 (setq subject (nnweb-decode-entities-string subject)) 157 (setq subject (nnweb-decode-entities-string subject))
153 (forward-line 1) 158 (search-forward "<BR>")
154 (if (looking-at 159 (if (looking-at
155 "by <a[^>]+>\\([^<]+\\)</a>[ \t\n]*.*(\\([^)]+\\))") 160 "by[ \t\n]+<a[^>]+>\\([^<]+\\)</a>[ \t\n]*(\\(<[^>]+>\\)*\\([^<>)]+\\))")
156 (progn 161 (progn
157 (goto-char (- (match-end 0) 5)) 162 (goto-char (- (match-end 0) 5))
158 (setq from (concat 163 (setq from (concat
159 (nnweb-decode-entities-string (match-string 1)) 164 (nnweb-decode-entities-string (match-string 1))
160 " <" (match-string 2) ">"))) 165 " <" (match-string 3) ">")))
161 (setq from "") 166 (setq from "")
162 (when (looking-at "by \\(.+\\) on ") 167 (when (looking-at "by \\([^<>]*\\) on ")
163 (goto-char (- (match-end 0) 5)) 168 (goto-char (- (match-end 0) 5))
164 (setq from (nnweb-decode-entities-string (match-string 1))))) 169 (setq from (nnweb-decode-entities-string (match-string 1)))))
165 (search-forward " on ") 170 (search-forward " on ")
166 (setq date 171 (setq date
167 (nnslashdot-date-to-date 172 (nnslashdot-date-to-date
168 (buffer-substring (point) (progn (end-of-line) (point))))) 173 (buffer-substring (point) (progn (skip-chars-forward "^()<>\n\r") (point)))))
169 (setq lines (/ (abs (- (search-forward "<td ") 174 (setq lines (/ (abs (- (search-forward "<td")
170 (search-forward "</td>"))) 175 (search-forward "</td>")))
171 70)) 176 70))
172 (forward-line 4) 177 (if (not
173 (setq parent 178 (re-search-forward ".*cid=\\([0-9]+\\)\">Parent</A>" nil t))
174 (if (looking-at ".*cid=\\([0-9]+\\)") 179 (setq parent nil)
175 (match-string 1) 180 (setq parent (match-string 1))
176 nil)) 181 (when (string= parent "0")
177 (setq did t) 182 (setq parent nil)))
178 (push 183 (push
179 (cons 184 (cons
180 (1+ article) 185 article
181 (make-full-mail-header 186 (make-full-mail-header
182 (1+ article) 187 article
183 (concat subject " (" score ")") 188 (concat subject " (" score ")")
184 from date 189 from date
185 (concat "<" (nnslashdot-sid-strip sid) "%" 190 (concat "<" (nnslashdot-sid-strip sid) "%" cid "@slashdot>")
186 (number-to-string (1+ article))
187 "@slashdot>")
188 (if parent 191 (if parent
189 (concat "<" (nnslashdot-sid-strip sid) "%" 192 (concat "<" (nnslashdot-sid-strip sid) "%"
190 (number-to-string (1+ (string-to-number parent))) 193 parent "@slashdot>")
191 "@slashdot>")
192 "") 194 "")
193 0 lines nil nil)) 195 0 lines nil nil))
194 headers))))) 196 headers)
197 (while (and articles (<= (car articles) article))
198 (pop articles))
199 (setq article (1+ article)))
200 (if nnslashdot-threaded
201 (progn
202 (setq start (pop startats))
203 (if start (setq start (+ start 2))))
204 (setq start (pop articles))))))
205 (if changed (nnslashdot-write-groups))
195 (setq nnslashdot-headers (sort headers 'car-less-than-car)) 206 (setq nnslashdot-headers (sort headers 'car-less-than-car))
196 (save-excursion 207 (save-excursion
197 (set-buffer nntp-server-buffer) 208 (set-buffer nntp-server-buffer)
@@ -201,108 +212,6 @@
201 (nnheader-insert-nov (cdr header))))) 212 (nnheader-insert-nov (cdr header)))))
202 'nov)) 213 'nov))
203 214
204(deffoo nnslashdot-sane-retrieve-headers (articles group)
205 (let ((last (car (last articles)))
206 (did nil)
207 (start (max (1- (car articles)) 1))
208 (sid (caddr (assoc group nnslashdot-groups)))
209 headers article subject score from date lines parent point)
210 (save-excursion
211 (set-buffer nnslashdot-buffer)
212 (erase-buffer)
213 (when (= start 1)
214 (nnweb-insert (format nnslashdot-article-url
215 (nnslashdot-sid-strip sid)) t)
216 (goto-char (point-min))
217 (search-forward "Posted by ")
218 (when (looking-at "<a[^>]+>\\([^<]+\\)")
219 (setq from (nnweb-decode-entities-string (match-string 1))))
220 (search-forward " on ")
221 (setq date (nnslashdot-date-to-date
222 (buffer-substring (point) (1- (search-forward "<")))))
223 (forward-line 2)
224 (setq lines (count-lines (point)
225 (re-search-forward
226 "A href=\"\\(http://slashdot.org\\)?/article")))
227 (push
228 (cons
229 1
230 (make-full-mail-header
231 1 group from date (concat "<" (nnslashdot-sid-strip sid)
232 "%1@slashdot>")
233 "" 0 lines nil nil))
234 headers))
235 (while (or (not article)
236 (and did
237 (< article last)))
238 (when article
239 (setq start (1+ article)))
240 (setq point (goto-char (point-max)))
241 (nnweb-insert
242 (format nnslashdot-comments-url (nnslashdot-sid-strip sid)
243 nnslashdot-threshold 4 start)
244 t)
245 (goto-char point)
246 (while (re-search-forward
247 "<a name=\"\\([0-9]+\\)\"><\\(b\\|H4\\)>\\([^<]+\\)</\\(b\\|H4\\)>.*score:\\([^)]+\\))"
248 nil t)
249 (setq article (string-to-number (match-string 1))
250 subject (match-string 3)
251 score (match-string 5))
252 (when (string-match "^Re: *" subject)
253 (setq subject (concat "Re: " (substring subject (match-end 0)))))
254 (setq subject (nnweb-decode-entities-string subject))
255 (forward-line 1)
256 (if (looking-at
257 "by <a[^>]+>\\([^<]+\\)</a>[ \t\n]*.*(\\([^)]+\\))")
258 (progn
259 (goto-char (- (match-end 0) 5))
260 (setq from (concat
261 (nnweb-decode-entities-string (match-string 1))
262 " <" (match-string 2) ">")))
263 (setq from "")
264 (when (looking-at "by \\(.+\\) on ")
265 (goto-char (- (match-end 0) 5))
266 (setq from (nnweb-decode-entities-string (match-string 1)))))
267 (search-forward " on ")
268 (setq date
269 (nnslashdot-date-to-date
270 (buffer-substring (point) (progn (end-of-line) (point)))))
271 (setq lines (/ (abs (- (search-forward "<td ")
272 (search-forward "</td>")))
273 70))
274 (forward-line 2)
275 (setq parent
276 (if (looking-at ".*cid=\\([0-9]+\\)")
277 (match-string 1)
278 nil))
279 (setq did t)
280 (push
281 (cons
282 (1+ article)
283 (make-full-mail-header
284 (1+ article) (concat subject " (" score ")")
285 from date
286 (concat "<" (nnslashdot-sid-strip sid) "%"
287 (number-to-string (1+ article))
288 "@slashdot>")
289 (if parent
290 (concat "<" (nnslashdot-sid-strip sid) "%"
291 (number-to-string (1+ (string-to-number parent)))
292 "@slashdot>")
293 "")
294 0 lines nil nil))
295 headers))))
296 (setq nnslashdot-headers
297 (sort headers (lambda (s1 s2) (< (car s1) (car s2)))))
298 (save-excursion
299 (set-buffer nntp-server-buffer)
300 (erase-buffer)
301 (mm-with-unibyte-current-buffer
302 (dolist (header nnslashdot-headers)
303 (nnheader-insert-nov (cdr header)))))
304 'nov))
305
306(deffoo nnslashdot-request-group (group &optional server dont-check) 215(deffoo nnslashdot-request-group (group &optional server dont-check)
307 (nnslashdot-possibly-change-server nil server) 216 (nnslashdot-possibly-change-server nil server)
308 (let ((elem (assoc group nnslashdot-groups))) 217 (let ((elem (assoc group nnslashdot-groups)))
@@ -325,7 +234,7 @@
325 234
326(deffoo nnslashdot-request-article (article &optional group server buffer) 235(deffoo nnslashdot-request-article (article &optional group server buffer)
327 (nnslashdot-possibly-change-server group server) 236 (nnslashdot-possibly-change-server group server)
328 (let (contents) 237 (let (contents cid)
329 (condition-case why 238 (condition-case why
330 (save-excursion 239 (save-excursion
331 (set-buffer nnslashdot-buffer) 240 (set-buffer nnslashdot-buffer)
@@ -333,23 +242,32 @@
333 (goto-char (point-min)) 242 (goto-char (point-min))
334 (when (and (stringp article) 243 (when (and (stringp article)
335 (string-match "%\\([0-9]+\\)@" article)) 244 (string-match "%\\([0-9]+\\)@" article))
336 (setq article (string-to-number (match-string 1 article)))) 245 (setq cid (match-string 1 article))
246 (let ((map (nth 4 (assoc group nnslashdot-groups))))
247 (while map
248 (if (equal (cdar map) cid)
249 (setq article (caar map)
250 map nil)
251 (setq map (cdr map))))))
337 (when (numberp article) 252 (when (numberp article)
338 (if (= article 1) 253 (if (= article 1)
339 (progn 254 (progn
340 (re-search-forward "Posted by *<[^>]+>[^>]*<[^>]+> *on ") 255 (re-search-forward
256 "Posted by")
341 (search-forward "<BR>") 257 (search-forward "<BR>")
342 (setq contents 258 (setq contents
343 (buffer-substring 259 (buffer-substring
344 (point) 260 (point)
345 (progn 261 (progn
346 (re-search-forward 262 (re-search-forward
347 "<p>.*A href=\"\\(http://slashdot.org\\)?/article") 263 "&lt;&nbsp;[ \t\r\n]*<A HREF=\"\\(\\(http:\\)?//slashdot\\.org\\)?/article")
348 (match-beginning 0))))) 264 (match-beginning 0)))))
349 (search-forward (format "<a name=\"%d\">" (1- article))) 265 (setq cid (cdr (assq article
266 (nth 4 (assoc group nnslashdot-groups)))))
267 (search-forward (format "<a name=\"%s\">" cid))
350 (setq contents 268 (setq contents
351 (buffer-substring 269 (buffer-substring
352 (re-search-forward "<td[^>]+>") 270 (re-search-forward "<td[^>]*>")
353 (search-forward "</td>"))))))) 271 (search-forward "</td>")))))))
354 (search-failed (nnslashdot-lose why))) 272 (search-failed (nnslashdot-lose why)))
355 273
@@ -384,10 +302,10 @@
384 (let ((number 0) 302 (let ((number 0)
385 sid elem description articles gname) 303 sid elem description articles gname)
386 (condition-case why 304 (condition-case why
387 ;; First we do the Ultramode to get info on all the latest groups. 305 ;; First we do the Ultramode to get info on all the latest groups.
388 (progn 306 (progn
389 (mm-with-unibyte-buffer 307 (mm-with-unibyte-buffer
390 (nnweb-insert "http://slashdot.org/slashdot.xml" t) 308 (nnweb-insert nnslashdot-backslash-url t)
391 (goto-char (point-min)) 309 (goto-char (point-min))
392 (while (search-forward "<story>" nil t) 310 (while (search-forward "<story>" nil t)
393 (narrow-to-region (point) (search-forward "</story>")) 311 (narrow-to-region (point) (search-forward "</story>"))
@@ -404,7 +322,8 @@
404 (setq gname (concat description " (" sid ")")) 322 (setq gname (concat description " (" sid ")"))
405 (if (setq elem (assoc gname nnslashdot-groups)) 323 (if (setq elem (assoc gname nnslashdot-groups))
406 (setcar (cdr elem) articles) 324 (setcar (cdr elem) articles)
407 (push (list gname articles sid) nnslashdot-groups)) 325 (push (list gname articles sid (current-time) nil)
326 nnslashdot-groups))
408 (goto-char (point-max)) 327 (goto-char (point-max))
409 (widen))) 328 (widen)))
410 ;; Then do the older groups. 329 ;; Then do the older groups.
@@ -425,13 +344,14 @@
425 (setq gname (concat description " (" sid ")")) 344 (setq gname (concat description " (" sid ")"))
426 (if (setq elem (assoc gname nnslashdot-groups)) 345 (if (setq elem (assoc gname nnslashdot-groups))
427 (setcar (cdr elem) articles) 346 (setcar (cdr elem) articles)
428 (push (list gname articles sid) nnslashdot-groups))))) 347 (push (list gname articles sid (current-time) nil)
348 nnslashdot-groups)))))
429 (incf number 30))) 349 (incf number 30)))
430 (search-failed (nnslashdot-lose why))) 350 (search-failed (nnslashdot-lose why)))
431 (nnslashdot-write-groups) 351 (nnslashdot-write-groups)
432 (nnslashdot-generate-active) 352 (nnslashdot-generate-active)
433 t)) 353 t))
434 354
435(deffoo nnslashdot-request-newgroups (date &optional server) 355(deffoo nnslashdot-request-newgroups (date &optional server)
436 (nnslashdot-possibly-change-server nil server) 356 (nnslashdot-possibly-change-server nil server)
437 (nnslashdot-generate-active) 357 (nnslashdot-generate-active)
@@ -496,6 +416,24 @@
496 (setq nnslashdot-headers nil 416 (setq nnslashdot-headers nil
497 nnslashdot-groups nil)) 417 nnslashdot-groups nil))
498 418
419(deffoo nnslashdot-request-expire-articles
420 (articles group &optional server force)
421 (nnslashdot-possibly-change-server group server)
422 (let ((item (assoc group nnslashdot-groups)))
423 (when item
424 (if (fourth item)
425 (when (and (>= (length articles) (cadr item)) ;; All are expirable.
426 (nnmail-expired-article-p
427 group
428 (fourth item)
429 force))
430 (setq nnslashdot-groups (delq item nnslashdot-groups))
431 (nnslashdot-write-groups)
432 (setq articles nil)) ;; all expired.
433 (setcdr (cddr item) (list (current-time)))
434 (nnslashdot-write-groups))))
435 articles)
436
499(nnoo-define-skeleton nnslashdot) 437(nnoo-define-skeleton nnslashdot)
500 438
501;;; Internal functions 439;;; Internal functions
@@ -508,18 +446,32 @@
508 (unless nnslashdot-groups 446 (unless nnslashdot-groups
509 (nnslashdot-read-groups))) 447 (nnslashdot-read-groups)))
510 448
449(defun nnslashdot-make-tuple (tuple n)
450 (prog1
451 tuple
452 (while (> n 1)
453 (unless (cdr tuple)
454 (setcdr tuple (list nil)))
455 (setq tuple (cdr tuple)
456 n (1- n)))))
457
511(defun nnslashdot-read-groups () 458(defun nnslashdot-read-groups ()
512 (let ((file (expand-file-name "groups" nnslashdot-directory))) 459 (let ((file (expand-file-name "groups" nnslashdot-directory)))
513 (when (file-exists-p file) 460 (when (file-exists-p file)
514 (mm-with-unibyte-buffer 461 (mm-with-unibyte-buffer
515 (insert-file-contents file) 462 (insert-file-contents file)
516 (goto-char (point-min)) 463 (goto-char (point-min))
517 (setq nnslashdot-groups (read (current-buffer))))))) 464 (setq nnslashdot-groups (read (current-buffer))))
465 (if (and nnslashdot-groups (< (length (car nnslashdot-groups)) 5))
466 (let ((groups nnslashdot-groups))
467 (while groups
468 (nnslashdot-make-tuple (car groups) 5)
469 (setq groups (cdr groups))))))))
518 470
519(defun nnslashdot-write-groups () 471(defun nnslashdot-write-groups ()
520 (with-temp-file (expand-file-name "groups" nnslashdot-directory) 472 (with-temp-file (expand-file-name "groups" nnslashdot-directory)
521 (prin1 nnslashdot-groups (current-buffer)))) 473 (gnus-prin1 nnslashdot-groups)))
522 474
523(defun nnslashdot-init (server) 475(defun nnslashdot-init (server)
524 "Initialize buffers and such." 476 "Initialize buffers and such."
525 (unless (file-exists-p nnslashdot-directory) 477 (unless (file-exists-p nnslashdot-directory)
@@ -528,7 +480,8 @@
528 (setq nnslashdot-buffer 480 (setq nnslashdot-buffer
529 (save-excursion 481 (save-excursion
530 (nnheader-set-temp-buffer 482 (nnheader-set-temp-buffer
531 (format " *nnslashdot %s*" server)))))) 483 (format " *nnslashdot %s*" server))))
484 (push nnslashdot-buffer gnus-buffers)))
532 485
533(defun nnslashdot-date-to-date (sdate) 486(defun nnslashdot-date-to-date (sdate)
534 (condition-case err 487 (condition-case err
@@ -552,11 +505,6 @@
552(defun nnslashdot-lose (why) 505(defun nnslashdot-lose (why)
553 (error "Slashdot HTML has changed; please get a new version of nnslashdot")) 506 (error "Slashdot HTML has changed; please get a new version of nnslashdot"))
554 507
555;(defun nnslashdot-sid-strip (sid)
556; (if (string-match "^00/" sid)
557; (substring sid (match-end 0))
558; sid))
559
560(defalias 'nnslashdot-sid-strip 'identity) 508(defalias 'nnslashdot-sid-strip 'identity)
561 509
562(provide 'nnslashdot) 510(provide 'nnslashdot)
diff --git a/lisp/gnus/nnultimate.el b/lisp/gnus/nnultimate.el
index 6ccb0a2aec8..5ce8446da11 100644
--- a/lisp/gnus/nnultimate.el
+++ b/lisp/gnus/nnultimate.el
@@ -56,6 +56,8 @@
56(defvoo nnultimate-groups nil) 56(defvoo nnultimate-groups nil)
57(defvoo nnultimate-headers nil) 57(defvoo nnultimate-headers nil)
58(defvoo nnultimate-articles nil) 58(defvoo nnultimate-articles nil)
59(defvar nnultimate-table-regexp
60 "postings.*editpost\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio")
59 61
60;;; Interface functions 62;;; Interface functions
61 63
@@ -74,13 +76,17 @@
74 (old-total (or (nth 6 entry) 1)) 76 (old-total (or (nth 6 entry) 1))
75 (furl "forumdisplay.cgi?action=topics&number=%d&DaysPrune=1000") 77 (furl "forumdisplay.cgi?action=topics&number=%d&DaysPrune=1000")
76 (furls (list (concat nnultimate-address (format furl sid)))) 78 (furls (list (concat nnultimate-address (format furl sid))))
79 (nnultimate-table-regexp
80 "postings.*editpost\\|forumdisplay\\|getbio")
77 headers article subject score from date lines parent point 81 headers article subject score from date lines parent point
78 contents tinfo fetchers map elem a href garticles topic old-max 82 contents tinfo fetchers map elem a href garticles topic old-max
79 inc datel table string current-page total-contents pages 83 inc datel table current-page total-contents pages
80 farticles forum-contents parse furl-fetched mmap farticle) 84 farticles forum-contents parse furl-fetched mmap farticle)
81 (setq map mapping) 85 (setq map mapping)
82 (while (and (setq article (car articles)) 86 (while (and (setq article (car articles))
83 map) 87 map)
88 ;; Skip past the articles in the map until we reach the
89 ;; article we're looking for.
84 (while (and map 90 (while (and map
85 (or (> article (caar map)) 91 (or (> article (caar map))
86 (< (cadar map) (caar map)))) 92 (< (cadar map) (caar map))))
@@ -101,7 +107,7 @@
101 fetchers)) 107 fetchers))
102 (pop articles) 108 (pop articles)
103 (setq article (car articles))))) 109 (setq article (car articles)))))
104 ;; Now we have the mapping from/to Gnus/nnultimate article numbers, 110 ;; Now we have the mapping from/to Gnus/nnultimate article numbers,
105 ;; so we start fetching the topics that we need to satisfy the 111 ;; so we start fetching the topics that we need to satisfy the
106 ;; request. 112 ;; request.
107 (if (not fetchers) 113 (if (not fetchers)
@@ -128,22 +134,27 @@
128 (setq contents 134 (setq contents
129 (ignore-errors (w3-parse-buffer (current-buffer)))) 135 (ignore-errors (w3-parse-buffer (current-buffer))))
130 (setq table (nnultimate-find-forum-table contents)) 136 (setq table (nnultimate-find-forum-table contents))
131 (setq string (mapconcat 'identity (nnweb-text table) "")) 137 (goto-char (point-min))
132 (when (string-match "topic is \\([0-9]\\) pages" string) 138 (when (re-search-forward "topic is \\([0-9]+\\) pages" nil t)
133 (setq pages (string-to-number (match-string 1 string))) 139 (setq pages (string-to-number (match-string 1))))
134 (setcdr table nil)
135 (setq table (nnultimate-find-forum-table contents)))
136 (setq contents (cdr (nth 2 (car (nth 2 table))))) 140 (setq contents (cdr (nth 2 (car (nth 2 table)))))
137 (setq total-contents (nconc total-contents contents)) 141 (setq total-contents (nconc total-contents contents))
138 (incf current-page)) 142 (incf current-page))
139 ;;(setq total-contents (nreverse total-contents)) 143 (when t
140 (dolist (art (cdr elem)) 144 (let ((i 0))
141 (if (not (nth (1- (cdr art)) total-contents)) 145 (dolist (co total-contents)
142 () ;(debug) 146 (push (list (or (nnultimate-topic-article-to-article
143 (push (list (car art) 147 group (car elem) (incf i))
144 (nth (1- (cdr art)) total-contents) 148 1)
145 subject) 149 co subject)
146 nnultimate-articles))))) 150 nnultimate-articles))))
151 (when nil
152 (dolist (art (cdr elem))
153 (when (nth (1- (cdr art)) total-contents)
154 (push (list (car art)
155 (nth (1- (cdr art)) total-contents)
156 subject)
157 nnultimate-articles))))))
147 (setq nnultimate-articles 158 (setq nnultimate-articles
148 (sort nnultimate-articles 'car-less-than-car)) 159 (sort nnultimate-articles 'car-less-than-car))
149 ;; Now we have all the articles, conveniently in an alist 160 ;; Now we have all the articles, conveniently in an alist
@@ -161,17 +172,26 @@
161 (setq date (substring (car datel) (match-end 0)) 172 (setq date (substring (car datel) (match-end 0))
162 datel nil)) 173 datel nil))
163 (pop datel)) 174 (pop datel))
164 (setq date (delete "" (split-string date "[- \n\t\r    ]"))) 175 (when date
165 (if (or (member "AM" date) 176 (setq date (delete "" (split-string
166 (member "PM" date)) 177 date "[-, \n\t\r    ]")))
178 (if (or (member "AM" date)
179 (member "PM" date))
180 (setq date (format
181 "%s %s %s %s"
182 (nth 1 date)
183 (if (and (>= (length (nth 0 date)) 3)
184 (assoc (downcase
185 (substring (nth 0 date) 0 3))
186 parse-time-months))
187 (substring (nth 0 date) 0 3)
188 (car (rassq (string-to-number (nth 0 date))
189 parse-time-months)))
190 (nth 2 date) (nth 3 date)))
167 (setq date (format "%s %s %s %s" 191 (setq date (format "%s %s %s %s"
168 (car (rassq (string-to-number (nth 0 date)) 192 (car (rassq (string-to-number (nth 1 date))
169 parse-time-months)) 193 parse-time-months))
170 (nth 1 date) (nth 2 date) (nth 3 date))) 194 (nth 0 date) (nth 2 date) (nth 3 date)))))
171 (setq date (format "%s %s %s %s"
172 (car (rassq (string-to-number (nth 1 date))
173 parse-time-months))
174 (nth 0 date) (nth 2 date) (nth 3 date))))
175 (push 195 (push
176 (cons 196 (cons
177 article 197 article
@@ -180,7 +200,7 @@
180 from (or date "") 200 from (or date "")
181 (concat "<" (number-to-string sid) "%" 201 (concat "<" (number-to-string sid) "%"
182 (number-to-string article) 202 (number-to-string article)
183 "@ultimate>") 203 "@ultimate." server ">")
184 "" 0 204 "" 0
185 (/ (length (mapconcat 205 (/ (length (mapconcat
186 'identity 206 'identity
@@ -199,6 +219,16 @@
199 (nnheader-insert-nov (cdr header)))))) 219 (nnheader-insert-nov (cdr header))))))
200 'nov))) 220 'nov)))
201 221
222(defun nnultimate-topic-article-to-article (group topic article)
223 (catch 'found
224 (dolist (elem (nth 5 (assoc group nnultimate-groups)))
225 (when (and (= topic (nth 2 elem))
226 (>= article (nth 3 elem))
227 (< article (+ (- (nth 1 elem) (nth 0 elem)) 1
228 (nth 3 elem))))
229 (throw 'found
230 (+ (nth 0 elem) (- article (nth 3 elem))))))))
231
202(deffoo nnultimate-request-group (group &optional server dont-check) 232(deffoo nnultimate-request-group (group &optional server dont-check)
203 (nnultimate-possibly-change-server nil server) 233 (nnultimate-possibly-change-server nil server)
204 (when (not nnultimate-groups) 234 (when (not nnultimate-groups)
@@ -330,7 +360,7 @@
330 ;; the group is entered, there's 2 new articles in topic one 360 ;; the group is entered, there's 2 new articles in topic one
331 ;; and 1 in topic three. Then Gnus article number 8-9 be 5-6 361 ;; and 1 in topic three. Then Gnus article number 8-9 be 5-6
332 ;; in topic one and 10 will be the 2 in topic three. 362 ;; in topic one and 10 will be the 2 in topic three.
333 (dolist (row (reverse forum-contents)) 363 (dolist (row (nreverse forum-contents))
334 (setq row (nth 2 row)) 364 (setq row (nth 2 row))
335 (when (setq a (nnweb-parse-find 'a row)) 365 (when (setq a (nnweb-parse-find 'a row))
336 (setq subject (car (last (nnweb-text a))) 366 (setq subject (car (last (nnweb-text a)))
@@ -403,7 +433,7 @@
403 nnultimate-groups-alist) 433 nnultimate-groups-alist)
404 (with-temp-file (expand-file-name "groups" nnultimate-directory) 434 (with-temp-file (expand-file-name "groups" nnultimate-directory)
405 (prin1 nnultimate-groups-alist (current-buffer)))) 435 (prin1 nnultimate-groups-alist (current-buffer))))
406 436
407(defun nnultimate-init (server) 437(defun nnultimate-init (server)
408 "Initialize buffers and such." 438 "Initialize buffers and such."
409 (unless (file-exists-p nnultimate-directory) 439 (unless (file-exists-p nnultimate-directory)
@@ -438,9 +468,7 @@
438 (nth 2 parse)))) 468 (nth 2 parse))))
439 (let ((href (cdr (assq 'href (nth 1 (nnweb-parse-find 'a parse 20))))) 469 (let ((href (cdr (assq 'href (nth 1 (nnweb-parse-find 'a parse 20)))))
440 case-fold-search) 470 case-fold-search)
441 (when (and href (string-match 471 (when (and href (string-match nnultimate-table-regexp href))
442 "postings\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio"
443 href))
444 t)))) 472 t))))
445 473
446(provide 'nnultimate) 474(provide 'nnultimate)
diff --git a/lisp/gnus/nnweb.el b/lisp/gnus/nnweb.el
index c4ff7248e6b..740b182639f 100644
--- a/lisp/gnus/nnweb.el
+++ b/lisp/gnus/nnweb.el
@@ -1,5 +1,5 @@
1;;; nnweb.el --- retrieving articles via web search engines 1;;; nnweb.el --- retrieving articles via web search engines
2;; Copyright (C) 1996, 1997, 1998, 1999, 2000 2;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
3;; Free Software Foundation, Inc. 3;; Free Software Foundation, Inc.
4 4
5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
@@ -55,25 +55,48 @@
55(defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") 55(defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
56 "Where nnweb will save its files.") 56 "Where nnweb will save its files.")
57 57
58(defvoo nnweb-type 'dejanews 58(defvoo nnweb-type 'google
59 "What search engine type is being used. 59 "What search engine type is being used.
60Valid types include `dejanews', `dejanewsold', `reference', 60Valid types include `google', `dejanews', `dejanewsold', `reference',
61and `altavista'.") 61and `altavista'.")
62 62
63(defvar nnweb-type-definition 63(defvar nnweb-type-definition
64 '((dejanews 64 '(
65 (google
66 ;;(article . nnweb-google-wash-article)
67 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
65 (article . ignore) 68 (article . ignore)
66 (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") 69 (id . "http://groups.google.com/groups?selm=%s&output=gplain")
67 (map . nnweb-dejanews-create-mapping) 70 ;;(reference . nnweb-google-reference)
68 (search . nnweb-dejanews-search) 71 (reference . identity)
69 (address . "http://www.deja.com/=dnc/qs.xp") 72 (map . nnweb-google-create-mapping)
70 (identifier . nnweb-dejanews-identity)) 73 (search . nnweb-google-search)
71 (dejanewsold 74 (address . "http://groups.google.com/groups")
75 (identifier . nnweb-google-identity))
76 (dejanews ;; alias of google
77 ;;(article . nnweb-google-wash-article)
78 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
72 (article . ignore) 79 (article . ignore)
73 (map . nnweb-dejanews-create-mapping) 80 (id . "http://groups.google.com/groups?selm=%s&output=gplain")
74 (search . nnweb-dejanewsold-search) 81 ;;(reference . nnweb-google-reference)
75 (address . "http://www.deja.com/dnquery.xp") 82 (reference . identity)
76 (identifier . nnweb-dejanews-identity)) 83 (map . nnweb-google-create-mapping)
84 (search . nnweb-google-search)
85 (address . "http://groups.google.com/groups")
86 (identifier . nnweb-google-identity))
87;;; (dejanews
88;;; (article . ignore)
89;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text")
90;;; (map . nnweb-dejanews-create-mapping)
91;;; (search . nnweb-dejanews-search)
92;;; (address . "http://www.deja.com/=dnc/qs.xp")
93;;; (identifier . nnweb-dejanews-identity))
94;;; (dejanewsold
95;;; (article . ignore)
96;;; (map . nnweb-dejanews-create-mapping)
97;;; (search . nnweb-dejanewsold-search)
98;;; (address . "http://www.deja.com/dnquery.xp")
99;;; (identifier . nnweb-dejanews-identity))
77 (reference 100 (reference
78 (article . nnweb-reference-wash-article) 101 (article . nnweb-reference-wash-article)
79 (map . nnweb-reference-create-mapping) 102 (map . nnweb-reference-create-mapping)
@@ -124,6 +147,8 @@ and `altavista'.")
124 147
125(deffoo nnweb-request-scan (&optional group server) 148(deffoo nnweb-request-scan (&optional group server)
126 (nnweb-possibly-change-server group server) 149 (nnweb-possibly-change-server group server)
150 (if nnweb-ephemeral-p
151 (setq nnweb-hashtb (gnus-make-hashtable 4095)))
127 (funcall (nnweb-definition 'map)) 152 (funcall (nnweb-definition 'map))
128 (unless nnweb-ephemeral-p 153 (unless nnweb-ephemeral-p
129 (nnweb-write-active) 154 (nnweb-write-active)
@@ -134,9 +159,10 @@ and `altavista'.")
134 (when (and group 159 (when (and group
135 (not (equal group nnweb-group)) 160 (not (equal group nnweb-group))
136 (not nnweb-ephemeral-p)) 161 (not nnweb-ephemeral-p))
162 (setq nnweb-group group
163 nnweb-articles nil)
137 (let ((info (assoc group nnweb-group-alist))) 164 (let ((info (assoc group nnweb-group-alist)))
138 (when info 165 (when info
139 (setq nnweb-group group)
140 (setq nnweb-type (nth 2 info)) 166 (setq nnweb-type (nth 2 info))
141 (setq nnweb-search (nth 3 info)) 167 (setq nnweb-search (nth 3 info))
142 (unless dont-check 168 (unless dont-check
@@ -175,17 +201,19 @@ and `altavista'.")
175 (and (stringp article) 201 (and (stringp article)
176 (nnweb-definition 'id t) 202 (nnweb-definition 'id t)
177 (let ((fetch (nnweb-definition 'id)) 203 (let ((fetch (nnweb-definition 'id))
178 art) 204 art active)
179 (when (string-match "^<\\(.*\\)>$" article) 205 (when (string-match "^<\\(.*\\)>$" article)
180 (setq art (match-string 1 article))) 206 (setq art (match-string 1 article)))
181 (and fetch 207 (when (and fetch art)
182 art 208 (setq url (format fetch art))
183 (mm-with-unibyte-current-buffer 209 (mm-with-unibyte-current-buffer
184 (nnweb-fetch-url 210 (nnweb-fetch-url url))
185 (format fetch article))))))) 211 (if (nnweb-definition 'reference t)
212 (setq article
213 (funcall (nnweb-definition
214 'reference) article)))))))
186 (unless nnheader-callback-function 215 (unless nnheader-callback-function
187 (funcall (nnweb-definition 'article)) 216 (funcall (nnweb-definition 'article)))
188 (nnweb-decode-entities))
189 (nnheader-report 'nnweb "Fetched article %s" article) 217 (nnheader-report 'nnweb "Fetched article %s" article)
190 (cons group (and (numberp article) article)))))) 218 (cons group (and (numberp article) article))))))
191 219
@@ -290,10 +318,11 @@ and `altavista'.")
290 (nnweb-open-server server))) 318 (nnweb-open-server server)))
291 (unless nnweb-group-alist 319 (unless nnweb-group-alist
292 (nnweb-read-active)) 320 (nnweb-read-active))
321 (unless nnweb-hashtb
322 (setq nnweb-hashtb (gnus-make-hashtable 4095)))
293 (when group 323 (when group
294 (when (and (not nnweb-ephemeral-p) 324 (when (and (not nnweb-ephemeral-p)
295 (not (equal group nnweb-group))) 325 (equal group nnweb-group))
296 (setq nnweb-hashtb (gnus-make-hashtable 4095))
297 (nnweb-request-group group nil t)))) 326 (nnweb-request-group group nil t))))
298 327
299(defun nnweb-init (server) 328(defun nnweb-init (server)
@@ -393,7 +422,7 @@ and `altavista'.")
393 (car (rassq (string-to-number 422 (car (rassq (string-to-number
394 (match-string 2 date)) 423 (match-string 2 date))
395 parse-time-months)) 424 parse-time-months))
396 (match-string 3 date) 425 (match-string 3 date)
397 (match-string 1 date))) 426 (match-string 1 date)))
398 (setq date "Jan 1 00:00:00 0000")) 427 (setq date "Jan 1 00:00:00 0000"))
399 (incf i) 428 (incf i)
@@ -559,6 +588,7 @@ and `altavista'.")
559 (while (search-forward "," nil t) 588 (while (search-forward "," nil t)
560 (replace-match " " t t))) 589 (replace-match " " t t)))
561 (widen) 590 (widen)
591 (nnweb-decode-entities)
562 (set-marker body nil)))) 592 (set-marker body nil))))
563 593
564(defun nnweb-reference-search (search) 594(defun nnweb-reference-search (search)
@@ -663,7 +693,8 @@ and `altavista'.")
663 (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t) 693 (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t)
664 (replace-match "&lt;\\1&gt; " t))) 694 (replace-match "&lt;\\1&gt; " t)))
665 (widen) 695 (widen)
666 (nnweb-remove-markup))) 696 (nnweb-remove-markup)
697 (nnweb-decode-entities)))
667 698
668(defun nnweb-altavista-search (search &optional part) 699(defun nnweb-altavista-search (search &optional part)
669 (url-insert-file-contents 700 (url-insert-file-contents
@@ -683,13 +714,147 @@ and `altavista'.")
683 t) 714 t)
684 715
685;;; 716;;;
717;;; Deja bought by google.com
718;;;
719
720(defun nnweb-google-wash-article ()
721 (let ((case-fold-search t) url)
722 (goto-char (point-min))
723 (re-search-forward "^<pre>" nil t)
724 (narrow-to-region (point-min) (point))
725 (search-backward "<table " nil t 2)
726 (delete-region (point-min) (point))
727 (if (re-search-forward "Search Result [0-9]+" nil t)
728 (replace-match ""))
729 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
730 (replace-match ""))
731 (goto-char (point-min))
732 (while (search-forward "<br>" nil t)
733 (replace-match "\n"))
734 (nnweb-remove-markup)
735 (goto-char (point-min))
736 (while (re-search-forward "^[ \t]*\n" nil t)
737 (replace-match ""))
738 (goto-char (point-max))
739 (insert "\n")
740 (widen)
741 (narrow-to-region (point) (point-max))
742 (search-forward "</pre>" nil t)
743 (delete-region (point) (point-max))
744 (nnweb-remove-markup)
745 (widen)))
746
747(defun nnweb-google-parse-1 (&optional Message-ID)
748 (let ((i 0)
749 (case-fold-search t)
750 (active (cadr (assoc nnweb-group nnweb-group-alist)))
751 Subject Score Date Newsgroups From
752 map url mid)
753 (unless active
754 (push (list nnweb-group (setq active (cons 1 0))
755 nnweb-type nnweb-search)
756 nnweb-group-alist))
757 ;; Go through all the article hits on this page.
758 (goto-char (point-min))
759 (while (re-search-forward
760 "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
761 (setq mid (match-string 2)
762 url (format
763 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
764 (narrow-to-region (search-forward ">" nil t)
765 (search-forward "</a>" nil t))
766 (nnweb-remove-markup)
767 (nnweb-decode-entities)
768 (setq Subject (buffer-string))
769 (goto-char (point-max))
770 (widen)
771 (forward-line 1)
772 (when (looking-at "<br><font[^>]+>")
773 (goto-char (match-end 0)))
774 (if (not (looking-at "<a[^>]+>"))
775 (skip-chars-forward " \t")
776 (narrow-to-region (point)
777 (search-forward "</a>" nil t))
778 (nnweb-remove-markup)
779 (nnweb-decode-entities)
780 (setq Newsgroups (buffer-string))
781 (goto-char (point-max))
782 (widen)
783 (skip-chars-forward "- \t"))
784 (when (looking-at
785 "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
786 (setq From (match-string 2)
787 Date (match-string 1)))
788 (forward-line 1)
789 (incf i)
790 (unless (nnweb-get-hashtb url)
791 (push
792 (list
793 (incf (cdr active))
794 (make-full-mail-header
795 (cdr active) (if Newsgroups
796 (concat "(" Newsgroups ") " Subject)
797 Subject)
798 From Date (or Message-ID mid)
799 nil 0 0 url))
800 map)
801 (nnweb-set-hashtb (cadar map) (car map))))
802 map))
803
804(defun nnweb-google-reference (id)
805 (let ((map (nnweb-google-parse-1 id)) header)
806 (setq nnweb-articles
807 (nconc nnweb-articles map))
808 (when (setq header (cadar map))
809 (mm-with-unibyte-current-buffer
810 (nnweb-fetch-url (mail-header-xref header)))
811 (caar map))))
812
813(defun nnweb-google-create-mapping ()
814 "Perform the search and create an number-to-url alist."
815 (save-excursion
816 (set-buffer nnweb-buffer)
817 (erase-buffer)
818 (when (funcall (nnweb-definition 'search) nnweb-search)
819 (let ((more t))
820 (while more
821 (setq nnweb-articles
822 (nconc nnweb-articles (nnweb-google-parse-1)))
823 ;; FIXME: There is more.
824 (setq more nil))
825 ;; Return the articles in the right order.
826 (setq nnweb-articles
827 (sort nnweb-articles 'car-less-than-car))))))
828
829(defun nnweb-google-search (search)
830 (nnweb-insert
831 (concat
832 (nnweb-definition 'address)
833 "?"
834 (nnweb-encode-www-form-urlencoded
835 `(("q" . ,search)
836 ("num". "100")
837 ("hq" . "")
838 ("hl" . "")
839 ("lr" . "")
840 ("safe" . "off")
841 ("sites" . "groups")))))
842 t)
843
844(defun nnweb-google-identity (url)
845 "Return an unique identifier based on URL."
846 (if (string-match "selm=\\([^ &>]+\\)" url)
847 (match-string 1 url)
848 url))
849
850;;;
686;;; General web/w3 interface utility functions 851;;; General web/w3 interface utility functions
687;;; 852;;;
688 853
689(defun nnweb-insert-html (parse) 854(defun nnweb-insert-html (parse)
690 "Insert HTML based on a w3 parse tree." 855 "Insert HTML based on a w3 parse tree."
691 (if (stringp parse) 856 (if (stringp parse)
692 (insert parse) 857 (insert (nnheader-string-as-multibyte parse))
693 (insert "<" (symbol-name (car parse)) " ") 858 (insert "<" (symbol-name (car parse)) " ")
694 (insert (mapconcat 859 (insert (mapconcat
695 (lambda (param) 860 (lambda (param)
@@ -729,7 +894,7 @@ and `altavista'.")
729 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) 894 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
730 (let ((elem (if (eq (aref (match-string 1) 0) ?\#) 895 (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
731 (let ((c 896 (let ((c
732 (string-to-number (substring 897 (string-to-number (substring
733 (match-string 1) 1)))) 898 (match-string 1) 1))))
734 (if (mm-char-or-char-int-p c) c 32)) 899 (if (mm-char-or-char-int-p c) c 32))
735 (or (cdr (assq (intern (match-string 1)) 900 (or (cdr (assq (intern (match-string 1))
@@ -739,9 +904,9 @@ and `altavista'.")
739 (setq elem (char-to-string elem))) 904 (setq elem (char-to-string elem)))
740 (replace-match elem t t)))) 905 (replace-match elem t t))))
741 906
742(defun nnweb-decode-entities-string (str) 907(defun nnweb-decode-entities-string (string)
743 (with-temp-buffer 908 (with-temp-buffer
744 (insert str) 909 (insert string)
745 (nnweb-decode-entities) 910 (nnweb-decode-entities)
746 (buffer-substring (point-min) (point-max)))) 911 (buffer-substring (point-min) (point-max))))
747 912
@@ -760,12 +925,12 @@ and `altavista'.")
760 "Insert the contents from an URL in the current buffer. 925 "Insert the contents from an URL in the current buffer.
761If FOLLOW-REFRESH is non-nil, redirect refresh url in META." 926If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
762 (let ((name buffer-file-name)) 927 (let ((name buffer-file-name))
763 (if follow-refresh 928 (if follow-refresh
764 (save-restriction 929 (save-restriction
765 (narrow-to-region (point) (point)) 930 (narrow-to-region (point) (point))
766 (url-insert-file-contents url) 931 (url-insert-file-contents url)
767 (goto-char (point-min)) 932 (goto-char (point-min))
768 (when (re-search-forward 933 (when (re-search-forward
769 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) 934 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
770 (let ((url (match-string 1))) 935 (let ((url (match-string 1)))
771 (delete-region (point-min) (point-max)) 936 (delete-region (point-min) (point-max))
@@ -822,6 +987,11 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
822 (listp (cdr element))) 987 (listp (cdr element)))
823 (nnweb-text-1 element))))) 988 (nnweb-text-1 element)))))
824 989
990(defun nnweb-replace-in-string (string match newtext)
991 (while (string-match match string)
992 (setq string (replace-match newtext t t string)))
993 string)
994
825(provide 'nnweb) 995(provide 'nnweb)
826 996
827;;; nnweb.el ends here 997;;; nnweb.el ends here