aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
Diffstat (limited to 'lisp')
-rw-r--r--lisp/ChangeLog4
-rw-r--r--lisp/net/eww.el56
-rw-r--r--lisp/net/shr.el20
3 files changed, 80 insertions, 0 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index eb374375198..b6e32f285ce 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,5 +1,9 @@
12014-11-02 Lars Magne Ingebrigtsen <larsi@gnus.org> 12014-11-02 Lars Magne Ingebrigtsen <larsi@gnus.org>
2 2
3 * net/eww.el (eww-readable): New command and keystroke.
4
5 * net/shr.el (shr-retransform-dom): New function.
6
3 * net/eww.el (eww-display-html): Set `eww-current-source' in the 7 * net/eww.el (eww-display-html): Set `eww-current-source' in the
4 correct buffer. 8 correct buffer.
5 (eww-view-source): Use it. 9 (eww-view-source): Use it.
diff --git a/lisp/net/eww.el b/lisp/net/eww.el
index e4acd69ef4d..579f0878bbd 100644
--- a/lisp/net/eww.el
+++ b/lisp/net/eww.el
@@ -402,6 +402,7 @@ word(s) will be searched for via `eww-search-prefix'."
402 (setq-local eww-contents-url nil)) 402 (setq-local eww-contents-url nil))
403 403
404(defun eww-view-source () 404(defun eww-view-source ()
405 "View the HTML source code of the current page."
405 (interactive) 406 (interactive)
406 (let ((buf (get-buffer-create "*eww-source*")) 407 (let ((buf (get-buffer-create "*eww-source*"))
407 (source eww-current-source)) 408 (source eww-current-source))
@@ -413,6 +414,60 @@ word(s) will be searched for via `eww-search-prefix'."
413 (html-mode))) 414 (html-mode)))
414 (view-buffer buf))) 415 (view-buffer buf)))
415 416
417(defun eww-readable ()
418 "View the main \"readable\" parts of the current web page.
419This command uses heuristics to find the parts of the web page that
420contains the main textual portion, leaving out navigation menus and
421the like."
422 (interactive)
423 (let* ((source eww-current-source)
424 (dom (shr-transform-dom
425 (with-temp-buffer
426 (insert source)
427 (libxml-parse-html-region (point-min) (point-max))))))
428 (eww-score-readability dom)
429 (eww-display-html 'utf-8 nil (shr-retransform-dom
430 (eww-highest-readability dom)))
431 (setq eww-current-source source)))
432
433(defun eww-score-readability (node)
434 (let ((score -1))
435 (cond
436 ((memq (car node) '(script head))
437 (setq score -2))
438 ((eq (car node) 'meta)
439 (setq score -1))
440 ((eq (car node) 'a)
441 (setq score (- (length (split-string
442 (or (cdr (assoc 'text (cdr node))) ""))))))
443 (t
444 (dolist (elem (cdr node))
445 (cond
446 ((eq (car elem) 'text)
447 (setq score (+ score (length (split-string (cdr elem))))))
448 ((consp (cdr elem))
449 (setq score (+ score
450 (or (cdr (assoc :eww-readability-score (cdr elem)))
451 (eww-score-readability elem)))))))))
452 ;; Cache the score of the node to avoid recomputing all the time.
453 (setcdr node (cons (cons :eww-readability-score score) (cdr node)))
454 score))
455
456(defun eww-highest-readability (node)
457 (let ((result node)
458 highest)
459 (dolist (elem (cdr node))
460 (when (and (consp (cdr elem))
461 (> (or (cdr (assoc
462 :eww-readability-score
463 (setq highest
464 (eww-highest-readability elem))))
465 most-negative-fixnum)
466 (or (cdr (assoc :eww-readability-score (cdr result)))
467 most-negative-fixnum)))
468 (setq result highest)))
469 result))
470
416(defvar eww-mode-map 471(defvar eww-mode-map
417 (let ((map (make-sparse-keymap))) 472 (let ((map (make-sparse-keymap)))
418 (suppress-keymap map) 473 (suppress-keymap map)
@@ -435,6 +490,7 @@ word(s) will be searched for via `eww-search-prefix'."
435 (define-key map "w" 'eww-copy-page-url) 490 (define-key map "w" 'eww-copy-page-url)
436 (define-key map "C" 'url-cookie-list) 491 (define-key map "C" 'url-cookie-list)
437 (define-key map "v" 'eww-view-source) 492 (define-key map "v" 'eww-view-source)
493 (define-key map "R" 'eww-readable)
438 (define-key map "H" 'eww-list-histories) 494 (define-key map "H" 'eww-list-histories)
439 495
440 (define-key map "b" 'eww-add-bookmark) 496 (define-key map "b" 'eww-add-bookmark)
diff --git a/lisp/net/shr.el b/lisp/net/shr.el
index 878728c9319..59326de64dd 100644
--- a/lisp/net/shr.el
+++ b/lisp/net/shr.el
@@ -370,6 +370,26 @@ size, and full-buffer size."
370 (push (shr-transform-dom sub) result))) 370 (push (shr-transform-dom sub) result)))
371 (nreverse result))) 371 (nreverse result)))
372 372
373(defun shr-retransform-dom (dom)
374 "Transform the shr DOM back into the libxml DOM."
375 (let ((tag (car dom))
376 (attributes nil)
377 (text nil)
378 (sub-nodes nil))
379 (dolist (elem (cdr dom))
380 (cond
381 ((eq (car elem) 'text)
382 (setq text (cdr elem)))
383 ((not (consp (cdr elem)))
384 (push (cons (intern (substring (symbol-name (car elem)) 1) obarray)
385 (cdr elem))
386 attributes))
387 (t
388 (push (shr-retransform-dom elem) sub-nodes))))
389 (append (list tag (nreverse attributes))
390 (nreverse sub-nodes)
391 (and text (list text)))))
392
373(defsubst shr-generic (cont) 393(defsubst shr-generic (cont)
374 (dolist (sub cont) 394 (dolist (sub cont)
375 (cond 395 (cond