aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorLars Magne Ingebrigtsen2014-11-03 01:01:20 +0100
committerLars Magne Ingebrigtsen2014-11-03 01:01:20 +0100
commit2e8259b044fda2a6424b71eb8368cafa2fa6d86e (patch)
treee24c9c4697446de0489d776de66a660cc01d0680 /lisp
parent816cad6e2414474b06ebb4f691fc1bdb9a8953a4 (diff)
downloademacs-2e8259b044fda2a6424b71eb8368cafa2fa6d86e.tar.gz
emacs-2e8259b044fda2a6424b71eb8368cafa2fa6d86e.zip
Add a new, somewhat experimental "readability" command to eww
* net/eww.el (eww-readable): New command and keystroke. * net/shr.el (shr-retransform-dom): New function.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/ChangeLog4
-rw-r--r--lisp/net/eww.el56
-rw-r--r--lisp/net/shr.el20
3 files changed, 80 insertions, 0 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index eb374375198..b6e32f285ce 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,5 +1,9 @@
12014-11-02 Lars Magne Ingebrigtsen <larsi@gnus.org> 12014-11-02 Lars Magne Ingebrigtsen <larsi@gnus.org>
2 2
3 * net/eww.el (eww-readable): New command and keystroke.
4
5 * net/shr.el (shr-retransform-dom): New function.
6
3 * net/eww.el (eww-display-html): Set `eww-current-source' in the 7 * net/eww.el (eww-display-html): Set `eww-current-source' in the
4 correct buffer. 8 correct buffer.
5 (eww-view-source): Use it. 9 (eww-view-source): Use it.
diff --git a/lisp/net/eww.el b/lisp/net/eww.el
index e4acd69ef4d..579f0878bbd 100644
--- a/lisp/net/eww.el
+++ b/lisp/net/eww.el
@@ -402,6 +402,7 @@ word(s) will be searched for via `eww-search-prefix'."
402 (setq-local eww-contents-url nil)) 402 (setq-local eww-contents-url nil))
403 403
404(defun eww-view-source () 404(defun eww-view-source ()
405 "View the HTML source code of the current page."
405 (interactive) 406 (interactive)
406 (let ((buf (get-buffer-create "*eww-source*")) 407 (let ((buf (get-buffer-create "*eww-source*"))
407 (source eww-current-source)) 408 (source eww-current-source))
@@ -413,6 +414,60 @@ word(s) will be searched for via `eww-search-prefix'."
413 (html-mode))) 414 (html-mode)))
414 (view-buffer buf))) 415 (view-buffer buf)))
415 416
417(defun eww-readable ()
418 "View the main \"readable\" parts of the current web page.
419This command uses heuristics to find the parts of the web page that
420contains the main textual portion, leaving out navigation menus and
421the like."
422 (interactive)
423 (let* ((source eww-current-source)
424 (dom (shr-transform-dom
425 (with-temp-buffer
426 (insert source)
427 (libxml-parse-html-region (point-min) (point-max))))))
428 (eww-score-readability dom)
429 (eww-display-html 'utf-8 nil (shr-retransform-dom
430 (eww-highest-readability dom)))
431 (setq eww-current-source source)))
432
433(defun eww-score-readability (node)
434 (let ((score -1))
435 (cond
436 ((memq (car node) '(script head))
437 (setq score -2))
438 ((eq (car node) 'meta)
439 (setq score -1))
440 ((eq (car node) 'a)
441 (setq score (- (length (split-string
442 (or (cdr (assoc 'text (cdr node))) ""))))))
443 (t
444 (dolist (elem (cdr node))
445 (cond
446 ((eq (car elem) 'text)
447 (setq score (+ score (length (split-string (cdr elem))))))
448 ((consp (cdr elem))
449 (setq score (+ score
450 (or (cdr (assoc :eww-readability-score (cdr elem)))
451 (eww-score-readability elem)))))))))
452 ;; Cache the score of the node to avoid recomputing all the time.
453 (setcdr node (cons (cons :eww-readability-score score) (cdr node)))
454 score))
455
456(defun eww-highest-readability (node)
457 (let ((result node)
458 highest)
459 (dolist (elem (cdr node))
460 (when (and (consp (cdr elem))
461 (> (or (cdr (assoc
462 :eww-readability-score
463 (setq highest
464 (eww-highest-readability elem))))
465 most-negative-fixnum)
466 (or (cdr (assoc :eww-readability-score (cdr result)))
467 most-negative-fixnum)))
468 (setq result highest)))
469 result))
470
416(defvar eww-mode-map 471(defvar eww-mode-map
417 (let ((map (make-sparse-keymap))) 472 (let ((map (make-sparse-keymap)))
418 (suppress-keymap map) 473 (suppress-keymap map)
@@ -435,6 +490,7 @@ word(s) will be searched for via `eww-search-prefix'."
435 (define-key map "w" 'eww-copy-page-url) 490 (define-key map "w" 'eww-copy-page-url)
436 (define-key map "C" 'url-cookie-list) 491 (define-key map "C" 'url-cookie-list)
437 (define-key map "v" 'eww-view-source) 492 (define-key map "v" 'eww-view-source)
493 (define-key map "R" 'eww-readable)
438 (define-key map "H" 'eww-list-histories) 494 (define-key map "H" 'eww-list-histories)
439 495
440 (define-key map "b" 'eww-add-bookmark) 496 (define-key map "b" 'eww-add-bookmark)
diff --git a/lisp/net/shr.el b/lisp/net/shr.el
index 878728c9319..59326de64dd 100644
--- a/lisp/net/shr.el
+++ b/lisp/net/shr.el
@@ -370,6 +370,26 @@ size, and full-buffer size."
370 (push (shr-transform-dom sub) result))) 370 (push (shr-transform-dom sub) result)))
371 (nreverse result))) 371 (nreverse result)))
372 372
373(defun shr-retransform-dom (dom)
374 "Transform the shr DOM back into the libxml DOM."
375 (let ((tag (car dom))
376 (attributes nil)
377 (text nil)
378 (sub-nodes nil))
379 (dolist (elem (cdr dom))
380 (cond
381 ((eq (car elem) 'text)
382 (setq text (cdr elem)))
383 ((not (consp (cdr elem)))
384 (push (cons (intern (substring (symbol-name (car elem)) 1) obarray)
385 (cdr elem))
386 attributes))
387 (t
388 (push (shr-retransform-dom elem) sub-nodes))))
389 (append (list tag (nreverse attributes))
390 (nreverse sub-nodes)
391 (and text (list text)))))
392
373(defsubst shr-generic (cont) 393(defsubst shr-generic (cont)
374 (dolist (sub cont) 394 (dolist (sub cont)
375 (cond 395 (cond