aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKatsumi Yamaoka2016-11-04 10:33:26 +0000
committerKatsumi Yamaoka2016-11-04 10:33:26 +0000
commit6ccb5f19f68689186002b817be7be25d929f5677 (patch)
treefa02cd082fafaf17d4e2370630fff2ce82c6a104
parent0adefe7ef9f4c23a5c8fef1503bc2b02ea6db8f5 (diff)
downloademacs-6ccb5f19f68689186002b817be7be25d929f5677.tar.gz
emacs-6ccb5f19f68689186002b817be7be25d929f5677.zip
* lisp/net/shr.el (shr-collect-extra-strings-in-table) New function
that gathers extra strings in an invalid html. (bug#24831) (shr-tag-table): Use it.
-rw-r--r--lisp/net/shr.el37
1 files changed, 37 insertions, 0 deletions
diff --git a/lisp/net/shr.el b/lisp/net/shr.el
index 7ea33fb2d3e..73886bf1b4e 100644
--- a/lisp/net/shr.el
+++ b/lisp/net/shr.el
@@ -1897,11 +1897,48 @@ The preference is a float determined from `shr-prefer-media-type'."
1897 (when (zerop shr-table-depth) 1897 (when (zerop shr-table-depth)
1898 (save-excursion 1898 (save-excursion
1899 (shr-expand-alignments start (point))) 1899 (shr-expand-alignments start (point)))
1900 ;; Insert also non-td/th strings excluding comments and styles.
1901 (save-restriction
1902 (narrow-to-region (point) (point))
1903 (insert (mapconcat #'identity
1904 (shr-collect-extra-strings-in-table dom)
1905 "\n"))
1906 (shr-fill-lines (point-min) (point-max)))
1900 (dolist (elem (dom-by-tag dom 'object)) 1907 (dolist (elem (dom-by-tag dom 'object))
1901 (shr-tag-object elem)) 1908 (shr-tag-object elem))
1902 (dolist (elem (dom-by-tag dom 'img)) 1909 (dolist (elem (dom-by-tag dom 'img))
1903 (shr-tag-img elem))))) 1910 (shr-tag-img elem)))))
1904 1911
1912(defun shr-collect-extra-strings-in-table (dom &optional flags)
1913 "Return extra strings in DOM of which the root is a table clause.
1914FLAGS is a cons of two flags that control whether to collect strings."
1915 ;; If and only if the cdr is not set, the car will be set to t when
1916 ;; a <td> or a <th> clause is found in the children of DOM, and reset
1917 ;; to nil when a <table> clause is found in the children of DOM.
1918 ;; The cdr will be set to t when a <table> clause is found if the car
1919 ;; is not set then, and will never be reset.
1920 ;; This function collects strings if the car of FLAGS is not set.
1921 (unless flags (setq flags (cons nil nil)))
1922 (cl-loop for child in (dom-children dom)
1923 if (stringp child)
1924 when (and (not (car flags))
1925 (string-match "\\(?:[^\t\n\r ]+[\t\n\r ]+\\)*[^\t\n\r ]+"
1926 child))
1927 collect (match-string 0 child)
1928 end
1929 else
1930 unless (let ((tag (dom-tag child)))
1931 (or (memq tag '(comment style))
1932 (progn
1933 (cond ((memq tag '(td th))
1934 (unless (cdr flags) (setcar flags t)))
1935 ((eq tag 'table)
1936 (if (car flags)
1937 (unless (cdr flags) (setcar flags nil))
1938 (setcdr flags t))))
1939 nil)))
1940 append (shr-collect-extra-strings-in-table child flags)))
1941
1905(defun shr-insert-table (table widths) 1942(defun shr-insert-table (table widths)
1906 (let* ((collapse (equal (cdr (assq 'border-collapse shr-stylesheet)) 1943 (let* ((collapse (equal (cdr (assq 'border-collapse shr-stylesheet))
1907 "collapse")) 1944 "collapse"))