diff options
| author | Katsumi Yamaoka | 2016-11-04 10:33:26 +0000 |
|---|---|---|
| committer | Katsumi Yamaoka | 2016-11-04 10:33:26 +0000 |
| commit | 6ccb5f19f68689186002b817be7be25d929f5677 (patch) | |
| tree | fa02cd082fafaf17d4e2370630fff2ce82c6a104 | |
| parent | 0adefe7ef9f4c23a5c8fef1503bc2b02ea6db8f5 (diff) | |
| download | emacs-6ccb5f19f68689186002b817be7be25d929f5677.tar.gz emacs-6ccb5f19f68689186002b817be7be25d929f5677.zip | |
* lisp/net/shr.el (shr-collect-extra-strings-in-table) New function
that gathers extra strings in an invalid html. (bug#24831)
(shr-tag-table): Use it.
| -rw-r--r-- | lisp/net/shr.el | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/lisp/net/shr.el b/lisp/net/shr.el index 7ea33fb2d3e..73886bf1b4e 100644 --- a/lisp/net/shr.el +++ b/lisp/net/shr.el | |||
| @@ -1897,11 +1897,48 @@ The preference is a float determined from `shr-prefer-media-type'." | |||
| 1897 | (when (zerop shr-table-depth) | 1897 | (when (zerop shr-table-depth) |
| 1898 | (save-excursion | 1898 | (save-excursion |
| 1899 | (shr-expand-alignments start (point))) | 1899 | (shr-expand-alignments start (point))) |
| 1900 | ;; Insert also non-td/th strings excluding comments and styles. | ||
| 1901 | (save-restriction | ||
| 1902 | (narrow-to-region (point) (point)) | ||
| 1903 | (insert (mapconcat #'identity | ||
| 1904 | (shr-collect-extra-strings-in-table dom) | ||
| 1905 | "\n")) | ||
| 1906 | (shr-fill-lines (point-min) (point-max))) | ||
| 1900 | (dolist (elem (dom-by-tag dom 'object)) | 1907 | (dolist (elem (dom-by-tag dom 'object)) |
| 1901 | (shr-tag-object elem)) | 1908 | (shr-tag-object elem)) |
| 1902 | (dolist (elem (dom-by-tag dom 'img)) | 1909 | (dolist (elem (dom-by-tag dom 'img)) |
| 1903 | (shr-tag-img elem))))) | 1910 | (shr-tag-img elem))))) |
| 1904 | 1911 | ||
| 1912 | (defun shr-collect-extra-strings-in-table (dom &optional flags) | ||
| 1913 | "Return extra strings in DOM of which the root is a table clause. | ||
| 1914 | FLAGS is a cons of two flags that control whether to collect strings." | ||
| 1915 | ;; If and only if the cdr is not set, the car will be set to t when | ||
| 1916 | ;; a <td> or a <th> clause is found in the children of DOM, and reset | ||
| 1917 | ;; to nil when a <table> clause is found in the children of DOM. | ||
| 1918 | ;; The cdr will be set to t when a <table> clause is found if the car | ||
| 1919 | ;; is not set then, and will never be reset. | ||
| 1920 | ;; This function collects strings if the car of FLAGS is not set. | ||
| 1921 | (unless flags (setq flags (cons nil nil))) | ||
| 1922 | (cl-loop for child in (dom-children dom) | ||
| 1923 | if (stringp child) | ||
| 1924 | when (and (not (car flags)) | ||
| 1925 | (string-match "\\(?:[^\t\n\r ]+[\t\n\r ]+\\)*[^\t\n\r ]+" | ||
| 1926 | child)) | ||
| 1927 | collect (match-string 0 child) | ||
| 1928 | end | ||
| 1929 | else | ||
| 1930 | unless (let ((tag (dom-tag child))) | ||
| 1931 | (or (memq tag '(comment style)) | ||
| 1932 | (progn | ||
| 1933 | (cond ((memq tag '(td th)) | ||
| 1934 | (unless (cdr flags) (setcar flags t))) | ||
| 1935 | ((eq tag 'table) | ||
| 1936 | (if (car flags) | ||
| 1937 | (unless (cdr flags) (setcar flags nil)) | ||
| 1938 | (setcdr flags t)))) | ||
| 1939 | nil))) | ||
| 1940 | append (shr-collect-extra-strings-in-table child flags))) | ||
| 1941 | |||
| 1905 | (defun shr-insert-table (table widths) | 1942 | (defun shr-insert-table (table widths) |
| 1906 | (let* ((collapse (equal (cdr (assq 'border-collapse shr-stylesheet)) | 1943 | (let* ((collapse (equal (cdr (assq 'border-collapse shr-stylesheet)) |
| 1907 | "collapse")) | 1944 | "collapse")) |