diff options
| author | Lars Ingebrigtsen | 2016-02-24 13:04:03 +1100 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2016-02-24 13:04:03 +1100 |
| commit | 21fe2ebec8b63d5fd0a570ed0c907802ab83f991 (patch) | |
| tree | f7fe7b6b4b2a21667cb66a1fdf7d470c7ec292a0 /lisp/net/html2text.el | |
| parent | e1d749bd7e0d68ab063eae3927caede6039a33cf (diff) | |
| download | emacs-21fe2ebec8b63d5fd0a570ed0c907802ab83f991.tar.gz emacs-21fe2ebec8b63d5fd0a570ed0c907802ab83f991.zip | |
Move low-level library files from the lisp/gnus directory
The files moved from lisp/gnus are:
auth-source.el -> /
compface.el -> /image
ecomplete.el -> /
flow-fill.el -> /mail
gravatar.el -> /image
gssapi.el -> /net
html2text.el -> /net
ietf-drums.el -> /mail
mail-parse.el -> /mail
mail-prsvr.el -> /mail
mailcap.el -> /net
plstore.el -> /
pop3.el -> /net
qp.el -> /mail
registry.el -> /
rfc1843.el -> /international
rfc2045.el -> /mail
rfc2047.el -> /mail
rfc2231.el -> /mail
rtree.el -> /
sieve-manage.el -> /net
sieve-mode.el -> /net
sieve.el -> /net
starttls.el -> /net
utf7.el -> /international
yenc.el -> /mail
Diffstat (limited to 'lisp/net/html2text.el')
| -rw-r--r-- | lisp/net/html2text.el | 461 |
1 files changed, 461 insertions, 0 deletions
diff --git a/lisp/net/html2text.el b/lisp/net/html2text.el new file mode 100644 index 00000000000..2b1c2057bb4 --- /dev/null +++ b/lisp/net/html2text.el | |||
| @@ -0,0 +1,461 @@ | |||
| 1 | ;;; html2text.el --- a simple html to plain text converter -*- coding: utf-8 -*- | ||
| 2 | |||
| 3 | ;; Copyright (C) 2002-2016 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; Author: Joakim Hove <hove@phys.ntnu.no> | ||
| 6 | |||
| 7 | ;; This file is part of GNU Emacs. | ||
| 8 | |||
| 9 | ;; GNU Emacs is free software: you can redistribute it and/or modify | ||
| 10 | ;; it under the terms of the GNU General Public License as published by | ||
| 11 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 12 | ;; (at your option) any later version. | ||
| 13 | |||
| 14 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 17 | ;; GNU General Public License for more details. | ||
| 18 | |||
| 19 | ;; You should have received a copy of the GNU General Public License | ||
| 20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | ||
| 21 | |||
| 22 | ;;; Commentary: | ||
| 23 | |||
| 24 | ;; These functions provide a simple way to wash/clean html infected | ||
| 25 | ;; mails. Definitely do not work in all cases, but some improvement | ||
| 26 | ;; in readability is generally obtained. Formatting is only done in | ||
| 27 | ;; the buffer, so the next time you enter the article it will be | ||
| 28 | ;; "re-htmlized". | ||
| 29 | ;; | ||
| 30 | ;; The main function is `html2text'. | ||
| 31 | |||
| 32 | ;;; Code: | ||
| 33 | |||
| 34 | ;; | ||
| 35 | ;; <Global variables> | ||
| 36 | ;; | ||
| 37 | |||
| 38 | (eval-when-compile | ||
| 39 | (require 'cl)) | ||
| 40 | |||
| 41 | (defvar html2text-format-single-element-list '(("hr" . html2text-clean-hr))) | ||
| 42 | |||
| 43 | (defvar html2text-replace-list | ||
| 44 | '(("´" . "`") | ||
| 45 | ("&" . "&") | ||
| 46 | ("'" . "'") | ||
| 47 | ("¦" . "|") | ||
| 48 | ("¢" . "c") | ||
| 49 | ("ˆ" . "^") | ||
| 50 | ("©" . "(C)") | ||
| 51 | ("¤" . "(#)") | ||
| 52 | ("°" . "degree") | ||
| 53 | ("÷" . "/") | ||
| 54 | ("€" . "e") | ||
| 55 | ("½" . "1/2") | ||
| 56 | (">" . ">") | ||
| 57 | ("¿" . "?") | ||
| 58 | ("«" . "<<") | ||
| 59 | ("&ldquo" . "\"") | ||
| 60 | ("‹" . "(") | ||
| 61 | ("‘" . "`") | ||
| 62 | ("<" . "<") | ||
| 63 | ("—" . "--") | ||
| 64 | (" " . " ") | ||
| 65 | ("–" . "-") | ||
| 66 | ("‰" . "%%") | ||
| 67 | ("±" . "+-") | ||
| 68 | ("£" . "£") | ||
| 69 | (""" . "\"") | ||
| 70 | ("»" . ">>") | ||
| 71 | ("&rdquo" . "\"") | ||
| 72 | ("®" . "(R)") | ||
| 73 | ("›" . ")") | ||
| 74 | ("’" . "'") | ||
| 75 | ("§" . "§") | ||
| 76 | ("¹" . "^1") | ||
| 77 | ("²" . "^2") | ||
| 78 | ("³" . "^3") | ||
| 79 | ("˜" . "~")) | ||
| 80 | "The map of entity to text. | ||
| 81 | |||
| 82 | This is an alist were each element is a dotted pair consisting of an | ||
| 83 | old string, and a replacement string. This replacement is done by the | ||
| 84 | function `html2text-substitute' which basically performs a | ||
| 85 | `replace-string' operation for every element in the list. This is | ||
| 86 | completely verbatim - without any use of REGEXP.") | ||
| 87 | |||
| 88 | (defvar html2text-remove-tag-list | ||
| 89 | '("html" "body" "p" "img" "dir" "head" "div" "br" "font" "title" "meta") | ||
| 90 | "A list of removable tags. | ||
| 91 | |||
| 92 | This is a list of tags which should be removed, without any | ||
| 93 | formatting. Note that tags in the list are presented *without* | ||
| 94 | any \"<\" or \">\". All occurrences of a tag appearing in this | ||
| 95 | list are removed, irrespective of whether it is a closing or | ||
| 96 | opening tag, or if the tag has additional attributes. The | ||
| 97 | deletion is done by the function `html2text-remove-tags'. | ||
| 98 | |||
| 99 | For instance the text: | ||
| 100 | |||
| 101 | \"Here comes something <font size\"+3\" face=\"Helvetica\"> big </font>.\" | ||
| 102 | |||
| 103 | will be reduced to: | ||
| 104 | |||
| 105 | \"Here comes something big.\" | ||
| 106 | |||
| 107 | If this list contains the element \"font\".") | ||
| 108 | |||
| 109 | (defvar html2text-format-tag-list | ||
| 110 | '(("b" . html2text-clean-bold) | ||
| 111 | ("strong" . html2text-clean-bold) | ||
| 112 | ("u" . html2text-clean-underline) | ||
| 113 | ("i" . html2text-clean-italic) | ||
| 114 | ("em" . html2text-clean-italic) | ||
| 115 | ("blockquote" . html2text-clean-blockquote) | ||
| 116 | ("a" . html2text-clean-anchor) | ||
| 117 | ("ul" . html2text-clean-ul) | ||
| 118 | ("ol" . html2text-clean-ol) | ||
| 119 | ("dl" . html2text-clean-dl) | ||
| 120 | ("center" . html2text-clean-center)) | ||
| 121 | "An alist of tags and processing functions. | ||
| 122 | |||
| 123 | This is an alist where each dotted pair consists of a tag, and then | ||
| 124 | the name of a function to be called when this tag is found. The | ||
| 125 | function is called with the arguments p1, p2, p3 and p4. These are | ||
| 126 | demonstrated below: | ||
| 127 | |||
| 128 | \"<b> This is bold text </b>\" | ||
| 129 | ^ ^ ^ ^ | ||
| 130 | | | | | | ||
| 131 | p1 p2 p3 p4 | ||
| 132 | |||
| 133 | Then the called function will typically format the text somewhat and | ||
| 134 | remove the tags.") | ||
| 135 | |||
| 136 | (defvar html2text-remove-tag-list2 '("li" "dt" "dd" "meta") | ||
| 137 | "Another list of removable tags. | ||
| 138 | |||
| 139 | This is a list of tags which are removed similarly to the list | ||
| 140 | `html2text-remove-tag-list' - but these tags are retained for the | ||
| 141 | formatting, and then moved afterward.") | ||
| 142 | |||
| 143 | ;; | ||
| 144 | ;; </Global variables> | ||
| 145 | ;; | ||
| 146 | |||
| 147 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 148 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 149 | |||
| 150 | ;; | ||
| 151 | ;; <Utility functions> | ||
| 152 | ;; | ||
| 153 | |||
| 154 | |||
| 155 | (defun html2text-replace-string (from-string to-string min max) | ||
| 156 | "Replace FROM-STRING with TO-STRING in region from MIN to MAX." | ||
| 157 | (goto-char min) | ||
| 158 | (let ((delta (- (string-width to-string) (string-width from-string))) | ||
| 159 | (change 0)) | ||
| 160 | (while (search-forward from-string max t) | ||
| 161 | (replace-match to-string) | ||
| 162 | (setq change (+ change delta))) | ||
| 163 | change)) | ||
| 164 | |||
| 165 | ;; | ||
| 166 | ;; </Utility functions> | ||
| 167 | ;; | ||
| 168 | |||
| 169 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 170 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 171 | |||
| 172 | ;; | ||
| 173 | ;; <Functions related to attributes> i.e. <font size=+3> | ||
| 174 | ;; | ||
| 175 | |||
| 176 | (defun html2text-attr-value (list attribute) | ||
| 177 | "Get value of ATTRIBUTE from LIST." | ||
| 178 | (nth 1 (assoc attribute list))) | ||
| 179 | |||
| 180 | (defun html2text-get-attr (p1 p2) | ||
| 181 | (goto-char p1) | ||
| 182 | (re-search-forward "\\s-+" p2 t) | ||
| 183 | (let (attr-list) | ||
| 184 | (while (re-search-forward "[-a-z0-9._]+" p2 t) | ||
| 185 | (setq attr-list | ||
| 186 | (cons | ||
| 187 | (list (match-string 0) | ||
| 188 | (when (looking-at "\\s-*=") | ||
| 189 | (goto-char (match-end 0)) | ||
| 190 | (skip-chars-forward "[:space:]") | ||
| 191 | (when (or (looking-at "\"[^\"]*\"\\|'[^']*'") | ||
| 192 | (looking-at "[-a-z0-9._:]+")) | ||
| 193 | (goto-char (match-end 0)) | ||
| 194 | (match-string 0)))) | ||
| 195 | attr-list))) | ||
| 196 | attr-list)) | ||
| 197 | |||
| 198 | ;; | ||
| 199 | ;; </Functions related to attributes> | ||
| 200 | ;; | ||
| 201 | |||
| 202 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 203 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 204 | |||
| 205 | ;; | ||
| 206 | ;; <Functions to be called to format a tag-pair> | ||
| 207 | ;; | ||
| 208 | (defun html2text-clean-list-items (p1 p2 list-type) | ||
| 209 | (goto-char p1) | ||
| 210 | (let ((item-nr 0) | ||
| 211 | (items 0)) | ||
| 212 | (while (search-forward "<li>" p2 t) | ||
| 213 | (setq items (1+ items))) | ||
| 214 | (goto-char p1) | ||
| 215 | (while (< item-nr items) | ||
| 216 | (setq item-nr (1+ item-nr)) | ||
| 217 | (search-forward "<li>" (point-max) t) | ||
| 218 | (cond | ||
| 219 | ((string= list-type "ul") (insert " o ")) | ||
| 220 | ((string= list-type "ol") (insert (format " %s: " item-nr))) | ||
| 221 | (t (insert " x ")))))) | ||
| 222 | |||
| 223 | (defun html2text-clean-dtdd (p1 p2) | ||
| 224 | (goto-char p1) | ||
| 225 | (let ((items 0) | ||
| 226 | (item-nr 0)) | ||
| 227 | (while (search-forward "<dt>" p2 t) | ||
| 228 | (setq items (1+ items))) | ||
| 229 | (goto-char p1) | ||
| 230 | (while (< item-nr items) | ||
| 231 | (setq item-nr (1+ item-nr)) | ||
| 232 | (re-search-forward "<dt>\\([ ]*\\)" (point-max) t) | ||
| 233 | (when (match-string 1) | ||
| 234 | (delete-region (point) (- (point) (string-width (match-string 1))))) | ||
| 235 | (let ((def-p1 (point)) | ||
| 236 | (def-p2 0)) | ||
| 237 | (re-search-forward "\\([ ]*\\)\\(</dt>\\|<dd>\\)" (point-max) t) | ||
| 238 | (if (match-string 1) | ||
| 239 | (progn | ||
| 240 | (let* ((mw1 (string-width (match-string 1))) | ||
| 241 | (mw2 (string-width (match-string 2))) | ||
| 242 | (mw (+ mw1 mw2))) | ||
| 243 | (goto-char (- (point) mw)) | ||
| 244 | (delete-region (point) (+ (point) mw1)) | ||
| 245 | (setq def-p2 (point)))) | ||
| 246 | (setq def-p2 (- (point) (string-width (match-string 2))))) | ||
| 247 | (put-text-property def-p1 def-p2 'face 'bold))))) | ||
| 248 | |||
| 249 | (defun html2text-delete-tags (p1 p2 p3 p4) | ||
| 250 | (delete-region p1 p2) | ||
| 251 | (delete-region (- p3 (- p2 p1)) (- p4 (- p2 p1)))) | ||
| 252 | |||
| 253 | (defun html2text-delete-single-tag (p1 p2) | ||
| 254 | (delete-region p1 p2)) | ||
| 255 | |||
| 256 | (defun html2text-clean-hr (p1 p2) | ||
| 257 | (html2text-delete-single-tag p1 p2) | ||
| 258 | (goto-char p1) | ||
| 259 | (newline 1) | ||
| 260 | (insert (make-string fill-column ?-))) | ||
| 261 | |||
| 262 | (defun html2text-clean-ul (p1 p2 p3 p4) | ||
| 263 | (html2text-delete-tags p1 p2 p3 p4) | ||
| 264 | (html2text-clean-list-items p1 (- p3 (- p1 p2)) "ul")) | ||
| 265 | |||
| 266 | (defun html2text-clean-ol (p1 p2 p3 p4) | ||
| 267 | (html2text-delete-tags p1 p2 p3 p4) | ||
| 268 | (html2text-clean-list-items p1 (- p3 (- p1 p2)) "ol")) | ||
| 269 | |||
| 270 | (defun html2text-clean-dl (p1 p2 p3 p4) | ||
| 271 | (html2text-delete-tags p1 p2 p3 p4) | ||
| 272 | (html2text-clean-dtdd p1 (- p3 (- p1 p2)))) | ||
| 273 | |||
| 274 | (defun html2text-clean-center (p1 p2 p3 p4) | ||
| 275 | (html2text-delete-tags p1 p2 p3 p4) | ||
| 276 | (center-region p1 (- p3 (- p2 p1)))) | ||
| 277 | |||
| 278 | (defun html2text-clean-bold (p1 p2 p3 p4) | ||
| 279 | (put-text-property p2 p3 'face 'bold) | ||
| 280 | (html2text-delete-tags p1 p2 p3 p4)) | ||
| 281 | |||
| 282 | (defun html2text-clean-title (p1 p2 p3 p4) | ||
| 283 | (put-text-property p2 p3 'face 'bold) | ||
| 284 | (html2text-delete-tags p1 p2 p3 p4)) | ||
| 285 | |||
| 286 | (defun html2text-clean-underline (p1 p2 p3 p4) | ||
| 287 | (put-text-property p2 p3 'face 'underline) | ||
| 288 | (html2text-delete-tags p1 p2 p3 p4)) | ||
| 289 | |||
| 290 | (defun html2text-clean-italic (p1 p2 p3 p4) | ||
| 291 | (put-text-property p2 p3 'face 'italic) | ||
| 292 | (html2text-delete-tags p1 p2 p3 p4)) | ||
| 293 | |||
| 294 | (defun html2text-clean-font (p1 p2 p3 p4) | ||
| 295 | (html2text-delete-tags p1 p2 p3 p4)) | ||
| 296 | |||
| 297 | (defun html2text-clean-blockquote (p1 p2 p3 p4) | ||
| 298 | (html2text-delete-tags p1 p2 p3 p4)) | ||
| 299 | |||
| 300 | (defun html2text-clean-anchor (p1 p2 p3 p4) | ||
| 301 | ;; If someone can explain how to make the URL clickable I will surely | ||
| 302 | ;; improve upon this. | ||
| 303 | ;; Maybe `goto-addr.el' can be used here. | ||
| 304 | (let* ((attr-list (html2text-get-attr p1 p2)) | ||
| 305 | (href (html2text-attr-value attr-list "href"))) | ||
| 306 | (delete-region p1 p4) | ||
| 307 | (when href | ||
| 308 | (goto-char p1) | ||
| 309 | (insert (if (string-match "\\`['\"].*['\"]\\'" href) | ||
| 310 | (substring href 1 -1) href)) | ||
| 311 | (put-text-property p1 (point) 'face 'bold)))) | ||
| 312 | |||
| 313 | ;; | ||
| 314 | ;; </Functions to be called to format a tag-pair> | ||
| 315 | ;; | ||
| 316 | |||
| 317 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 318 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 319 | |||
| 320 | ;; | ||
| 321 | ;; <Functions to be called to fix up paragraphs> | ||
| 322 | ;; | ||
| 323 | |||
| 324 | (defun html2text-fix-paragraph (p1 p2) | ||
| 325 | (goto-char p1) | ||
| 326 | (let ((refill-start) | ||
| 327 | (refill-stop)) | ||
| 328 | (when (re-search-forward "<br>$" p2 t) | ||
| 329 | (goto-char p1) | ||
| 330 | (when (re-search-forward ".+[^<][^b][^r][^>]$" p2 t) | ||
| 331 | (beginning-of-line) | ||
| 332 | (setq refill-start (point)) | ||
| 333 | (goto-char p2) | ||
| 334 | (re-search-backward ".+[^<][^b][^r][^>]$" refill-start t) | ||
| 335 | (forward-line 1) | ||
| 336 | (end-of-line) | ||
| 337 | ;; refill-stop should ideally be adjusted to | ||
| 338 | ;; accommodate the "<br>" strings which are removed | ||
| 339 | ;; between refill-start and refill-stop. Can simply | ||
| 340 | ;; be returned from my-replace-string | ||
| 341 | (setq refill-stop (+ (point) | ||
| 342 | (html2text-replace-string | ||
| 343 | "<br>" "" | ||
| 344 | refill-start (point)))) | ||
| 345 | ;; (message "Point = %s refill-stop = %s" (point) refill-stop) | ||
| 346 | ;; (sleep-for 4) | ||
| 347 | (fill-region refill-start refill-stop)))) | ||
| 348 | (html2text-replace-string "<br>" "" p1 p2)) | ||
| 349 | |||
| 350 | ;; | ||
| 351 | ;; This one is interactive ... | ||
| 352 | ;; | ||
| 353 | (defun html2text-fix-paragraphs () | ||
| 354 | "This _tries_ to fix up the paragraphs - this is done in quite a ad-hook | ||
| 355 | fashion, quite close to pure guess-work. It does work in some cases though." | ||
| 356 | (interactive) | ||
| 357 | (goto-char (point-min)) | ||
| 358 | (while (re-search-forward "^<br>$" nil t) | ||
| 359 | (delete-region (match-beginning 0) (match-end 0))) | ||
| 360 | ;; Removing lonely <br> on a single line, if they are left intact we | ||
| 361 | ;; don't have any paragraphs at all. | ||
| 362 | (goto-char (point-min)) | ||
| 363 | (while (not (eobp)) | ||
| 364 | (let ((p1 (point))) | ||
| 365 | (forward-paragraph 1) | ||
| 366 | ;;(message "Kaller fix med p1=%s p2=%s " p1 (1- (point))) (sleep-for 5) | ||
| 367 | (html2text-fix-paragraph p1 (1- (point))) | ||
| 368 | (goto-char p1) | ||
| 369 | (when (not (eobp)) | ||
| 370 | (forward-paragraph 1))))) | ||
| 371 | |||
| 372 | ;; | ||
| 373 | ;; </Functions to be called to fix up paragraphs> | ||
| 374 | ;; | ||
| 375 | |||
| 376 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 377 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| 378 | |||
| 379 | ;; | ||
| 380 | ;; <Interactive functions> | ||
| 381 | ;; | ||
| 382 | |||
| 383 | (defun html2text-remove-tags (tag-list) | ||
| 384 | "Removes the tags listed in the list `html2text-remove-tag-list'. | ||
| 385 | See the documentation for that variable." | ||
| 386 | (interactive) | ||
| 387 | (dolist (tag tag-list) | ||
| 388 | (goto-char (point-min)) | ||
| 389 | (while (re-search-forward (format "\\(</?%s[^>]*>\\)" tag) (point-max) t) | ||
| 390 | (delete-region (match-beginning 0) (match-end 0))))) | ||
| 391 | |||
| 392 | (defun html2text-format-tags () | ||
| 393 | "See the variable `html2text-format-tag-list' for documentation." | ||
| 394 | (interactive) | ||
| 395 | (dolist (tag-and-function html2text-format-tag-list) | ||
| 396 | (let ((tag (car tag-and-function)) | ||
| 397 | (function (cdr tag-and-function))) | ||
| 398 | (goto-char (point-min)) | ||
| 399 | (while (re-search-forward (format "\\(<%s\\( [^>]*\\)?>\\)" tag) | ||
| 400 | (point-max) t) | ||
| 401 | (let ((p1) | ||
| 402 | (p2 (point)) | ||
| 403 | (p3) (p4)) | ||
| 404 | (search-backward "<" (point-min) t) | ||
| 405 | (setq p1 (point)) | ||
| 406 | (unless (search-forward (format "</%s>" tag) (point-max) t) | ||
| 407 | (goto-char p2) | ||
| 408 | (insert (format "</%s>" tag))) | ||
| 409 | (setq p4 (point)) | ||
| 410 | (search-backward "</" (point-min) t) | ||
| 411 | (setq p3 (point)) | ||
| 412 | (funcall function p1 p2 p3 p4) | ||
| 413 | (goto-char p1)))))) | ||
| 414 | |||
| 415 | (defun html2text-substitute () | ||
| 416 | "See the variable `html2text-replace-list' for documentation." | ||
| 417 | (interactive) | ||
| 418 | (dolist (e html2text-replace-list) | ||
| 419 | (goto-char (point-min)) | ||
| 420 | (let ((old-string (car e)) | ||
| 421 | (new-string (cdr e))) | ||
| 422 | (html2text-replace-string old-string new-string (point-min) (point-max))))) | ||
| 423 | |||
| 424 | (defun html2text-format-single-elements () | ||
| 425 | (interactive) | ||
| 426 | (dolist (tag-and-function html2text-format-single-element-list) | ||
| 427 | (let ((tag (car tag-and-function)) | ||
| 428 | (function (cdr tag-and-function))) | ||
| 429 | (goto-char (point-min)) | ||
| 430 | (while (re-search-forward (format "\\(<%s\\( [^>]*\\)?>\\)" tag) | ||
| 431 | (point-max) t) | ||
| 432 | (let ((p1) | ||
| 433 | (p2 (point))) | ||
| 434 | (search-backward "<" (point-min) t) | ||
| 435 | (setq p1 (point)) | ||
| 436 | (funcall function p1 p2)))))) | ||
| 437 | |||
| 438 | ;; | ||
| 439 | ;; Main function | ||
| 440 | ;; | ||
| 441 | |||
| 442 | ;;;###autoload | ||
| 443 | (defun html2text () | ||
| 444 | "Convert HTML to plain text in the current buffer." | ||
| 445 | (interactive) | ||
| 446 | (save-excursion | ||
| 447 | (let ((case-fold-search t) | ||
| 448 | (buffer-read-only)) | ||
| 449 | (html2text-remove-tags html2text-remove-tag-list) | ||
| 450 | (html2text-format-tags) | ||
| 451 | (html2text-remove-tags html2text-remove-tag-list2) | ||
| 452 | (html2text-substitute) | ||
| 453 | (html2text-format-single-elements) | ||
| 454 | (html2text-fix-paragraphs)))) | ||
| 455 | |||
| 456 | ;; | ||
| 457 | ;; </Interactive functions> | ||
| 458 | ;; | ||
| 459 | (provide 'html2text) | ||
| 460 | |||
| 461 | ;;; html2text.el ends here | ||