diff options
| author | Ulf Jasper | 2014-11-21 16:31:30 +0100 |
|---|---|---|
| committer | Ulf Jasper | 2014-11-21 16:31:30 +0100 |
| commit | c39443c1d651bab2eb023f4c38db418c3dc04160 (patch) | |
| tree | 6266035bf6e6f261440caa4bf5a6d3aafcc43b10 | |
| parent | e14c4354cf29fab12fb414c7ebc94bf1a9920dd0 (diff) | |
| download | emacs-c39443c1d651bab2eb023f4c38db418c3dc04160.tar.gz emacs-c39443c1d651bab2eb023f4c38db418c3dc04160.zip | |
'libxml-parse(html|xml)-region': new optional param 'discard-comments'.
* doc/lispref/text.texi (Parsing HTML/XML): Document new optional parameter
'discard-comments' of 'libxml-parse(html|xml)-region'.
* src/xml.c (parse_region): Take care of new optional parameter
'discard-comments' of 'libxml-parse(html|xml)-region'.
(Flibxml_parse_html_region, Flibxml_parse_xml_region): New
optional parameter 'discard-comments'.
* test/automated/libxml-tests.el
(libxml-tests--data-comments-preserved): Renamed from
'libxml-tests--data'.
(libxml-tests--data-comments-discarded): New.
(libxml-tests): Check whether 'libxml-parse-xml-region' is
discarding comments correctly.
| -rw-r--r-- | doc/lispref/ChangeLog | 5 | ||||
| -rw-r--r-- | doc/lispref/text.texi | 7 | ||||
| -rw-r--r-- | src/ChangeLog | 7 | ||||
| -rw-r--r-- | src/xml.c | 47 | ||||
| -rw-r--r-- | test/ChangeLog | 9 | ||||
| -rw-r--r-- | test/automated/libxml-tests.el | 26 |
6 files changed, 74 insertions, 27 deletions
diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index 6706f936c5e..0c8792af81f 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog | |||
| @@ -1,3 +1,8 @@ | |||
| 1 | 2014-11-21 Ulf Jasper <ulf.jasper@web.de> | ||
| 2 | |||
| 3 | * text.texi (Parsing HTML/XML): Document new optional parameter | ||
| 4 | 'discard-comments' of 'libxml-parse(html|xml)-region'. | ||
| 5 | |||
| 1 | 2014-11-18 Leo Liu <sdl.web@gmail.com> | 6 | 2014-11-18 Leo Liu <sdl.web@gmail.com> |
| 2 | 7 | ||
| 3 | * functions.texi (Advising Named Functions): Document | 8 | * functions.texi (Advising Named Functions): Document |
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi index d1a1e6fa6b9..7c88a5b25d1 100644 --- a/doc/lispref/text.texi +++ b/doc/lispref/text.texi | |||
| @@ -4324,7 +4324,7 @@ coding instead. | |||
| 4324 | When Emacs is compiled with libxml2 support, the following functions | 4324 | When Emacs is compiled with libxml2 support, the following functions |
| 4325 | are available to parse HTML or XML text into Lisp object trees. | 4325 | are available to parse HTML or XML text into Lisp object trees. |
| 4326 | 4326 | ||
| 4327 | @defun libxml-parse-html-region start end &optional base-url | 4327 | @defun libxml-parse-html-region start end &optional base-url discard-comments |
| 4328 | This function parses the text between @var{start} and @var{end} as | 4328 | This function parses the text between @var{start} and @var{end} as |
| 4329 | HTML, and returns a list representing the HTML @dfn{parse tree}. It | 4329 | HTML, and returns a list representing the HTML @dfn{parse tree}. It |
| 4330 | attempts to handle ``real world'' HTML by robustly coping with syntax | 4330 | attempts to handle ``real world'' HTML by robustly coping with syntax |
| @@ -4333,6 +4333,9 @@ mistakes. | |||
| 4333 | The optional argument @var{base-url}, if non-@code{nil}, should be a | 4333 | The optional argument @var{base-url}, if non-@code{nil}, should be a |
| 4334 | string specifying the base URL for relative URLs occurring in links. | 4334 | string specifying the base URL for relative URLs occurring in links. |
| 4335 | 4335 | ||
| 4336 | If the optional argument @var{discard-comments} is non-@code{nil}, | ||
| 4337 | then the parse tree is created without any comments. | ||
| 4338 | |||
| 4336 | In the parse tree, each HTML node is represented by a list in which | 4339 | In the parse tree, each HTML node is represented by a list in which |
| 4337 | the first element is a symbol representing the node name, the second | 4340 | the first element is a symbol representing the node name, the second |
| 4338 | element is an alist of node attributes, and the remaining elements are | 4341 | element is an alist of node attributes, and the remaining elements are |
| @@ -4368,7 +4371,7 @@ buffer. The argument @var{dom} should be a list as generated by | |||
| 4368 | @end defun | 4371 | @end defun |
| 4369 | 4372 | ||
| 4370 | @cindex parsing xml | 4373 | @cindex parsing xml |
| 4371 | @defun libxml-parse-xml-region start end &optional base-url | 4374 | @defun libxml-parse-xml-region start end &optional base-url discard-comments |
| 4372 | This function is the same as @code{libxml-parse-html-region}, except | 4375 | This function is the same as @code{libxml-parse-html-region}, except |
| 4373 | that it parses the text as XML rather than HTML (so it is stricter | 4376 | that it parses the text as XML rather than HTML (so it is stricter |
| 4374 | about syntax). | 4377 | about syntax). |
diff --git a/src/ChangeLog b/src/ChangeLog index b169479a274..f8c9c5afd69 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,10 @@ | |||
| 1 | 2014-11-21 Ulf Jasper <ulf.jasper@web.de> | ||
| 2 | |||
| 3 | * xml.c (parse_region): Take care of new optional parameter | ||
| 4 | 'discard-comments' of 'libxml-parse(html|xml)-region'. | ||
| 5 | (Flibxml_parse_html_region, Flibxml_parse_xml_region): New | ||
| 6 | optional parameter 'discard-comments'. | ||
| 7 | |||
| 1 | 2014-11-17 Paul Eggert <eggert@cs.ucla.edu> | 8 | 2014-11-17 Paul Eggert <eggert@cs.ucla.edu> |
| 2 | 9 | ||
| 3 | Improve time stamp handling, and be more consistent about it. | 10 | Improve time stamp handling, and be more consistent about it. |
| @@ -175,7 +175,7 @@ make_dom (xmlNode *node) | |||
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | static Lisp_Object | 177 | static Lisp_Object |
| 178 | parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int htmlp) | 178 | parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments, int htmlp) |
| 179 | { | 179 | { |
| 180 | xmlDoc *doc; | 180 | xmlDoc *doc; |
| 181 | Lisp_Object result = Qnil; | 181 | Lisp_Object result = Qnil; |
| @@ -214,21 +214,24 @@ parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int html | |||
| 214 | 214 | ||
| 215 | if (doc != NULL) | 215 | if (doc != NULL) |
| 216 | { | 216 | { |
| 217 | /* If the document is just comments, then this should get us the | ||
| 218 | nodes anyway. */ | ||
| 219 | xmlNode *n = doc->children; | ||
| 220 | Lisp_Object r = Qnil; | 217 | Lisp_Object r = Qnil; |
| 221 | 218 | if (NILP(discard_comments)) | |
| 222 | while (n) { | 219 | { |
| 223 | if (!NILP (r)) | 220 | /* If the document has toplevel comments, then this should |
| 224 | result = Fcons (r, result); | 221 | get us the nodes and the comments. */ |
| 225 | r = make_dom (n); | 222 | xmlNode *n = doc->children; |
| 226 | n = n->next; | 223 | |
| 227 | } | 224 | while (n) { |
| 225 | if (!NILP (r)) | ||
| 226 | result = Fcons (r, result); | ||
| 227 | r = make_dom (n); | ||
| 228 | n = n->next; | ||
| 229 | } | ||
| 230 | } | ||
| 228 | 231 | ||
| 229 | if (NILP (result)) { | 232 | if (NILP (result)) { |
| 230 | /* The document isn't just comments, so get the tree the | 233 | /* The document doesn't have toplevel comments or we discarded |
| 231 | proper way. */ | 234 | them. Get the tree the proper way. */ |
| 232 | xmlNode *node = fn_xmlDocGetRootElement (doc); | 235 | xmlNode *node = fn_xmlDocGetRootElement (doc); |
| 233 | if (node != NULL) | 236 | if (node != NULL) |
| 234 | result = make_dom (node); | 237 | result = make_dom (node); |
| @@ -251,25 +254,27 @@ xml_cleanup_parser (void) | |||
| 251 | 254 | ||
| 252 | DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region, | 255 | DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region, |
| 253 | Slibxml_parse_html_region, | 256 | Slibxml_parse_html_region, |
| 254 | 2, 3, 0, | 257 | 2, 4, 0, |
| 255 | doc: /* Parse the region as an HTML document and return the parse tree. | 258 | doc: /* Parse the region as an HTML document and return the parse tree. |
| 256 | If BASE-URL is non-nil, it is used to expand relative URLs. */) | 259 | If BASE-URL is non-nil, it is used to expand relative URLs. |
| 257 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) | 260 | If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */) |
| 261 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments) | ||
| 258 | { | 262 | { |
| 259 | if (init_libxml2_functions ()) | 263 | if (init_libxml2_functions ()) |
| 260 | return parse_region (start, end, base_url, 1); | 264 | return parse_region (start, end, base_url, discard_comments, 1); |
| 261 | return Qnil; | 265 | return Qnil; |
| 262 | } | 266 | } |
| 263 | 267 | ||
| 264 | DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region, | 268 | DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region, |
| 265 | Slibxml_parse_xml_region, | 269 | Slibxml_parse_xml_region, |
| 266 | 2, 3, 0, | 270 | 2, 4, 0, |
| 267 | doc: /* Parse the region as an XML document and return the parse tree. | 271 | doc: /* Parse the region as an XML document and return the parse tree. |
| 268 | If BASE-URL is non-nil, it is used to expand relative URLs. */) | 272 | If BASE-URL is non-nil, it is used to expand relative URLs. |
| 269 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) | 273 | If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */) |
| 274 | (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments) | ||
| 270 | { | 275 | { |
| 271 | if (init_libxml2_functions ()) | 276 | if (init_libxml2_functions ()) |
| 272 | return parse_region (start, end, base_url, 0); | 277 | return parse_region (start, end, base_url, discard_comments, 0); |
| 273 | return Qnil; | 278 | return Qnil; |
| 274 | } | 279 | } |
| 275 | 280 | ||
diff --git a/test/ChangeLog b/test/ChangeLog index 475b6a3f473..d0988e42076 100644 --- a/test/ChangeLog +++ b/test/ChangeLog | |||
| @@ -1,3 +1,12 @@ | |||
| 1 | 2014-11-21 Ulf Jasper <ulf.jasper@web.de> | ||
| 2 | |||
| 3 | * automated/libxml-tests.el | ||
| 4 | (libxml-tests--data-comments-preserved): Renamed from | ||
| 5 | 'libxml-tests--data'. | ||
| 6 | (libxml-tests--data-comments-discarded): New. | ||
| 7 | (libxml-tests): Check whether 'libxml-parse-xml-region' is | ||
| 8 | discarding comments correctly. | ||
| 9 | |||
| 1 | 2014-11-17 Michal Nazarewicz <mina86@mina86.com> | 10 | 2014-11-17 Michal Nazarewicz <mina86@mina86.com> |
| 2 | 11 | ||
| 3 | * automated/tildify-tests.el (tildify-test-html, tildify-test-xml): | 12 | * automated/tildify-tests.el (tildify-test-html, tildify-test-xml): |
diff --git a/test/automated/libxml-tests.el b/test/automated/libxml-tests.el index ced0df7b3c4..6b6d0170562 100644 --- a/test/automated/libxml-tests.el +++ b/test/automated/libxml-tests.el | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | (require 'ert) | 28 | (require 'ert) |
| 29 | 29 | ||
| 30 | (defvar libxml-tests--data | 30 | (defvar libxml-tests--data-comments-preserved |
| 31 | `(;; simple case | 31 | `(;; simple case |
| 32 | ("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>" | 32 | ("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>" |
| 33 | . (foo ((baz . "true")) "bar")) | 33 | . (foo ((baz . "true")) "bar")) |
| @@ -40,17 +40,35 @@ | |||
| 40 | "<bar>blub</bar></foo><!--comment-b--><!--comment-c-->") | 40 | "<bar>blub</bar></foo><!--comment-b--><!--comment-c-->") |
| 41 | . (top nil (comment nil "comment-a") (foo ((a . "b")) (bar nil "blub")) | 41 | . (top nil (comment nil "comment-a") (foo ((a . "b")) (bar nil "blub")) |
| 42 | (comment nil "comment-b") (comment nil "comment-c")))) | 42 | (comment nil "comment-b") (comment nil "comment-c")))) |
| 43 | "Alist of XML strings and their expected parse trees.") | 43 | "Alist of XML strings and their expected parse trees for preserved comments.") |
| 44 | |||
| 45 | (defvar libxml-tests--data-comments-discarded | ||
| 46 | `(;; simple case | ||
| 47 | ("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>" | ||
| 48 | . (foo ((baz . "true")) "bar")) | ||
| 49 | ;; toplevel comments -- first document child must not get lost | ||
| 50 | (,(concat "<?xml version=\"1.0\"?><foo>bar</foo><!--comment-1-->" | ||
| 51 | "<!--comment-2-->") | ||
| 52 | . (foo nil "bar")) | ||
| 53 | (,(concat "<?xml version=\"1.0\"?><!--comment-a--><foo a=\"b\">" | ||
| 54 | "<bar>blub</bar></foo><!--comment-b--><!--comment-c-->") | ||
| 55 | . (foo ((a . "b")) (bar nil "blub")))) | ||
| 56 | "Alist of XML strings and their expected parse trees for discarded comments.") | ||
| 44 | 57 | ||
| 45 | 58 | ||
| 46 | (ert-deftest libxml-tests () | 59 | (ert-deftest libxml-tests () |
| 47 | "Test libxml." | 60 | "Test libxml." |
| 48 | (when (fboundp 'libxml-parse-xml-region) | 61 | (when (fboundp 'libxml-parse-xml-region) |
| 49 | (with-temp-buffer | 62 | (with-temp-buffer |
| 50 | (dolist (test libxml-tests--data) | 63 | (dolist (test libxml-tests--data-comments-preserved) |
| 64 | (erase-buffer) | ||
| 65 | (insert (car test)) | ||
| 66 | (should (equal (cdr test) | ||
| 67 | (libxml-parse-xml-region (point-min) (point-max))))) | ||
| 68 | (dolist (test libxml-tests--data-comments-discarded) | ||
| 51 | (erase-buffer) | 69 | (erase-buffer) |
| 52 | (insert (car test)) | 70 | (insert (car test)) |
| 53 | (should (equal (cdr test) | 71 | (should (equal (cdr test) |
| 54 | (libxml-parse-xml-region (point-min) (point-max)))))))) | 72 | (libxml-parse-xml-region (point-min) (point-max) nil t))))))) |
| 55 | 73 | ||
| 56 | ;;; libxml-tests.el ends here | 74 | ;;; libxml-tests.el ends here |