aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorUlf Jasper2014-11-21 16:31:30 +0100
committerUlf Jasper2014-11-21 16:31:30 +0100
commitc39443c1d651bab2eb023f4c38db418c3dc04160 (patch)
tree6266035bf6e6f261440caa4bf5a6d3aafcc43b10
parente14c4354cf29fab12fb414c7ebc94bf1a9920dd0 (diff)
downloademacs-c39443c1d651bab2eb023f4c38db418c3dc04160.tar.gz
emacs-c39443c1d651bab2eb023f4c38db418c3dc04160.zip
'libxml-parse(html|xml)-region': new optional param 'discard-comments'.
* doc/lispref/text.texi (Parsing HTML/XML): Document new optional parameter 'discard-comments' of 'libxml-parse(html|xml)-region'. * src/xml.c (parse_region): Take care of new optional parameter 'discard-comments' of 'libxml-parse(html|xml)-region'. (Flibxml_parse_html_region, Flibxml_parse_xml_region): New optional parameter 'discard-comments'. * test/automated/libxml-tests.el (libxml-tests--data-comments-preserved): Renamed from 'libxml-tests--data'. (libxml-tests--data-comments-discarded): New. (libxml-tests): Check whether 'libxml-parse-xml-region' is discarding comments correctly.
-rw-r--r--doc/lispref/ChangeLog5
-rw-r--r--doc/lispref/text.texi7
-rw-r--r--src/ChangeLog7
-rw-r--r--src/xml.c47
-rw-r--r--test/ChangeLog9
-rw-r--r--test/automated/libxml-tests.el26
6 files changed, 74 insertions, 27 deletions
diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog
index 6706f936c5e..0c8792af81f 100644
--- a/doc/lispref/ChangeLog
+++ b/doc/lispref/ChangeLog
@@ -1,3 +1,8 @@
12014-11-21 Ulf Jasper <ulf.jasper@web.de>
2
3 * text.texi (Parsing HTML/XML): Document new optional parameter
4 'discard-comments' of 'libxml-parse(html|xml)-region'.
5
12014-11-18 Leo Liu <sdl.web@gmail.com> 62014-11-18 Leo Liu <sdl.web@gmail.com>
2 7
3 * functions.texi (Advising Named Functions): Document 8 * functions.texi (Advising Named Functions): Document
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi
index d1a1e6fa6b9..7c88a5b25d1 100644
--- a/doc/lispref/text.texi
+++ b/doc/lispref/text.texi
@@ -4324,7 +4324,7 @@ coding instead.
4324When Emacs is compiled with libxml2 support, the following functions 4324When Emacs is compiled with libxml2 support, the following functions
4325are available to parse HTML or XML text into Lisp object trees. 4325are available to parse HTML or XML text into Lisp object trees.
4326 4326
4327@defun libxml-parse-html-region start end &optional base-url 4327@defun libxml-parse-html-region start end &optional base-url discard-comments
4328This function parses the text between @var{start} and @var{end} as 4328This function parses the text between @var{start} and @var{end} as
4329HTML, and returns a list representing the HTML @dfn{parse tree}. It 4329HTML, and returns a list representing the HTML @dfn{parse tree}. It
4330attempts to handle ``real world'' HTML by robustly coping with syntax 4330attempts to handle ``real world'' HTML by robustly coping with syntax
@@ -4333,6 +4333,9 @@ mistakes.
4333The optional argument @var{base-url}, if non-@code{nil}, should be a 4333The optional argument @var{base-url}, if non-@code{nil}, should be a
4334string specifying the base URL for relative URLs occurring in links. 4334string specifying the base URL for relative URLs occurring in links.
4335 4335
4336If the optional argument @var{discard-comments} is non-@code{nil},
4337then the parse tree is created without any comments.
4338
4336In the parse tree, each HTML node is represented by a list in which 4339In the parse tree, each HTML node is represented by a list in which
4337the first element is a symbol representing the node name, the second 4340the first element is a symbol representing the node name, the second
4338element is an alist of node attributes, and the remaining elements are 4341element is an alist of node attributes, and the remaining elements are
@@ -4368,7 +4371,7 @@ buffer. The argument @var{dom} should be a list as generated by
4368@end defun 4371@end defun
4369 4372
4370@cindex parsing xml 4373@cindex parsing xml
4371@defun libxml-parse-xml-region start end &optional base-url 4374@defun libxml-parse-xml-region start end &optional base-url discard-comments
4372This function is the same as @code{libxml-parse-html-region}, except 4375This function is the same as @code{libxml-parse-html-region}, except
4373that it parses the text as XML rather than HTML (so it is stricter 4376that it parses the text as XML rather than HTML (so it is stricter
4374about syntax). 4377about syntax).
diff --git a/src/ChangeLog b/src/ChangeLog
index b169479a274..f8c9c5afd69 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,10 @@
12014-11-21 Ulf Jasper <ulf.jasper@web.de>
2
3 * xml.c (parse_region): Take care of new optional parameter
4 'discard-comments' of 'libxml-parse(html|xml)-region'.
5 (Flibxml_parse_html_region, Flibxml_parse_xml_region): New
6 optional parameter 'discard-comments'.
7
12014-11-17 Paul Eggert <eggert@cs.ucla.edu> 82014-11-17 Paul Eggert <eggert@cs.ucla.edu>
2 9
3 Improve time stamp handling, and be more consistent about it. 10 Improve time stamp handling, and be more consistent about it.
diff --git a/src/xml.c b/src/xml.c
index 7e99beb1d05..d418202182b 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -175,7 +175,7 @@ make_dom (xmlNode *node)
175} 175}
176 176
177static Lisp_Object 177static Lisp_Object
178parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int htmlp) 178parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments, int htmlp)
179{ 179{
180 xmlDoc *doc; 180 xmlDoc *doc;
181 Lisp_Object result = Qnil; 181 Lisp_Object result = Qnil;
@@ -214,21 +214,24 @@ parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int html
214 214
215 if (doc != NULL) 215 if (doc != NULL)
216 { 216 {
217 /* If the document is just comments, then this should get us the
218 nodes anyway. */
219 xmlNode *n = doc->children;
220 Lisp_Object r = Qnil; 217 Lisp_Object r = Qnil;
221 218 if (NILP(discard_comments))
222 while (n) { 219 {
223 if (!NILP (r)) 220 /* If the document has toplevel comments, then this should
224 result = Fcons (r, result); 221 get us the nodes and the comments. */
225 r = make_dom (n); 222 xmlNode *n = doc->children;
226 n = n->next; 223
227 } 224 while (n) {
225 if (!NILP (r))
226 result = Fcons (r, result);
227 r = make_dom (n);
228 n = n->next;
229 }
230 }
228 231
229 if (NILP (result)) { 232 if (NILP (result)) {
230 /* The document isn't just comments, so get the tree the 233 /* The document doesn't have toplevel comments or we discarded
231 proper way. */ 234 them. Get the tree the proper way. */
232 xmlNode *node = fn_xmlDocGetRootElement (doc); 235 xmlNode *node = fn_xmlDocGetRootElement (doc);
233 if (node != NULL) 236 if (node != NULL)
234 result = make_dom (node); 237 result = make_dom (node);
@@ -251,25 +254,27 @@ xml_cleanup_parser (void)
251 254
252DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region, 255DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region,
253 Slibxml_parse_html_region, 256 Slibxml_parse_html_region,
254 2, 3, 0, 257 2, 4, 0,
255 doc: /* Parse the region as an HTML document and return the parse tree. 258 doc: /* Parse the region as an HTML document and return the parse tree.
256If BASE-URL is non-nil, it is used to expand relative URLs. */) 259If BASE-URL is non-nil, it is used to expand relative URLs.
257 (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) 260If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
261 (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments)
258{ 262{
259 if (init_libxml2_functions ()) 263 if (init_libxml2_functions ())
260 return parse_region (start, end, base_url, 1); 264 return parse_region (start, end, base_url, discard_comments, 1);
261 return Qnil; 265 return Qnil;
262} 266}
263 267
264DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region, 268DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region,
265 Slibxml_parse_xml_region, 269 Slibxml_parse_xml_region,
266 2, 3, 0, 270 2, 4, 0,
267 doc: /* Parse the region as an XML document and return the parse tree. 271 doc: /* Parse the region as an XML document and return the parse tree.
268If BASE-URL is non-nil, it is used to expand relative URLs. */) 272If BASE-URL is non-nil, it is used to expand relative URLs.
269 (Lisp_Object start, Lisp_Object end, Lisp_Object base_url) 273If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
274 (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments)
270{ 275{
271 if (init_libxml2_functions ()) 276 if (init_libxml2_functions ())
272 return parse_region (start, end, base_url, 0); 277 return parse_region (start, end, base_url, discard_comments, 0);
273 return Qnil; 278 return Qnil;
274} 279}
275 280
diff --git a/test/ChangeLog b/test/ChangeLog
index 475b6a3f473..d0988e42076 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,12 @@
12014-11-21 Ulf Jasper <ulf.jasper@web.de>
2
3 * automated/libxml-tests.el
4 (libxml-tests--data-comments-preserved): Renamed from
5 'libxml-tests--data'.
6 (libxml-tests--data-comments-discarded): New.
7 (libxml-tests): Check whether 'libxml-parse-xml-region' is
8 discarding comments correctly.
9
12014-11-17 Michal Nazarewicz <mina86@mina86.com> 102014-11-17 Michal Nazarewicz <mina86@mina86.com>
2 11
3 * automated/tildify-tests.el (tildify-test-html, tildify-test-xml): 12 * automated/tildify-tests.el (tildify-test-html, tildify-test-xml):
diff --git a/test/automated/libxml-tests.el b/test/automated/libxml-tests.el
index ced0df7b3c4..6b6d0170562 100644
--- a/test/automated/libxml-tests.el
+++ b/test/automated/libxml-tests.el
@@ -27,7 +27,7 @@
27 27
28(require 'ert) 28(require 'ert)
29 29
30(defvar libxml-tests--data 30(defvar libxml-tests--data-comments-preserved
31 `(;; simple case 31 `(;; simple case
32 ("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>" 32 ("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>"
33 . (foo ((baz . "true")) "bar")) 33 . (foo ((baz . "true")) "bar"))
@@ -40,17 +40,35 @@
40 "<bar>blub</bar></foo><!--comment-b--><!--comment-c-->") 40 "<bar>blub</bar></foo><!--comment-b--><!--comment-c-->")
41 . (top nil (comment nil "comment-a") (foo ((a . "b")) (bar nil "blub")) 41 . (top nil (comment nil "comment-a") (foo ((a . "b")) (bar nil "blub"))
42 (comment nil "comment-b") (comment nil "comment-c")))) 42 (comment nil "comment-b") (comment nil "comment-c"))))
43 "Alist of XML strings and their expected parse trees.") 43 "Alist of XML strings and their expected parse trees for preserved comments.")
44
45(defvar libxml-tests--data-comments-discarded
46 `(;; simple case
47 ("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>"
48 . (foo ((baz . "true")) "bar"))
49 ;; toplevel comments -- first document child must not get lost
50 (,(concat "<?xml version=\"1.0\"?><foo>bar</foo><!--comment-1-->"
51 "<!--comment-2-->")
52 . (foo nil "bar"))
53 (,(concat "<?xml version=\"1.0\"?><!--comment-a--><foo a=\"b\">"
54 "<bar>blub</bar></foo><!--comment-b--><!--comment-c-->")
55 . (foo ((a . "b")) (bar nil "blub"))))
56 "Alist of XML strings and their expected parse trees for discarded comments.")
44 57
45 58
46(ert-deftest libxml-tests () 59(ert-deftest libxml-tests ()
47 "Test libxml." 60 "Test libxml."
48 (when (fboundp 'libxml-parse-xml-region) 61 (when (fboundp 'libxml-parse-xml-region)
49 (with-temp-buffer 62 (with-temp-buffer
50 (dolist (test libxml-tests--data) 63 (dolist (test libxml-tests--data-comments-preserved)
64 (erase-buffer)
65 (insert (car test))
66 (should (equal (cdr test)
67 (libxml-parse-xml-region (point-min) (point-max)))))
68 (dolist (test libxml-tests--data-comments-discarded)
51 (erase-buffer) 69 (erase-buffer)
52 (insert (car test)) 70 (insert (car test))
53 (should (equal (cdr test) 71 (should (equal (cdr test)
54 (libxml-parse-xml-region (point-min) (point-max)))))))) 72 (libxml-parse-xml-region (point-min) (point-max) nil t)))))))
55 73
56;;; libxml-tests.el ends here 74;;; libxml-tests.el ends here