diff options
Diffstat (limited to 'src/xml.c')
| -rw-r--r-- | src/xml.c | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 00000000000..c1098b15a20 --- /dev/null +++ b/src/xml.c | |||
| @@ -0,0 +1,141 @@ | |||
| 1 | /* Interface to libxml2. | ||
| 2 | Copyright (C) 2010 Free Software Foundation, Inc. | ||
| 3 | |||
| 4 | This file is part of GNU Emacs. | ||
| 5 | |||
| 6 | GNU Emacs is free software: you can redistribute it and/or modify | ||
| 7 | it under the terms of the GNU General Public License as published by | ||
| 8 | the Free Software Foundation, either version 3 of the License, or | ||
| 9 | (at your option) any later version. | ||
| 10 | |||
| 11 | GNU Emacs is distributed in the hope that it will be useful, | ||
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | GNU General Public License for more details. | ||
| 15 | |||
| 16 | You should have received a copy of the GNU General Public License | ||
| 17 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | ||
| 18 | |||
| 19 | #include <config.h> | ||
| 20 | |||
| 21 | #ifdef HAVE_LIBXML2 | ||
| 22 | |||
| 23 | #include <setjmp.h> | ||
| 24 | #include <libxml/tree.h> | ||
| 25 | #include <libxml/parser.h> | ||
| 26 | #include <libxml/HTMLparser.h> | ||
| 27 | |||
| 28 | #include "lisp.h" | ||
| 29 | #include "buffer.h" | ||
| 30 | |||
| 31 | Lisp_Object make_dom (xmlNode *node) | ||
| 32 | { | ||
| 33 | if (node->type == XML_ELEMENT_NODE) { | ||
| 34 | Lisp_Object result = Fcons (intern (node->name), Qnil); | ||
| 35 | xmlNode *child; | ||
| 36 | xmlAttr *property; | ||
| 37 | |||
| 38 | /* First add the attributes. */ | ||
| 39 | property = node->properties; | ||
| 40 | while (property != NULL) { | ||
| 41 | if (property->children && | ||
| 42 | property->children->content) { | ||
| 43 | char *pname = xmalloc (strlen (property->name) + 2); | ||
| 44 | *pname = ':'; | ||
| 45 | strcpy(pname + 1, property->name); | ||
| 46 | result = Fcons (Fcons (intern (pname), | ||
| 47 | build_string(property->children->content)), | ||
| 48 | result); | ||
| 49 | xfree (pname); | ||
| 50 | } | ||
| 51 | property = property->next; | ||
| 52 | } | ||
| 53 | /* Then add the children of the node. */ | ||
| 54 | child = node->children; | ||
| 55 | while (child != NULL) { | ||
| 56 | result = Fcons (make_dom (child), result); | ||
| 57 | child = child->next; | ||
| 58 | } | ||
| 59 | return Fnreverse (result); | ||
| 60 | } else if (node->type == XML_TEXT_NODE) { | ||
| 61 | Lisp_Object content = Qnil; | ||
| 62 | |||
| 63 | if (node->content) | ||
| 64 | content = build_string (node->content); | ||
| 65 | |||
| 66 | return Fcons (intern (node->name), content); | ||
| 67 | } else | ||
| 68 | return Qnil; | ||
| 69 | } | ||
| 70 | |||
| 71 | static Lisp_Object | ||
| 72 | parse_buffer (Lisp_Object string, Lisp_Object base_url, int htmlp) | ||
| 73 | { | ||
| 74 | xmlDoc *doc; | ||
| 75 | xmlNode *node; | ||
| 76 | Lisp_Object result; | ||
| 77 | int ibeg, iend; | ||
| 78 | char *burl = ""; | ||
| 79 | |||
| 80 | LIBXML_TEST_VERSION; | ||
| 81 | |||
| 82 | CHECK_STRING (string); | ||
| 83 | |||
| 84 | if (! NILP (base_url)) { | ||
| 85 | CHECK_STRING (base_url); | ||
| 86 | burl = SDATA (base_url); | ||
| 87 | } | ||
| 88 | |||
| 89 | if (htmlp) | ||
| 90 | doc = htmlReadMemory (SDATA (string), SBYTES (string), burl, "utf-8", | ||
| 91 | HTML_PARSE_RECOVER|HTML_PARSE_NONET| | ||
| 92 | HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR); | ||
| 93 | else | ||
| 94 | doc = xmlReadMemory (SDATA (string), SBYTES (string), burl, "utf-8", | ||
| 95 | XML_PARSE_NONET|XML_PARSE_NOWARNING| | ||
| 96 | XML_PARSE_NOERROR); | ||
| 97 | |||
| 98 | if (doc != NULL) { | ||
| 99 | node = xmlDocGetRootElement (doc); | ||
| 100 | if (node != NULL) | ||
| 101 | result = make_dom (node); | ||
| 102 | |||
| 103 | xmlFreeDoc (doc); | ||
| 104 | xmlCleanupParser (); | ||
| 105 | } | ||
| 106 | |||
| 107 | return result; | ||
| 108 | } | ||
| 109 | |||
| 110 | DEFUN ("html-parse-string", Fhtml_parse_string, Shtml_parse_string, | ||
| 111 | 0, 2, 0, | ||
| 112 | doc: /* Parse the string as an HTML document and return the parse tree. | ||
| 113 | If BASE-URL is non-nil, it will be used to expand relative URLs in | ||
| 114 | the HTML document.*/) | ||
| 115 | (Lisp_Object string, Lisp_Object base_url) | ||
| 116 | { | ||
| 117 | return parse_buffer (string, base_url, 1); | ||
| 118 | } | ||
| 119 | |||
| 120 | DEFUN ("xml-parse-string", Fxml_parse_string, Sxml_parse_string, | ||
| 121 | 0, 2, 0, | ||
| 122 | doc: /* Parse the string as an XML document and return the parse tree. | ||
| 123 | If BASE-URL is non-nil, it will be used to expand relative URLs in | ||
| 124 | the XML document.*/) | ||
| 125 | (Lisp_Object string, Lisp_Object base_url) | ||
| 126 | { | ||
| 127 | return parse_buffer (string, base_url, 0); | ||
| 128 | } | ||
| 129 | |||
| 130 | |||
| 131 | /*********************************************************************** | ||
| 132 | Initialization | ||
| 133 | ***********************************************************************/ | ||
| 134 | void | ||
| 135 | syms_of_xml (void) | ||
| 136 | { | ||
| 137 | defsubr (&Shtml_parse_string); | ||
| 138 | defsubr (&Sxml_parse_string); | ||
| 139 | } | ||
| 140 | |||
| 141 | #endif /* HAVE_LIBXML2 */ | ||