aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Magne Ingebrigtsen2010-09-10 18:44:35 +0200
committerLars Magne Ingebrigtsen2010-09-10 18:44:35 +0200
commit381408e2192b8fd606babaa8c9a103186589d708 (patch)
tree488a49b786d5cffcd0b068a527ec1ebe8339114a
parent36f7d3666905e1447a2e80957735a1ade23c894c (diff)
downloademacs-381408e2192b8fd606babaa8c9a103186589d708.tar.gz
emacs-381408e2192b8fd606babaa8c9a103186589d708.zip
Add support for the libxml2 library.
This adds the html-parse-string and xml-parse-string functions in the new file src/xml.c, as well as autoconf detection of the library.
-rw-r--r--ChangeLog4
-rwxr-xr-xconfigure118
-rw-r--r--configure.in12
-rw-r--r--doc/lispref/text.texi44
-rw-r--r--src/ChangeLog10
-rw-r--r--src/Makefile.in10
-rw-r--r--src/config.in3
-rw-r--r--src/emacs.c4
-rw-r--r--src/lisp.h5
-rw-r--r--src/xml.c137
10 files changed, 344 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index c0cd9fccee6..66fa0f859aa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
12010-09-10 Lars Magne Ingebrigtsen <larsi@gnus.org>
2
3 * configure.in: Check for libxml2.
4
12010-09-09 Glenn Morris <rgm@gnu.org> 52010-09-09 Glenn Morris <rgm@gnu.org>
2 6
3 * make-dist: No more TODO files under lisp/. 7 * make-dist: No more TODO files under lisp/.
diff --git a/configure b/configure
index 527c53690bd..4d501a994e5 100755
--- a/configure
+++ b/configure
@@ -660,6 +660,8 @@ BLESSMAIL_TARGET
660LIBS_MAIL 660LIBS_MAIL
661liblockfile 661liblockfile
662ALLOCA 662ALLOCA
663LIBXML2_LIBS
664LIBXML2_CFLAGS
663LIBXSM 665LIBXSM
664LIBGPM 666LIBGPM
665LIBGIF 667LIBGIF
@@ -807,6 +809,7 @@ with_tiff
807with_gif 809with_gif
808with_png 810with_png
809with_rsvg 811with_rsvg
812with_xml2
810with_imagemagick 813with_imagemagick
811with_xft 814with_xft
812with_libotf 815with_libotf
@@ -1514,6 +1517,7 @@ Optional Packages:
1514 --without-gif don't compile with GIF image support 1517 --without-gif don't compile with GIF image support
1515 --without-png don't compile with PNG image support 1518 --without-png don't compile with PNG image support
1516 --without-rsvg don't compile with SVG image support 1519 --without-rsvg don't compile with SVG image support
1520 --without-xml2 don't compile with XML parsing support
1517 --with-imagemagick compile with ImageMagick image support 1521 --with-imagemagick compile with ImageMagick image support
1518 --without-xft don't use XFT for anti aliased fonts 1522 --without-xft don't use XFT for anti aliased fonts
1519 --without-libotf don't use libotf for OpenType font support 1523 --without-libotf don't use libotf for OpenType font support
@@ -2732,6 +2736,14 @@ else
2732fi 2736fi
2733 2737
2734 2738
2739# Check whether --with-xml2 was given.
2740if test "${with_xml2+set}" = set; then :
2741 withval=$with_xml2;
2742else
2743 with_xml2=yes
2744fi
2745
2746
2735# Check whether --with-imagemagick was given. 2747# Check whether --with-imagemagick was given.
2736if test "${with_imagemagick+set}" = set; then : 2748if test "${with_imagemagick+set}" = set; then :
2737 withval=$with_imagemagick; 2749 withval=$with_imagemagick;
@@ -11070,6 +11082,112 @@ $as_echo "#define HAVE_X_SM 1" >>confdefs.h
11070fi 11082fi
11071 11083
11072 11084
11085### Use libxml (-lxml2) if available
11086if test "${with_xml2}" != "no"; then
11087 ### I'm not sure what the version number should be, so I just guessed.
11088
11089 succeeded=no
11090
11091 # Extract the first word of "pkg-config", so it can be a program name with args.
11092set dummy pkg-config; ac_word=$2
11093{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
11094$as_echo_n "checking for $ac_word... " >&6; }
11095if test "${ac_cv_path_PKG_CONFIG+set}" = set; then :
11096 $as_echo_n "(cached) " >&6
11097else
11098 case $PKG_CONFIG in
11099 [\\/]* | ?:[\\/]*)
11100 ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
11101 ;;
11102 *)
11103 as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
11104for as_dir in $PATH
11105do
11106 IFS=$as_save_IFS
11107 test -z "$as_dir" && as_dir=.
11108 for ac_exec_ext in '' $ac_executable_extensions; do
11109 if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
11110 ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
11111 $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
11112 break 2
11113 fi
11114done
11115 done
11116IFS=$as_save_IFS
11117
11118 test -z "$ac_cv_path_PKG_CONFIG" && ac_cv_path_PKG_CONFIG="no"
11119 ;;
11120esac
11121fi
11122PKG_CONFIG=$ac_cv_path_PKG_CONFIG
11123if test -n "$PKG_CONFIG"; then
11124 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
11125$as_echo "$PKG_CONFIG" >&6; }
11126else
11127 { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
11128$as_echo "no" >&6; }
11129fi
11130
11131
11132
11133 if test "$PKG_CONFIG" = "no" ; then
11134 HAVE_LIBXML2=no
11135 else
11136 PKG_CONFIG_MIN_VERSION=0.9.0
11137 if $PKG_CONFIG --atleast-pkgconfig-version $PKG_CONFIG_MIN_VERSION; then
11138 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libxml-2.0 > 2.2.0" >&5
11139$as_echo_n "checking for libxml-2.0 > 2.2.0... " >&6; }
11140
11141 if $PKG_CONFIG --exists "libxml-2.0 > 2.2.0" 2>&5; then
11142 { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
11143$as_echo "yes" >&6; }
11144 succeeded=yes
11145
11146 { $as_echo "$as_me:${as_lineno-$LINENO}: checking LIBXML2_CFLAGS" >&5
11147$as_echo_n "checking LIBXML2_CFLAGS... " >&6; }
11148 LIBXML2_CFLAGS=`$PKG_CONFIG --cflags "libxml-2.0 > 2.2.0"|sed -e 's,///*,/,g'`
11149 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIBXML2_CFLAGS" >&5
11150$as_echo "$LIBXML2_CFLAGS" >&6; }
11151
11152 { $as_echo "$as_me:${as_lineno-$LINENO}: checking LIBXML2_LIBS" >&5
11153$as_echo_n "checking LIBXML2_LIBS... " >&6; }
11154 LIBXML2_LIBS=`$PKG_CONFIG --libs "libxml-2.0 > 2.2.0"|sed -e 's,///*,/,g'`
11155 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIBXML2_LIBS" >&5
11156$as_echo "$LIBXML2_LIBS" >&6; }
11157 else
11158 { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
11159$as_echo "no" >&6; }
11160 LIBXML2_CFLAGS=""
11161 LIBXML2_LIBS=""
11162 ## If we have a custom action on failure, don't print errors, but
11163 ## do set a variable so people can do so.
11164 LIBXML2_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "libxml-2.0 > 2.2.0"`
11165
11166 fi
11167
11168
11169
11170 else
11171 echo "*** Your version of pkg-config is too old. You need version $PKG_CONFIG_MIN_VERSION or newer."
11172 echo "*** See http://www.freedesktop.org/software/pkgconfig"
11173 fi
11174 fi
11175
11176 if test $succeeded = yes; then
11177 HAVE_LIBXML2=yes
11178 else
11179 HAVE_LIBXML2=no
11180 fi
11181
11182 if test "${HAVE_LIBXML2}" = "yes"; then
11183
11184$as_echo "#define HAVE_LIBXML2 1" >>confdefs.h
11185
11186 fi
11187fi
11188
11189
11190
11073# If netdb.h doesn't declare h_errno, we must declare it by hand. 11191# If netdb.h doesn't declare h_errno, we must declare it by hand.
11074{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether netdb declares h_errno" >&5 11192{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether netdb declares h_errno" >&5
11075$as_echo_n "checking whether netdb declares h_errno... " >&6; } 11193$as_echo_n "checking whether netdb declares h_errno... " >&6; }
diff --git a/configure.in b/configure.in
index e69ce064c0c..03e4e1a2a00 100644
--- a/configure.in
+++ b/configure.in
@@ -155,6 +155,7 @@ OPTION_DEFAULT_ON([tiff],[don't compile with TIFF image support])
155OPTION_DEFAULT_ON([gif],[don't compile with GIF image support]) 155OPTION_DEFAULT_ON([gif],[don't compile with GIF image support])
156OPTION_DEFAULT_ON([png],[don't compile with PNG image support]) 156OPTION_DEFAULT_ON([png],[don't compile with PNG image support])
157OPTION_DEFAULT_ON([rsvg],[don't compile with SVG image support]) 157OPTION_DEFAULT_ON([rsvg],[don't compile with SVG image support])
158OPTION_DEFAULT_ON([xml2],[don't compile with XML parsing support])
158OPTION_DEFAULT_OFF([imagemagick],[compile with ImageMagick image support]) 159OPTION_DEFAULT_OFF([imagemagick],[compile with ImageMagick image support])
159 160
160OPTION_DEFAULT_ON([xft],[don't use XFT for anti aliased fonts]) 161OPTION_DEFAULT_ON([xft],[don't use XFT for anti aliased fonts])
@@ -2535,6 +2536,17 @@ if test "${HAVE_X11}" = "yes"; then
2535fi 2536fi
2536AC_SUBST(LIBXSM) 2537AC_SUBST(LIBXSM)
2537 2538
2539### Use libxml (-lxml2) if available
2540if test "${with_xml2}" != "no"; then
2541 ### I'm not sure what the version number should be, so I just guessed.
2542 PKG_CHECK_MODULES(LIBXML2, libxml-2.0 > 2.2.0, HAVE_LIBXML2=yes, HAVE_LIBXML2=no)
2543 if test "${HAVE_LIBXML2}" = "yes"; then
2544 AC_DEFINE(HAVE_LIBXML2, 1, [Define to 1 if you have the libxml library (-lxml2).])
2545 fi
2546fi
2547AC_SUBST(LIBXML2_LIBS)
2548AC_SUBST(LIBXML2_CFLAGS)
2549
2538# If netdb.h doesn't declare h_errno, we must declare it by hand. 2550# If netdb.h doesn't declare h_errno, we must declare it by hand.
2539AC_CACHE_CHECK(whether netdb declares h_errno, 2551AC_CACHE_CHECK(whether netdb declares h_errno,
2540 emacs_cv_netdb_declares_h_errno, 2552 emacs_cv_netdb_declares_h_errno,
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi
index 142a071f494..ff4e65d299f 100644
--- a/doc/lispref/text.texi
+++ b/doc/lispref/text.texi
@@ -59,6 +59,7 @@ the character after point.
59 position stored in a register. 59 position stored in a register.
60* Base 64:: Conversion to or from base 64 encoding. 60* Base 64:: Conversion to or from base 64 encoding.
61* MD5 Checksum:: Compute the MD5 "message digest"/"checksum". 61* MD5 Checksum:: Compute the MD5 "message digest"/"checksum".
62* Parsing HTML:: Parsing HTML and XML.
62* Atomic Changes:: Installing several buffer changes "atomically". 63* Atomic Changes:: Installing several buffer changes "atomically".
63* Change Hooks:: Supplying functions to be run when text is changed. 64* Change Hooks:: Supplying functions to be run when text is changed.
64@end menu 65@end menu
@@ -4106,6 +4107,49 @@ using the specified or chosen coding system. However, if
4106coding instead. 4107coding instead.
4107@end defun 4108@end defun
4108 4109
4110@node Parsing HTML
4111@section Parsing HTML
4112@cindex parsing html
4113@cindex parsing xml
4114
4115Emacs provides an interface to the @code{libxml2} library via two
4116functions: @code{html-parse-buffer} and @code{xml-parse-buffer}. The
4117HTML function will parse ``real world'' HTML and try to return a
4118sensible parse tree, while the XML function is somewhat stricter about
4119syntax.
4120
4121They both take a two optional parameter. The first is a buffer, and
4122the second is a base URL to be used to expand relative URLs in the
4123document, if any.
4124
4125Here's an example demonstrating the structure of the parsed data you
4126get out. Given this HTML document:
4127
4128@example
4129<html><hEad></head><body width=101><div class=thing>Foo<div>Yes
4130@end example
4131
4132You get this parse tree:
4133
4134@example
4135(html
4136 (head)
4137 (body
4138 (:width . "101")
4139 (div
4140 (:class . "thing")
4141 (text . "Foo")
4142 (div
4143 (text . "Yes\n")))))
4144@end example
4145
4146It's a simple tree structure, where the @code{car} for each node is
4147the name of the node, and the @code{cdr} is the value, or the list of
4148values.
4149
4150Attributes are coded the same way as child nodes, but with @samp{:} as
4151the first character.
4152
4109@node Atomic Changes 4153@node Atomic Changes
4110@section Atomic Change Groups 4154@section Atomic Change Groups
4111@cindex atomic changes 4155@cindex atomic changes
diff --git a/src/ChangeLog b/src/ChangeLog
index 4b4f82aa4c8..9578130afd5 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,13 @@
12010-09-09 Lars Magne Ingebrigtsen <larsi@gnus.org>
2
3 * xml.c (Fxml_parse_buffer): New function to parse XML files.
4
52010-09-08 Lars Magne Ingebrigtsen <larsi@gnus.org>
6
7 * xml.c: New file.
8 (Fhtml_parse_buffer): New function to interface to the libxml2
9 html parsing function.
10
12010-09-05 Juanma Barranquero <lekktu@gmail.com> 112010-09-05 Juanma Barranquero <lekktu@gmail.com>
2 12
3 * biditype.h: Regenerate. 13 * biditype.h: Regenerate.
diff --git a/src/Makefile.in b/src/Makefile.in
index 9ee5631ef70..d91b95d86e3 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -226,6 +226,8 @@ RSVG_CFLAGS= @RSVG_CFLAGS@
226IMAGEMAGICK_LIBS= @IMAGEMAGICK_LIBS@ 226IMAGEMAGICK_LIBS= @IMAGEMAGICK_LIBS@
227IMAGEMAGICK_CFLAGS= @IMAGEMAGICK_CFLAGS@ 227IMAGEMAGICK_CFLAGS= @IMAGEMAGICK_CFLAGS@
228 228
229LIBXML2_LIBS = @LIBXML2_LIBS@
230LIBXML2_CFLAGS = @LIBXML2_CFLAGS@
229 231
230## widget.o if USE_X_TOOLKIT, otherwise empty. 232## widget.o if USE_X_TOOLKIT, otherwise empty.
231WIDGET_OBJ=@WIDGET_OBJ@ 233WIDGET_OBJ=@WIDGET_OBJ@
@@ -320,7 +322,8 @@ MKDEPDIR=@MKDEPDIR@
320## FIXME? MYCPPFLAGS only referenced in etc/DEBUG. 322## FIXME? MYCPPFLAGS only referenced in etc/DEBUG.
321ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(MYCPPFLAGS) -I. -I${srcdir} \ 323ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(MYCPPFLAGS) -I. -I${srcdir} \
322 ${C_SWITCH_MACHINE} ${C_SWITCH_SYSTEM} ${C_SWITCH_X_SITE} \ 324 ${C_SWITCH_MACHINE} ${C_SWITCH_SYSTEM} ${C_SWITCH_X_SITE} \
323 ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} ${DBUS_CFLAGS} \ 325 ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} \
326 ${LIBXML2_CFLAGS} ${DBUS_CFLAGS} \
324 ${GCONF_CFLAGS} ${FREETYPE_CFLAGS} ${FONTCONFIG_CFLAGS} \ 327 ${GCONF_CFLAGS} ${FREETYPE_CFLAGS} ${FONTCONFIG_CFLAGS} \
325 ${LIBOTF_CFLAGS} ${M17N_FLT_CFLAGS} ${DEPFLAGS} ${PROFILING_CFLAGS} \ 328 ${LIBOTF_CFLAGS} ${M17N_FLT_CFLAGS} ${DEPFLAGS} ${PROFILING_CFLAGS} \
326 ${C_WARNINGS_SWITCH} ${CFLAGS} 329 ${C_WARNINGS_SWITCH} ${CFLAGS}
@@ -349,7 +352,7 @@ obj= dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \
349 syntax.o $(UNEXEC_OBJ) bytecode.o \ 352 syntax.o $(UNEXEC_OBJ) bytecode.o \
350 process.o callproc.o \ 353 process.o callproc.o \
351 region-cache.o sound.o atimer.o \ 354 region-cache.o sound.o atimer.o \
352 doprnt.o strftime.o intervals.o textprop.o composite.o md5.o \ 355 doprnt.o strftime.o intervals.o textprop.o composite.o md5.o xml.o \
353 $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) 356 $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ)
354 357
355## Object files used on some machine or other. 358## Object files used on some machine or other.
@@ -595,7 +598,8 @@ SOME_MACHINE_LISP = ../lisp/mouse.elc \
595## duplicated symbols. If the standard libraries were compiled 598## duplicated symbols. If the standard libraries were compiled
596## with GCC, we might need LIB_GCC again after them. 599## with GCC, we might need LIB_GCC again after them.
597LIBES = $(LIBS) $(LIBX_BASE) $(LIBX_OTHER) $(LIBSOUND) \ 600LIBES = $(LIBS) $(LIBX_BASE) $(LIBX_OTHER) $(LIBSOUND) \
598 $(RSVG_LIBS) ${IMAGEMAGICK_LIBS} $(DBUS_LIBS) $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \ 601 $(RSVG_LIBS) ${IMAGEMAGICK_LIBS} $(DBUS_LIBS) \
602 ${LIBXML2_LIBS} $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \
599 $(LIBS_TERMCAP) $(GETLOADAVG_LIBS) ${GCONF_LIBS} ${LIBSELINUX_LIBS} \ 603 $(LIBS_TERMCAP) $(GETLOADAVG_LIBS) ${GCONF_LIBS} ${LIBSELINUX_LIBS} \
600 $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \ 604 $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \
601 $(LIB_GCC) $(LIB_MATH) $(LIB_STANDARD) $(LIB_GCC) 605 $(LIB_GCC) $(LIB_MATH) $(LIB_STANDARD) $(LIB_GCC)
diff --git a/src/config.in b/src/config.in
index 604a737a8b0..199afbd78ba 100644
--- a/src/config.in
+++ b/src/config.in
@@ -813,6 +813,9 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
813/* Define to 1 if you have the SM library (-lSM). */ 813/* Define to 1 if you have the SM library (-lSM). */
814#undef HAVE_X_SM 814#undef HAVE_X_SM
815 815
816/* Define to 1 if you have the libxml2 library (-lxml2). */
817#undef HAVE_LIBXML2
818
816/* Define to 1 if you want to use the X window system. */ 819/* Define to 1 if you want to use the X window system. */
817#undef HAVE_X_WINDOWS 820#undef HAVE_X_WINDOWS
818 821
diff --git a/src/emacs.c b/src/emacs.c
index 397d3d9ad27..33e0d60630b 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -1544,6 +1544,10 @@ main (int argc, char **argv)
1544#endif 1544#endif
1545#endif /* HAVE_X_WINDOWS */ 1545#endif /* HAVE_X_WINDOWS */
1546 1546
1547#ifdef HAVE_LIBXML2
1548 syms_of_xml ();
1549#endif
1550
1547 syms_of_menu (); 1551 syms_of_menu ();
1548 1552
1549#ifdef HAVE_NTGUI 1553#ifdef HAVE_NTGUI
diff --git a/src/lisp.h b/src/lisp.h
index 89514bf9ecb..fc9198a5ff7 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -3577,6 +3577,11 @@ extern char *x_get_keysym_name (int);
3577EXFUN (Fmsdos_downcase_filename, 1); 3577EXFUN (Fmsdos_downcase_filename, 1);
3578#endif 3578#endif
3579 3579
3580#ifdef HAVE_LIBXML2
3581/* Defined in xml.c */
3582extern void syms_of_xml (void);
3583#endif
3584
3580#ifdef HAVE_MENUS 3585#ifdef HAVE_MENUS
3581/* Defined in (x|w32)fns.c, nsfns.m... */ 3586/* Defined in (x|w32)fns.c, nsfns.m... */
3582extern int have_menus_p (void); 3587extern int have_menus_p (void);
diff --git a/src/xml.c b/src/xml.c
new file mode 100644
index 00000000000..92066067d73
--- /dev/null
+++ b/src/xml.c
@@ -0,0 +1,137 @@
1/* Interface to libxml2.
2 Copyright (C) 2010 Free Software Foundation, Inc.
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software: you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation, either version 3 of the License, or
9(at your option) any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
18
19#include <config.h>
20
21#ifdef HAVE_LIBXML2
22
23#include <setjmp.h>
24#include <libxml/tree.h>
25#include <libxml/parser.h>
26#include <libxml/HTMLparser.h>
27
28#include "lisp.h"
29#include "buffer.h"
30
31Lisp_Object make_dom (xmlNode *node)
32{
33 if (node->type == XML_ELEMENT_NODE) {
34 Lisp_Object result = Fcons (intern (node->name), Qnil);
35 xmlNode *child;
36 xmlAttr *property;
37
38 /* First add the attributes. */
39 property = node->properties;
40 while (property != NULL) {
41 if (property->children &&
42 property->children->content) {
43 char *pname = xmalloc (strlen (property->name) + 2);
44 *pname = ':';
45 strcpy(pname + 1, property->name);
46 result = Fcons (Fcons (intern (pname),
47 build_string(property->children->content)),
48 result);
49 xfree (pname);
50 }
51 property = property->next;
52 }
53 /* Then add the children of the node. */
54 child = node->children;
55 while (child != NULL) {
56 result = Fcons (make_dom (child), result);
57 child = child->next;
58 }
59 return Fnreverse (result);
60 } else if (node->type == XML_TEXT_NODE) {
61 Lisp_Object content = Qnil;
62
63 if (node->content)
64 content = build_string (node->content);
65
66 return Fcons (intern (node->name), content);
67 } else
68 return Qnil;
69}
70
71static Lisp_Object
72parse_buffer (Lisp_Object string, Lisp_Object base_url, int htmlp)
73{
74 xmlDoc *doc;
75 xmlNode *node;
76 Lisp_Object result;
77 int ibeg, iend;
78 char *burl = "";
79
80 LIBXML_TEST_VERSION;
81
82 CHECK_STRING (string);
83
84 if (! NILP (base_url)) {
85 CHECK_STRING (base_url);
86 burl = SDATA (base_url);
87 }
88
89 if (htmlp)
90 doc = htmlReadMemory (SDATA (string), SBYTES (string), burl, "utf-8",
91 HTML_PARSE_RECOVER|HTML_PARSE_NONET|
92 HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR);
93 else
94 doc = xmlReadMemory (SDATA (string), SBYTES (string), burl, "utf-8",
95 XML_PARSE_NONET|XML_PARSE_NOWARNING|
96 XML_PARSE_NOERROR);
97
98 if (doc != NULL) {
99 node = xmlDocGetRootElement (doc);
100 if (node != NULL)
101 result = make_dom (node);
102
103 xmlFreeDoc (doc);
104 xmlCleanupParser ();
105 }
106
107 return result;
108}
109
110DEFUN ("html-parse-string", Fhtml_parse_string, Shtml_parse_string,
111 0, 2, 0,
112 doc: /* Parse the string as an HTML document and return the parse tree.*/)
113 (Lisp_Object string, Lisp_Object base_url)
114{
115 return parse_buffer (string, base_url, 1);
116}
117
118DEFUN ("xml-parse-string", Fxml_parse_string, Sxml_parse_string,
119 0, 2, 0,
120 doc: /* Parse the string as an XML document and return the parse tree.*/)
121 (Lisp_Object string, Lisp_Object base_url)
122{
123 return parse_buffer (string, base_url, 0);
124}
125
126
127/***********************************************************************
128 Initialization
129 ***********************************************************************/
130void
131syms_of_xml (void)
132{
133 defsubr (&Shtml_parse_string);
134 defsubr (&Sxml_parse_string);
135}
136
137#endif /* HAVE_LIBXML2 */