Skip to content

Instantly share code, notes, and snippets.

@seancribbs
Created February 27, 2012 20:18
Show Gist options
  • Save seancribbs/1926748 to your computer and use it in GitHub Desktop.
Save seancribbs/1926748 to your computer and use it in GitHub Desktop.
Module to simplify XML reading/manipulation stuffs in Erlang
%% @doc Uses SAX to convert an XML document into a simple nested-tuple
%% structure. Ignores namespaces.
-module(xmlsimple).
-export([file/1,
string/1,
emit/1,
emit_file/2]).
-include_lib("xmerl/include/xmerl.hrl").
-define(SAX, [{continuation_fun, fun continuation/1},
{event_fun, fun event/3},
{event_state, []},
{file_type, normal},
{encoding, utf8},
skip_external_dtd]).
-type tag() :: {TagName::atom(), Attributes::[proplists:property()], Children::[tag() | string()]}.
-type predicate() :: fun((term()) -> boolean()).
-export_types([tag/0]).
%% @doc Parses an XML document from a file.
-spec file(file:name()) -> {ok, tag() | [tag()]} | {error, term()}.
file(Filename) ->
case file:read_file(Filename) of
{ok, Bin} ->
string(Bin);
Err -> Err
end.
%% @doc Parses an XML document from a list or binary.
-spec string(iodata()) -> {ok, tag() | [tag()]} | {error, term()}.
string(Binary) when is_binary(Binary)
orelse is_list(Binary)->
case xmerl_sax_parser:stream(Binary, ?SAX) of
{ok, [Doc], _Rest} ->
{ok, Doc};
{ok, State, _Rest} ->
{ok, State};
Else ->
Else
end.
%% @doc Writes an Erlang nested-tuple as described by {@link tag/0}
%% into an XML document in a file.
-spec emit_file(file:name(), tag() | [tag()]) -> ok | {error, term()}.
emit_file(Filename, Doc) ->
file:write_file(Filename, emit(doc)).
%% @doc Writes an Erlang nested-tuple as described by {@link tag/0}
%% into an XML document.
-spec emit(tag() | [tag()]) -> iodata().
emit(Doc) when not is_list(Doc) ->
emit([Doc]);
emit(Docs) ->
xmerl:export_simple(Docs, xmerl_xml).
-spec continuation(term()) -> {binary(), term()}.
continuation(State) ->
{<<>>, State}.
-spec event(xmerl_sax_parser:event(), tuple(), term()) -> term().
event({startElement, _Uri, LocalName, _QName, Attrs}, _Location, State) ->
Tag = list_to_atom(LocalName),
AttrPList = lists:map(fun attr_to_pair/1, Attrs),
[{Tag, AttrPList, undefined}|State];
event({endElement, _Uri, LocalName, _QName}, _Location, State) ->
Tag = list_to_existing_atom(LocalName),
{Children, [{Tag, Attrs, _}|Stack]} = lists:splitwith(tag_predicate(Tag), State),
[{Tag, Attrs, lists:reverse(Children)}|Stack];
event(startCDATA, _, State) ->
[cdata|State];
event(endCDATA, _, State) ->
{Text, [cdata|Stack]} = lists:splitwith(fun cdata_predicate/1, State),
[lists:flatten(lists:reverse(Text))|Stack];
event({characters, String}, _, [Top|_]=State) ->
case Top of
Chars when is_list(Chars) ->
[Chars ++ String|State];
_ ->
[String|State]
end;
event(_,_,State) -> State.
-spec attr_to_pair(tuple()) -> proplists:property().
attr_to_pair({_,_,Name,Value}) ->
{list_to_atom(Name), Value}.
-spec tag_predicate(atom()) -> predicate().
tag_predicate(T) ->
fun({Tag,_,_}) when T =:= Tag-> false;
(_) -> true
end.
-spec cdata_predicate(term()) -> boolean().
cdata_predicate(cdata) ->
false;
cdata_predicate(_) ->
true.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment