Skip to content

Instantly share code, notes, and snippets.

@thiagomajesk
Last active July 7, 2020 18:50
Show Gist options
  • Save thiagomajesk/52b70b3a736d1664451130bb228a9a4a to your computer and use it in GitHub Desktop.
Save thiagomajesk/52b70b3a736d1664451130bb228a9a4a to your computer and use it in GitHub Desktop.
XML parsing benchmark
# Dependencies: {:saxy, "~> 1.2"}, {:sax_map, "~> 0.2"}, {:benchee, "~> 1.0"}
#
# Samples
#
simple = """
<?xml version="1.0"?>
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<catalog>
<product description="Cardigan Sweater" product_image="cardigan.jpg">
<catalog_item gender="Men's">
<item_number>QWZ5671</item_number>
<price>39.95</price>
<!--Yet another comment-->
<size description="Medium">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
<size description="Large">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
</catalog_item>
<catalog_item gender="Women's">
<item_number>RRX9856</item_number>
<price>42.50</price>
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<size description="Small">
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
<size description="Medium">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
<color_swatch image="black_cardigan.jpg">Black</color_swatch>
</size>
<size description="Large">
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
<color_swatch image="black_cardigan.jpg">Black</color_swatch>
</size>
<size description="Extra Large">
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
<color_swatch image="black_cardigan.jpg">Black</color_swatch>
</size>
</catalog_item>
</product>
</catalog>
"""
%{body: feed} = HTTPoison.get!("https://www.gamespot.com/feeds/mashup/")
%{body: complex} = HTTPoison.get!("http://aiweb.cs.washington.edu/research/projects/xmltk/xmldata/data/treebank/treebank_e.xml")
#
# Saxy.SimpleForm based implementation
#
defmodule Parser do
def parse([{tag, attrs, children} | tail]) do
%{}
|> Map.put(tag, parse(children))
|> Map.merge(parse(tail), fn
_k, v1, v2 when is_list(v2) -> [v1 | v2]
_k, v1, v2 -> [v1, v2]
end)
end
def parse(["\n" <> _ | tail]), do: parse(tail)
def parse([head | []]), do: head
def parse([]), do: %{}
end
#
# Tests
#
Benchee.run(
%{
"SAXMap.from_string" => fn input ->
SAXMap.from_string(input)
end,
"Parser.parse" => fn input ->
{:ok, result} = Saxy.SimpleForm.parse_string(input)
Parser.parse([result])
end
},
time: 10,
memory_time: 2,
inputs: %{
"SIMPLE XML" => simple,
"FEED XML" => feed
}
)
#
# Tests with complex files
#
# SAXMap.from_string can't process large files so I removed it from the tests.
# In my current hardware I've awaited over 15 minutes for tests to be completed without response.
Benchee.run(
%{
"Parser.parse" => fn input ->
{:ok, result} = Saxy.SimpleForm.parse_string(input)
Parser.parse([result])
end
},
time: 10,
memory_time: 2,
inputs: %{
"COMPLEX" => complex
}
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment