Last active
December 1, 2018 17:49
-
-
Save blogscot/c2084cfd91ff56e2a87c4575b7579476 to your computer and use it in GitHub Desktop.
A first look at XML parsing using Elixir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule XmlParsing do | |
import Record, only: [defrecord: 2, extract: 2] | |
defrecord :xmlElement, extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl") | |
defrecord :xmlText, extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl") | |
def xml do | |
""" | |
<html> | |
<head> | |
<title>XML Parsing</title> | |
</head> | |
<body> | |
<p>Some interesting text</p> | |
<ul> | |
<li>First</li> | |
<li>Second</li> | |
</ul> | |
</body> | |
</html> | |
""" | |
end | |
def get_title_text do | |
{ doc, _ } = xml |> :binary.bin_to_list |> :xmerl_scan.string | |
[ title_element ] = :xmerl_xpath.string('/html/head/title', doc) | |
# confirm we have a valid xmlElement record | |
true = Record.is_record(title_element, :xmlElement) | |
[content] = xmlElement(title_element, :content) | |
# confirm we have a valid xmlText record | |
true = Record.is_record(content, :xmlText) | |
xmlText(content, :value) | |
end | |
def get_paragraph_text do | |
{ doc, _ } = xml |> :binary.bin_to_list |> :xmerl_scan.string | |
[ text ] = :xmerl_xpath.string('/html/body/p/text()', doc) | |
xmlText(text, :value) | |
end | |
def get_li_text do | |
{ doc, _ } = xml |> :binary.bin_to_list |> :xmerl_scan.string | |
li_text = :xmerl_xpath.string('/html/body/ul/li/text()', doc) | |
li_text |> Enum.map(fn text -> xmlText(text, :value) end) | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule XmlParsingTest do | |
use ExUnit.Case | |
test "get contents of html/head/title tag" do | |
assert 'XML Parsing' = XmlParsing.get_title_text | |
end | |
test "get contents of p tag" do | |
assert 'Some interesting text' = XmlParsing.get_paragraph_text | |
end | |
test "get contents of li tags" do | |
assert ['First', 'Second'] == XmlParsing.get_li_text | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment