Created
February 24, 2018 22:01
-
-
Save thomcc/2caeb3c56b57367a33be6166222b590a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <vector> | |
#include <cstddef> | |
#include <memory> | |
#include <iostream> | |
#include <cassert> | |
#include <stdexcept> | |
struct XmlNode { | |
struct Attribute { | |
Attribute(std::string k) : name(k) {} | |
std::string name, value; | |
}; | |
explicit XmlNode(std::string name) : name(name) {} | |
XmlNode() {} | |
XmlNode *create_child(std::string tagname) { | |
children.emplace_back(new XmlNode(tagname)); | |
return children.back().get(); | |
} | |
XmlNode *get_child(char const *s) { | |
for (auto &child : children) if (child->name == s) return child.get(); | |
return nullptr; | |
} | |
Attribute *get_attribute(char const *s) { | |
for (auto &attr : attributes) if (attr->name == s) return attr.get(); | |
return nullptr; | |
} | |
Attribute const *get_attribute(char const *s) const { | |
return const_cast<XmlNode*>(this)->get_attribute(s); | |
} | |
XmlNode const *get_child(char const *s) const { | |
return const_cast<XmlNode*>(this)->get_child(s); | |
} | |
char const *attribute_value(char const *s) const { | |
if (Attribute const *a = get_attribute(s)) return a->value.c_str(); | |
return ""; | |
} | |
friend std::ostream &operator<<(std::ostream &o, XmlNode const &n) { | |
return n.write(o, 0); | |
} | |
std::ostream &write(std::ostream &o, int depth=0) const { | |
bool one_line = children.size() == 0 && body.size() < 60; | |
indent(o, depth) << '<' << name; | |
for (auto const &attr : attributes) { | |
o << ' ' << attr->name << "=\"" << attr->value << '"'; | |
} | |
if (children.size() == 0 && body.empty()) { | |
return o << " />\n"; | |
} | |
o << (one_line ? ">" : ">\n"); | |
for (auto const &child : children) { | |
child->write(o, depth+2); | |
} | |
if (!body.empty()) { | |
indent(o, one_line ? 0 : depth+2) << body << (one_line ? "" : "\n"); | |
} | |
return indent(o, one_line ? 0 : depth) << "</" << name << ">\n"; | |
} | |
std::string &get_or_create_attr(char const *s) { | |
if (Attribute *a = get_attribute(s)) return a->value; | |
attributes.emplace_back(new Attribute(s)); | |
return attributes.back()->value; | |
} | |
void add_attribute(char const *name, std::string value) { | |
get_or_create_attr(name) = value; | |
} | |
std::string name, body; | |
std::vector<std::unique_ptr<Attribute>> attributes; | |
std::vector<std::unique_ptr<XmlNode>> children; | |
private: | |
static std::ostream &indent(std::ostream &o, int n) { | |
for (int i = 0; i < n; ++i) o << ' '; return o; | |
} | |
}; | |
class XmlError : public std::runtime_error { | |
public: | |
std::string message; | |
XmlError(std::string const &m) : std::runtime_error("XmlError"), message("XML parsing error: "+m) {} | |
char const *what() const noexcept override { return message.c_str(); } | |
}; | |
class XmlParser { | |
char const *s, *t; | |
std::string token; | |
XmlParser(char const *text) : s(text), t(text), token("") { assert(text && text[0]); } | |
static bool char_in_string(char c, char const *s) { | |
while (*s) if (c == *s++) return true; | |
return false; | |
} | |
static char const *skip_while(char const *s, char const *which) { | |
while (*s && char_in_string(*s, which)) ++s; | |
return s; | |
} | |
static char const *skip_until(char const *s, char const *which) { | |
while (*s && !char_in_string(*s, which)) ++s; | |
return s; | |
} | |
std::string &next() { | |
t = s = skip_while(s, " \t\r\n"); | |
if (!s[0]) { | |
return token = ""; | |
} else if (*t == '"') { | |
s = skip_until(++t, "\""); | |
token = std::string(t, *s ? s++ : s); | |
return token; | |
} else if (char_in_string(*t, "<>!?=/")) { | |
return token = std::string(t, ++s); | |
} else { | |
s = skip_until(s, "<>!?=/ \r\t\n"); | |
token = std::string(t, s); | |
s = skip_while(s, " \t\n\r"); | |
return token; | |
} | |
} | |
std::unique_ptr<XmlNode> parse() { | |
while (token != "<" || char_in_string(*s, "!?")) { | |
next(); | |
if (s[0] == '\0') throw XmlError("Unexpected EOF"); | |
} | |
std::unique_ptr<XmlNode> node{new XmlNode(next())}; | |
while (*s && (next() != ">") && (token != "/")) { | |
std::string &attrval = node->get_or_create_attr(token.c_str()); | |
if (next() != "=") throw XmlError("Missing '=' after attribute \""+token+"\". Got \""+token+"\""); | |
attrval = next(); | |
} | |
if (token == "/") { | |
if (next() != ">") throw XmlError("Missing '>' after '/' for element \""+node->name+"\". Got \""+token+"\""); | |
next(); | |
return node; | |
} | |
if (token != ">") throw XmlError("Missing '>' for element \""+node->name+"\". Got \""+token+"\"."); | |
for (next(); token != "<" || *s != '/';) { | |
if (token == "<") node->children.push_back(parse()); | |
else { node->body += std::string(t, s = skip_until(s, "<")); next(); } | |
} | |
if (*t != '<' || *s != '/') throw XmlError("Missing ending tag for element \""+node->name+"\"."); | |
next(); | |
if (next() != node->name.c_str()) throw XmlError("Wrong end tag for element \""+node->name+"\". Got \""+token+"\""); | |
if (next() != ">") throw XmlError("Illegal character in closing tag for element \""+node->name+"\": \""+token+"\""); | |
next(); | |
return node; | |
} | |
public: | |
static std::unique_ptr<XmlNode> parse_data(char const *text) { | |
return XmlParser(text).parse(); | |
} | |
static std::unique_ptr<XmlNode> parse_file(std::string filename) { | |
if (filename.empty()) return nullptr; | |
FILE *fp = fopen(filename.c_str(), "r"); | |
if (!fp) { | |
filename += ".xml"; | |
fp = fopen(filename.c_str(), "r"); | |
} | |
if (!fp) return nullptr; | |
fseek(fp, 0, SEEK_END); | |
ssize_t len = ftell(fp); | |
fseek(fp, 0, SEEK_SET); | |
std::unique_ptr<char[]> buffer{new char[len+1]}; | |
ssize_t rlen = fread(buffer.get(), 1, len, fp); | |
assert(rlen >= 0); | |
buffer[rlen] = '\0'; | |
char garbage[16]; | |
rlen = fread(garbage, 1, sizeof garbage, fp); | |
assert(rlen <= 0); | |
return parse_data(buffer.get()); | |
} | |
}; | |
std::string test_xml = ( | |
"<root>\n" | |
" <bar><baz/></bar>\n" | |
" <point x=\"30\" y=\"40\" z=\"50\"/>\n" | |
" <frob count=\"30\"></frob>\n" | |
" <grovel count=\"50\">\n" | |
" <quux></quux>\n" | |
" <a />\n" | |
" <b c=\"d\">e f g</b>\n" | |
" </grovel>\n" | |
"</root>" | |
); | |
std::string simpler = ("<root><foo /></root>"); | |
int main() { | |
try { | |
auto parsed = XmlParser::parse_data(test_xml.c_str()); | |
std::cout << *parsed << std::endl; | |
} catch (std::runtime_error const &e) { | |
std::cout << e.what() << std::endl; | |
} | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment