Skip to content

Instantly share code, notes, and snippets.

@thomcc
Created February 24, 2018 22:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thomcc/2caeb3c56b57367a33be6166222b590a to your computer and use it in GitHub Desktop.
Save thomcc/2caeb3c56b57367a33be6166222b590a to your computer and use it in GitHub Desktop.
#include <string>
#include <vector>
#include <cstddef>
#include <memory>
#include <iostream>
#include <cassert>
#include <stdexcept>
struct XmlNode {
struct Attribute {
Attribute(std::string k) : name(k) {}
std::string name, value;
};
explicit XmlNode(std::string name) : name(name) {}
XmlNode() {}
XmlNode *create_child(std::string tagname) {
children.emplace_back(new XmlNode(tagname));
return children.back().get();
}
XmlNode *get_child(char const *s) {
for (auto &child : children) if (child->name == s) return child.get();
return nullptr;
}
Attribute *get_attribute(char const *s) {
for (auto &attr : attributes) if (attr->name == s) return attr.get();
return nullptr;
}
Attribute const *get_attribute(char const *s) const {
return const_cast<XmlNode*>(this)->get_attribute(s);
}
XmlNode const *get_child(char const *s) const {
return const_cast<XmlNode*>(this)->get_child(s);
}
char const *attribute_value(char const *s) const {
if (Attribute const *a = get_attribute(s)) return a->value.c_str();
return "";
}
friend std::ostream &operator<<(std::ostream &o, XmlNode const &n) {
return n.write(o, 0);
}
std::ostream &write(std::ostream &o, int depth=0) const {
bool one_line = children.size() == 0 && body.size() < 60;
indent(o, depth) << '<' << name;
for (auto const &attr : attributes) {
o << ' ' << attr->name << "=\"" << attr->value << '"';
}
if (children.size() == 0 && body.empty()) {
return o << " />\n";
}
o << (one_line ? ">" : ">\n");
for (auto const &child : children) {
child->write(o, depth+2);
}
if (!body.empty()) {
indent(o, one_line ? 0 : depth+2) << body << (one_line ? "" : "\n");
}
return indent(o, one_line ? 0 : depth) << "</" << name << ">\n";
}
std::string &get_or_create_attr(char const *s) {
if (Attribute *a = get_attribute(s)) return a->value;
attributes.emplace_back(new Attribute(s));
return attributes.back()->value;
}
void add_attribute(char const *name, std::string value) {
get_or_create_attr(name) = value;
}
std::string name, body;
std::vector<std::unique_ptr<Attribute>> attributes;
std::vector<std::unique_ptr<XmlNode>> children;
private:
static std::ostream &indent(std::ostream &o, int n) {
for (int i = 0; i < n; ++i) o << ' '; return o;
}
};
class XmlError : public std::runtime_error {
public:
std::string message;
XmlError(std::string const &m) : std::runtime_error("XmlError"), message("XML parsing error: "+m) {}
char const *what() const noexcept override { return message.c_str(); }
};
class XmlParser {
char const *s, *t;
std::string token;
XmlParser(char const *text) : s(text), t(text), token("") { assert(text && text[0]); }
static bool char_in_string(char c, char const *s) {
while (*s) if (c == *s++) return true;
return false;
}
static char const *skip_while(char const *s, char const *which) {
while (*s && char_in_string(*s, which)) ++s;
return s;
}
static char const *skip_until(char const *s, char const *which) {
while (*s && !char_in_string(*s, which)) ++s;
return s;
}
std::string &next() {
t = s = skip_while(s, " \t\r\n");
if (!s[0]) {
return token = "";
} else if (*t == '"') {
s = skip_until(++t, "\"");
token = std::string(t, *s ? s++ : s);
return token;
} else if (char_in_string(*t, "<>!?=/")) {
return token = std::string(t, ++s);
} else {
s = skip_until(s, "<>!?=/ \r\t\n");
token = std::string(t, s);
s = skip_while(s, " \t\n\r");
return token;
}
}
std::unique_ptr<XmlNode> parse() {
while (token != "<" || char_in_string(*s, "!?")) {
next();
if (s[0] == '\0') throw XmlError("Unexpected EOF");
}
std::unique_ptr<XmlNode> node{new XmlNode(next())};
while (*s && (next() != ">") && (token != "/")) {
std::string &attrval = node->get_or_create_attr(token.c_str());
if (next() != "=") throw XmlError("Missing '=' after attribute \""+token+"\". Got \""+token+"\"");
attrval = next();
}
if (token == "/") {
if (next() != ">") throw XmlError("Missing '>' after '/' for element \""+node->name+"\". Got \""+token+"\"");
next();
return node;
}
if (token != ">") throw XmlError("Missing '>' for element \""+node->name+"\". Got \""+token+"\".");
for (next(); token != "<" || *s != '/';) {
if (token == "<") node->children.push_back(parse());
else { node->body += std::string(t, s = skip_until(s, "<")); next(); }
}
if (*t != '<' || *s != '/') throw XmlError("Missing ending tag for element \""+node->name+"\".");
next();
if (next() != node->name.c_str()) throw XmlError("Wrong end tag for element \""+node->name+"\". Got \""+token+"\"");
if (next() != ">") throw XmlError("Illegal character in closing tag for element \""+node->name+"\": \""+token+"\"");
next();
return node;
}
public:
static std::unique_ptr<XmlNode> parse_data(char const *text) {
return XmlParser(text).parse();
}
static std::unique_ptr<XmlNode> parse_file(std::string filename) {
if (filename.empty()) return nullptr;
FILE *fp = fopen(filename.c_str(), "r");
if (!fp) {
filename += ".xml";
fp = fopen(filename.c_str(), "r");
}
if (!fp) return nullptr;
fseek(fp, 0, SEEK_END);
ssize_t len = ftell(fp);
fseek(fp, 0, SEEK_SET);
std::unique_ptr<char[]> buffer{new char[len+1]};
ssize_t rlen = fread(buffer.get(), 1, len, fp);
assert(rlen >= 0);
buffer[rlen] = '\0';
char garbage[16];
rlen = fread(garbage, 1, sizeof garbage, fp);
assert(rlen <= 0);
return parse_data(buffer.get());
}
};
std::string test_xml = (
"<root>\n"
" <bar><baz/></bar>\n"
" <point x=\"30\" y=\"40\" z=\"50\"/>\n"
" <frob count=\"30\"></frob>\n"
" <grovel count=\"50\">\n"
" <quux></quux>\n"
" <a />\n"
" <b c=\"d\">e f g</b>\n"
" </grovel>\n"
"</root>"
);
std::string simpler = ("<root><foo /></root>");
int main() {
try {
auto parsed = XmlParser::parse_data(test_xml.c_str());
std::cout << *parsed << std::endl;
} catch (std::runtime_error const &e) {
std::cout << e.what() << std::endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment