Skip to content

Instantly share code, notes, and snippets.

@telescreen
Last active December 18, 2015 01:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save telescreen/5707634 to your computer and use it in GitHub Desktop.
Save telescreen/5707634 to your computer and use it in GitHub Desktop.
simple xml parser
/*
* Compile: g++ sparse.cpp
* Run: ./a.out
*
* Test code:
*
* <tag1><tag11>aa</tag11><tag12>bb</tag12></tag><tag21>cc</tag21>
* <tag><tag1>fdfdf</tag1></tag>
* <tag><tag1>fdfdf<tag3>fdf</tag3></tag1></tag>
*
*/
#include <iostream>
#include <stack>
#include <vector>
#include <cstdio>
#define forall(c, it) \
for(typeof((c).begin()) it = (c).begin(); it != (c).end(); ++it)
using namespace std;
enum tag_t { ROOT, OTAG, ETAG, TEXT };
class Node
{
vector<Node*> nodes;
tag_t tag_;
string val_;
public:
Node(tag_t tag, string val)
: tag_(tag),
val_(val)
{
}
tag_t getTag() const { return tag_; }
string getVal() const { return val_; }
void addNode(Node *v)
{
nodes.push_back(v);
}
const vector<Node*>& getNodes() const { return nodes; }
};
tag_t type;
int ahead;
stack<int> tag;
string match_tag()
{
string tagname;
ahead = getchar();
if (ahead == '/') {
type = ETAG;
} else {
type = OTAG;
tagname.push_back(ahead);
}
ahead = getchar();
while(ahead != '>') {
tagname.push_back(ahead);
ahead = getchar();
}
ahead = getchar();
return tagname;
}
string match_text()
{
string text;
type = TEXT;
while (isalpha(ahead)) {
text.push_back(ahead);
ahead = getchar();
}
return text;
}
string token()
{
if (ahead == '<') {
return match_tag();
}
if (isalpha(ahead)) {
return match_text();
}
return "";
}
void traverse(Node *tree)
{
const vector<Node*> &t = tree->getNodes();
forall(t, it)
traverse(*it);
printf("type (OTAG, ETAG, TEXT): %d\n", tree->getTag());
printf("%s\n", tree->getVal().c_str());
}
int main(int argc, char **argv)
{
string t;
Node *tree = new Node(ROOT, "");
stack<Node*> current_root;
current_root.push(tree);
ahead = getchar();
while(1) {
t = token();
if (t.empty()) break;
//printf("type (OTAG, ETAG, TEXT): %d\n", type);
//printf("%s\n", t.c_str());
if (type == OTAG || type == TEXT) {
Node *v = new Node(type, t);
Node *rt = (Node*)current_root.top();
rt->addNode(v);
if (type == OTAG) current_root.push(v);
} else if (type == ETAG) {
current_root.pop();
Node *v = new Node(type, t);
Node *rt = (Node*)current_root.top();
rt->addNode(v);
}
}
traverse(tree);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment