Last active
December 18, 2015 01:59
-
-
Save telescreen/5707634 to your computer and use it in GitHub Desktop.
simple xml parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Compile: g++ sparse.cpp | |
* Run: ./a.out | |
* | |
* Test code: | |
* | |
* <tag1><tag11>aa</tag11><tag12>bb</tag12></tag><tag21>cc</tag21> | |
* <tag><tag1>fdfdf</tag1></tag> | |
* <tag><tag1>fdfdf<tag3>fdf</tag3></tag1></tag> | |
* | |
*/ | |
#include <iostream> | |
#include <stack> | |
#include <vector> | |
#include <cstdio> | |
#define forall(c, it) \ | |
for(typeof((c).begin()) it = (c).begin(); it != (c).end(); ++it) | |
using namespace std; | |
enum tag_t { ROOT, OTAG, ETAG, TEXT }; | |
class Node | |
{ | |
vector<Node*> nodes; | |
tag_t tag_; | |
string val_; | |
public: | |
Node(tag_t tag, string val) | |
: tag_(tag), | |
val_(val) | |
{ | |
} | |
tag_t getTag() const { return tag_; } | |
string getVal() const { return val_; } | |
void addNode(Node *v) | |
{ | |
nodes.push_back(v); | |
} | |
const vector<Node*>& getNodes() const { return nodes; } | |
}; | |
tag_t type; | |
int ahead; | |
stack<int> tag; | |
string match_tag() | |
{ | |
string tagname; | |
ahead = getchar(); | |
if (ahead == '/') { | |
type = ETAG; | |
} else { | |
type = OTAG; | |
tagname.push_back(ahead); | |
} | |
ahead = getchar(); | |
while(ahead != '>') { | |
tagname.push_back(ahead); | |
ahead = getchar(); | |
} | |
ahead = getchar(); | |
return tagname; | |
} | |
string match_text() | |
{ | |
string text; | |
type = TEXT; | |
while (isalpha(ahead)) { | |
text.push_back(ahead); | |
ahead = getchar(); | |
} | |
return text; | |
} | |
string token() | |
{ | |
if (ahead == '<') { | |
return match_tag(); | |
} | |
if (isalpha(ahead)) { | |
return match_text(); | |
} | |
return ""; | |
} | |
void traverse(Node *tree) | |
{ | |
const vector<Node*> &t = tree->getNodes(); | |
forall(t, it) | |
traverse(*it); | |
printf("type (OTAG, ETAG, TEXT): %d\n", tree->getTag()); | |
printf("%s\n", tree->getVal().c_str()); | |
} | |
int main(int argc, char **argv) | |
{ | |
string t; | |
Node *tree = new Node(ROOT, ""); | |
stack<Node*> current_root; | |
current_root.push(tree); | |
ahead = getchar(); | |
while(1) { | |
t = token(); | |
if (t.empty()) break; | |
//printf("type (OTAG, ETAG, TEXT): %d\n", type); | |
//printf("%s\n", t.c_str()); | |
if (type == OTAG || type == TEXT) { | |
Node *v = new Node(type, t); | |
Node *rt = (Node*)current_root.top(); | |
rt->addNode(v); | |
if (type == OTAG) current_root.push(v); | |
} else if (type == ETAG) { | |
current_root.pop(); | |
Node *v = new Node(type, t); | |
Node *rt = (Node*)current_root.top(); | |
rt->addNode(v); | |
} | |
} | |
traverse(tree); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment