Skip to content

Instantly share code, notes, and snippets.

@deoxxa
Created December 30, 2010 12:55
Show Gist options
  • Save deoxxa/759766 to your computer and use it in GitHub Desktop.
Save deoxxa/759766 to your computer and use it in GitHub Desktop.
#include <fstream>
#include <iostream>
#include <cstdio>
#include <cstring>
#include <htmlcxx/html/ParserDom.h>
#include <htmlcxx/html/utils.h>
int main(int argc, char** argv)
{
std::ifstream file(argv[1]);
std::string html;
while (file.good())
{
char buf[1024];
memset(buf, 0, 1024);
file.read(buf, 1024);
html.append(buf, file.gcount());
}
file.close();
htmlcxx::HTML::ParserDom parser;
parser.parse(html);
tree<htmlcxx::HTML::Node> root = parser.getTree();
bool print = false;
int depth = 0;
tree<htmlcxx::HTML::Node>::iterator it = root.begin();
for (;it!=root.end();++it)
{
if (it->tagName() == "div")
{
it->parseAttributes();
if (it->attribute("class").second.find("entry ") != std::string::npos)
{
depth = root.depth(it);
print = true;
}
else if (depth == root.depth(it))
{
depth = 0;
print = false;
}
}
if (print)
{
std::cout << it->tagName() << std::endl;
}
}
return(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment