Skip to content

Instantly share code, notes, and snippets.

@derofim
Last active July 26, 2016 18:21
Show Gist options
  • Save derofim/517b60c637dc2d8e0f680610ffd8722f to your computer and use it in GitHub Desktop.
Save derofim/517b60c637dc2d8e0f680610ffd8722f to your computer and use it in GitHub Desktop.
Examples gumbo-query
#include <iostream>
#include <Windows.h>
#include "Document.h"
#include "Node.h"
int main()
{
SetConsoleOutputCP(65001);
{
std::cout << "Example: \"h1 a\" to find all a tags in h1" << std::endl;
std::string page("<html><div><span>1\n</span>2\n</div><h1><a>some link</a></h1><h1><a>other link</a></h1><a>no link</a></html>");
CDocument doc;
doc.parse(page.c_str());
CSelection c = doc.find("h1 a");
for (int i = 0; i < c.nodeNum(); i++) {
std::cout << c.nodeAt(i).text() << std::endl;
}
}
std::cout << "==========" << std::endl;
{
std::cout << "Example: \"div\" to find all div tags" << std::endl;
std::string page = "<html><div><span>1\n</span>2\n</div><br/></html>";
CDocument doc;
doc.parse(page.c_str());
CSelection c = doc.find("div");
CNode pNode = c.nodeAt(0);
std::string content = page.substr(pNode.startPos(), pNode.endPos() - pNode.startPos());
std::cout << content << std::endl;
// Clears subtags:
std::cout << "--- OR ---" << std::endl;
for (int i = 0; i < c.nodeNum(); i++) {
std::cout << c.nodeAt(i).text() << std::endl;
}
}
// Example: "h1 a.special" to find a tags with class="special"
// Example: "span[id='spid'][class='spcls']" to find span tags with id='spid' and class='spcls'
std::cout << "==========" << std::endl;
{
std::cout << "Example: inner find" << std::endl;
std::string page = "<html><div>outer\n<div>inner\n<h1>title\n</h1></div></div></html>";
CDocument doc;
doc.parse(page.c_str());
CSelection outer = doc.find("div div");
CNode pNode = outer.nodeAt(0);
CSelection c = outer.find("h1");
for (int i = 0; i < c.nodeNum(); i++) {
std::cout << c.nodeAt(i).text() << std::endl;
}
}
std::cout << "==========" << std::endl;
{
std::cout << "Example: node manipulation" << std::endl;
std::string page = "<html><div>outer\n<div class=\"cls\">inner\n<h1>title\n</h1></div></div></html>";
CDocument doc;
doc.parse(page.c_str());
CSelection c = doc.find("div");
for (int i = 0; i < c.nodeNum(); i++)
{
if (!c.nodeAt(i).valid()) continue;
std::cout << "ownText(): " << c.nodeAt(i).ownText() << std::endl; // ignores text in subtags ;)
std::cout << "parent().tag(): " << c.nodeAt(i).parent().tag() << std::endl; // get parent tag name
std::cout << "class: " << c.nodeAt(i).attribute("class") << std::endl; // get class name
}
}
std::getchar();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment