Skip to content

Instantly share code, notes, and snippets.

@tenderlove
Created October 14, 2009 00:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tenderlove/209686 to your computer and use it in GitHub Desktop.
Save tenderlove/209686 to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <libxml/HTMLparser.h>
void find_with_id(char * html)
{
printf("parsing: %s\n\n", html);
htmlDocPtr doc = htmlReadMemory(html, strlen(html), NULL, NULL, 1);
xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
xmlXPathObjectPtr xpath = xmlXPathEvalExpression("id('hello')", ctx);
if(doc->intSubset)
printf("intSubset: %s\n", doc->intSubset->name);
if(doc->extSubset)
printf("extSubset: %s\n", doc->extSubset->name);
printf("found %d\n", xpath->nodesetval->nodeNr);
}
int main(int argc, char *argv[])
{
char * doctype = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html><body><p id='hello'>blah</p></html>";
char * no_doctype = "<html><body><p id='hello'>blah</p></html>";
// Finds the p tag with the ID
find_with_id(doctype);
// Does not find the p tag with the ID
find_with_id(no_doctype);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment