Skip to content

Instantly share code, notes, and snippets.

@minase
Created November 8, 2009 18:37
Show Gist options
  • Save minase/229419 to your computer and use it in GitHub Desktop.
Save minase/229419 to your computer and use it in GitHub Desktop.
XPathEvaluator.m
#import "Foundation/Foundation.h"
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/xpath.h>
@interface XPathEvaluator : NSObject
+(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html;
@end
@implementation XPathEvaluator
+(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html
{
id pool = [NSAutoreleasePool new];
const char *chtml = [html cStringUsingEncoding:NSUTF8StringEncoding];
const char *cxpath = [query cStringUsingEncoding:NSUTF8StringEncoding];
NSMutableArray *results = [NSMutableArray new];
// XPath
htmlDocPtr doc = htmlParseDoc((xmlChar*)chtml, "UTF-8");
xmlXPathContextPtr xctx = xmlXPathNewContext(doc);
xmlXPathObjectPtr xobj = xmlXPathEval((xmlChar*)cxpath, xctx);
xmlNodeSetPtr nodes = xobj->nodesetval;
if(doc == NULL) NSLog(@"parse fail");
if(xobj == NULL) NSLog(@"xpath fail");
for(int n = 0; n < xmlXPathNodeSetGetLength(nodes); n++)
{
xmlNodePtr node = nodes->nodeTab[n];
xmlOutputBufferPtr outbuf = xmlAllocOutputBuffer(NULL);
if(outbuf == NULL) NSLog(@"outbuf fail");
// NSLog(@"==== div.body:%d %s %d ====", n, node->name, node->type);
// inner html
xmlNodePtr child = node->children;
while(child)
{
htmlNodeDumpFormatOutput(outbuf, doc, child, "UTF-8", 0);
child = child->next;
}
xmlOutputBufferFlush(outbuf);
// attributes
NSMutableDictionary *attributes = [NSMutableDictionary dictionary];
xmlAttr *attr = node->properties;
while(attr)
{
[attributes
setObject:[NSString stringWithCString:(char*)attr->children->content encoding:NSUTF8StringEncoding]
forKey: [NSString stringWithCString:(char*)attr->name encoding:NSUTF8StringEncoding]
];
attr = attr->next;
}
// Dictionary
NSString *name = [NSString stringWithCString:(char*)node->name encoding:NSUTF8StringEncoding];
NSString *content = [[NSString
stringWithCString:(char*)outbuf->buffer->content encoding:NSUTF8StringEncoding
] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
NSDictionary *nodeinfo = [NSDictionary dictionaryWithObjectsAndKeys:
name, @"name",
attributes, @"attributes",
content, @"content",
nil
];
[results addObject:nodeinfo];
xmlOutputBufferClose(outbuf);
}
xmlXPathFreeObject(xobj);
xmlXPathFreeContext(xctx);
xmlFreeDoc(doc);
[pool release];
return [results autorelease];
}
@end
int main(int argc, char **argv)
{
id pool = [NSAutoreleasePool new];
char *cxpath = (argc > 1) ? argv[1] : "//span[@class='msg']";
char *chtml = (argc > 3) ? argv[2] : "<span class=\"msg\">oppai</span><span>futomomo</span>";
NSString *query = [NSString stringWithCString:cxpath encoding:NSUTF8StringEncoding];
NSString *html = [NSString stringWithCString:chtml encoding:NSUTF8StringEncoding];
NSArray *nodes = [XPathEvaluator arrayWithXPathQuery:query fromHTML:html];
NSLog(@"%@", nodes);
NSLog(@"%d", [nodes count]);
[pool release];
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment