Skip to content

Instantly share code, notes, and snippets.

@premedios
Created November 30, 2013 22:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save premedios/7725404 to your computer and use it in GitHub Desktop.
Save premedios/7725404 to your computer and use it in GitHub Desktop.
//
// TFHppleElement.h
// Hpple
//
// Created by Geoffrey Grosenbach on 1/31/09.
//
// Copyright (c) 2009 Topfunky Corporation, http://topfunky.com
//
// MIT LICENSE
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#import <Foundation/Foundation.h>
@interface TFHppleElement : NSObject
- (id) initWithNode:(NSDictionary *) theNode isXML:(BOOL)isDataXML withEncoding:(NSString *)theEncoding;
+ (TFHppleElement *) hppleElementWithNode:(NSDictionary *) theNode isXML:(BOOL)isDataXML withEncoding:(NSString *)theEncoding;
@property (nonatomic, copy, readonly) NSString *raw;
// Returns this tag's innerHTML content.
@property (nonatomic, copy, readonly) NSString *content;
// Returns the name of the current tag, such as "h3".
@property (nonatomic, copy, readonly) NSString *tagName;
// Returns tag attributes with name as key and content as value.
// href = 'http://peepcode.com'
// class = 'highlight'
@property (nonatomic, strong, readonly) NSDictionary *attributes;
// Returns the children of a given node
@property (nonatomic, strong, readonly) NSArray *children;
// Returns the first child of a given node
@property (nonatomic, strong, readonly) TFHppleElement *firstChild;
// the parent of a node
@property (nonatomic, unsafe_unretained, readonly) TFHppleElement *parent;
// Returns YES if the node has any child
// This is more efficient than using the children property since no NSArray is constructed
- (BOOL)hasChildren;
// Returns YES if this is a text node
- (BOOL)isTextNode;
// Provides easy access to the content of a specific attribute,
// such as 'href' or 'class'.
- (NSString *) objectForKey:(NSString *) theKey;
// Returns the children whose tag name equals the given string
// (comparison is performed with NSString's isEqualToString)
// Returns an empty array if no matching child is found
- (NSArray *) childrenWithTagName:(NSString *)tagName;
// Returns the first child node whose tag name equals the given string
// (comparison is performed with NSString's isEqualToString)
// Returns nil if no matching child is found
- (TFHppleElement *) firstChildWithTagName:(NSString *)tagName;
// Returns the children whose class equals the given string
// (comparison is performed with NSString's isEqualToString)
// Returns an empty array if no matching child is found
- (NSArray *) childrenWithClassName:(NSString *)className;
// Returns the first child whose class requals the given string
// (comparison is performed with NSString's isEqualToString)
// Returns nil if no matching child is found
- (TFHppleElement *) firstChildWithClassName:(NSString*)className;
// Returns the first text node from this element's children
// Returns nil if there is no text node among the children
- (TFHppleElement *) firstTextChild;
// Returns the string contained by the first text node from this element's children
// Convenience method which can be used instead of firstTextChild.content
- (NSString *) text;
// Returns elements searched with xpath
- (NSArray *) searchWithXPathQuery:(NSString *)xPathOrCSS;
- (id)objectAtKeyedSubscript:(id <NSCopying>)key;
@end
//
// TFHppleElement.m
// Hpple
//
// Created by Geoffrey Grosenbach on 1/31/09.
//
// Copyright (c) 2009 Topfunky Corporation, http://topfunky.com
//
// MIT LICENSE
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#import "TFHppleElement.h"
#import "XPathQuery.h"
static NSString * const TFHppleNodeContentKey = @"nodeContent";
static NSString * const TFHppleNodeNameKey = @"nodeName";
static NSString * const TFHppleNodeChildrenKey = @"nodeChildArray";
static NSString * const TFHppleNodeAttributeArrayKey = @"nodeAttributeArray";
static NSString * const TFHppleNodeAttributeNameKey = @"attributeName";
static NSString * const TFHppleTextNodeName = @"text";
@interface TFHppleElement ()
{
NSDictionary * node;
BOOL isXML;
NSString *encoding;
__unsafe_unretained TFHppleElement *parent;
}
@property (nonatomic, unsafe_unretained, readwrite) TFHppleElement *parent;
@end
@implementation TFHppleElement
@synthesize parent;
- (id) initWithNode:(NSDictionary *) theNode isXML:(BOOL)isDataXML withEncoding:(NSString *)theEncoding
{
if (!(self = [super init]))
return nil;
isXML = isDataXML;
node = theNode;
encoding = theEncoding;
return self;
}
+ (TFHppleElement *) hppleElementWithNode:(NSDictionary *) theNode isXML:(BOOL)isDataXML withEncoding:(NSString *)theEncoding
{
return [[[self class] alloc] initWithNode:theNode isXML:isDataXML withEncoding:theEncoding];
}
#pragma mark -
- (NSString *)raw
{
return [node objectForKey:@"raw"];
}
- (NSString *) content
{
return [node objectForKey:TFHppleNodeContentKey];
}
- (NSString *) tagName
{
return [node objectForKey:TFHppleNodeNameKey];
}
- (NSArray *) children
{
NSMutableArray *children = [NSMutableArray array];
for (NSDictionary *child in [node objectForKey:TFHppleNodeChildrenKey]) {
TFHppleElement *element = [TFHppleElement hppleElementWithNode:child isXML:isXML withEncoding:encoding];
element.parent = self;
[children addObject:element];
}
return children;
}
- (TFHppleElement *) firstChild
{
NSArray * children = self.children;
if (children.count)
return [children objectAtIndex:0];
return nil;
}
- (NSDictionary *) attributes
{
NSMutableDictionary * translatedAttributes = [NSMutableDictionary dictionary];
for (NSDictionary * attributeDict in [node objectForKey:TFHppleNodeAttributeArrayKey]) {
if ([attributeDict objectForKey:TFHppleNodeContentKey] && [attributeDict objectForKey:TFHppleNodeAttributeNameKey]) {
[translatedAttributes setObject:[attributeDict objectForKey:TFHppleNodeContentKey]
forKey:[attributeDict objectForKey:TFHppleNodeAttributeNameKey]];
}
}
return translatedAttributes;
}
- (NSString *) objectForKey:(NSString *) theKey
{
return [[self attributes] objectForKey:theKey];
}
- (id) description
{
return [node description];
}
- (BOOL)hasChildren
{
if ([node objectForKey:TFHppleNodeChildrenKey])
return YES;
else
return NO;
}
- (BOOL)isTextNode
{
// we must distinguish between real text nodes and standard nodes with tha name "text" (<text>)
// real text nodes must have content
if ([self.tagName isEqualToString:TFHppleTextNodeName] && (self.content))
return YES;
else
return NO;
}
- (NSArray*) childrenWithTagName:(NSString*)tagName
{
NSMutableArray* matches = [NSMutableArray array];
for (TFHppleElement* child in self.children)
{
if ([child.tagName isEqualToString:tagName])
[matches addObject:child];
}
return matches;
}
- (TFHppleElement *) firstChildWithTagName:(NSString*)tagName
{
for (TFHppleElement* child in self.children)
{
if ([child.tagName isEqualToString:tagName])
return child;
}
return nil;
}
- (NSArray*) childrenWithClassName:(NSString*)className
{
NSMutableArray* matches = [NSMutableArray array];
for (TFHppleElement* child in self.children)
{
if ([[child objectForKey:@"class"] isEqualToString:className])
[matches addObject:child];
}
return matches;
}
- (TFHppleElement *) firstChildWithClassName:(NSString*)className
{
for (TFHppleElement* child in self.children)
{
if ([[child objectForKey:@"class"] isEqualToString:className])
return child;
}
return nil;
}
- (TFHppleElement *) firstTextChild
{
for (TFHppleElement* child in self.children)
{
if ([child isTextNode])
return child;
}
return [self firstChildWithTagName:TFHppleTextNodeName];
}
- (NSString *) text
{
return self.firstTextChild.content;
}
// Returns all elements at xPath.
- (NSArray *) searchWithXPathQuery:(NSString *)xPathOrCSS
{
NSData *data = [self.raw dataUsingEncoding:NSUTF8StringEncoding];
NSArray * detailNodes = nil;
if (isXML) {
detailNodes = PerformXMLXPathQueryWithEncoding(data, xPathOrCSS, encoding);
} else {
detailNodes = PerformHTMLXPathQueryWithEncoding(data, xPathOrCSS, encoding);
}
NSMutableArray * hppleElements = [NSMutableArray array];
for (id newNode in detailNodes) {
[hppleElements addObject:[TFHppleElement hppleElementWithNode:newNode isXML:isXML withEncoding:encoding]];
}
return hppleElements;
}
- (id)objectAtKeyedSubscript:(id <NSCopying>)key
{
id result = self.attributes[key];
return result;
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment