Last active
December 18, 2015 14:28
-
-
Save mmackh/5797253 to your computer and use it in GitHub Desktop.
Parse terrible news.ycombinator.com HTML in Obj-C with Hpple
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
- (void)loadCommentsOnStoryWithID:(NSString *)storyID result:(void(^)(NSArray *results))completionBlock | |
{ | |
NSString *queryURLString = [NSString stringWithFormat:@"https://news.ycombinator.com/item?id=%@",storyID]; | |
NSURLRequest *request = [NSURLRequest requestWithURL:[NSURL URLWithString:queryURLString] cachePolicy:NSURLRequestReturnCacheDataElseLoad timeoutInterval:10]; | |
AFHTTPRequestOperation *operation = [[AFHTTPRequestOperation alloc] initWithRequest:request]; | |
[operation setCompletionBlockWithSuccess:^(AFHTTPRequestOperation *operation, id responseObject) | |
{ | |
NSMutableArray *comments = [NSMutableArray new]; | |
TFHpple * doc = [[TFHpple alloc] initWithHTMLData:responseObject]; | |
NSArray * elements = [doc searchWithXPathQuery:@"//html/body/center/table/tr[3]/td/table[2]"]; | |
if (!elements.count) { completionBlock(nil); return; }; | |
NSArray *rawElements = [[elements objectAtIndex:0] children]; | |
for (TFHppleElement *element in rawElements) | |
{ | |
TFHpple *comment = [[TFHpple alloc] initWithHTMLData:[[element raw] dataUsingEncoding:NSUTF8StringEncoding]]; | |
NSArray *rawCommentQuery = [comment searchWithXPathQuery:@"//span/font"]; | |
if (!rawCommentQuery.count)continue; | |
NSString *commentHTML = [[rawCommentQuery objectAtIndex:0] raw]; | |
NSArray *indentationLevelQuery = [comment searchWithXPathQuery:@"//td/img"]; | |
int indentationLevel = [[[indentationLevelQuery objectAtIndex:0] objectForKey:@"width"] intValue] / 40; | |
NSArray *usernameQuery = [comment searchWithXPathQuery:@"//div/span/a"]; | |
NSString *username = [[usernameQuery objectAtIndex:0] text]; | |
NSArray *timeQuery = [comment searchWithXPathQuery:@"//td/div/span"]; | |
NSString *time = [[[timeQuery objectAtIndex:0] text] stringByReplacingOccurrencesOfString:@" | " withString:@""]; | |
NSArray *replyIDQuery = [comment searchWithXPathQuery:@"//font/u/a"]; | |
if (!replyIDQuery.count)continue; | |
NSString *replyID = [[replyIDQuery objectAtIndex:0] objectForKey:@"href"]; | |
MAMHNComment *newComment = [MAMHNComment new]; | |
[newComment setComment:commentHTML]; | |
[newComment setTime:time]; | |
[newComment setUsername:username]; | |
[newComment setReplyID:replyID]; | |
[newComment setIndentationLevel:indentationLevel]; | |
[comments addObject:newComment]; | |
} | |
completionBlock(comments); | |
} | |
failure:^(AFHTTPRequestOperation *operation, NSError *error) | |
{ | |
NSLog(@"%@",error.description); | |
}]; | |
[operation start]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment