Last active
December 21, 2015 02:29
-
-
Save etsai/2809adda359d0f5da6b7 to your computer and use it in GitHub Desktop.
Solution for Scraping HN 1: Building Objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Solution for Challenge: Scraping HN 1: Building Objects. Started 2013-08-14T20:47:25+00:00 | |
require 'nokogiri' | |
require 'rubygems' | |
require 'open-uri' | |
class Post | |
attr_reader :comments, :comment_objects | |
def initialize(url) | |
@page = Nokogiri::HTML(open(url)) | |
get_comments | |
get_comment_id | |
get_users | |
create_comment_objects | |
end | |
def title | |
@page.search('.title > a:first-child').map { |link| link.inner_text } | |
end | |
def url | |
@page.search('.title > a:first-child').map { |link| link['href'] } | |
end | |
def points | |
@page.search('.subtext > span:first-child').map { |span| span.inner_text } | |
end | |
def item_id | |
@page.search('.subtext > a:nth-child(3)').map { |link| link['href'] } | |
end | |
def get_comments | |
unless @comments | |
@comments = @page.search('.comment > font:first-child').map { |font| font.inner_text } | |
end | |
# BUG::NEEDS TO BE KEEP 'P' together. Currently breaks it up based on <p> | |
# @page.search('td.default').each do |td_tag| | |
# td_tag.search('.comment') | |
# end | |
# p @comments | |
# comments.map { |comment| comment.inner_text } | |
end | |
def get_users | |
@commented_users = @page.search('.comhead > a:first-child').map { |user| user.inner_text } | |
end | |
def get_comment_id | |
@all_comment_id = @page.search('.comhead > a:nth-child(2)').map { |link| link['href'] } | |
end | |
def create_comment_objects | |
@comment_objects = [] | |
@comments.length.times do |i| | |
@comment_objects << Comment.new(@comments[i], @commented_users[i], @all_comment_id[i]) | |
end | |
end | |
def add_comment(comment, user, id) | |
@comment_objects << Comment.new(comment, user, id) | |
end | |
end | |
class Comment | |
attr_reader :comment, :user, :id | |
def initialize(comment, user, id ) | |
@comment = comment | |
@user = user | |
@id = id | |
end | |
end | |
url = ARGV | |
post = Post.new(url.pop) | |
p post.title | |
p post.url | |
# p post.add_comment("Hello", "Elaine", "123") | |
# p post.points | |
# p post.item_id | |
# p post.comments | |
# p post.add_comment("I like cats.") | |
# p post.comments | |
p post.comment_objects[0].user | |
# p post.comment_objects.last.inspect | |
# page = Nokogiri::HTML(open('post.html')) | |
# puts page.class | |
# def extract_usernames(doc) | |
# doc.search('.comhead > a:first-child').map do |element| | |
# element.inner_text | |
# end | |
# end | |
# def extract_post_points(doc) | |
# doc.search('.subtext > span:first-child').map { |span| span.inner_text } | |
# end | |
# def extract_post_id(doc) | |
# doc.search('.subtext > a:nth-child(3)').map { |link| link['href'] } | |
# end | |
# def extract_title(doc) | |
# doc.search('.title > a:first-child').map { |link| link.inner_text } | |
# end | |
# def extract_title_link(doc) | |
# doc.search('.title > a:first-child').map { |link| link['href'] } | |
# end | |
# def extract_comments(doc) | |
# doc.search('.comment > font:first-child').map { |font| font.inner_text } | |
# end | |
# def extract_comment_direct_link(doc) | |
# doc.search('.comhead > a:nth-child(2)').map { |link| link['href'] } | |
# end | |
# p extract_usernames(page) | |
# p extract_post_points(page) | |
# p extract_post_id(page) | |
# p extract_title(page) | |
# p extract_article_link(page) | |
# p extract_title_comment_font(page) | |
# p extract_comment_direct_link(page) | |
# Data structure is an array. | |
# Yes, you can call ruby methods on the returned data structure. | |
# links = page.css("table").css("tr").css("td").css("img") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment