Skip to content

Instantly share code, notes, and snippets.

@etsai
Last active December 21, 2015 02:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save etsai/2809adda359d0f5da6b7 to your computer and use it in GitHub Desktop.
Save etsai/2809adda359d0f5da6b7 to your computer and use it in GitHub Desktop.
Solution for Scraping HN 1: Building Objects
# Solution for Challenge: Scraping HN 1: Building Objects. Started 2013-08-14T20:47:25+00:00
require 'nokogiri'
require 'rubygems'
require 'open-uri'
class Post
attr_reader :comments, :comment_objects
def initialize(url)
@page = Nokogiri::HTML(open(url))
get_comments
get_comment_id
get_users
create_comment_objects
end
def title
@page.search('.title > a:first-child').map { |link| link.inner_text }
end
def url
@page.search('.title > a:first-child').map { |link| link['href'] }
end
def points
@page.search('.subtext > span:first-child').map { |span| span.inner_text }
end
def item_id
@page.search('.subtext > a:nth-child(3)').map { |link| link['href'] }
end
def get_comments
unless @comments
@comments = @page.search('.comment > font:first-child').map { |font| font.inner_text }
end
# BUG::NEEDS TO BE KEEP 'P' together. Currently breaks it up based on <p>
# @page.search('td.default').each do |td_tag|
# td_tag.search('.comment')
# end
# p @comments
# comments.map { |comment| comment.inner_text }
end
def get_users
@commented_users = @page.search('.comhead > a:first-child').map { |user| user.inner_text }
end
def get_comment_id
@all_comment_id = @page.search('.comhead > a:nth-child(2)').map { |link| link['href'] }
end
def create_comment_objects
@comment_objects = []
@comments.length.times do |i|
@comment_objects << Comment.new(@comments[i], @commented_users[i], @all_comment_id[i])
end
end
def add_comment(comment, user, id)
@comment_objects << Comment.new(comment, user, id)
end
end
class Comment
attr_reader :comment, :user, :id
def initialize(comment, user, id )
@comment = comment
@user = user
@id = id
end
end
url = ARGV
post = Post.new(url.pop)
p post.title
p post.url
# p post.add_comment("Hello", "Elaine", "123")
# p post.points
# p post.item_id
# p post.comments
# p post.add_comment("I like cats.")
# p post.comments
p post.comment_objects[0].user
# p post.comment_objects.last.inspect
# page = Nokogiri::HTML(open('post.html'))
# puts page.class
# def extract_usernames(doc)
# doc.search('.comhead > a:first-child').map do |element|
# element.inner_text
# end
# end
# def extract_post_points(doc)
# doc.search('.subtext > span:first-child').map { |span| span.inner_text }
# end
# def extract_post_id(doc)
# doc.search('.subtext > a:nth-child(3)').map { |link| link['href'] }
# end
# def extract_title(doc)
# doc.search('.title > a:first-child').map { |link| link.inner_text }
# end
# def extract_title_link(doc)
# doc.search('.title > a:first-child').map { |link| link['href'] }
# end
# def extract_comments(doc)
# doc.search('.comment > font:first-child').map { |font| font.inner_text }
# end
# def extract_comment_direct_link(doc)
# doc.search('.comhead > a:nth-child(2)').map { |link| link['href'] }
# end
# p extract_usernames(page)
# p extract_post_points(page)
# p extract_post_id(page)
# p extract_title(page)
# p extract_article_link(page)
# p extract_title_comment_font(page)
# p extract_comment_direct_link(page)
# Data structure is an array.
# Yes, you can call ruby methods on the returned data structure.
# links = page.css("table").css("tr").css("td").css("img")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment