Skip to content

Instantly share code, notes, and snippets.

@ukutaht
Created September 25, 2013 20:47
Show Gist options
  • Save ukutaht/6705808 to your computer and use it in GitHub Desktop.
Save ukutaht/6705808 to your computer and use it in GitHub Desktop.
Command line tool for scraping Hackernews.
require 'nokogiri'
require_relative 'utils'
require 'open-uri'
class Scraper
def initialize(path)
@doc = Nokogiri::HTML(File.open(path))
end
def find_post
post_attributes = Hash.new
post_attributes[:title] = @doc.css('title').text
post_attributes[:user] = @doc.css('.subtext a')[0].text
post_attributes[:points] = @doc.css('.subtext span').text
post_attributes[:body] = @doc.css('table')[2].css('p').text
post_attributes[:created_at] = @doc.css('.subtext').text.scan(/\d+ \w+ ago/)[0]
Post.new(post_attributes)
end
def find_comments
all_comments = []
comments = @doc.css('td.default')
comments.each do |comment|
comment_attributes = Hash.new
comment_attributes[:created_at] = comment.css('span.comhead').text.scan(/\d+ \w+ ago/)[0]
comment_attributes[:user] = comment.css('span.comhead a').first.text
comment_attributes[:body] = comment.css('span.comment').text
all_comments << Comment.new(comment_attributes)
end
all_comments
end
end
file = open(ARGV[0])
post = Scraper.new(file).find_post
post.comments = Scraper.new(file).find_comments
puts "USER: #{post.user}"
unless post.body == ""
puts "POST: #{post.body}"
end
puts "POINTS: #{post.points}"
puts "POSTED: #{post.created_at}"
puts "-----------------------------------------------------------------------------------------------------------------------"
puts "1st COMMENT: #{post.comments.first.body}"
# puts "User: #{post.comments.first.user}"
# puts "Posted: #{post.comments.first.created_at}"
# puts "Comment:
# #{post.comments.first.body}"
class Post
attr_accessor :comments
attr_reader :title, :points, :user, :body, :created_at
def initialize(post_params)
@title = post_params[:title]
@points = post_params[:points]
@user = post_params[:user]
@body = post_params[:body]
@created_at = post_params[:created_at]
@comments = []
end
end
class Comment
attr_reader :body, :created_at, :user
def initialize(comment_params)
@user = comment_params[:user]
@created_at = comment_params[:created_at]
@body = comment_params[:body]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment