Skip to content

Instantly share code, notes, and snippets.

@flakyfilibuster
Created November 7, 2012 18:34
Show Gist options
  • Save flakyfilibuster/4033460 to your computer and use it in GitHub Desktop.
Save flakyfilibuster/4033460 to your computer and use it in GitHub Desktop.
grouphug_nokogiri
require 'rubygems'
require 'nokogiri'
require 'open-uri'
class Grouphug_scraper
attr_accessor :confessions, :ids, :title
def initialize(url)
@confessions = []
@ids = []
@url = url
@title = ""
@doc = Nokogiri::HTML(open(@url))
end
def titlizer
@title = doc.at_css("title").text
end
def scrape_confessions
@doc.css("#confessions p").each do |confession|
@confessions << confession.text.gsub(/\t/, "")
end
end
def scrape_ids
@doc.css(".conf-id a").each do |id|
ids << id.text
end
end
def nice_print
ids.zip(confessions).each do |id, confession|
puts "#{id} : #{confession}\n"
end
end
end
scraper_deluxe = Grouphug_scraper.new('http://web.archive.org/web/20071025014638/http://grouphug.us/')
scraper_deluxe.scrape_confessions
scraper_deluxe.scrape_ids
scraper_deluxe.nice_print
require './nokogiri_grouphug'
require 'nokogiri'
require 'open-uri'
require 'fakeweb'
FakeWeb.register_uri(:get, 'http://web.archive.org/web/20071025014638/http://grouphug.us/', :body => "Hello World!")
describe "nokogiri extracts data from grouphug and spits it in an array" do
let(:scraper) { Grouphug_scraper.new('http://web.archive.org/web/20071025014638/http://grouphug.us/') }
let(:doc) {}
context "nokogiri instanciates correctly" do
it "initiates with an emtpy confessions array" do
scraper.confessions.should eql([])
end
it "initiates with an empty ids array" do
scraper.ids.should == []
end
it "puts the 'url' in the instance variable @url" do
scraper.stub(:url).and_return ('http://web.archive.org/web/20071025014638/http://grouphug.us/')
end
it "initializes with an empty title" do
scraper.title.should be_empty
end
xit "assigns a nokogiri object to 'doc'" do
scraper.stub(:doc).and_return (Nokogiri::HTML(open('http://web.archive.org/web/20071025014638/http://grouphug.us/')))
scraper.doc.should be_a_kind_of(Nokogiri)
end
end
context "nokogiri titlizer returns correct title" do
it "assigns to the instance variable 'title' the correct title" do
scraper.stub(:titlizer).and_return ('group hug // anonymous online confessions')
scraper.titlizer.should eql('group hug // anonymous online confessions')
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment