Skip to content

Instantly share code, notes, and snippets.

@irmiller22
Created February 16, 2014 22:37
Show Gist options
  • Save irmiller22/9041647 to your computer and use it in GitHub Desktop.
Save irmiller22/9041647 to your computer and use it in GitHub Desktop.
Flatiron Scraper
require 'nokogiri'
require 'open-uri'
require 'pry'
class Scraper
def initialize
@student_data = []
@index_url = "http://students.flatironschool.com/"
@index = Nokogiri::HTML(open(@index_url))
end
def call
scrape_index_data
scrape_student_data
self.all
end
def all
@student_data
end
def scrape_index_data
@index.css("li.home-blog-post").each_with_index do |element, index|
@student_data[index] ||= {}
@student_data[index][:href] = element.css("a").attr("href").text
if @student_data[index][:href] == "students/scottluptowski.html"
@student_data[index][:name] = "Scott Luptowski"
else
@student_data[index][:name] = element.css("h3 a").text
end
end
end
def scrape_student_data
@student_data.each do |student_hash|
temp_page = "#{@index_url}#{student_hash[:href]}"
student_page = Nokogiri::HTML(open(temp_page))
data_from_student_page(student_hash, student_page)
end
end
def data_from_student_page(student_hash, student_page)
student_hash[:social] ||= {}
student_hash[:social][:twitter] = student_page.css(".icon-twitter").first.parent.attr("href")
student_hash[:social][:github] = student_page.css(".icon-github").first.parent.attr("href")
student_hash[:social][:linkedin] = student_page.css(".icon-linkedin-sign").first.parent.attr("href")
# student_hash[:social][:blog] = student_page.css(".icon-rss").first.parent.attr("href")
end
end
a = Scraper.new
students = a.call
binding.pry
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment