Skip to content

Instantly share code, notes, and snippets.

@msepcot
Last active November 4, 2015 19:43
Show Gist options
  • Save msepcot/e403e0866beff0863f49 to your computer and use it in GitHub Desktop.
Save msepcot/e403e0866beff0863f49 to your computer and use it in GitHub Desktop.
Script to add GreatSchools scores to Craigslist listings.
#!/usr/bin/env ruby
require 'active_support/all'
require 'gmail'
require 'great_schools'
require 'nokogiri'
require 'open-uri'
require 'sqlite3'
def db
@db ||= begin
db = SQLite3::Database.open('craigslist.db')
db.execute_batch <<-SQL.squish
CREATE TABLE IF NOT EXISTS houses(
id INTEGER, title TEXT, date TEXT, href TEXT, price INTEGER, sqft INTEGER, preview TEXT,
location TEXT, latitude REAL, longitude REAL, images INTEGER, checked INTEGER, rank REAL
);
CREATE TABLE IF NOT EXISTS schools(
id INTEGER, name TEXT, href TEXT, type TEXT, grade_range TEXT,
latitude REAL, longitude REAL, district_id INTEGER, score TEXT
);
CREATE TABLE IF NOT EXISTS neighborhood_schools(house_id INTEGER, school_id INTEGER);
SQL
db
end
end
def create_home(id, data = {})
unless row = db.get_first_row('SELECT * FROM houses WHERE id = ?', id)
db.execute(
'INSERT INTO houses(id, title, date, href, price, preview, sqft, location, latitude, longitude, images, checked) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
id, data[:title], data[:date], data[:href], data[:price], data[:preview], data[:sqft], data[:location], data[:latitude], data[:longitude], data[:images], 0
)
end
end
def create_school(id, data = {})
unless row = db.get_first_row('SELECT * FROM schools WHERE id = ?', id)
db.execute(
'INSERT INTO schools(id, name, href, type, grade_range, latitude, longitude, district_id) VALUES(?, ?, ?, ?, ?, ?, ?, ?)',
id, data[:name], data[:href], data[:type], data[:grade_range], data[:latitude], data[:longitude], data[:district_id]
)
end
end
def create_mapping(house_id, school_id)
db.execute('INSERT INTO neighborhood_schools(house_id, school_id) VALUES(?, ?)', house_id, school_id)
end
# = Find Homes For Rent
yesterday = Date.today - 1
debug = []
[ 'http://sfbay.craigslist.org/search/apa?s=000&bedrooms=2&housing_type=4&housing_type=6&housing_type=9&maxAsk=3100&minAsk=2000&minSqft=1000',
'http://sfbay.craigslist.org/search/apa?s=100&bedrooms=2&housing_type=4&housing_type=6&housing_type=9&maxAsk=3100&minAsk=2000&minSqft=1000',
'http://sfbay.craigslist.org/search/apa?s=200&bedrooms=2&housing_type=4&housing_type=6&housing_type=9&maxAsk=3100&minAsk=2000&minSqft=1000'
].each do |uri|
Nokogiri::HTML(open(uri)).css('p.row').each do |node|
# Filter to San Francisco, Peninsula, and the South Bay; Must be able to map the location
href = "http://sfbay.craigslist.org#{node.at_css('.pl a')['href']}"
next unless %w(sfc pen).include?(href[28..30]) # %w(sfc pen sby)
location = Nokogiri::HTML(open(href)).at_css('#map')
next unless location && location['data-latitude'].present?
# only looking for listings going back to yesterday (3 pages max)
date = Date.parse(node.at_css('time')['datetime'])
break if date.eql?(yesterday - 1)
preview = node.at_css('a.i')['data-ids'].try(:split, ',').try(:first).try(:[], 2..-1)
data = {
title: node.at_css('.pl a').text.strip,
date: date.to_s,
href: href,
price: node.at_css('.price').try(:text).try(:[], /\$(\d+)/, 1),
sqft: node.at_css('.l2').text[/- (\d+).+ -/, 1],
location: (node.at_css('.l2 .pnr small').try(:text) || '').strip[1..-2].try(:strip),
latitude: location['data-latitude'],
longitude: location['data-longitude'],
images: (!!(node.at_css('.l2 .pnr .px').text =~ /img|pic/) ? 1 : 0),
preview: (preview ? "http://images.craigslist.org/#{preview}_300x300.jpg" : nil),
}
debug << data
begin
create_home(node['data-pid'].to_i, data)
rescue => e
puts e.inspect, data.inspect
end
end
end
# = Find Schools Around The Homes
GreatSchools::API.key = 'APIKEY'
db.execute('SELECT id, latitude, longitude FROM houses WHERE checked = 0').each do |id, latitude, longitude|
GreatSchools::School.nearby('CA', latitude: latitude, longitude: longitude, limit: 10).each do |school|
data = {
name: school.name,
href: school.overview_link,
type: school.type,
grade_range: school.grade_range,
latitude: school.latitude,
longitude: school.longitude,
district_id: school.district_id
}
create_school(school.id.to_i, data)
create_mapping(id, school.id)
db.execute('UPDATE houses SET checked = 1 WHERE id = ?', id)
end
end
db.execute('SELECT id FROM schools WHERE type = "public" AND score IS NULL').flatten.each do |id|
score = GreatSchools::Score.for_school('CA', id)
db.execute('UPDATE schools SET score = ? WHERE id = ?', score.rank.score.to_i, id)
end
db.execute('SELECT id FROM houses WHERE date >= ?', yesterday.to_s).flatten.each do |id|
scores = db.execute('SELECT schools.score FROM schools INNER JOIN neighborhood_schools n ON n.school_id = schools.id WHERE n.house_id = ? AND schools.type = "public"', id).flatten.map(&:to_i)
db.execute('UPDATE houses SET rank = ? WHERE id = ?', (scores.reduce(:+).to_f / scores.size), id)
end
# = Build HTML Email
homes = db.execute('SELECT * FROM houses WHERE date >= ? ORDER BY houses.rank DESC', yesterday.to_s)
html = "<html>\n<head>\n <title>Craigslist - GreatSchools</title>\n</head>\n<body>\n <h3>#{homes.size} Home#{'s' unless homes.size == 1 } Matching Criteria: < $3100, House/Duplex/Townhouse, 2+ BR, 1000+ sqft in the Peninsula</h3>\n"
homes.each do |house|
html << " <h4>$#{house[4]}"
html << "/#{house[5]}sqft" if house[5]
html << " - <a href=\"#{house[3]}\">#{house[1]}</a> (#{house[7]})</h4>\n"
html << " <img src=\"#{house[6]}\" height=\"200\" alt=\"\" /><br />\n" if house[6]
html << " <ul>\n"
db.execute('SELECT schools.* FROM schools INNER JOIN neighborhood_schools n ON n.school_id = schools.id WHERE n.house_id = ?', house[0]).each do |school|
html << " <li>(#{school[8] || school[3]}) <a href=\"#{school[2]}\">#{school[1]}</a></li>\n"
end
html << " </ul>\n"
end and nil
html << "</body>\n</html>"
# = Send Email
Gmail.new('EMAIL', 'PASSWORD') do |gmail|
gmail.deliver do
to 'EMAIL'
subject "Craigslist Summary for #{yesterday.to_s}"
html_part do
content_type 'text/html; charset=UTF-8'
body(html)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment