Skip to content

Instantly share code, notes, and snippets.

@sumoward
Last active October 26, 2016 12:30
Show Gist options
  • Save sumoward/469e655cc70dbf82b7ecebafc78a3a22 to your computer and use it in GitHub Desktop.
Save sumoward/469e655cc70dbf82b7ecebafc78a3a22 to your computer and use it in GitHub Desktop.
require 'open-uri'
require 'nokogiri'
require 'mechanize'
require 'csv'
def scrape(row)
puts '_________start_________'
mechanize = Mechanize.new
page = mechanize.get('http://www.aircraftbluebook.com/Tools/HVR/Calculate.do')
# find login form and add credentials
form = page.form
form.field_with(:name => "email").value = 'USERNAME'
form.field_with(:name => "password").value = 'PASSWORD'
page = form.submit
# get required link
link = page.link_with(text: 'Historical Value Reference')
# navigate
# page = link.click
# for the moment lets just use the constructed link
page = mechanize.get(row[2])
# check the wholesale radio button, it is the second one
page.forms.last.radiobuttons[1].checked = true
# set other radiobutton to false
page.forms.last.radiobuttons[0].checked = false
page = page.forms.last.submit
# the body of the page
body = page.body
# use nokogiri to catch the table in the body
doc = Nokogiri::HTML(body)
table = doc.css('table.idp-table')
# Fetches all rows (<tr>s)
rows = table.css('tr')
# The column names are the first row (shift returns the first element and removes it from the array).
# On that row we get the text of each individual <th> This will be Table name, Column name 1, Column name 2...
title = rows.shift.css('th').map(&:text)
# repeat to get column names
column_names = rows.shift.css('th').map(&:text)
# On each of the remaining rows
text_all_rows = rows.map do |row|
# We get the text of each individual value (<td>)
# On the first row this will be 1001, 1002, 1003...
# on the second - 2001, 2002, 2003... etc
row_values = row.css('td').map(&:text)
# We map the name, followed by all the values
[*row_values]
end
puts title
puts column_names
puts text_all_rows
# write table to csv
CSV.open("#{title[0]}.csv", "a") do |csv|
csv << [row[0], row[1]]
csv << column_names
text_all_rows.each do |row|
csv << row
end
end
puts '_________end_________'
end
# def plane
# product = "HVR"
# type_id = 7
# type = "Business+Jet"
# make_id = 12
# make = "Bombardier"
# model_id = 741
# model = "BOMBARDIER+GLOBAL+6000"
# model_instance_id = 6512
# model_instance = 'Global%206000'
# "http://www.aircraftbluebook.com/Navigation.do?product=#{product}&typeId=#{type_id}&type=#{type}&makeId=#{make_id}&make=#{make}&modelId=#{model_id}&model=#{model}&modelInstanceId=#{model_instance_id}&modelInstance=#{global_instance}"
# 'http://www.aircraftbluebook.com/Navigation.do?product=HVR&typeId=7&type=Business+Jet&makeId=12&make=Bombardier&modelId=741&model=BOMBARDIER+GLOBAL+6000&modelInstanceId=6512&modelInstance=Global%206000'
#
# end
def do_all
CSV.foreach("aircraft.csv") do |row|
puts row
scrape row
sleep(5)
end
puts('ALL DONE')
end
do_all
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment