Skip to content

Instantly share code, notes, and snippets.

@usmanasif
Created April 9, 2019 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save usmanasif/35f88bc91e6d5bbff8f213834b87f3df to your computer and use it in GitHub Desktop.
Save usmanasif/35f88bc91e6d5bbff8f213834b87f3df to your computer and use it in GitHub Desktop.
Scraper for Edmunds Site
require "selenium-webdriver"
require 'write_xlsx'
namespace :scraping do
desc "Fetch all the data"
task fetch: :environment do
workbook, worksheet, worksheet2 = create_worksheet
options = Selenium::WebDriver::Chrome::Options.new(args: ['headless', '--blink-settings=imagesEnabled=false'])
driver = Selenium::WebDriver.for :chrome, options: options
driver.navigate.to "https://www.edmunds.com/car-maintenance/guide-page.html"
years = (1990..2020)
mileages = [5_000, 10_000, 15_000, 25_000, 50_000, 75_000, 100_000, 200_000, 300_000]
row_index = 1
vehicle_id = 0
begin
years.each do |year|
wait_for_element("year", driver)
reload_opts(driver, year: year)
wait_for_element("makes", driver)
sleep 1
make_options = set_make(driver)
make_options.each do |make_option|
reload_opts(driver, make_option: make_option, year: year)
wait_for_element("models", driver)
models_options = set_model(driver)
models_options.each do |models_option|
reload_opts(driver, models_option: models_option, make_option: make_option, year: year)
wait_for_element("styles", driver)
trim_options = set_trim(driver)
trim_options.each do |trim_option|
reload_opts(driver, trim_option: trim_option, models_option: models_option,
make_option: make_option, year: year)
wait_for_element("engines", driver)
engine_options = set_engine(driver)
engine_options.each do |engine_option|
reload_opts(driver, engine_option: engine_option, trim_option: trim_option,
models_option: models_option, make_option: make_option,
year: year)
wait_for_element("transmissions", driver)
transmission_options = set_transmission(driver)
transmission_options.each do |transmission_option|
vehicle_id += 1
insert_vehicle_info(vehicle_id, year, make_option, models_option,
trim_option, engine_option, transmission_option, worksheet2)
sleep 2
reload_opts(driver, transmission_option: transmission_option, engine_option: engine_option,
trim_option: trim_option, models_option: models_option,
make_option: make_option, year: year)
mileages.each do |mileage|
reload_opts(driver, transmission_option: transmission_option, engine_option: engine_option,
trim_option: trim_option, models_option: models_option,
make_option: make_option, year: year)
mileage_field = driver.find_element(name: 'mileage')
mileage_field.send_keys(mileage.to_s)
zipcode_field = driver.find_element(name: 'zip')
zipcode_field.send_keys("95131")
submit_button = driver.find_element(name: 'Go')
submit_button.submit
sleep 2
html = Nokogiri::HTML(driver.page_source)
main_div = html.css(".item-list.three-column")
uls = main_div.css("ul").reject{|ul| [ul.attributes["class"].value].to_set.intersect?(["head", "last"].to_set)}
uls.each do |ul|
data = ul.css("li").map(&:text).collect{|el| el.gsub("?", "")}.map(&:strip)
price = nil
if data[0].include?("Replace") || data[0].include?("Change")
replace_div = html.css(".item-list.two-column").first
replace_uls = replace_div.css('ul').reject{|ule| [ule.attributes["class"].value].to_set.intersect?(["head", "last"].to_set)}
matched_ul = replace_uls.find{|rep_ul| rep_ul.css("li").first.text.gsub("?", "").strip == data[1]}
price = matched_ul.css("li.right").text
end
insert_services_info(vehicle_id, mileage, data[0], data[1], data[2], price, row_index, worksheet)
row_index += 1
end
driver.navigate.to "https://www.edmunds.com/car-maintenance/guide-page.html"
end
end
end
end
end
end
end
ensure
workbook.close
end
end
def insert_vehicle_info(vehicle_id, year, make, models,
trim, engine, transmission, worksheet2)
worksheet2.write(vehicle_id, 0, vehicle_id)
worksheet2.write(vehicle_id, 1, year)
worksheet2.write(vehicle_id, 2, make)
worksheet2.write(vehicle_id, 3, models)
worksheet2.write(vehicle_id, 4, trim)
worksheet2.write(vehicle_id, 5, engine)
worksheet2.write(vehicle_id, 6, transmission)
end
def insert_services_info(vehicle_id, mileage, labor_action, item,
labor_cost, part_cost, row_index, worksheet)
worksheet.write(row_index, 0, vehicle_id)
worksheet.write(row_index, 1, mileage)
worksheet.write(row_index, 2, labor_action)
worksheet.write(row_index, 3, item)
worksheet.write(row_index, 4, labor_cost)
worksheet.write(row_index, 5, part_cost)
end
def create_worksheet
workbook = WriteXLSX.new('./ruby4.xlsx')
worksheet = workbook.add_worksheet
worksheet2 = workbook.add_worksheet
worksheet.write(0, 0, "Vehicle ID")
worksheet.write(0, 1, "Service Interval")
worksheet.write(0, 2, "Labor Action")
worksheet.write(0, 3, "Item")
worksheet.write(0, 4, "Labor Cost")
worksheet.write(0, 5, "Part Cost")
worksheet2.write(0, 0, "Vehicle_Id")
worksheet2.write(0, 1, "year")
worksheet2.write(0, 2, "make")
worksheet2.write(0, 3, "model")
worksheet2.write(0, 4, "trim")
worksheet2.write(0, 5, "engine")
worksheet2.write(0, 6, "transmission")
[workbook, worksheet, worksheet2]
end
def wait_for_element(element, driver)
wait = Selenium::WebDriver::Wait.new(:timeout => 30)
wait.until { driver.find_element(:name, element).enabled?}
end
def reload_opts(driver, transmission_option: nil, engine_option: nil,
trim_option: nil, models_option: nil,
make_option: nil, year: nil)
if year
sleep 5
set_year(driver, year)
end
if make_option
wait_for_element("makes", driver)
sleep 1
set_make(driver, make_option)
end
if models_option
wait_for_element("models", driver)
sleep 1
set_model(driver, models_option)
end
if trim_option
wait_for_element("styles", driver)
sleep 1
set_trim(driver, trim_option)
end
if engine_option
wait_for_element("engines", driver)
sleep 1
set_engine(driver, engine_option)
end
if transmission_option
wait_for_element("transmissions", driver)
sleep 1
set_transmission(driver, transmission_option)
end
end
def set_transmission(driver, transmission_option=nil)
transmission_element = driver.find_element(:name, "transmissions")
transmission_select = Selenium::WebDriver::Support::Select.new(transmission_element)
transmission_select.select_by(:text, transmission_option) if transmission_option.present?
transmission_select.options.map(&:text)[1..-1]
end
def set_engine(driver, engine_option=nil)
engine_element = driver.find_element(:name, "engines")
engine_select = Selenium::WebDriver::Support::Select.new(engine_element)
engine_select.select_by(:text, engine_option) if engine_option.present?
engine_select.options.map(&:text)[1..-1]
end
def set_trim(driver, trim_option=nil)
trim_element = driver.find_element(:name, "styles")
trim_select = Selenium::WebDriver::Support::Select.new(trim_element)
trim_select.select_by(:text, trim_option) if trim_option.present?
trim_select.options.map(&:text)[1..-1]
end
def set_model(driver, models_option=nil)
models_element = driver.find_element(:name, "models")
models_select = Selenium::WebDriver::Support::Select.new(models_element)
models_select.select_by(:text, models_option) if models_option.present?
models_select.options.map(&:text)[1..-1]
end
def set_make(driver, make_option=nil)
make_element = driver.find_element(:name, "makes")
make_select = Selenium::WebDriver::Support::Select.new(make_element)
make_select.select_by(:text, make_option) if make_option.present?
make_select.options.map(&:text)[1..-1]
end
def set_year(driver, year)
year_element = driver.find_element(:name, "year")
year_select = Selenium::WebDriver::Support::Select.new(year_element)
year_select.select_by(:text, year.to_s)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment