-
-
Save SecureCloud-biz/bed79efd29e296af7345185574a60303 to your computer and use it in GitHub Desktop.
findprivateclinics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
main_url = "https://www.findprivateclinics.ca/" | |
url = "https://www.findprivateclinics.ca/listings.html" | |
t = Typhoeus.get url | |
page = Nokogiri::HTML t.body | |
data = {} | |
def parse_page(page, category, subcategory, data) | |
page.search(".listing").each do |el| | |
name = el.at(".name").at("a").text.strip | |
location_url = URI.join("https://www.findprivateclinics.ca/", el.at(".name").at("a").attr("href")).to_s | |
address = el.at(".address").nil? ? "" : el.at(".address").text.strip | |
city = el.at(".city").nil? ? "" : el.at(".city").text.strip | |
provstate = el.at(".provstate").nil? ? "" : el.at(".provstate").text.strip | |
zip = el.at(".zip").nil? ? "" : el.at(".zip").text.strip | |
t_loc = Typhoeus.get location_url | |
loc_page = Nokogiri::HTML t_loc.body | |
services = [] | |
unless loc_page.at(".services").nil? | |
loc_page.at(".services").search(".service").each do |serv_el| | |
services.push(serv_el.text.strip) | |
end | |
end | |
provstate = loc_page.at('[itemprop="addressRegion"]').nil? ? "" : loc_page.at('[itemprop="addressRegion"]').text.strip | |
record = { name: name, address: address, city: city, provstate: provstate, zip: zip, location_url: location_url, services: services.join(",") } | |
if data[name].present? | |
if data[name][category].present? | |
if data[name][category][subcategory].present? | |
data[name][category][subcategory].push(record) | |
else | |
data[name][category] = { subcategory => [record]} | |
end | |
else | |
data[name] = { category => { subcategory => [record] } } | |
end | |
else | |
data[name] = { category => { subcategory => [record] } } | |
end | |
end | |
data | |
end | |
page.search("select").last.search("option").each do |opt| | |
next if opt.text == "All Categories" | |
category = opt.text | |
cat_url = URI.join(main_url, opt.attr("data-url")).to_s | |
t_cat = Typhoeus.get cat_url | |
page_cat = Nokogiri::HTML t_cat.body | |
if page_cat.search("select").last.attr("id") == "FilterCategoryId[0]" | |
subcategory = "-" | |
data = parse_page(page_cat, category, subcategory, data) | |
else | |
page_cat.search("select").last.search("option").each do |sub_opt| | |
next if sub_opt.text == "All Subcategories" | |
subcategory = sub_opt.text | |
sub_cat_url = URI.join(main_url, sub_opt.attr("data-url")).to_s | |
t_sub_cat = Typhoeus.get sub_cat_url | |
page_sub_cat = Nokogiri::HTML t_sub_cat.body | |
data = parse_page(page_sub_cat, category, subcategory, data) | |
end | |
end | |
end | |
data = Hash[ data.sort_by { |key, val| key } ] | |
CSV.open('findprivateclinics_ca.csv','w', | |
:write_headers=> true, | |
:headers => ["Name", "Category", "Subcategory", "Civic Address", "City", "Province", "Postal Code", "Services"], | |
:col_sep => ";") do|hdr| | |
data.each do |name, categories| | |
categories.each do |cat_name, subcategories| | |
subcategories.each do |subcat_name, records| | |
records.each do |record| | |
hdr << [name, cat_name, subcat_name, record[:address], record[:city], record[:provstate], record[:zip], record[:services]] | |
end | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment