Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save SecureCloud-biz/bed79efd29e296af7345185574a60303 to your computer and use it in GitHub Desktop.
Save SecureCloud-biz/bed79efd29e296af7345185574a60303 to your computer and use it in GitHub Desktop.
findprivateclinics
require 'csv'
main_url = "https://www.findprivateclinics.ca/"
url = "https://www.findprivateclinics.ca/listings.html"
t = Typhoeus.get url
page = Nokogiri::HTML t.body
data = {}
def parse_page(page, category, subcategory, data)
page.search(".listing").each do |el|
name = el.at(".name").at("a").text.strip
location_url = URI.join("https://www.findprivateclinics.ca/", el.at(".name").at("a").attr("href")).to_s
address = el.at(".address").nil? ? "" : el.at(".address").text.strip
city = el.at(".city").nil? ? "" : el.at(".city").text.strip
provstate = el.at(".provstate").nil? ? "" : el.at(".provstate").text.strip
zip = el.at(".zip").nil? ? "" : el.at(".zip").text.strip
t_loc = Typhoeus.get location_url
loc_page = Nokogiri::HTML t_loc.body
services = []
unless loc_page.at(".services").nil?
loc_page.at(".services").search(".service").each do |serv_el|
services.push(serv_el.text.strip)
end
end
provstate = loc_page.at('[itemprop="addressRegion"]').nil? ? "" : loc_page.at('[itemprop="addressRegion"]').text.strip
record = { name: name, address: address, city: city, provstate: provstate, zip: zip, location_url: location_url, services: services.join(",") }
if data[name].present?
if data[name][category].present?
if data[name][category][subcategory].present?
data[name][category][subcategory].push(record)
else
data[name][category] = { subcategory => [record]}
end
else
data[name] = { category => { subcategory => [record] } }
end
else
data[name] = { category => { subcategory => [record] } }
end
end
data
end
page.search("select").last.search("option").each do |opt|
next if opt.text == "All Categories"
category = opt.text
cat_url = URI.join(main_url, opt.attr("data-url")).to_s
t_cat = Typhoeus.get cat_url
page_cat = Nokogiri::HTML t_cat.body
if page_cat.search("select").last.attr("id") == "FilterCategoryId[0]"
subcategory = "-"
data = parse_page(page_cat, category, subcategory, data)
else
page_cat.search("select").last.search("option").each do |sub_opt|
next if sub_opt.text == "All Subcategories"
subcategory = sub_opt.text
sub_cat_url = URI.join(main_url, sub_opt.attr("data-url")).to_s
t_sub_cat = Typhoeus.get sub_cat_url
page_sub_cat = Nokogiri::HTML t_sub_cat.body
data = parse_page(page_sub_cat, category, subcategory, data)
end
end
end
data = Hash[ data.sort_by { |key, val| key } ]
CSV.open('findprivateclinics_ca.csv','w',
:write_headers=> true,
:headers => ["Name", "Category", "Subcategory", "Civic Address", "City", "Province", "Postal Code", "Services"],
:col_sep => ";") do|hdr|
data.each do |name, categories|
categories.each do |cat_name, subcategories|
subcategories.each do |subcat_name, records|
records.each do |record|
hdr << [name, cat_name, subcat_name, record[:address], record[:city], record[:provstate], record[:zip], record[:services]]
end
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment