Skip to content

Instantly share code, notes, and snippets.

@samclane
Last active June 14, 2021 16:18
Show Gist options
  • Save samclane/f832962066465d91b1688ab77f943639 to your computer and use it in GitHub Desktop.
Save samclane/f832962066465d91b1688ab77f943639 to your computer and use it in GitHub Desktop.
Webscraper for public-market insurance prices from Connect for Health CO's website
import csv
import json
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
# create webdriver object
options = webdriver.ChromeOptions()
options.binary_location = r"C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe"
driver = webdriver.Chrome(options=options)
timeout = 10
def find_and_send(id_, value):
try:
email_present = EC.presence_of_element_located((By.ID, id_))
WebDriverWait(driver, timeout).until(email_present)
except TimeoutException:
print(f"Timed out after {timeout} seconds...")
email_field = driver.find_element_by_id(id_)
email_field.send_keys(value)
return email_field
with open("config.json", "r") as file:
data = file.read()
config = json.loads(data)
# get url
URL = config.get('URL') or r"https://prd.connectforhealthco.com/ShoppingPortal/"
driver.get(URL)
find_and_send('email', config["username"])
pass_field = find_and_send('password', config["password"])
pass_field.submit()
csv_file = f"{int(time.time())}.csv"
with open(csv_file, 'w') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=["Name", "Individual Deductible", "Individual Out-of-Pocket Max",
"Primary Care Coinsurance (after deductible)",
"Specialist Coinsurance (after deductible)",
"Urgent Care Coinsurance (after deductible)", "Monthly Premium"])
writer.writeheader()
while True:
try:
list_present = EC.presence_of_element_located((By.CLASS_NAME, "plan-results-container"))
WebDriverWait(driver, timeout).until(list_present)
except TimeoutException:
print(f"Timed out after {timeout} seconds...")
try:
list_present = EC.presence_of_element_located((By.CLASS_NAME, "plan"))
WebDriverWait(driver, timeout).until(list_present)
except TimeoutException:
print(f"Timed out after {timeout} seconds...")
plans = driver.find_elements_by_class_name("plan")
plan_list = []
for p in plans:
d = {}
divs = p.find_elements_by_css_selector('div')
for r in divs:
try:
name = r.find_element_by_class_name("col-sm-12").text
except NoSuchElementException:
continue
prices_elements = p.find_elements_by_class_name('float-right')
prices = [n.text for n in prices_elements]
d["Name"] = name
d["Individual Deductible"] = prices[0]
d["Individual Out-of-Pocket Max"] = prices[1]
d["Primary Care Coinsurance (after deductible)"] = prices[2]
d["Specialist Coinsurance (after deductible)"] = prices[3]
d["Urgent Care Coinsurance (after deductible)"] = prices[4]
d["Monthly Premium"] = prices[5]
plan_list.append(d)
try:
for plan in plan_list:
writer.writerow(plan)
except IOError:
print("I/O Error")
try:
nxt_btn = driver.find_element_by_xpath(
r"/html/body/app-root/div/div[2]/div/app-plan-results/div[1]/div[2]/div/div[13]/pagination-controls/pagination-template/ul/li[10]/a")
except NoSuchElementException:
break
nxt_btn.click()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment