Skip to content

Instantly share code, notes, and snippets.

@pedro2555
Created March 7, 2018 19:49
Show Gist options
  • Save pedro2555/3d9d19cdbff772bc1c1786ca32bb0076 to your computer and use it in GitHub Desktop.
Save pedro2555/3d9d19cdbff772bc1c1786ca32bb0076 to your computer and use it in GitHub Desktop.
from selenium import webdriver
from collections import deque
browser = webdriver.Chrome()
try:
browser.get('http://www.portugal-vacc.org/')
# just chrome devtools'd this
# TODO: make sure this doesn't break when theres no scheduled ATC
elem = browser.find_element_by_css_selector('\
body > table > tbody > tr:nth-child(3) > td > table:nth-child(1) > \
tbody > tr:nth-child(2) > td:nth-child(2) > p:nth-child(5) > table')
rows = elem.find_elements_by_tag_name('tr')
data = deque([])
for row in rows:
row = row.text
if (row != ''):
data.insert(0, row)
continue
if len(data) > 0:
# split the parts
where, who = data.pop().split(' ', 1)
# lookup for training sessions
mentor = None
if '\n' in who:
who, mentor = who.split('\n', 1)
who = who[:len(who) - len(' [Training]')]
mentor = mentor[len('Mentor:') + 1:]
# TODO: clear this
if mentor:
print('%s training at %s' % (who, where))
else:
print('%s controlling at %s' % (who, where))
data = []
finally:
browser.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment