Last active
June 22, 2018 15:05
-
-
Save kylebarron/c6dc69b1eee1eee3e2c048243e8c6b8d to your computer and use it in GitHub Desktop.
Scrape Mt. Whitney permits and email me if one is available
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" | |
--------------------------------------------------------------------- | |
Program: recreation_gov_scrape.py | |
Author: Kyle Barron <barronk@mit.edu> | |
Created: 6/15/2018, 9:38:50 PM | |
Updated: 6/15/2018, 9:38:50 PM | |
Purpose: Scrape Mt. Whitney permits and email me if one is available | |
""" | |
from time import sleep | |
from textwrap import dedent | |
from gmail_send import send | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
from os.path import expanduser | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import Select | |
def main(): | |
url = 'https://www.recreation.gov/permits/Mt-Whitney/r/' | |
url += 'wildernessAreaDetails.do?page=detail&contractCode=NRSO&parkId=72201' | |
soup = scrape_page(url) | |
text = parse_page(soup) | |
if text is not None: | |
with open(expanduser('~/.config/email_address'), 'r') as f: | |
email_address = f.read() | |
send( | |
to_addr=email_address, | |
from_addr=email_address, | |
subject='Whitney Permits Now Available', | |
msg=text) | |
else: | |
print('Permits not available') | |
def scrape_page(url): | |
try: | |
driver = webdriver.Chrome(executable_path='/home/kyle/local/bin/chromedriver') | |
except: | |
driver = webdriver.PhantomJS(executable_path='/home/kyle/local/bin/phantomjs') | |
driver.set_window_size(1200, 800) | |
driver.get(url) | |
permit_type_id = Select( | |
driver.find_element_by_css_selector('#permitTypeId')) | |
permit_type_id.select_by_visible_text('DAY USE of Mt. Whitney Zone') | |
trail = Select(driver.find_element_by_css_selector('#entrance')) | |
trail.select_by_visible_text( | |
'Mt. Whitney Zone- Day Use All Routes JM34.5MWZ') | |
dates_range = driver.find_element_by_css_selector('#rangeyes') | |
dates_range.click() | |
start_date = driver.find_element_by_css_selector('#entryStartDate') | |
# Open calendar | |
start_date.click() | |
# Switch to pop up calendar | |
popup_calendar = driver.find_element_by_css_selector('#popupCalendar') | |
driver.switch_to.frame(popup_calendar) | |
month = Select(driver.find_element_by_css_selector('#month')) | |
month.select_by_visible_text('Aug 2018') | |
start_day_iframe = driver.find_element_by_link_text('27') | |
start_day_iframe.click() | |
# Switch back to main document | |
driver.switch_to.parent_frame() | |
end_date = driver.find_element_by_css_selector('#entryEndDate') | |
# Open calendar | |
end_date.click() | |
# Switch to pop up calendar | |
popup_calendar = driver.find_element_by_css_selector('#popupCalendar') | |
driver.switch_to.frame(popup_calendar) | |
month = Select(driver.find_element_by_css_selector('#month')) | |
month.select_by_visible_text('Aug 2018') | |
# Note, if you just choose the first element named "31", it will choose the | |
# previous month's 31st date if listed | |
end_day_iframe = driver.find_elements_by_link_text('31')[-1] | |
end_day_iframe.click() | |
# Switch back to main document | |
driver.switch_to.parent_frame() | |
group_size = driver.find_element_by_css_selector('#groupSize') | |
group_size.send_keys('1') | |
# Wait a few seconds before clicking again | |
sleep(4) | |
search_button = driver.find_element_by_css_selector( | |
'#permitAvailabilitySearchButton') | |
search_button.click() | |
return BeautifulSoup(driver.page_source, 'lxml') | |
def parse_page(soup): | |
reserved_status = [x.get_text() for x in soup.select('.permitStatus')] | |
if (len(reserved_status) == 14) & all(x == 'R' for x in reserved_status): | |
return | |
results = soup.select('#calendar tbody td')[1:] | |
avail_ind = [ind for ind, x in enumerate(results) if 'a' in x['class']] | |
tags = [x for x in results if 'a' in x['class']] | |
n_avail = [x.get_text()[1:] for x in tags] | |
statuses = [x.get_text()[0] for x in tags] | |
dates = soup.select('thead .date') | |
numbers = [x.get_text() for x in dates[0::2]] | |
abbr = [x.get_text() for x in dates[1::2]] | |
month = soup.select('thead .month span')[0].get_text() | |
day_dict = { | |
'M': 'Monday', | |
'Tu': 'Tuesday', | |
'W': 'Wednesday', | |
'Th': 'Thursday', | |
'F': 'Friday', | |
'Sa': 'Saturday', | |
'Su': 'Sunday'} | |
status_legend = { | |
'A': 'Available for online reservation (click to book entry date)', | |
'a': 'Available for online reservation with earlier arrival date', | |
'L': 'Accepting Lottery Application (click to apply for the lottery)', | |
'W': 'Available at the Facility', | |
'R': 'Reserved', | |
'C': 'Closed', | |
'X': 'Not available', | |
'N': 'Not available'} | |
text = [f'Whitney permit status, as of {str(datetime.now())}\n\n'] | |
for i in range(len(avail_ind)): | |
msg = f"""\ | |
- Date: {day_dict[abbr[avail_ind[i]]]}, {numbers[avail_ind[i]]} {month} | |
- Status: {status_legend[statuses[i]]} | |
- # Available: {n_avail[i]} | |
""" | |
text.append(dedent(msg)) | |
return ''.join(text) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment