Skip to content

Instantly share code, notes, and snippets.

@nbhasker
Created June 21, 2019 02:18
Show Gist options
  • Save nbhasker/9e0bb85d76b54053bf8c7ef704729cab to your computer and use it in GitHub Desktop.
Save nbhasker/9e0bb85d76b54053bf8c7ef704729cab to your computer and use it in GitHub Desktop.
Simple Python script to download historical temperature data for Portland Airport from Weather Underground
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import io
driver = webdriver.Chrome("chromedriver.exe")
driver.set_page_load_timeout(30)
years = [2019]
months = range(1, 5)
urlBase = "https://www.wunderground.com/history/monthly/us/or/portland/KPDX/date/"
xpath = '//*[@id="inner-content"]/div[2]/div[3]/div/div[1]/div/div/city-history-observation/div/div[2]/table'
for thisYear in years:
thisYearsFileName = "PDXTemp" + str(thisYear) + ".html"
thisYearsFile = io.open(thisYearsFileName, "w", encoding="utf-8")
for thisMonth in months:
url = urlBase + str(thisYear) + "-" + str(thisMonth)
print url
driver.get(url)
timeout = 30
try:
elementPresent = EC.presence_of_element_located((By.XPATH, xpath))
WebDriverWait(driver, timeout).until(elementPresent)
print "Page loaded"
except TimeoutException:
print "Timed out waiting for page to load"
thisMonthsTable = driver.find_element_by_xpath(xpath);
thisMonthsData = thisMonthsTable.get_attribute('outerHTML')
thisMonthsFileName = "PDXTemp" + str(thisYear) + "-" + str(thisMonth) + ".html"
print thisMonthsFileName
thisMonthsFile = io.open(thisMonthsFileName, "w", encoding="utf-8")
thisMonthsFile.write(thisMonthsData)
thisMonthsFile.close()
thisYearsFile.write(thisMonthsData)
thisYearsFile.close()
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment