Skip to content

Instantly share code, notes, and snippets.

@aborilov
Created May 11, 2016 16:29
Show Gist options
  • Save aborilov/e30b0e4e6097c69150fcf2d99ba2916d to your computer and use it in GitHub Desktop.
Save aborilov/e30b0e4e6097c69150fcf2d99ba2916d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
import os
import requests
from urlparse import urlparse
from bs4 import BeautifulSoup
urls = ['https://energyplus.net/weather-region/north_and_central_america_wmo_region_4/USA',
'https://energyplus.net/weather-region/north_and_central_america_wmo_region_4/CAN']
dirname = 'weatherdata'
def parse_files(html):
soup = BeautifulSoup(html, 'html.parser')
divs = soup.find_all(class_="btn-group-vertical")[0]
for div in divs.contents[:-1]:
path = div.get('href')
filename = path.split('/')[-1]
response = requests.get(domain+path)
if response.ok:
with open(os.path.join(dirname, filename), 'wb') as f:
f.write(response.content)
print "{} saved".format(filename)
def parse_state(html):
soup = BeautifulSoup(html, 'html.parser')
divs = soup.find_all(class_="btn-group-vertical")[0]
for div in divs:
response = requests.get(domain+div.get('href'))
if response.ok:
parse_files(response.content)
for url in urls:
response = requests.get(url)
parsed_uri = urlparse(url)
domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
if response.ok:
html = response.content
soup = BeautifulSoup(html, 'html.parser')
divs = soup.find_all(class_="btn-group-vertical")[1]
for div in divs:
response = requests.get(domain+div.get('href'))
if response.ok:
parse_state(response.content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment