aborilov/download.py

## download.py
#!/usr/bin/env python2
import os
import requests
from urlparse import urlparse
from bs4 import BeautifulSoup

urls = ['https://energyplus.net/weather-region/north_and_central_america_wmo_region_4/USA',
        'https://energyplus.net/weather-region/north_and_central_america_wmo_region_4/CAN']
dirname  =  'weatherdata'

def parse_files(html):
    soup = BeautifulSoup(html, 'html.parser')
    divs = soup.find_all(class_="btn-group-vertical")[0]
    for div in divs.contents[:-1]:
        path = div.get('href')
        filename = path.split('/')[-1]
        response = requests.get(domain+path)
        if response.ok:
            with open(os.path.join(dirname, filename), 'wb') as f:
                f.write(response.content)
                print "{} saved".format(filename)


def parse_state(html):
    soup = BeautifulSoup(html, 'html.parser')
    divs = soup.find_all(class_="btn-group-vertical")[0]
    for div in divs:
        response = requests.get(domain+div.get('href'))
        if response.ok:
            parse_files(response.content)


for url in urls:
    response = requests.get(url)
    parsed_uri = urlparse(url)
    domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
    if response.ok:
        html = response.content
        soup = BeautifulSoup(html, 'html.parser')
        divs = soup.find_all(class_="btn-group-vertical")[1]
        for div in divs:
            response = requests.get(domain+div.get('href'))
            if response.ok:
                parse_state(response.content)
	#!/usr/bin/env python2
	import os
	import requests
	from urlparse import urlparse
	from bs4 import BeautifulSoup

	urls = ['https://energyplus.net/weather-region/north_and_central_america_wmo_region_4/USA',
	'https://energyplus.net/weather-region/north_and_central_america_wmo_region_4/CAN']
	dirname = 'weatherdata'

	def parse_files(html):
	soup = BeautifulSoup(html, 'html.parser')
	divs = soup.find_all(class_="btn-group-vertical")[0]
	for div in divs.contents[:-1]:
	path = div.get('href')
	filename = path.split('/')[-1]
	response = requests.get(domain+path)
	if response.ok:
	with open(os.path.join(dirname, filename), 'wb') as f:
	f.write(response.content)
	print "{} saved".format(filename)


	def parse_state(html):
	soup = BeautifulSoup(html, 'html.parser')
	divs = soup.find_all(class_="btn-group-vertical")[0]
	for div in divs:
	response = requests.get(domain+div.get('href'))
	if response.ok:
	parse_files(response.content)


	for url in urls:
	response = requests.get(url)
	parsed_uri = urlparse(url)
	domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
	if response.ok:
	html = response.content
	soup = BeautifulSoup(html, 'html.parser')
	divs = soup.find_all(class_="btn-group-vertical")[1]
	for div in divs:
	response = requests.get(domain+div.get('href'))
	if response.ok:
	parse_state(response.content)