Created
June 15, 2012 16:31
-
-
Save geoffalday/2937441 to your computer and use it in GitHub Desktop.
Scrape weather data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
from bs4 import BeautifulSoup | |
# What year? | |
year = 2011 | |
# Create/open a file called wunder.txt | |
f = open('wunder-data-' + str(year) + '.txt', 'w') | |
f.write('datestamp,tmean,tmax,tmin,precip,dewpoint\n') | |
# Iterate through months and days | |
for m in range(1, 13): | |
for d in range(1, 32): | |
# Check if already gone through month | |
if (m == 2 and d > 28): | |
break | |
elif (m in [4, 6, 9, 11] and d > 30): | |
break | |
# Open wunderground.com url | |
datestamp = str(year) + '-' + str(m) + '-' + str(d) | |
print 'Getting data for ' + datestamp | |
url = 'http://www.wunderground.com/history/airport/KBNA/' + str(year) + '/' + str(m) + '/' + str(d) + '/DailyHistory.html' | |
page = urllib2.urlopen(url) | |
# Get history table | |
soup = BeautifulSoup(page) | |
historyTable = soup.find('table', id='historyTable') | |
spans = historyTable.find_all(attrs={'class':'nobr'}) | |
# Get mean temperatures from page | |
tmean = spans[0].span.string | |
tmax = spans[2].span.string | |
tmin = spans[5].span.string | |
#Get precip from page | |
precip = spans[9].span.string | |
#Get dewpoint from page | |
dewpoint = spans[8].span.string | |
# Format month for datestamp | |
if len(str(m)) < 2: | |
mStamp = '0' + str(m) | |
else: | |
mStamp = str(m) | |
# Format day for datestamp | |
if len(str(d)) < 2: | |
dStamp = '0' + str(d) | |
else: | |
dStamp = str(d) | |
# Build timestamp | |
datestamp = str(year) + mStamp + dStamp | |
# Write timestamp and temperature to file | |
print tmean + ':' + tmax + ':' + tmax + ':' + precip + ':' + dewpoint | |
f.write(datestamp + ',' + tmean + ',' + tmax + ',' + tmin + ',' + precip + ',' + dewpoint + '\n') | |
# Done getting data! Close file. | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment