Skip to content

Instantly share code, notes, and snippets.

@geoffalday
Created June 15, 2012 16:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save geoffalday/2937441 to your computer and use it in GitHub Desktop.
Save geoffalday/2937441 to your computer and use it in GitHub Desktop.
Scrape weather data
import urllib2
from bs4 import BeautifulSoup
# What year?
year = 2011
# Create/open a file called wunder.txt
f = open('wunder-data-' + str(year) + '.txt', 'w')
f.write('datestamp,tmean,tmax,tmin,precip,dewpoint\n')
# Iterate through months and days
for m in range(1, 13):
for d in range(1, 32):
# Check if already gone through month
if (m == 2 and d > 28):
break
elif (m in [4, 6, 9, 11] and d > 30):
break
# Open wunderground.com url
datestamp = str(year) + '-' + str(m) + '-' + str(d)
print 'Getting data for ' + datestamp
url = 'http://www.wunderground.com/history/airport/KBNA/' + str(year) + '/' + str(m) + '/' + str(d) + '/DailyHistory.html'
page = urllib2.urlopen(url)
# Get history table
soup = BeautifulSoup(page)
historyTable = soup.find('table', id='historyTable')
spans = historyTable.find_all(attrs={'class':'nobr'})
# Get mean temperatures from page
tmean = spans[0].span.string
tmax = spans[2].span.string
tmin = spans[5].span.string
#Get precip from page
precip = spans[9].span.string
#Get dewpoint from page
dewpoint = spans[8].span.string
# Format month for datestamp
if len(str(m)) < 2:
mStamp = '0' + str(m)
else:
mStamp = str(m)
# Format day for datestamp
if len(str(d)) < 2:
dStamp = '0' + str(d)
else:
dStamp = str(d)
# Build timestamp
datestamp = str(year) + mStamp + dStamp
# Write timestamp and temperature to file
print tmean + ':' + tmax + ':' + tmax + ':' + precip + ':' + dewpoint
f.write(datestamp + ',' + tmean + ',' + tmax + ',' + tmin + ',' + precip + ',' + dewpoint + '\n')
# Done getting data! Close file.
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment