Skip to content

Instantly share code, notes, and snippets.

@i-namekawa
Last active September 29, 2015 09:09
Show Gist options
  • Save i-namekawa/aea89409c8fa89c24705 to your computer and use it in GitHub Desktop.
Save i-namekawa/aea89409c8fa89c24705 to your computer and use it in GitHub Desktop.
Basel weather analysis
import datetime
from bs4 import BeautifulSoup
import pandas as pd
import requests
def getData(year=2014, month=1, proxies={}):
url = 'http://en.tutiempo.net/climate/%02d-%d/ws-66010.html' % (month, year)
r = requests.get(url, proxies=proxies)
soup = BeautifulSoup( r.text )
table = soup.find('table', 'medias mensuales')
rows = table.findAll('tr')
# Getting labels for cols
index = list()
for th in rows[0].findAll('th'):
index.append( th.text )
#th.contents[0].attrs['title']
# Parsing the table
temp = dict()
for rowind, tr in enumerate(rows[1:-1]): # first and last contain no data
row = dict()
for coln, td in enumerate(tr.findAll('td')):
try:
row[index[coln]] = float(td.text)
except:
row[index[coln]] = td.text
if index[coln] == 'Day':
day = int(td.text)
weekday = datetime.datetime(year=year, month=month, day=day).weekday()
row['WD'] = weekday
temp[rowind] = row
return pd.DataFrame(temp)
def sortByWeekday(data, years, months, label='RA'):
# sort by weekday
week = [[],[],[],[],[],[],[]]
for year in years:
for month in months:
for n in range(7):
weekday = (data[(year,month)].T['WD'] == n).values
if label=='PP':
sublist = data[(year,month)].loc['PP'][weekday].tolist()
week[n] += [ele for ele in sublist if type(ele) == float]
elif label=='RA': # RA (rain or not)
sublist = data[(year,month)].loc[label][weekday].tolist()
week[n] += [1 if ele == 'o' else 0 for ele in sublist]
return pd.DataFrame(week)
if __name__ == '__main__':
from dateutil.relativedelta import relativedelta
import cPickle as pickle
from pylab import *
import scipy.stats as stats
YM = []
dt = datetime.datetime(2009,1,1)
while dt < datetime.datetime(2015,8,1): # date between Jan 2009 > Aug 2015
dt += relativedelta(months=1)
YM.append( map(int, dt.strftime('%Y %m').split(' ')) )
if 0: # get data online
data = dict()
for year, month in YM:
print 'Processing ', year, month
data[(year,month)] = getData(month=month, year=year)
# pickle for later use
with open('weather_basel.pickle', 'wb') as f:
pickle.dump(data, f)
else: # or load saved data
with open('weather_basel.pickle', 'rb') as f:
data = pickle.load(f)
def myplot(x,y,ax,_title):
bar(x, y, facecolor='none')
xticks(x+0.4, ['Mon', 'Tue', 'Wed', 'Thr', 'Fri', 'Sat', 'Sun'])
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
tick_params(direction='out', right='off', top='off')
title(_title)
figure(facecolor='w')
x = np.arange(7)+0.4
months = range(1,13)
ax = subplot(221)
Data2014 = sortByWeekday(data, years=[2014], months=months, label='RA')
myplot(x, Data2014.T.sum(), ax, '2014')
ax = subplot(222)
Data2013 = sortByWeekday(data, years=[2013], months=months, label='RA')
myplot(x, Data2013.T.sum(), ax, '2013')
ax = subplot(223)
Data2012 = sortByWeekday(data, years=[2012], months=months, label='RA')
myplot(x, Data2012.T.sum(), ax, '2012')
ax = subplot(224)
Data2011 = sortByWeekday(data, years=[2011], months=months, label='RA')
myplot(x, Data2011.T.sum(), ax, '2011')
tight_layout()
# #[Mon=0 to Sun=6], [# of rainy days in 2014]
# #0 19
# #1 18
# #2 19
# #3 15
# #4 15
# #5 21 (10 more days to be significant at 5%)
# #6 11
# chi, p = stats.mstats.chisquare( Data2014.T.sum() )
# # p = 0.665
# figure(facecolor='w')
# ax = subplot(111)
# DataLast2years = Data2014.T.sum() + Data2013.T.sum()
# myplot(x, DataLast2years, ax, '2013 and 2014')
# #[Mon=0 to Sun=6], [# of rainy days in 2013-2014]
# #0 31
# #1 31
# #2 35
# #3 30
# #4 33
# #5 38
# #6 30
# chi, p = stats.mstats.chisquare( DataLast2years.T.sum() )
# # p = 0.948
# figure(facecolor='w')
# ax = subplot(111)
# DataLast6years = sortByWeekday(data, years=range(2009,2015), months=months, label='RA')
# myplot(x, DataLast6years.T.sum(), ax, '2009-2014')
# figure(facecolor='w')
# ax = subplot(111)
# Data2015 = sortByWeekday(data, years=years, months=[1,2,3], label='RA')
# myplot(x, Data2015.T.sum(), ax, '2015')
# figure(facecolor='w')
# ax = subplot(111)
# RecentDays = Data2015.T.sum() + Data2014.T.sum() + Data2013.T.sum()
# myplot(x, RecentDays, ax, '2013-2015Mar')
# #[Mon=0 to Sun=6], [# of rainy days in 2013-2015Mar]
# #0 33
# #1 32
# #2 36
# #3 34
# #4 35
# #5 44
# #6 34
# chi, p = stats.mstats.chisquare( RecentDays )
# # p = 0.8452
# figure(facecolor='w')
# ax = subplot(111)
# RecentDays = Data2015.T.sum() + Data2014.T.sum()
# myplot(x, RecentDays, ax, '2014-2015Mar')
# #[Mon=0 to Sun=6], [# of rainy days in 2013-2015Mar]
# #0 21
# #1 19
# #2 20
# #3 19
# #4 17
# #5 27
# #6 15
# chi, p = stats.mstats.chisquare( RecentDays )
# # p = 0.6316
fig = figure(facecolor='w')
ax = subplot(121)
x = np.arange(7)+0.4
months = np.arange(1,9)
Data2015 = sortByWeekday(data, years=[2015], months=months, label='RA')
print months, Data2015.T.sum()
myplot(x, Data2015.T.sum(), ax, '2015 Jan-Aug')
ax = subplot(122)
RecentDays = Data2015.T.sum() + Data2014.T.sum() + Data2013.T.sum()
myplot(x, RecentDays, ax, '2013-2015Aug')
show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment