Created
May 28, 2012 03:20
-
-
Save ethanwhite/2817030 to your computer and use it in GitHub Desktop.
A simpler version of Michael Hansen's temperature trend example using Pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Trying Michael Hansen's temperature trend example using Pandas | |
The original example, without Pandas, is available at: | |
http://software-carpentry.org/2012/05/an-exercise-with-matplotlib-and-numpy/ | |
""" | |
import os | |
import pandas | |
import numpy as np | |
import matplotlib.pyplot as pyplot | |
from scipy import stats | |
from datetime import datetime | |
#Thanks to Pandas we don't need to manually deal with all of the date | |
#conversion, so no event2int() or date2int() | |
#We'll also use scipy.stats so that we don't need r_squared() | |
def read_weather(file_name): | |
#Importing is simpler since Pandas handles the complexity for us | |
data = pandas.read_csv(file_name, index_col=0, parse_dates=True) | |
return data | |
def temp_plot(mean_temps, min_temps=None, max_temps=None): | |
year_start = datetime(2012, 1, 1) | |
days = np.array([(d - year_start).days + 1 for d in mean_temps.index]) | |
fig = pyplot.figure() | |
pyplot.title('Temperatures in Bloomington 2012') | |
pyplot.ylabel('Mean Temperature (F)') | |
pyplot.xlabel('Day of Year') | |
if (max_temps is None or min_temps is None): | |
pyplot.plot(days, mean_temps, marker='o') | |
else: | |
temp_err = np.vstack((mean_temps - min_temps, | |
max_temps - mean_temps)) | |
pyplot.errorbar(days, mean_temps, marker='o', yerr=temp_err) | |
pyplot.title('Temperatures in Bloomington 2012 (max/min)') | |
#Use Scipy instead of calculating R^2 by hand | |
slope, intercept, rval, pval, stderr = stats.linregress(days, mean_temps) | |
ideal_temps = intercept + (slope * days) | |
fit_label = 'Linear fit ({0:.3f})'.format(slope) | |
pyplot.plot(days, ideal_temps, color='red', linestyle='--', label=fit_label) | |
pyplot.annotate('r^2 = {0:.3f}'.format(rval ** 2), (0.05, 0.9), xycoords='axes fraction') | |
pyplot.legend(loc='lower right') | |
return fig | |
#-------------------------------------------------- | |
# Read data and extract dates, temperatures, and events | |
data = read_weather('data/weather.csv') | |
min_temps = data['Min TemperatureF'] | |
mean_temps = data['Mean TemperatureF'] | |
max_temps = data['Max TemperatureF'] | |
events = data[' Events'] | |
if not os.path.exists('plots'): | |
os.mkdir('plots') | |
# Plot without error bars | |
#In Pandas the temperatures are automatically associated with the dates, so | |
#we can just pass the mean_temps Series object to temp_plot | |
fig = temp_plot(mean_temps) | |
fig.savefig('plots/day_vs_temp.png') | |
# Plot with error bars | |
fig = temp_plot(mean_temps, min_temps, max_temps) | |
fig.savefig('plots/day_vs_temp-all.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment