Skip to content

Instantly share code, notes, and snippets.

# read the source data file
drio = pd.read_csv("RioDJ.csv")
drio
drio.columns = ["Date", "Temp"] # rename columns
drio.Date = drio["Date"]
drio.Date = pd.to_datetime(drio.Date) # convert imported object/string to datetime
drio.set_index(drio.Date, inplace=True) # set Date as index
drio["year"] = drio.index.year
drio["month"] = drio.index.month
drio.info()
# tabular view of our source data via pivot table
drio["month"] = drio.index.month
drio["year"] = drio.index.year
pivot = pd.pivot_table(
drio, values='Temp', index='month', columns='year',
aggfunc='mean', margins=True, margins_name="Avg", fill_value="")
pivot.transpose()
# use the pivot table aggregations to plot the 10-year rolling average temperature and see if there is a trend
year_avg = pd.pivot_table(drio, values='Temp', index='year', aggfunc='mean')
year_avg['10 Years RA'] = year_avg['Temp'].rolling(10).mean()
year_avg[['Temp','10 Years RA']].plot(figsize=(20,6))
plt.title('Average Temperature in Rio de Janeiro')
min_Y = drio['year'].min()
max_Y = drio['year'].max()
plt.xticks([x for x in range(max_Y, min_Y, -10)])
# create a column with the rolling 12-month average temperature
drio["roll12M"] = drio["Temp"].rolling(12).mean()
drio = drio.dropna()
plt.title('12-Month Rolling Average Temperature in Rio de Janeiro')
drio["roll12M"].plot(figsize=(20,6))
plt.show()
# pmdarima - ADF test - should we difference?
# ADF null hypothesis: the series is not stationary
def ADF_pmd(x):
adf_test = pmd.arima.stationarity.ADFTest(alpha=ALPHA)
res = adf_test.should_diff(x)
conclusion = "non-stationary" if res[0] > ALPHA else "stationary"
resdict = {"should we difference? ":res[1], "p-value ":res[0], "conclusion":conclusion}
return resdict
# call the ADF test:
# pmdarima - KPSS test - should we difference?
# null hypothesis: the series is at least trend stationary
def KPSS_pmd(x):
kpss_test = pmd.arima.stationarity.KPSSTest(alpha=ALPHA)
res = kpss_test.should_diff(x)
conclusion = "not stationary" if res[0] <= ALPHA else "stationary"
resdict = {"should we difference? ":res[1], "p-value ":res[0], "conclusion":conclusion}
return resdict
# call the KPSS test:
# compare ADF and KPSS result
test_values = zip(resADF.values(), resKPSS.values())
dict_tests = dict(zip(resADF.keys(), test_values))
df_tests = pd.DataFrame().from_dict(dict_tests).transpose()
df_tests.columns = ["ADF", "KPSS"]
df_tests
# We apply the ADF and KPSS tests of statsmodels.stattools:
# statsmodels - ADF test
# null hypothesis: There is a unit root and the series is NOT stationary
# Low p-values are preferable
# get results as a dictionary
def ADF_statt(x):
adf_test = adfuller(x, autolag="aic")
t_stat, p_value, _, _, _, _ = adf_test
# statsmodels - KPSS test
# more detailed output than pmdarima
# null hypothesis: There series is (at least trend-)stationary
# High p-values are preferable
# get results as a dictionary
def KPSS_statt(x):
kpss_test = kpss(x)
t_stat, p_value, _, critical_values = kpss_test
conclusion = "stationary" if p_value > ALPHA else "not stationary"
res_dict = {"KPSS statistic":t_stat, "p-value":p_value, "should we difference?": (p_value < ALPHA), "conclusion": conclusion}