h3ik0th

## quack_1_read
# read the source data file
drio = pd.read_csv("RioDJ.csv")
drio

## quack_1_dateidx
drio.columns = ["Date", "Temp"]             # rename columns
drio.Date = drio["Date"]
drio.Date = pd.to_datetime(drio.Date)       # convert imported object/string to datetime
drio.set_index(drio.Date, inplace=True)     # set Date as index
drio["year"] = drio.index.year
drio["month"] = drio.index.month
drio.info()

## quack_1_pivot
# tabular view of our source data via pivot table
drio["month"] = drio.index.month
drio["year"] = drio.index.year

pivot = pd.pivot_table(
    drio, values='Temp', index='month', columns='year',
    aggfunc='mean', margins=True, margins_name="Avg", fill_value="")
pivot.transpose()

## quack_1_pivplot
# use the pivot table aggregations to plot the 10-year rolling average temperature and see if there is a trend

year_avg = pd.pivot_table(drio, values='Temp', index='year', aggfunc='mean')
year_avg['10 Years RA'] = year_avg['Temp'].rolling(10).mean()

year_avg[['Temp','10 Years RA']].plot(figsize=(20,6))
plt.title('Average Temperature in Rio de Janeiro')
min_Y = drio['year'].min()
max_Y = drio['year'].max()
plt.xticks([x for x in range(max_Y, min_Y, -10)])

## quack_1_roll12M_chart
# create a column with the rolling 12-month average temperature
drio["roll12M"] = drio["Temp"].rolling(12).mean()
drio = drio.dropna()
plt.title('12-Month Rolling Average Temperature in Rio de Janeiro')
drio["roll12M"].plot(figsize=(20,6))
plt.show()

## quack_2_pmd_ADF
# pmdarima - ADF test - should we difference?
# ADF null hypothesis: the series is not stationary
def ADF_pmd(x):
    adf_test = pmd.arima.stationarity.ADFTest(alpha=ALPHA)
    res = adf_test.should_diff(x)
    conclusion = "non-stationary" if res[0] > ALPHA else "stationary"
    resdict = {"should we difference? ":res[1], "p-value ":res[0], "conclusion":conclusion}
    return resdict

# call the ADF test:

## quack_2_pmd_KPSS
# pmdarima - KPSS test -  should we difference?
# null hypothesis: the series is at least trend stationary
def KPSS_pmd(x):
    kpss_test = pmd.arima.stationarity.KPSSTest(alpha=ALPHA)
    res = kpss_test.should_diff(x)
    conclusion = "not stationary" if res[0] <= ALPHA else "stationary"
    resdict = {"should we difference? ":res[1], "p-value ":res[0], "conclusion":conclusion}
    return resdict

# call the KPSS test:

## quack_2_pmd_compare
# compare ADF and KPSS result
test_values = zip(resADF.values(), resKPSS.values())
dict_tests = dict(zip(resADF.keys(), test_values))
df_tests = pd.DataFrame().from_dict(dict_tests).transpose()
df_tests.columns = ["ADF", "KPSS"]
df_tests

## quack_2_statt_ADF
# We apply the ADF and KPSS tests of statsmodels.stattools:


# statsmodels - ADF test
# null hypothesis: There is a unit root and the series is NOT stationary
# Low p-values are preferable
# get results as a dictionary
def ADF_statt(x):
     adf_test = adfuller(x, autolag="aic")
     t_stat, p_value, _, _, _, _  = adf_test

## quack_2_statt_KPSS
# statsmodels - KPSS test
# more detailed output than pmdarima
# null hypothesis: There series is (at least trend-)stationary
# High p-values are preferable
# get results as a dictionary
def KPSS_statt(x):
     kpss_test = kpss(x)
     t_stat, p_value, _, critical_values  = kpss_test
     conclusion = "stationary" if p_value > ALPHA else "not stationary"
     res_dict = {"KPSS statistic":t_stat, "p-value":p_value, "should we difference?": (p_value < ALPHA), "conclusion": conclusion}
	# read the source data file
	drio = pd.read_csv("RioDJ.csv")
	drio
	drio.columns = ["Date", "Temp"] # rename columns
	drio.Date = drio["Date"]
	drio.Date = pd.to_datetime(drio.Date) # convert imported object/string to datetime
	drio.set_index(drio.Date, inplace=True) # set Date as index
	drio["year"] = drio.index.year
	drio["month"] = drio.index.month
	drio.info()
	# tabular view of our source data via pivot table
	drio["month"] = drio.index.month
	drio["year"] = drio.index.year

	pivot = pd.pivot_table(
	drio, values='Temp', index='month', columns='year',
	aggfunc='mean', margins=True, margins_name="Avg", fill_value="")
	pivot.transpose()
	# use the pivot table aggregations to plot the 10-year rolling average temperature and see if there is a trend

	year_avg = pd.pivot_table(drio, values='Temp', index='year', aggfunc='mean')
	year_avg['10 Years RA'] = year_avg['Temp'].rolling(10).mean()

	year_avg[['Temp','10 Years RA']].plot(figsize=(20,6))
	plt.title('Average Temperature in Rio de Janeiro')
	min_Y = drio['year'].min()
	max_Y = drio['year'].max()
	plt.xticks([x for x in range(max_Y, min_Y, -10)])
	# create a column with the rolling 12-month average temperature
	drio["roll12M"] = drio["Temp"].rolling(12).mean()
	drio = drio.dropna()
	plt.title('12-Month Rolling Average Temperature in Rio de Janeiro')
	drio["roll12M"].plot(figsize=(20,6))
	plt.show()
	# pmdarima - ADF test - should we difference?
	# ADF null hypothesis: the series is not stationary
	def ADF_pmd(x):
	adf_test = pmd.arima.stationarity.ADFTest(alpha=ALPHA)
	res = adf_test.should_diff(x)
	conclusion = "non-stationary" if res[0] > ALPHA else "stationary"
	resdict = {"should we difference? ":res[1], "p-value ":res[0], "conclusion":conclusion}
	return resdict

	# call the ADF test:
	# pmdarima - KPSS test - should we difference?
	# null hypothesis: the series is at least trend stationary
	def KPSS_pmd(x):
	kpss_test = pmd.arima.stationarity.KPSSTest(alpha=ALPHA)
	res = kpss_test.should_diff(x)
	conclusion = "not stationary" if res[0] <= ALPHA else "stationary"
	resdict = {"should we difference? ":res[1], "p-value ":res[0], "conclusion":conclusion}
	return resdict

	# call the KPSS test:
	# compare ADF and KPSS result
	test_values = zip(resADF.values(), resKPSS.values())
	dict_tests = dict(zip(resADF.keys(), test_values))
	df_tests = pd.DataFrame().from_dict(dict_tests).transpose()
	df_tests.columns = ["ADF", "KPSS"]
	df_tests
	# We apply the ADF and KPSS tests of statsmodels.stattools:


	# statsmodels - ADF test
	# null hypothesis: There is a unit root and the series is NOT stationary
	# Low p-values are preferable
	# get results as a dictionary
	def ADF_statt(x):
	adf_test = adfuller(x, autolag="aic")
	t_stat, p_value, _, _, _, _ = adf_test
	# statsmodels - KPSS test
	# more detailed output than pmdarima
	# null hypothesis: There series is (at least trend-)stationary
	# High p-values are preferable
	# get results as a dictionary
	def KPSS_statt(x):
	kpss_test = kpss(x)
	t_stat, p_value, _, critical_values = kpss_test
	conclusion = "stationary" if p_value > ALPHA else "not stationary"
	res_dict = {"KPSS statistic":t_stat, "p-value":p_value, "should we difference?": (p_value < ALPHA), "conclusion": conclusion}