This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn import linear_model | |
#Read the data into a pandas DataFrame | |
df = pd.read_csv('southern_osc.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0]) | |
#add two columns containing the LAG=1 and LAG=2 version of the data to the DataFrame | |
df['T_(i-1)'] = df['T_i'].shift(1) | |
df['T_(i-2)'] = df['T_i'].shift(2) | |
#drop the top two rows as they contain NaNs after shifting | |
df = df.drop(df.index[[0,1]]) | |
#fit a linear regression model on T_i and T_i-1 and add it's predictions to the DataFrame as a new column | |
lm = linear_model.LinearRegression() | |
df_X = df[['T_(i-1)']] #Note the double brackets! [[]] | |
df_y = df['T_i'] #Note the single brackets! [] | |
model = lm.fit(df_X,df_y) | |
df['Predicted_T_i|T_(i-1)'] = lm.predict(df_X) | |
#create the time series of residuals corresponding to the predictions of this model and add it to the DataFrame. | |
# This gives us the first one of the two time series we need for calculating the PACF for X at LAG=2 | |
#Observed minus predicted | |
df['Residual_T_i|T_(i-1)'] = df['T_i'] - df['Predicted_T_i|T_(i-1)'] | |
#repeat the above procedure to calculate the second time series of residuals | |
lm = linear_model.LinearRegression() | |
df_X = df[['T_(i-1)']] #Note the double brackets! [[]] | |
df_y = df['T_(i-2)'] #Note the single brackets! [] | |
model = lm.fit(df_X,df_y) | |
df['Predicted_T_(i-2)|T_(i-1)'] = lm.predict(df_X) | |
#Observed minus predicted | |
df['Residual_T_(i-2)|T_(i-1)'] = df['T_(i-2)'] - df['Predicted_T_(i-2)|T_(i-1)'] | |
#Finally, apply the formula for Pearson's r to the two time series of residuals to get the value of the PACF at LAG=2 | |
print(df.corr(method='pearson')['Residual_T_i|T_(i-1)']['Residual_T_(i-2)|T_(i-1)']) | |
#prints: 0.29612303554627606 | |
#or cheat, like so: | |
from statsmodels.tsa.stattools import pacf | |
print(pacf(df['T_i'], nlags=2)[2]) | |
#prints: 0.2996545841351261 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment