Skip to content

Instantly share code, notes, and snippets.

@sachinsdate
Created October 26, 2019 18:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sachinsdate/6787c43663b7c1fac279c9d0c93bf1bc to your computer and use it in GitHub Desktop.
Save sachinsdate/6787c43663b7c1fac279c9d0c93bf1bc to your computer and use it in GitHub Desktop.
F-test for regression analysis. An ilustrative example
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#Create a pandas DataFrame for the djia data set.
df = pd.read_csv('djia.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
################################
######## THE MEAN MODEL ########
################################
mean = round(df['Closing Price'].mean(),2)
y_pred = np.full(len(df['Closing Price']), mean)
fig = plt.figure()
fig.suptitle('DJIA Closing Price')
actual, = plt.plot(df.index, df['Closing Price'], 'go-', label='Actual Closing Price')
predicted, = plt.plot(df.index, y_pred, 'ro-', label='Predicted Closing Price')
plt.xlabel('Date')
plt.ylabel('Closing Price (USD)')
plt.legend(handles=[predicted, actual])
plt.show()
###########################################
######## THE LAGGED VARIABLE MODEL ########
###########################################
import pandas as pd
import numpy as np
import statsmodels.api as sm
#Read the data set into a Pandas Data Frame
df = pd.read_csv('djia.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
#Add the lagged column
df['CP_LAGGED'] = df['Closing Price'].shift(1)
#Let's remove the first row as it contains an NaN:
df_lagged = df.drop(df.index[0])
split_index = round(len(df_lagged)*0.8)
split_date = df_lagged.index[split_index]
df_train = df_lagged.loc[df_lagged.index <= split_date].copy()
df_test = df_lagged.loc[df_lagged.index > split_date].copy()
X_train = df_train['CP_LAGGED'].values
#Add a placeholder for the constant so that model computes an intercept value i.e. the regression equation will take the form: y = Beta_0 + Beta_1*x
X_train = sm.add_constant(X_train)
y_train = df_train['Closing Price'].values
X_test = df_test['CP_LAGGED'].values
#Add a placeholder for the constant so that model computes an intercept value
X_test = sm.add_constant(X_test)
y_test = df_test['Closing Price'].values
#Construct and fit the OLS regression model:
ols_model = sm.OLS(y_train,X_train)
ols_results = ols_model.fit()
#Use the fitted model to make predictions on the training and testing data sets:
y_pred_train = ols_results.predict(X_train)
y_pred_test = ols_results.predict(X_test)
#Use the fitted model to make predictions on the training and testing data sets:
y_pred_train = ols_results.predict(X_train)
y_pred_test = ols_results.predict(X_test)
#Plot the model's performance against the test data set:
fig = plt.figure()
fig.suptitle('DJIA Closing Price')
actual, = plt.plot(df_test.index, y_test, 'go-', label='Actual Closing Price')
predicted, = plt.plot(df_test.index, y_pred_test, 'ro-', label='Predicted Closing Price')
plt.xlabel('Date')
plt.ylabel('Closing Price (USD)')
plt.legend(handles=[predicted, actual])
plt.show()
#print the summary of regression results
print(ols_results.summary())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment