Skip to content

Instantly share code, notes, and snippets.

reg_summary = pd.DataFrame(Xi.columns.values, columns = ["Features"])
reg_summary["Weights"] = regr.coef_
# Initialize the matplotlib figure
f, ax = plt.subplots(figsize=(15, 6))
sns.barplot(x="Weights", y="Features", data=reg_summary.sort_values("Weights", ascending=False, key = abs),
label="Weights", color="b")
ax.set_title("Feature Weights in Linear Regression (Test)",fontsize=20)
df_pf = pd.DataFrame(y_hat_test, columns = ['Predicted'])
y_test = y_test.reset_index (drop = True)
df_pf["Target"] = y_test
df_pf["Residual"] = df_pf["Target"] - df_pf["Predicted"]
df_pf["Residual%"] = abs((df_pf["Target"] - df_pf["Predicted"])/df_pf["Target"]*100)
df_pf.describe()
y_hat_test = regr.predict(X_test)
xi_r_sqr_test = regr.score(X_test,y_test)
from matplotlib import pyplot as plt
plt.scatter(y_test, y_hat_test, alpha = 0.2)
plt.xlabel('Work Life Balance Score Target (y_test)', size = 16)
plt.ylabel('Work Life Balance Score Predicted (y_test)', size = 16)
plt.title('Model Trained R Squared ='+ '{number:.3f}'.format(number=xi_r_sqr_test), size = 20)
import statsmodels.api as sm
X_train_Sm= sm.add_constant(X_train)
X_train_Sm= sm.add_constant(X_train)
ls=sm.OLS(y_train,X_train_Sm).fit()
print(ls.summary())
# Goldfield Quant test
import statsmodels.stats.api as sms
from statsmodels.compat import lzip
name = ['F statistic', 'p-value']
test = sms.het_goldfeldquandt(residuals, X_train)
lzip(name, test)
p = sns.scatterplot(y_pred,residuals)
plt.xlabel('y_pred/predicted values')
plt.ylabel('Residuals')
p = sns.lineplot([y_pred.min(),y_pred.max()],[0,0],color='blue')
p = plt.title('Residuals vs fitted values plot for homoscedasticity check')
p = sns.distplot(residuals,kde=True)
p = plt.title('Normality of error terms/residuals')
residuals = y_train.values-y_pred
mean_residuals = np.mean(residuals)
print("Mean of Residuals {}".format(mean_residuals))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Xi, yi,random_state = 0,test_size=0.25)
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn import linear_model
regr = linear_model.LinearRegression()
regr.fit(X_train,y_train)
# import sklearn and standardscaler
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
# transform dataframe
Xi = pd.DataFrame(sc.fit_transform(xi),columns = xi.columns)
Xt = pd.DataFrame(sc.fit_transform(xt),columns = xt.columns)