Instantly share code, notes, and snippets.

Embed
What would you like to do?
Understanding the difference between R_squared and Adjusted R_squared
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
def metrics(m,X,y):
yhat = m.predict(X)
print(yhat)
SS_Residual = sum((y-yhat)**2)
SS_Total = sum((y-np.mean(y))**2)
r_squared = 1 - (float(SS_Residual))/SS_Total
adj_r_squared = 1 - (1-r_squared)*(len(y)-1)/(len(y)-X.shape[1]-1)
return r_squared,adj_r_squared
data = pd.DataFrame({"x1": [1,2,3,4,5], "x2": [2.1,4,6.1,8,10.1]})
y = np.array([2.1, 4, 6.2, 8, 9])
model1 = linear_model.LinearRegression()
model1.fit( data.drop("x2", axis = 1),y)
metrics(model1,data.drop("x2", axis=1),y)
model2 = linear_model.LinearRegression()
model2.fit( data,y)
metrics(model2,data,y)
data = pd.DataFrame({"x1": [1,2,3,4,5], "x2": [2.1,4,6.1,8,10.1]} )
y = np.array([2.1, 4, 6.2, 8, 9])
model3 = linear_model.LinearRegression()
model3.fit( data,y)
metrics(model3,data,y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment