Last active
June 21, 2019 07:13
-
-
Save KhyatiMahendru/c0f9ea717912a36d9d2be87d4923f9bd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import dataset | |
import pandas as pd | |
data = pd.read_csv('mtcars.csv') | |
# remove string and categorical variables | |
cat_var = ['model', 'cyl', 'vs', 'am', 'gear', 'carb'] | |
data = data.drop(cat_var, axis = 1) | |
# scale the variables to prevent coefficients from becoming too large or too small | |
from sklearn.preprocessing import MinMaxScaler | |
scaler = MinMaxScaler() | |
data = scaler.fit_transform(data) | |
# fit the linear regression model to predict mpg as a function of other variables | |
from sklearn.linear_model import LinearRegression | |
reg = LinearRegression() | |
model = reg.fit(data[:, 1:5], data[:, 0]) | |
# calculate r2 score | |
from sklearn.metrics import r2_score | |
r2 = r2_score(model.predict(data[:, 1:5]), data[:, 0]) | |
# adjusted r2 using formula adj_r2 = 1 - (1- r2) * (n-1) / (n - k - 1) | |
# k = number of predictors = data.shape[1] - 1 | |
adj_r2 = 1 - (1-r2)*(len(data) - 1) / (len(data) - (data.shape[1] - 1) - 1) | |
print(r2, adj_r2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment