KhyatiMahendru/mtcars_regression.py

## mtcars_regression.py
# import dataset
import pandas as pd
data = pd.read_csv('mtcars.csv')

# remove string and categorical variables
cat_var = ['model', 'cyl', 'vs', 'am', 'gear', 'carb']
data = data.drop(cat_var, axis = 1)

# scale the variables to prevent coefficients from becoming too large or too small
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

# fit the linear regression model to predict mpg as a function of other variables
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
model = reg.fit(data[:, 1:5], data[:, 0])

# calculate r2 score
from sklearn.metrics import r2_score
r2 = r2_score(model.predict(data[:, 1:5]), data[:, 0])

# adjusted r2 using formula adj_r2 = 1 - (1- r2) * (n-1) / (n - k - 1)
# k = number of predictors = data.shape[1] - 1
adj_r2 = 1 - (1-r2)*(len(data) - 1) / (len(data) - (data.shape[1] - 1) - 1)
print(r2, adj_r2)
	# import dataset
	import pandas as pd
	data = pd.read_csv('mtcars.csv')

	# remove string and categorical variables
	cat_var = ['model', 'cyl', 'vs', 'am', 'gear', 'carb']
	data = data.drop(cat_var, axis = 1)

	# scale the variables to prevent coefficients from becoming too large or too small
	from sklearn.preprocessing import MinMaxScaler
	scaler = MinMaxScaler()
	data = scaler.fit_transform(data)

	# fit the linear regression model to predict mpg as a function of other variables
	from sklearn.linear_model import LinearRegression
	reg = LinearRegression()
	model = reg.fit(data[:, 1:5], data[:, 0])

	# calculate r2 score
	from sklearn.metrics import r2_score
	r2 = r2_score(model.predict(data[:, 1:5]), data[:, 0])

	# adjusted r2 using formula adj_r2 = 1 - (1- r2) * (n-1) / (n - k - 1)
	# k = number of predictors = data.shape[1] - 1
	adj_r2 = 1 - (1-r2)*(len(data) - 1) / (len(data) - (data.shape[1] - 1) - 1)
	print(r2, adj_r2)