Skip to content

Instantly share code, notes, and snippets.

@ksv-muralidhar
Last active August 27, 2021 17:55
Show Gist options
  • Save ksv-muralidhar/5fad685b08ceee0a51a4f634881a6ec2 to your computer and use it in GitHub Desktop.
Save ksv-muralidhar/5fad685b08ceee0a51a4f634881a6ec2 to your computer and use it in GitHub Desktop.
r2
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.datasets import load_boston
X = load_boston()['data'].copy()
y = load_boston()['target'].copy()
linear_regression = LinearRegression()
linear_regression.fit(X,y)
prediction = linear_regression.predict(X)
def my_r2_score(y_true, y_hat):
y_bar = np.mean(y_true)
ss_total = np.sum((y_true - y_bar) ** 2)
ss_explained = np.sum((y_hat - y_bar) ** 2)
ss_residual = np.sum((y_true - y_hat) ** 2)
scikit_r2 = r2_score(y_true, y_hat)
print(f'R-squared (SS_explained / SS_Total) = {ss_explained / ss_total}\n' + \
f'R-squared (1 - (SS_residual / SS_Total)) = {1 - (ss_residual / ss_total)}\n'+ \
f"Scikit-Learn's R-squared = {scikit_r2}")
print('Positive R-squared\n')
my_r2_score(y, prediction)
print('\n\nNegative R-squared\n')
my_r2_score(y, np.zeros(len(y)))
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.datasets import load_boston
from sklearn.feature_selection import mutual_info_regression
df = pd.DataFrame(load_boston()['data'], columns=load_boston()['feature_names'])
df['y'] = load_boston()['target']
df['RAD'] = df['RAD'].astype('int')
df['CHAS'] = df['CHAS'].astype('int')
X = df.drop(columns='y').copy()
y = df['y'].copy()
np.random.seed(11)
X['random1'] = np.random.randn(len(X))
X['random2'] = np.random.randint(len(X))
X['random3'] = np.random.normal(len(X))
mutual_info = mutual_info_regression(X, y, discrete_features=X.dtypes == np.int32)
mutual_info = pd.Series(mutual_info, index=X.columns)
mutual_info.sort_values(ascending=False, inplace=True)
mutual_info
result_df = pd.DataFrame()
for i in range(1, len(mutual_info) + 1):
X_new = X.iloc[:, :i].copy()
linear_regression = LinearRegression()
linear_regression.fit(X_new, y)
prediction = linear_regression.predict(X_new)
r2 = r2_score(y_true=y, y_pred=prediction)
adj_r2 = 1 - ((1 - r2) * (len(X) - 1) / (len(X) - i - 1))
result_df = result_df.append(pd.DataFrame({'r2': r2,
'adj_r2': adj_r2}, index=[i]))
result_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment