Skip to content

Instantly share code, notes, and snippets.

def xray(var):
"""Return name of variable and its state,
for logging purposes."""
import inspect, re
string = inspect.getframeinfo(
inspect.getouterframes(
inspect.currentframe()
)[1][0]).code_context[0]
def regression_roc_auc_score(y_true, y_pred, num_rounds = 10000):
"""
Computes Regression-ROC-AUC-score.
Parameters:
----------
y_true: array-like of shape (n_samples,). Binary or continuous target variable.
y_pred: array-like of shape (n_samples,). Target scores.
num_rounds: int or string. If integer, number of random pairs of observations.
If string, 'exact', all possible pairs of observations will be evaluated.
def naive_roc_auc_score(y_true, y_pred):
num_same_sign = 0
num_pairs = 0
for a in range(len(y_true)):
for b in range(len(y_true)):
if y_true[a] > y_true[b]:
num_pairs += 1
if y_pred[a] > y_pred[b]:
import pandas as pd
from sklearn.feature_selection import f_regression
# inputs:
# X: pandas.DataFrame, features
# y: pandas.Series, target variable
# K: number of features to select
# compute F-statistics and initialize correlation matrix
F = pd.Series(f_regression(X, y)[0], index = X.columns)
import pandas as pd
from sklearn.feature_selection import f_regression
# inputs:
# X: pandas.DataFrame, features
# y: pandas.Series, target variable
# K: number of features to select
# compute F-statistics and correlations
F = pd.Series(f_regression(X, y)[0], index = X.columns)
y_level_ones = x.replace(y.groupby(x).apply(lambda l: (l == 1).sum()))
y_level_zeros = x.replace(y.groupby(x).apply(lambda l: (l == 0).sum()))
y_ones = (y == 1).sum()
y_zeros = (y == 0).sum()
nominator = y_level_ones / y_ones
denominator = y_level_zeros / y_zeros
woe_encoder = np.log(nominator / denominator)
y_level_except_self = x.to_frame().apply(
lambda row: y[x == row['x']].drop(row.name).to_list(),
axis = 1
)
leave_one_out_encoding = y_level_except_self.apply(np.mean)
y_mean = y.mean()
y_level_before_self = x.to_frame().apply(
lambda row: y[(x == row['x']) & (y.index < row.name)].to_list(),
axis = 1
)
catboost_encoding = y_level_before_self.apply(
lambda ylbs: (sum(ylbs) + y_mean * a) / (len(ylbs) + a)
)
model = smf.mixedlm(formula = 'y ~ 1', data = y.to_frame(), groups = x).fit()
intercept = model.params['Intercept']
random_effect = x.replace({k: float(v) for k, v in model.random_effects.items()})
glmm_encoding = intercept + random_effect
y_mean = y.mean()
y_var = y.var()
y_level_mean = x.replace(y.groupby(x).mean())
y_level_var = x.replace(y.groupby(x).var())
weight = 1 - (y_level_var / (y_var + y_level_var) * (len(set(x)) - 3) / (len(set(x)) - 1))
james_stein_encoding = y_level_mean * weight + y_mean * (1 - weight)