Created
June 23, 2017 00:24
-
-
Save ackjake/d3c14ac6589e73573e63f9bbc4344979 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def leave_one_out(df_train, df_test, var, noise=.01, drop=True): | |
new_var = 'mean_{}'.format(var) | |
df_train[new_var] = np.nan | |
df_test[new_var] = np.nan | |
# training set | |
loo = LeaveOneOut() | |
for train_index, test_index in tqdm_notebook(loo.split(df_train)): | |
loo_train = df_train.iloc[train_index] | |
loo_test = df_train.iloc[test_index] | |
calc_mean_var = loo_test[var].iloc[0] | |
df_train.iloc[test_index][new_var] = loo_train[loo_train[var] == calc_mean_var]['y'].mean() | |
# test set | |
grouped = df_train.groupby(var)[['y']].mean().reset_index() | |
grouped.columns = [var, new_var] | |
df_test = df_test.merge(grouped, on=var) | |
# drop column (optional) | |
if drop: | |
df_train.drop(var, axis=1, inplace=True) | |
df_test.drop(var, axis=1, inplace=True) | |
return df_train, df_test | |
train, test = leave_one_out(train, test, 'X0') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment