Skip to content

Instantly share code, notes, and snippets.

@ackjake
Created June 23, 2017 00:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ackjake/d3c14ac6589e73573e63f9bbc4344979 to your computer and use it in GitHub Desktop.
Save ackjake/d3c14ac6589e73573e63f9bbc4344979 to your computer and use it in GitHub Desktop.
def leave_one_out(df_train, df_test, var, noise=.01, drop=True):
new_var = 'mean_{}'.format(var)
df_train[new_var] = np.nan
df_test[new_var] = np.nan
# training set
loo = LeaveOneOut()
for train_index, test_index in tqdm_notebook(loo.split(df_train)):
loo_train = df_train.iloc[train_index]
loo_test = df_train.iloc[test_index]
calc_mean_var = loo_test[var].iloc[0]
df_train.iloc[test_index][new_var] = loo_train[loo_train[var] == calc_mean_var]['y'].mean()
# test set
grouped = df_train.groupby(var)[['y']].mean().reset_index()
grouped.columns = [var, new_var]
df_test = df_test.merge(grouped, on=var)
# drop column (optional)
if drop:
df_train.drop(var, axis=1, inplace=True)
df_test.drop(var, axis=1, inplace=True)
return df_train, df_test
train, test = leave_one_out(train, test, 'X0')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment