Skip to content

Instantly share code, notes, and snippets.

@Wann-Jiun
Last active January 20, 2017 01:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Wann-Jiun/b1121ab43b29235cb795099ec79a18cc to your computer and use it in GitHub Desktop.
Save Wann-Jiun/b1121ab43b29235cb795099ec79a18cc to your computer and use it in GitHub Desktop.
# Extra Trees Regressor
et_regr = ExtraTreesRegressor()
et_regr.fit(train_df_munged, label_df)
# Run prediction on training set to get a rough idea of how well it does.
y_pred = et_regr.predict(train_df_munged)
y_test = label_df
print("Extra Trees Regressor score on training set: ", rmse(y_test, y_pred))
# Run prediction on the Kaggle test set.
y_test_pred_et = et_regr.predict(test_df_munged)
# Fit model using each importance as a threshold
thresholds = sort(et_regr.feature_importances_)
#thresholds = sort([0.1,0.2])
for thresh in thresholds:
# select features using threshold
selection = SelectFromModel(et_regr, threshold=thresh, prefit=True)
select_X_train = selection.transform(train_df_munged)
# train model
selection_model = ExtraTreesRegressor()
selection_model.fit(select_X_train, y_test)
# eval model
select_X_test = selection.transform(train_df_munged)
y_pred = selection_model.predict(select_X_test)
print("Thresh=%.3f, n=%d, RMSE= %.10f" % (thresh, select_X_train.shape[1], rmse(y_test, y_pred)))
selection = SelectFromModel(et_regr, threshold=0.01, prefit=True)
select_X_train = selection.transform(train_df_munged)
# train model
selection_model = ExtraTreesRegressor()
selection_model.fit(select_X_train, y_test)
# eval model
select_X_test = selection.transform(test_df_munged)
y_test_pred_et_selec = selection_model.predict(select_X_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment