Skip to content

Instantly share code, notes, and snippets.

@jay-trivedi
Last active June 12, 2017 11:08
Show Gist options
  • Save jay-trivedi/bd5f40dc28356c43995a3743e84a73cf to your computer and use it in GitHub Desktop.
Save jay-trivedi/bd5f40dc28356c43995a3743e84a73cf to your computer and use it in GitHub Desktop.
#Random Forest
seed = 1
depth_range = range(1, 30,1)
acc_vs_depth_result_rf = {"depth": [],\
"train_acc": [],
"valid_acc": [],
"top_feature": [],
"second_feature": [],
"third_feature": []}
for depth in depth_range:
model = H2ORandomForestEstimator(model_id="model", \
sample_rate=1, \
ntrees=200, \
max_depth=depth, \
seed=seed)
model.train(x=x, y=y, training_frame=train)
predict_valid = model.predict(valid[x])
predict_train = model.predict(train[x])
t = predict_train["predict"].cbind(train["SalePrice"]).as_data_frame()
v = predict_valid["predict"].cbind(valid["SalePrice"]).as_data_frame()
acc_vs_depth_result_rf["depth"].append(depth)
acc_vs_depth_result_rf["valid_acc"].append(mean_squared_error(y_true = v.SalePrice, y_pred = v.predict))
acc_vs_depth_result_rf["train_acc"].append(mean_squared_error(y_true = t.SalePrice, y_pred = t.predict))
acc_vs_depth_result_rf["top_feature"].append(model.varimp()[0][0])
acc_vs_depth_result_rf["second_feature"].append(model.varimp()[1][0])
acc_vs_depth_result_rf["third_feature"].append(model.varimp()[2][0])
#Converting results to a DataFrame
acc_vs_depth_result_df_rf = pd.DataFrame(acc_vs_depth_result_rf)
cols = ["depth", "train_acc", "valid_acc", "top_feature", "second_feature", "third_feature"]
acc_vs_depth_result_df_rf = acc_vs_depth_result_df_rf[cols]
acc_vs_depth_result_df_rf
#Plotting results
fig = plt.figure(figsize=(10, 7))
plt.plot(acc_vs_depth_result_df_rf.depth, acc_vs_depth_result_df_rf.train_acc, label="train MSE (RF)")
plt.plot(acc_vs_depth_result_df_rf.depth, acc_vs_depth_result_df_rf.valid_acc, label="validation MSE (RF)")
plt.plot(acc_vs_depth_result_df.depth, acc_vs_depth_result_df.train_acc, label="train MSE (DT)")
plt.plot(acc_vs_depth_result_df.depth, acc_vs_depth_result_df.valid_acc, label="validation MSE (DT)")
plt.legend(loc='upper left', frameon=False)
plt.xlabel('Tree Depth')
plt.ylabel('MSE')
plt.savefig("figures/House_pricing_RF.png")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment