Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@linlincheng
Created September 21, 2016 19:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save linlincheng/5ae3203cd000c3c24849985bbe449f12 to your computer and use it in GitHub Desktop.
Save linlincheng/5ae3203cd000c3c24849985bbe449f12 to your computer and use it in GitHub Desktop.
#Linlin Cheng
#Proj 5. tuning file1
library(caret)
library(xgboost)
library(readr)
library(dplyr)
library(tidyr)
########################
#First round tuning:
xgb_params_1 = list(
objective = "multi:softmax",
num_class = 3,
eta = 0.1,
max.depth = 1,
eval_metric = "merror"
)
xgbtrain <- xgb.DMatrix(data.matrix(select(x_train, -lable)), label=tlabel, missing=NA)
# set up grid
xgb_grid_1 = expand.grid(
nrounds = c(100, 500, 1000),
eta = c(0.3, 0.1, 0.01, 0.001),
max_depth = c(2, 4, 6, 8, 10),
gamma = 1,
colsample_bytree = c(0.1, 0.5),
min_child_weight = 1
)
# set up parameters
xgb_trcontrol_1 = trainControl(
method = "cv",
number = 5,
verboseIter = TRUE,
returnData = FALSE,
returnResamp = "all",
classProbs = TRUE,
summaryFunction = multiClassSummary,
allowParallel = TRUE
)
#tuning script:
xgb_train_1 = train(
x = data.matrix(x_train %>% select(-lable)),
y = make.names(as.factor(tlabel)),
trControl = xgb_trcontrol_1,
tuneGrid = xgb_grid_1,
method = "xgbTree"
)
xgb_train_1$bestTune
#nrounds max_depth eta gamma colsample_bytree min_child_weight
#90 1000 10 0.1 1 0.5 1
# scatter plot of the AUC against max_depth and eta
ggplot(xgb_train_1$results, aes(x = as.factor(eta), y = max_depth, size = Mean_ROC, color = Mean_ROC)) +
geom_point() +
theme_bw() +
scale_size_continuous(guide = "none")
#variable importance plot
plot(varImp(xgb_train_1, scale = FALSE))
#########################
#second round tuning:
#set up grid:
xgb_grid_2 = expand.grid(
nrounds = 1000,
eta = c(0.15, 0.1, 0.09, 0.08, 0.07),
max_depth = c(8, 9, 10, 11),
gamma = 1,
colsample_bytree = c(0.4, 0.5, 0.6),
min_child_weight = 1
)
#set up parameters:
xgb_trcontrol_2 = trainControl(
method = "cv",
number = 5,
verboseIter = TRUE,
returnData = FALSE,
returnResamp = "all",
classProbs = TRUE,
summaryFunction = multiClassSummary,
allowParallel = TRUE
)
#model tuning:
xgb_train_2 = train(
x = data.matrix(x_train %>% select(-lable)),
y = make.names(as.factor(tlabel)),
trControl = xgb_trcontrol_2,
tuneGrid = xgb_grid_2,
method = "xgbTree"
)
xgb_train_2$bestTune
# nrounds max_depth eta gamma colsample_bytree
# 12 1000 11 0.07 1 0.6
# min_child_weight
# 12 1
save(xgb_train_2, file = "xgb_tuning2.Rdata")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment