Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Grid for the parameter search
#The guidlines for how to tune parameters are commented below and are taken from
# Owen Zheng http://www.slideshare.net/OwenZhang2/tips-for-data-science-competitions
xgb_grid_1 = expand.grid(
eta = c(.5, 1, 1.5), #[2-10]/num trees
max_depth = c(4, 6, 8), #Start with 6
nrounds = 100, #Fix at 100
gamma = 0, #Usually ok to leave at 0
colsample_bytree = c(.3, .5, .7), #.3 - .5
min_child_weight = 1 #start with 1/sqrt(eventrate)
)
# Tuning control parameters
xgb_trcontrol_1 = trainControl(
method = "cv",
number = 5,
verboseIter = TRUE,
returnData = FALSE,
returnResamp = "all", # save losses across all models
classProbs = TRUE, # set to TRUE for AUC to be computed
summaryFunction = twoClassSummary,
allowParallel = TRUE
)
# Train the model on each set of parameters in the grid and evaluate using cross-validation
xgb_train_1 = train(
x = higgs.train.dummy,
y = higgs.labels,
trControl = xgb_trcontrol_1,
tuneGrid = xgb_grid_1,
method = "xgbTree",
na.action = na.pass,
missing = NA,
metric = "ROC",
weights = scaled.weight
)
###############################
#Best parameters of first grid search:
#eta = .5
#max_depth = 4
#nrounds = 100
#gamma = 0
#colsample_bytree = .7
#min_child_weight = 1
###############################
#Five grid search
xgb_grid_5 = expand.grid(
eta = .2, #[2-10]/num trees
max_depth = 5, #Start with 6
nrounds = c(100, 200, 400, 500, 1000), #Fix at 100
gamma = 0, #Usually ok to leave at 0
colsample_bytree = .85, #.3 - .5
min_child_weight = 1 #start with 1/sqrt(eventrate)
)
xgb_train_5 = train(
x = higgs.train.dummy,
y = higgs.labels,
trControl = xgb_trcontrol_1,
tuneGrid = xgb_grid_5,
method = "xgbTree",
na.action = na.pass,
missing = NA,
metric = "ROC",
weights = scaled.weight
)
#nrounds = 200 is still the best
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.