Skip to content

Instantly share code, notes, and snippets.

@Yankim
Created September 19, 2016 13:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Yankim/65f6505a70b33a9f288ccda51e03849a to your computer and use it in GitHub Desktop.
Save Yankim/65f6505a70b33a9f288ccda51e03849a to your computer and use it in GitHub Desktop.
# Grid for the parameter search
#The guidlines for how to tune parameters are commented below and are taken from
# Owen Zheng http://www.slideshare.net/OwenZhang2/tips-for-data-science-competitions
xgb_grid_1 = expand.grid(
eta = c(.5, 1, 1.5), #[2-10]/num trees
max_depth = c(4, 6, 8), #Start with 6
nrounds = 100, #Fix at 100
gamma = 0, #Usually ok to leave at 0
colsample_bytree = c(.3, .5, .7), #.3 - .5
min_child_weight = 1 #start with 1/sqrt(eventrate)
)
# Tuning control parameters
xgb_trcontrol_1 = trainControl(
method = "cv",
number = 5,
verboseIter = TRUE,
returnData = FALSE,
returnResamp = "all", # save losses across all models
classProbs = TRUE, # set to TRUE for AUC to be computed
summaryFunction = twoClassSummary,
allowParallel = TRUE
)
# Train the model on each set of parameters in the grid and evaluate using cross-validation
xgb_train_1 = train(
x = higgs.train.dummy,
y = higgs.labels,
trControl = xgb_trcontrol_1,
tuneGrid = xgb_grid_1,
method = "xgbTree",
na.action = na.pass,
missing = NA,
metric = "ROC",
weights = scaled.weight
)
###############################
#Best parameters of first grid search:
#eta = .5
#max_depth = 4
#nrounds = 100
#gamma = 0
#colsample_bytree = .7
#min_child_weight = 1
###############################
#Five grid search
xgb_grid_5 = expand.grid(
eta = .2, #[2-10]/num trees
max_depth = 5, #Start with 6
nrounds = c(100, 200, 400, 500, 1000), #Fix at 100
gamma = 0, #Usually ok to leave at 0
colsample_bytree = .85, #.3 - .5
min_child_weight = 1 #start with 1/sqrt(eventrate)
)
xgb_train_5 = train(
x = higgs.train.dummy,
y = higgs.labels,
trControl = xgb_trcontrol_1,
tuneGrid = xgb_grid_5,
method = "xgbTree",
na.action = na.pass,
missing = NA,
metric = "ROC",
weights = scaled.weight
)
#nrounds = 200 is still the best
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment