Skip to content

Instantly share code, notes, and snippets.

@mndrake
Last active August 29, 2015 14:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mndrake/7105b93d71ace38dc42b to your computer and use it in GitHub Desktop.
Save mndrake/7105b93d71ace38dc42b to your computer and use it in GitHub Desktop.
library(xgboost)
library(Matrix)
library(data.table)
library(caret)
source('gini.R')
df <- read.csv('data/train.csv')
df.matrix <- model.matrix(Hazard~.-1,df)
set.seed(107)
trainIndex <- createDataPartition(df$Hazard, p = .75, list = FALSE, times = 1)
train <- list(x = df.matrix[trainIndex,], y=df$Hazard[trainIndex])
test <- list(x = df.matrix[-trainIndex,], y=df$Hazard[-trainIndex])
dtrain <- xgb.DMatrix(data = train$x, label=train$y)
dtest <- xgb.DMatrix(data = test$x, label=test$y)
watchlist <- list(train=dtrain, test=dtest)
evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
err <- -NormalizedGini(labels, preds)
return(list(metric = "error", value = err))
}
#bst <- xgb.train(data=dtrain, max.depth=9, nround=1000,
# watchlist = watchlist,
# objective='rank:pairwise',
# feval = evalerror,
# eta=0.011, min_child_weight=100, subsample=0.7,
# colsample_bytree=0.7, scale_pos_weight=1.0)
bst <- xgb.train(data=dtrain, max.depth=8, nround=2000,
watchlist = watchlist,
objective='rank:pairwise',
feval = evalerror,
eta=0.011, min_child_weight=100, subsample=0.7,
colsample_bytree=0.7, scale_pos_weight=1.0)
pred <- predict(bst,dtest)
NormalizedGini(test$y, pred)
library(pROC)
plot.roc(test$y, pred, print.auc=T, print.auc.y=0.5)
#AUC 0.5992 500 0.5992
#AUC 0.6028 750 0.6026
#AUC 0.6008 1000 0.5997
#AUC 0.6015 2000 0.6023
#AUC 0.6007 3000
NormalizedGini(test$y, pred)
#NG 0.3743120 500 0.3746527
#NG 0.3767384 750 0.3758139
#NG 0.3766062 1000 0.3783594 0.376777519
#NG 0.3713819 2000 0.3724358 0.37412506256842
#NG 0.3648366 3000
# prepare submission -------------------------
submission <- read.csv('data/test.csv')
submission.matrix <- model.matrix(~.-1,submission)
submission.pred <- predict(bst,submission.matrix)
submission.tbl <- data.frame(Id = submission$Id, Hazard = submission.pred)
write.csv(submission.tbl, 'out/6_xg.csv', row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment