Last active
August 29, 2015 14:28
-
-
Save mndrake/7105b93d71ace38dc42b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(xgboost) | |
library(Matrix) | |
library(data.table) | |
library(caret) | |
source('gini.R') | |
df <- read.csv('data/train.csv') | |
df.matrix <- model.matrix(Hazard~.-1,df) | |
set.seed(107) | |
trainIndex <- createDataPartition(df$Hazard, p = .75, list = FALSE, times = 1) | |
train <- list(x = df.matrix[trainIndex,], y=df$Hazard[trainIndex]) | |
test <- list(x = df.matrix[-trainIndex,], y=df$Hazard[-trainIndex]) | |
dtrain <- xgb.DMatrix(data = train$x, label=train$y) | |
dtest <- xgb.DMatrix(data = test$x, label=test$y) | |
watchlist <- list(train=dtrain, test=dtest) | |
evalerror <- function(preds, dtrain) { | |
labels <- getinfo(dtrain, "label") | |
err <- -NormalizedGini(labels, preds) | |
return(list(metric = "error", value = err)) | |
} | |
#bst <- xgb.train(data=dtrain, max.depth=9, nround=1000, | |
# watchlist = watchlist, | |
# objective='rank:pairwise', | |
# feval = evalerror, | |
# eta=0.011, min_child_weight=100, subsample=0.7, | |
# colsample_bytree=0.7, scale_pos_weight=1.0) | |
bst <- xgb.train(data=dtrain, max.depth=8, nround=2000, | |
watchlist = watchlist, | |
objective='rank:pairwise', | |
feval = evalerror, | |
eta=0.011, min_child_weight=100, subsample=0.7, | |
colsample_bytree=0.7, scale_pos_weight=1.0) | |
pred <- predict(bst,dtest) | |
NormalizedGini(test$y, pred) | |
library(pROC) | |
plot.roc(test$y, pred, print.auc=T, print.auc.y=0.5) | |
#AUC 0.5992 500 0.5992 | |
#AUC 0.6028 750 0.6026 | |
#AUC 0.6008 1000 0.5997 | |
#AUC 0.6015 2000 0.6023 | |
#AUC 0.6007 3000 | |
NormalizedGini(test$y, pred) | |
#NG 0.3743120 500 0.3746527 | |
#NG 0.3767384 750 0.3758139 | |
#NG 0.3766062 1000 0.3783594 0.376777519 | |
#NG 0.3713819 2000 0.3724358 0.37412506256842 | |
#NG 0.3648366 3000 | |
# prepare submission ------------------------- | |
submission <- read.csv('data/test.csv') | |
submission.matrix <- model.matrix(~.-1,submission) | |
submission.pred <- predict(bst,submission.matrix) | |
submission.tbl <- data.frame(Id = submission$Id, Hazard = submission.pred) | |
write.csv(submission.tbl, 'out/6_xg.csv', row.names = F) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment