Skip to content

Instantly share code, notes, and snippets.

@szilard
Created May 15, 2017 18:37
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save szilard/b5337394b51f2452ae64576469976807 to your computer and use it in GitHub Desktop.
caret slowdown issue
library(caret)
library(readr)
library(ROCR)
set.seed(123)
d <- read_csv("https://raw.githubusercontent.com/szilard/teach-data-science-UCLA-master-appl-stats/master/wk-06-ML/data/airline100K.csv")
N <- nrow(d)
N <- 10000
idx <- sample(1:N, 0.6*N)
d_train <- d[idx,]
d_test <- d[-idx,]
#library(doMC)
#registerDoMC(cores = parallel::detectCores())
system.time({
mds <- train(dep_delayed_15min ~ ., data = d_train,
method = "glmnet",
trControl = trainControl(number = 1, verboseIter = TRUE),
tuneGrid = data.frame(alpha = 1, lambda = 0))
})
# user system elapsed
# 10.963 0.044 11.004
system.time({
mds <- train(dep_delayed_15min ~ ., data = d_train,
method = "glmnet",
trControl = trainControl(number = 1, search = "random", verboseIter = TRUE),
tuneLength = 1)
})
phat <- predict(mds, new = d_test, type = "prob")[,"Y"] ## error new cats
phat <- predict(mds, new = d_train, type = "prob")[,"Y"]
rocr_pred <- prediction(phat, d_train$dep_delayed_15min)
performance(rocr_pred, "auc")@y.values[[1]]
X <- Matrix::sparse.model.matrix(dep_delayed_15min ~ . - 1, data = d)
X_train <- X[idx,]
library(glmnet)
system.time({
md <- glmnet( X_train, d_train$dep_delayed_15min, family = "binomial", lambda = 0)
})
#user system elapsed
# 0.092 0.000 0.092
phat <- predict(md, newx = X_train, type = "response")
rocr_pred <- prediction(phat, d_train$dep_delayed_15min)
performance(rocr_pred, "auc")@y.values[[1]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment