Created
February 23, 2019 21:44
-
-
Save statcompute/fe652c679e4abeae3a037878d23d5f82 to your computer and use it in GitHub Desktop.
Gradient-Free Optimization for glmnet Parameters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### gradient-free optimization for glmnet parameters ### | |
df1 <- read.csv("Downloads/credit_count.txt") | |
df2 <- df1[df1$CARDHLDR == 1, ] | |
X <- scale(df2[setdiff(colnames(df2), c("CARDHLDR", "DEFAULT"))]) | |
Y <- as.factor(as.matrix(df2["DEFAULT"])) | |
set.seed(2019) | |
sample <- sample(seq(nrow(df2)), size = nrow(df2) / 2, replace = FALSE) | |
### TRAINING SET ### | |
Y1 <- Y[sample] | |
X1 <- X[sample,] | |
### VALIDATION SET ### | |
Y2 <- Y[-sample] | |
X2 <- X[-sample,] | |
### OBJECTIVE FUNCTION TO MAXIMIZE AUC BY N-FOLD VALIDATION ### | |
glmnet.optim <- function(x) { | |
nfolds <- 10 | |
set.seed(1) | |
folds <- caret::createFolds(1:length(Y1), k = nfolds, list = FALSE) | |
glmnet.cv <- function(i) { | |
mdl <- glmnet::glmnet(X1[folds != i, ], Y1[folds != i], family = "binomial", standardize = FALSE, | |
alpha = x[1], lambda = x[2]) | |
data.frame(Ya = Y1[folds == i], Yp = as.numeric(predict(mdl, X1[folds == i, ], type = "response"))) | |
} | |
p <- do.call(rbind, parallel::mcMap(glmnet.cv, 1:nfolds, mc.cores = parallel::detectCores() - 1)) | |
r <- pROC::roc(p$Ya, p$Yp) | |
return(r$auc[1]) | |
} | |
### NELDER-MEAD OPTIMIZATION ### | |
nm_out <- dfoptim::nmkb(par = c(0.1, 0.01), fn = function(x) glmnet.optim(x), | |
upper = c(1, 100), lower = c(0, 0), | |
control = list(tol = 1e-10, maximize = T)) | |
nm_mdl <- glmnet::glmnet(X1, Y1, family = "binomial", alpha = nm_out$par[1], lambda = nm_out$par[2]) | |
coef(nm_mdl) | |
#(Intercept) -2.36444757 | |
#AGE . | |
#ACADMOS . | |
#ADEPCNT . | |
#MAJORDRG 0.03489905 | |
#MINORDRG 0.12017363 | |
#OWNRENT -0.10940849 | |
#INCOME -0.29822461 | |
#SELFEMPL . | |
#INCPER -0.09279876 | |
#EXP_INC . | |
#SPENDING . | |
#LOGSPEND -0.18790225 | |
pROC::roc(Y1, as.numeric(predict(nm_mdl, X1, type = "response"))) | |
# Area under the curve: 0.6529 | |
pROC::roc(Y2, as.numeric(predict(nm_mdl, X2, type = "response"))) | |
# Area under the curve: 0.6592 | |
### PARTICLE SWARM OPTIMIZATION ### | |
ps_out <- pso::psoptim(par = c(0.1, 0.01), upper = c(1, 100), lower = c(0, 0), | |
fn = function(x) -glmnet.optim(x), | |
control = list(maxit = 50, s = 10)) | |
ps_mdl <- glmnet::glmnet(X1, Y1, family = "binomial", alpha = ps_out$par[1], lambda = ps_out$par[2]) | |
coef(ps_mdl) | |
#(Intercept) -2.36448859 | |
#AGE . | |
#ACADMOS . | |
#ADEPCNT . | |
#MAJORDRG 0.03556278 | |
#MINORDRG 0.12033092 | |
#OWNRENT -0.11008422 | |
#INCOME -0.29728683 | |
#SELFEMPL . | |
#INCPER -0.09367351 | |
#EXP_INC . | |
#SPENDING . | |
#LOGSPEND -0.18814776 | |
pROC::roc(Y1, as.numeric(predict(ps_mdl, X1, type = "response"))) | |
# Area under the curve: 0.6529 | |
pROC::roc(Y2, as.numeric(predict(ps_mdl, X2, type = "response"))) | |
# Area under the curve: 0.6592 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
HI
where is credit_count.txt ??