Skip to content

Instantly share code, notes, and snippets.

@statcompute
Created February 23, 2019 21:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save statcompute/fe652c679e4abeae3a037878d23d5f82 to your computer and use it in GitHub Desktop.
Save statcompute/fe652c679e4abeae3a037878d23d5f82 to your computer and use it in GitHub Desktop.
Gradient-Free Optimization for glmnet Parameters
### gradient-free optimization for glmnet parameters ###
df1 <- read.csv("Downloads/credit_count.txt")
df2 <- df1[df1$CARDHLDR == 1, ]
X <- scale(df2[setdiff(colnames(df2), c("CARDHLDR", "DEFAULT"))])
Y <- as.factor(as.matrix(df2["DEFAULT"]))
set.seed(2019)
sample <- sample(seq(nrow(df2)), size = nrow(df2) / 2, replace = FALSE)
### TRAINING SET ###
Y1 <- Y[sample]
X1 <- X[sample,]
### VALIDATION SET ###
Y2 <- Y[-sample]
X2 <- X[-sample,]
### OBJECTIVE FUNCTION TO MAXIMIZE AUC BY N-FOLD VALIDATION ###
glmnet.optim <- function(x) {
nfolds <- 10
set.seed(1)
folds <- caret::createFolds(1:length(Y1), k = nfolds, list = FALSE)
glmnet.cv <- function(i) {
mdl <- glmnet::glmnet(X1[folds != i, ], Y1[folds != i], family = "binomial", standardize = FALSE,
alpha = x[1], lambda = x[2])
data.frame(Ya = Y1[folds == i], Yp = as.numeric(predict(mdl, X1[folds == i, ], type = "response")))
}
p <- do.call(rbind, parallel::mcMap(glmnet.cv, 1:nfolds, mc.cores = parallel::detectCores() - 1))
r <- pROC::roc(p$Ya, p$Yp)
return(r$auc[1])
}
### NELDER-MEAD OPTIMIZATION ###
nm_out <- dfoptim::nmkb(par = c(0.1, 0.01), fn = function(x) glmnet.optim(x),
upper = c(1, 100), lower = c(0, 0),
control = list(tol = 1e-10, maximize = T))
nm_mdl <- glmnet::glmnet(X1, Y1, family = "binomial", alpha = nm_out$par[1], lambda = nm_out$par[2])
coef(nm_mdl)
#(Intercept) -2.36444757
#AGE .
#ACADMOS .
#ADEPCNT .
#MAJORDRG 0.03489905
#MINORDRG 0.12017363
#OWNRENT -0.10940849
#INCOME -0.29822461
#SELFEMPL .
#INCPER -0.09279876
#EXP_INC .
#SPENDING .
#LOGSPEND -0.18790225
pROC::roc(Y1, as.numeric(predict(nm_mdl, X1, type = "response")))
# Area under the curve: 0.6529
pROC::roc(Y2, as.numeric(predict(nm_mdl, X2, type = "response")))
# Area under the curve: 0.6592
### PARTICLE SWARM OPTIMIZATION ###
ps_out <- pso::psoptim(par = c(0.1, 0.01), upper = c(1, 100), lower = c(0, 0),
fn = function(x) -glmnet.optim(x),
control = list(maxit = 50, s = 10))
ps_mdl <- glmnet::glmnet(X1, Y1, family = "binomial", alpha = ps_out$par[1], lambda = ps_out$par[2])
coef(ps_mdl)
#(Intercept) -2.36448859
#AGE .
#ACADMOS .
#ADEPCNT .
#MAJORDRG 0.03556278
#MINORDRG 0.12033092
#OWNRENT -0.11008422
#INCOME -0.29728683
#SELFEMPL .
#INCPER -0.09367351
#EXP_INC .
#SPENDING .
#LOGSPEND -0.18814776
pROC::roc(Y1, as.numeric(predict(ps_mdl, X1, type = "response")))
# Area under the curve: 0.6529
pROC::roc(Y2, as.numeric(predict(ps_mdl, X2, type = "response")))
# Area under the curve: 0.6592
@Kurtanjek
Copy link

HI
where is credit_count.txt ??

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment