Skip to content

Instantly share code, notes, and snippets.

@he9qi
Last active August 29, 2015 14:04
Show Gist options
  • Save he9qi/408d564b672991eec74d to your computer and use it in GitHub Desktop.
Save he9qi/408d564b672991eec74d to your computer and use it in GitHub Desktop.
Machine Learning Algorithms in R
# bagged Lasso and elastic-net regularized generalized linear models
library(foreach)
library(glmnet)
Tiqoo.bagging <- function(data.train, data.test, length_divisor=5, iterations=5000){
predictions <- foreach(m=1:iterations,.combine=cbind) %do% {
training_positions <- sample(nrow(data.train), size=floor((nrow(data.train)/length_divisor)))
train_pos <- 1:nrow(data.train) %in% training_positions
# train
data.train.i <- data.train[train_pos,]
X <- as.matrix(subset(data.train.i, select=-c(id,label)))
y <- as.factor(data.train.i$label)
fit <- glmnet(X, y, family="binomial", alpha=0, lambda=2^17)
# predict
Z <- data.matrix(subset(data.test, select=-c(id,label)))
z <- as.factor(data.test$label)
predict(fit, s=2^17, Z, type="response")
}
rowMeans(predictions)
}
# CV Folds
library(cvTools)
library(randomForest)
library(gbm)
library(ada)
library(glmnet)
library(AUC)
# train model using N folds ...
Tiqoo.train <- function(data.train, method, withId=F, ...){
numFolds=7
folds <- cvFolds(nrow(data.train), numFolds)
for (fold.i in 1:numFolds){
cat("trainning fold: ", fold.i)
indices.test <- sort(folds$subset[folds$which==fold.i])
indices.train <- sort(folds$subset[folds$which!=fold.i])
if(withId){
res <- method(data.train[indices.train,], data.train[indices.test,], fold.i, ...)
print(res$pred)
}else{
data.fold.i.train <- subset(data.train[indices.train,], select=-c(id))
data.fold.i.test <- subset(data.train[indices.test,], select=-c(id))
res <- method(data.fold.i.train, data.fold.i.test, ...)
print(res$pred)
}
}
}
Tiqoo.glmnet <- function(data.train, data.test) {
X <- as.matrix(subset(data.train, select=-c(label)))
y <- as.factor(data.train$label)
model.fit <- glmnet(X, y, family="binomial", alpha=0, lambda=2^17)
Z <- data.matrix(subset(data.test, select=-c(label)))
z <- as.factor(data.test$label)
pred.prob <- predict(model.fit, s=2^17, Z, type="response")
pred.roc <- roc(pred.prob, z)
list(model = model.fit, pred = auc(pred.roc))
}
Tiqoo.train.adaboost <- function(data.train, data.test, formula) {
model.fit <- gbm(formula, data=data.train, dist="adaboost", n.trees=1000, cv.folds=5)
best.iter <- gbm.perf(model.fit, method="cv")
print(best.iter)
pred.prob <- predict(model.fit, data.test, best.iter, type="link")
pred.roc <- roc(pred.prob, as.factor(data.test$label))
list(model = model.fit, pred = auc(pred.roc))
}
Tiqoo.train.randomForest <- function(data.train, data.test, formula) {
model.fit <- randomForest(formula, data.train)
pred.prob <- predict(model.fit, data.test, type="prob", ntree=1500)
pred.roc <- roc(pred.prob[,2], as.factor(data.test$label))
list(model = model.fit, pred = AUC::auc(pred.roc))
}
Tiqoo.train.ada <- function(data.train, data.test, formula) {
model.fit <- ada(formula, data.train)
pred.prob <- predict(model.fit, data.test, type="probs")
pred.roc <- roc(pred.prob[,2], as.factor(data.test$label))
list(model = model.fit, pred = auc(pred.roc))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment