Skip to content

Instantly share code, notes, and snippets.

@witwall
Forked from primaryobjects/nnet.R
Created November 24, 2017 09:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save witwall/c2580860e9f863d68dc32151c2d3192f to your computer and use it in GitHub Desktop.
Save witwall/c2580860e9f863d68dc32151c2d3192f to your computer and use it in GitHub Desktop.
Neural network (nnet) with caret and R. Machine learning classification example, includes parallel processing.
library(caret)
library(doParallel)
registerDoParallel(cores = 2)
# Read data.
data <- read.csv('train.csv')
test <- read.csv('test.csv')
# Set classification column to factor.
y <- as.factor(make.names(data$TARGET))
# Remove columns with near zero variance.
nzv <- nearZeroVar(data)
data <- data[,-nzv]
test <- test[,-nzv]
data$TARGET <- y
##### Removing constant features
cat("\n## Removing the constants features.\n")
for (f in names(data)) {
if (length(unique(data[[f]])) == 1) {
cat(f, "is constant in train. We delete it.\n")
data[[f]] <- NULL
test[[f]] <- NULL
}
}
##### Removing identical features
features_pair <- combn(names(data), 2, simplify = F)
toRemove <- c()
for(pair in features_pair) {
f1 <- pair[1]
f2 <- pair[2]
if (!(f1 %in% toRemove) & !(f2 %in% toRemove)) {
if (all(data[[f1]] == data[[f2]])) {
cat(f1, "and", f2, "are equals.\n")
toRemove <- c(toRemove, f2)
}
}
}
feature.names <- setdiff(names(data), toRemove)
data <- data[, feature.names]
test <- test[, feature.names[feature.names != 'TARGET']]
inTrain <- createDataPartition(data$TARGET, p = 3/4)[[1]]
training <- data[inTrain,]
testing <- data[-inTrain,]
# Train on entire training set.
# training <- data
numFolds <- trainControl(method = 'cv', number = 10, classProbs = TRUE, verboseIter = TRUE, summaryFunction = twoClassSummary, preProcOptions = list(thresh = 0.75, ICAcomp = 3, k = 5))
fit2 <- train(TARGET ~ . -TARGET -ID, data = training, method = 'nnet', preProcess = c('center', 'scale'), trControl = numFolds, tuneGrid=expand.grid(size=c(10), decay=c(0.1)))
results1 <- predict(fit2, newdata=training)
conf1 <- confusionMatrix(results1, training$TARGET)
results2 <- predict(fit2, newdata=testing)
conf2 <- confusionMatrix(results2, testing$TARGET)
probs <- predict(fit2, newdata=test, type='prob')
# Assemble output format: ID, prob.
output <- data.frame(ID=test$ID)
output <- cbind(output, TARGET=probs$X1)
write.csv(output, file='output.csv', row.names=FALSE, quote=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment