Created
March 31, 2016 20:26
-
-
Save sriyoda/c2c636af4429d0eb4ad26c37a4df6e94 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train = read.csv("train.csv", header = TRUE) | |
response <- train$TARGET | |
train <-train[-c(1,371)] | |
#Remove no variance predictors | |
zero_var = nearZeroVar(train, names=TRUE, freqCut = 95/5,uniqueCut = 10,saveMetrics = TRUE) | |
train = train[,-which(zero_var$zeroVar)] | |
train_cat_names = list() | |
train_num_names = list() | |
#loop through training data by column / predictor variable | |
for (i in (1:length(train))){ | |
if (all(train[,c(i)] == floor(train[,c(i)]))){ | |
train_cat_names[length(train_cat_names)+1]=(names(train[c(i)])) | |
}else{ | |
train_num_names[length(train_num_names)+1]=(names(train[c(i)])) | |
} | |
} | |
idx <- match(train_cat_names, names(train)) | |
train_cat = train[,idx] | |
train_num = train[,-idx] | |
#change categorical variables to factors | |
for (j in (1:length(train_cat))){ | |
train_cat[,c(j)] = as.factor(train_cat[,c(j)]) | |
} | |
#normalize continuous variables | |
preproc = preProcess(train_num,method = c("center", "scale")) | |
train_standardized <- predict(preproc, train_num) | |
train_standardized = cbind(train_num,train_cat,response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am trying to run this code, I need packages names that are used to implements this code.