Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
train = read.csv("train.csv", header = TRUE)
response <- train$TARGET
train <-train[-c(1,371)]
#Remove no variance predictors
zero_var = nearZeroVar(train, names=TRUE, freqCut = 95/5,uniqueCut = 10,saveMetrics = TRUE)
train = train[,-which(zero_var$zeroVar)]
train_cat_names = list()
train_num_names = list()
#loop through training data by column / predictor variable
for (i in (1:length(train))){
if (all(train[,c(i)] == floor(train[,c(i)]))){
train_cat_names[length(train_cat_names)+1]=(names(train[c(i)]))
}else{
train_num_names[length(train_num_names)+1]=(names(train[c(i)]))
}
}
idx <- match(train_cat_names, names(train))
train_cat = train[,idx]
train_num = train[,-idx]
#change categorical variables to factors
for (j in (1:length(train_cat))){
train_cat[,c(j)] = as.factor(train_cat[,c(j)])
}
#normalize continuous variables
preproc = preProcess(train_num,method = c("center", "scale"))
train_standardized <- predict(preproc, train_num)
train_standardized = cbind(train_num,train_cat,response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.