Skip to content

Instantly share code, notes, and snippets.

#10-fold
ctrl <- trainControl(method = "repeatedcv", repeats = 10,
classProbs = TRUE,
summaryFunction = twoClassSummary)
##TRAINING
set.seed(5627)
orig_fit <- train(UserClass ~ ., data = imbal_train,
method = "svmLinear",
metric = "ROC",
##load functions
source('~/experiments/lib/functions.R')
##load datasets
load('~/experiments/cache/socialMed.training-v1.RData')
load('~/experiments/cache/socialMed.validation-v1.RData')
load('~/experiments/cache/socialMed.test-v1.RData')
prepareFeatures <- function(dataset){
@zunman
zunman / smote_svm_7.R
Last active September 11, 2016 22:04
# SVM
svm.model <- svm(trainset, as.factor(trainset$label[1:nrow(trainset)]), cost = 100, gamma = 1)
svm.pred <- predict(svm.model, testset)
#view results
testdata$pred <- svm.pred
@zunman
zunman / smote_svm_6.R
Last active September 13, 2016 18:06
#SMOTE
trainset$label <- as.factor(trainset$label)
trainset <- SMOTE(label ~ ., trainset, perc.over = 200, perc.under=100)
prop.table(table(trainset$label))
@zunman
zunman / smote_svm_5.R
Last active September 11, 2016 21:35
prop.table(table(trainset$label))
prop.table(table(testset$label))
#perform split
splitIndex <- createDataPartition(data.DTM$label, p = .50, list = FALSE, times = 1)
trainset <- data.DTM[ splitIndex,]
testset <- data.DTM[-splitIndex,]
traindata <- data[ splitIndex,]
testdata <- data [-splitIndex,]
#convert sparse dtm to data.frame
data.DTM <- as.data.frame(as.matrix(sparse_DTM))
#append label column
data.DTM$label <- data$label
# Create the Document-Term matrix
DTM <- DocumentTermMatrix(MyCorpus, control = list(bounds = list(global = c(0, Inf))))
dim(DTM)
# Create a sparse matrix to put into SVM
sparse_DTM <- sparseMatrix(i = DTM$i, j = DTM$j, x = DTM$v,
dims = dim(DTM),
dimnames = list(rownames(DTM), colnames(DTM)))
# Create the corpus
MyCorpus <- VCorpus(VectorSource(data$text), readerControl = list(language = "en"))
content(MyCorpus[[1]])
# Some preprocessing
MyCorpus <- tm_map(MyCorpus, content_transformer(tolower))
content(MyCorpus[[1]])
library(caret)
library(e1071)
library(rpart)
library(RTextTools)
library(tm)
library(DMwR)
set.seed(1234)