Skip to content

Instantly share code, notes, and snippets.

@yabyzq
Created September 18, 2016 12:43
Show Gist options
  • Save yabyzq/f40f9f22fd3d78e2b86d9ffe516cde27 to your computer and use it in GitHub Desktop.
Save yabyzq/f40f9f22fd3d78e2b86d9ffe516cde27 to your computer and use it in GitHub Desktop.
simple R algorithms
#Prepare training and test data
test_index <- which (1:length(iris[,1])%% 5 == 0)
iris_train <- iris[-test_index, ]
iris_test <- iris[test_index, ]
library(car)
test_index <- which (1:nrow(Prestige)%% 4 == 0)
prestige_train <- Prestige[-test_index, ]
prestige_test <- Prestige[test_index, ]
#Linear Regression --log(income) + education
model.lm <- lm(prestige~., data = prestige_train)
predict.lm <- predict(model.lm , newdata = prestige_test)
summary(model.lm)
cor(predict.lm, prestige_test$prestige)
#Logistic Regression
newcol = data.frame(isSetosa=(iris_train$Species == 'setosa'))
traindata <- cbind(iris_train, newcol)
traindata[c(1,50,100),]
logisticModel <- glm(isSetosa ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width , data=traindata, family="binomial")
prob <- predict(logisticModel, newdata=iris_test, type='response')
table(prob>=0.5, iris_test$Species == 'setosa')
#Use regularisation
library(glmnet)
cv.fit <- cv.glmnet(as.matrix(prestige_train[,c(-4, -6)]), as.vector(prestige_train[,4]),
nlambda=100, alpha=0.7, family="gaussian")
plot(cv.fit)
coef(cv.fit)
prediction <- predict(cv.fit, newx=as.matrix(prestige_test[,c(-4, -6)]))
cor(prediction, as.vector(prestige_test[,4]))
#Neutral network
library(neuralnet)
nnet_iristrain <- cbind(iris_train, iris_train$Species == 'setosa')
nnet_iristrain <- cbind(nnet_iristrain, iris_train$Species == 'versicolor')
nnet_iristrain <- cbind(nnet_iristrain, iris_train$Species == 'virginica')
names(nnet_iristrain)[6:8] <- c('setosa', 'versicolor', 'virginica')
nn <- neuralnet(setosa+versicolor+virginica ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
data=nnet_iristrain, hidden=c(3))
plot(nn)
mypredict <- compute(nn, iris_test[-5])$net.result
maxidx <- function(arr) {
return(which(arr == max(arr)))
}
idx <- apply(mypredict, c(1), maxidx)
prediction <- c('setosa', 'versicolor', 'virginica')[idx]
table(prediction, iris_test$Species)
#SVM
library(e1071)
tune <- tune.svm(Species~., data=iris_train, gamma=10^(-6:-1), cost=10^(1:4))
summary(tune)
model <- svm(Species~., data=iris_train, method="C-classification", kernel="radial", probability=T, gamma=0.001, cost=10000)
prediction <- predict(model, iris_test, probability=T)
table(prediction, iris_test$Species)
#Bayesian
library(e1071)
model <- naiveBayes(Species~., data=iris_train)
prediction <- predict(model, iris_test[,-5])
table(prediction, iris_test[,5])
#KNN k
library(class)
train_input <- as.matrix(iris_train[,-5])
train_output <- as.vector(iris_train[,5])
test_input <- as.matrix(iris_test[,-5])
prediction <- knn(train_input, test_input, train_output, k=3)
table(prediction, iris_test$Species)
#Decision Tree
library(rpart)
treemodel <- rpart(Species~., data=iris_train)
plot(treemodel)
text(treemodel, use.n=T)
prediction <- predict(treemodel, newdata=iris_test, type='class')
table(prediction, iris_test$Species)
#RF
library(randomForest)
model <- randomForest(Species~., data=iris_train, nTree=500)
prediction <- predict(model, newdata=iris_test, type='class')
table(prediction, iris_test$Species)
importance(model)
qplot(Petal.Length, Petal.Width, color=Species, data=iris_train)
qplot(Sepal.Length, Sepal.Wdith, color=Species, data=iris_train)
#Boosting
library(gbm)
newcol = data.frame(isVersicolor=(iris_train$Species=='versicolor'))
iris_train <- cbind(iris_train, newcol)
model <- gbm(isVersicolor ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data=iris_train,
n.trees=1000, interaction.depth=2, distribution="bernoulli")
prediction <- predict.gbm(model, iris_test, type="response", n.trees=1000)
table(prediction>=0.5, iris_test$Species == 'versicolor')
summary(model)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment