Skip to content

Instantly share code, notes, and snippets.

@timmyshen
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timmyshen/17aa0af011da9b063bb4 to your computer and use it in GitHub Desktop.
Save timmyshen/17aa0af011da9b063bb4 to your computer and use it in GitHub Desktop.
# Step 1.
seaflow <- read.csv('seaflow_21min.csv', header=T)
summary(seaflow)
# Step 2.
# a <- sample(dim(seaflow)[1], dim(seaflow)[1]*0.8)
# trainseaflow <- seaflow[a,]
# b <- 1:dim(seaflow)[1]
# test.df <- seaflow[setdiff(b, a),]
library(caret)
data.part <- createDataPartition(seaflow$pop, times=2, p=.5)
train.df <- seaflow[ data.part$Resample1, ]
test.df <- seaflow[ data.part$Resample2, ]
summary(train.df)
# Step 3.
library(ggplot2)
qplot(x=chl_small, y=pe, data=seaflow, color=pop)
# ggplot(data=train.df, aes(x=chl_small, y=pe, color=pop)) + geom_point()
# Step 4.
library(rpart)
fol <- formula(pop ~ fsc_small+fsc_perp+fsc_big+pe+chl_big+chl_small)
model <- rpart(fol, method='class', data=train.df)
print(model)
# Step 5.
popfit <- predict(model, test.df)
k <- apply(popfit, 1, function(x) max(which(x == max(x, na.rm = TRUE))))
popfitname <- colnames(popfit)[k]
accuracy <- sum(popfitname==test.df[,'pop']) / length(k)
# Step 6.
library(randomForest)
model <- randomForest(fol, data=train.df)
print(model)
popfit <- predict(model, test.df)
accuracy <- sum(popfit==test.df[,'pop']) / length(k)
imp_randomforest <- importance(model)
library(e1071)
model <- svm(fol, data=train.df)
popfit <- predict(model, test.df)
accuracy <- sum(popfit==test.df[,'pop']) / length(k)
svmct <- table(pred=popfit, true=test.df$pop)
newseaflow <- seaflow[seaflow['file_id']!=208,]
newa <- sample(dim(newseaflow)[1], dim(newseaflow)[1]*0.5)
trainnewseaflow <- newseaflow[newa,]
newb <- 1:dim(newseaflow)[1]
testnewseaflow <- newseaflow[setdiff(newb, newa),]
library(e1071)
fol <- formula(pop ~ fsc_small+fsc_perp+fsc_big+pe+chl_big+chl_small)
model <- svm(fol, data=trainnewseaflow)
popfit <- predict(model, testnewseaflow)
accuracy <- sum(popfit==testnewseaflow[,'pop']) / length(popfit)
svmct <- table(pred=popfit, true=testnewseaflow$pop)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment