Skip to content

Instantly share code, notes, and snippets.

@joaovissoci
Created October 17, 2014 13:55
Show Gist options
  • Save joaovissoci/3e765525765994fc576e to your computer and use it in GitHub Desktop.
Save joaovissoci/3e765525765994fc576e to your computer and use it in GitHub Desktop.
predictive_models_template.R
#Prediction models in R
#Packages needed:
#"Deducer"
#"tree"
#"randomForest"
#"reshape"
lapply(c("Deduces","randomForest","tree","reshape"),
library, character.only=T)
############################################################################
#### Logisti Regression
############################################################################
#recoding outcome variable
Prestige$income<-car::recode(Prestige$income,"0:5930.5='low income';else='high income'")
Prestige$income<-as.factor(Prestige$income)
#fitting a model or prediction of Menarche by Age
fit = glm(income ~ education+women+prestige+census,
family=binomial, data=Prestige)
summary(fit) # display results
confint(fit) # 95% CI for the coefficients
exp(coef(fit)) # exponentiated coefficients
exp(confint(fit)) # 95% CI for exponentiated coefficients
residuals(fit, type="deviance") # residuals
plot(predict(fit, type="response"))
rocplot(fit,pred.prob.labels=TRUE,prob.label.digits=3,AUC=TRUE)
############################################################################
#### Bootstrapping
############################################################################
############################################################################
#### Classification Tree and Random Forest Predicitive Models
############################################################################
data_positive<-subset(Prestige,income=="high income")
dim(data_positive)
data_negative<-subset(Prestige,income=="low income")
dim(data_negative)
#data_positive <- data_positive[sample(1:nrow(data_positive), 50,
# replace=FALSE),]
data_sample<-rbind(data_positive,data_negative)
dim(data_sample)
data_treatment<-with(data_sample,data.frame(education,women,prestige,census,income))
set.seed(1234)
id <- sample(1:2,nrow(data_treatment),replace=TRUE)
List <- split(data_treatment,id)
names(List) <- c("Train","Test")
train = List$Train
test = List$Test
dim(train)
dim(test)
head(train)
#Tree Analysis
#library(tree)
tree1 = tree(income ~., data= train)
summary(tree1)
tree.screens()
plot(tree1)
text(tree1)
tile.tree(tree1, train$income, axes = TRUE)
close.screen(all = TRUE)
names(tree1)
tree1$frame
#pruning
set.seed(1234)
par(mfrow=c(1,2))
plot(cv.tree(tree1,FUN=prune.tree,method="misclass"))
plot(cv.tree(tree1))
pruneTree <- prune.tree(tree1,best=2)
tree.screens()
plot(pruneTree)
text(pruneTree)
tile.tree(pruneTree, train$income, axes = TRUE)
close.screen(all = TRUE)
summary(pruneTree)
pruneTree
#Cross Validation
pred1 = predict(tree1, test, type = "class")
t = table(observed=test[,'income'], predict=pred1)
t
#random forest analysis
#library(randomForest)
rf1 = randomForest(income ~., importance = TRUE, proximity = TRUE, ntree= 100000, data = train, na.action = na.omit)
rf1
varImpPlot(rf1, main = "Variable Importance")
rf1$importance
#predicting using random forest
pred = predict(rf1, newdata=test)
pred
table(observable = test$income, predicted = pred)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment