Created
October 17, 2014 13:55
-
-
Save joaovissoci/3e765525765994fc576e to your computer and use it in GitHub Desktop.
predictive_models_template.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Prediction models in R | |
#Packages needed: | |
#"Deducer" | |
#"tree" | |
#"randomForest" | |
#"reshape" | |
lapply(c("Deduces","randomForest","tree","reshape"), | |
library, character.only=T) | |
############################################################################ | |
#### Logisti Regression | |
############################################################################ | |
#recoding outcome variable | |
Prestige$income<-car::recode(Prestige$income,"0:5930.5='low income';else='high income'") | |
Prestige$income<-as.factor(Prestige$income) | |
#fitting a model or prediction of Menarche by Age | |
fit = glm(income ~ education+women+prestige+census, | |
family=binomial, data=Prestige) | |
summary(fit) # display results | |
confint(fit) # 95% CI for the coefficients | |
exp(coef(fit)) # exponentiated coefficients | |
exp(confint(fit)) # 95% CI for exponentiated coefficients | |
residuals(fit, type="deviance") # residuals | |
plot(predict(fit, type="response")) | |
rocplot(fit,pred.prob.labels=TRUE,prob.label.digits=3,AUC=TRUE) | |
############################################################################ | |
#### Bootstrapping | |
############################################################################ | |
############################################################################ | |
#### Classification Tree and Random Forest Predicitive Models | |
############################################################################ | |
data_positive<-subset(Prestige,income=="high income") | |
dim(data_positive) | |
data_negative<-subset(Prestige,income=="low income") | |
dim(data_negative) | |
#data_positive <- data_positive[sample(1:nrow(data_positive), 50, | |
# replace=FALSE),] | |
data_sample<-rbind(data_positive,data_negative) | |
dim(data_sample) | |
data_treatment<-with(data_sample,data.frame(education,women,prestige,census,income)) | |
set.seed(1234) | |
id <- sample(1:2,nrow(data_treatment),replace=TRUE) | |
List <- split(data_treatment,id) | |
names(List) <- c("Train","Test") | |
train = List$Train | |
test = List$Test | |
dim(train) | |
dim(test) | |
head(train) | |
#Tree Analysis | |
#library(tree) | |
tree1 = tree(income ~., data= train) | |
summary(tree1) | |
tree.screens() | |
plot(tree1) | |
text(tree1) | |
tile.tree(tree1, train$income, axes = TRUE) | |
close.screen(all = TRUE) | |
names(tree1) | |
tree1$frame | |
#pruning | |
set.seed(1234) | |
par(mfrow=c(1,2)) | |
plot(cv.tree(tree1,FUN=prune.tree,method="misclass")) | |
plot(cv.tree(tree1)) | |
pruneTree <- prune.tree(tree1,best=2) | |
tree.screens() | |
plot(pruneTree) | |
text(pruneTree) | |
tile.tree(pruneTree, train$income, axes = TRUE) | |
close.screen(all = TRUE) | |
summary(pruneTree) | |
pruneTree | |
#Cross Validation | |
pred1 = predict(tree1, test, type = "class") | |
t = table(observed=test[,'income'], predict=pred1) | |
t | |
#random forest analysis | |
#library(randomForest) | |
rf1 = randomForest(income ~., importance = TRUE, proximity = TRUE, ntree= 100000, data = train, na.action = na.omit) | |
rf1 | |
varImpPlot(rf1, main = "Variable Importance") | |
rf1$importance | |
#predicting using random forest | |
pred = predict(rf1, newdata=test) | |
pred | |
table(observable = test$income, predicted = pred) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment