Skip to content

Instantly share code, notes, and snippets.

@jurgen-dejager
Created August 16, 2016 03:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jurgen-dejager/7f96c61529ed72c3c846db018572c303 to your computer and use it in GitHub Desktop.
Save jurgen-dejager/7f96c61529ed72c3c846db018572c303 to your computer and use it in GitHub Desktop.
---
title: "Project3"
author: "Jurgen de Jager"
date: "August 12, 2016"
output: html_document
---
```{r, echo = FALSE}
library(randomForest)
library(ggplot2)
library(rpart)
library(rpart.plot)
library(rattle)
library(party)
library(RColorBrewer)
library(MASS)
library(GGally)
library(dplyr)
library(d3heatmap)
nba = read.csv("nba.csv")
nba$Position = as.character(nba$Position)
which(nchar(nba$Position)>3)
nba = nba[1:342,]
nba = nba[-c(159,260,324,327),]
nba$randu = runif(338, 0, 1)
nba.train = nba[nba$randu < .4,]
nba.test = nba[nba$randu >= .4,]
```
#VIZ
```{r}
ggpairs(nba3, aes(col = Position))
#points
ggplot(nba, aes(x = PER)) + geom_density(aes(fill = Position, alpha = 0.2))
#true shootings score
ggplot(nba, aes(x = TS)) + geom_density(aes(fill = Position, alpha = 0.2))
#Offensive Rebounds
ggplot(nba, aes(x = TRB)) + geom_density(aes(fill = Position, alpha = 0.2))
#Defensive Rebounds
ggplot(nba, aes(x = DRB)) + geom_density(aes(fill = Position, alpha = 0.2))
#Steals
ggplot(nba, aes(x = STL)) + geom_density(aes(fill = Position, alpha = 0.2))
#heatmap by position
viz.data = nba[,c(1:2,6:21)]
vd = viz.data %>%
group_by(Position) %>%
summarise_each(funs(mean))
name = vd$Position
vd = vd[,2:17]
row.names(vd) = name
d3heatmap(vd, scale="column", dendrogram = "none")
## heatmap of every player
nba3 = nba[,c('Player', 'TRB', 'AST', 'STL', 'BLK', 'PER', 'TOV', 'USG', 'TS')]
name2 = nba3$Player
row.names(nba3) = name2
d3heatmap(nba3[,c(2:9)], scale="column", yaxis_font_size = "4pt", k_row = 5,dendrogram = "row")
```
#Decision Tree - Good Fit
```{r}
#model2
fit = rpart(Position ~., data = nba.train[,c(2,6:21)], method="class")
#fancyRpartPlot(fit)
#prediction
nba.test$Prediction <- predict(fit, nba.test, type = "class")
#tabling results
table(nba.test$Position, nba.test$Prediction)
prop.table(table(nba.test$Position,nba.test$Prediction),1)
```
#Decision Tree - Over Fit
```{r}
#model2
fit = rpart(Position ~., data = nba.train[,c(2,6:21)], method="class", control=rpart.control(minsplit=3, cp=0.001))
plot(fit$variable.importance)
#fancyRpartPlot(fit)
#prediction
nba.test$Prediction <- predict(fit, nba.test, type = "class")
#tabling results
table(nba.test$Position, nba.test$Prediction)
prop.table(table(nba.test$Position,nba.test$Prediction),1)
```
#RF
```{r}
#model 1
rf.model = randomForest(as.factor(Position) ~ . , data= nba.train[,c(2,6:21)], ntree= 500, mtry = round(sqrt(ncol(nba))))
mean(rf.model$err.rate)
plot(rf.model)
#predicting using test set
nba.test$pred.pos.rf = predict(rf.model, nba.test, type="response")
#tabling resutls
table(nba.test$Position,nba.test$pred.pos.rf)
#proportion table
prop.table(table(nba.test$Position,nba.test$pred.pos.rf),1)
layout(matrix(c(1,2),nrow=1),
width=c(4,1))
par(mar=c(5,4,4,0)) #No margin on the right side
plot(rf.model)
par(mar=c(5,0,4,2)) #No margin on the left side
plot(c(0,1),type="n", axes=F, xlab="", ylab="")
legend("top", colnames(rf.model$err.rate),col=1:6,cex=0.8,fill=1:6)
```
#table
```{r}
ave.table = summaryBy(. ~ factor(Position) , data = nba[,c(2,6:21)], FUN = mean)
stargazer(ave.table[,-c(2,13,14,15)], summary = F, font.size = "small", column.sep.width = "1pt", covariate.labels = c("ID", "Position", "PER", "TS", "ORB", "DRB", "TRB", "AST", "BLK", "TOV", "USG", "ORtg", "DRtg","STL"))
stargazer(nba)
```
#finding best parameters(ntree and mtry)
```{r}
bestmtry <- tuneRF(nba[,c(6:21)], factor(nba$Position), stepFactor=1.5, improve=1e-5, ntree=500)
print(bestmtry)
plot(rf.model)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment