Take shot data from Basketball-Reference player-season page, create ggplot with shot points and isobars
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(stringr) | |
library(plyr) | |
library(car) | |
### the url of the player-season shooting page from basketball reference | |
url="http://www.basketball-reference.com/players/b/bryanko01/shooting/2015/" | |
dat=readLines(url) | |
dat=dat[grep("<div style=\"position:absolute;top:",dat)] | |
dat=data.frame(text=dat) | |
dat$x= -(as.numeric(gsub("px","",gsub("left:","",unlist(str_split(dat$text,";",4))[0:(length(dat$text)-1)*4+3])))/10-25) | |
dat$y= as.numeric(gsub("px","",gsub("top:","",unlist(str_split(dat$text,";",4))[0:(length(dat$text)-1)*4+2])))/472*45-4 | |
dat$date=as.Date(substr(unlist(str_split(dat$text,"="))[4*1:length(dat$text)],2,13),"%b %d, %Y") | |
dat$team=unlist(str_split(dat$text," ",10))[9+10*0:(length(dat$text)-1)] | |
dat$home=unlist(str_split(dat$text," ",14))[10+14*0:(length(dat$text)-1)] | |
dat$home=recode(dat$home,"'vs'=T;'at'=F") | |
dat$opp=substr(unlist(str_split(dat$text," ",14))[11+14*0:(length(dat$text)-1)],1,3) | |
dat$fg.made=NA | |
dat$fg.made[grep(">Missed ",dat$text)]=0 | |
dat$fg.made[grep(">Made ",dat$text)]=1 | |
dat$shot.2.or.3=NA | |
dat$shot.2.or.3[grep(" 2-pointer",dat$text)]=2 | |
dat$shot.2.or.3[grep(" 3-pointer",dat$text)]=3 | |
dat$distance=as.numeric(substr(unlist(str_split(dat$text," ",18))[17+18*0:(length(dat$text)-1)],1,3)) | |
dat$qtr=substr(unlist(str_split(dat$text,">",4))[3+4*0:(length(dat$text)-1)],1,5) | |
dat$qtr=recode(dat$qtr,"'1st Q'=1;'2nd Q'=2;'3rd Q'=3;'4th Q'=4;'1st O'=5;'2nd O'=6;'3rd O'=7;'4th O'=8") | |
dat$min=substr(unlist(str_split(dat$text,",",8))[4+4*0:(length(dat$text)-1)],2,6) | |
dat$min=c(12,24,36,48,53,58,63,58,73,78)[dat$qtr]-(as.numeric(gsub(":","",substr(dat$min,1,2)))+as.numeric(gsub(":","",substr(dat$min,3,6)))/60) | |
dat$final.state=gsub("•</span></div>","",unlist(str_split(dat$text,"<br>",4))[4+4*0:(length(dat$text)-1)]) | |
dat$final.state=gsub("×</span></div>","",dat$final.state) | |
dat$final.state=gsub("\\\">","",dat$final.state) | |
dat$final.state=gsub(" now","",dat$final.state) | |
dat$previous.margin=as.numeric(unlist(str_split(gsub("[a-zA-Z ]","",dat$final.state),"-"))[1:nrow(dat)*2-1])-as.numeric(unlist(str_split(gsub("[a-zA-Z ]","",dat$final.state),"-"))[1:nrow(dat)*2])-dat$fg*dat$shot.2.or.3 | |
dat$text=NULL | |
##ggplot parameters | |
line.col='grey' | |
shot.alpha=.09 | |
shot.size=3 | |
shot.shape=16 | |
jitterx=0 | |
jittery=0 | |
made.col=4 | |
miss.col=2 | |
iso.width=0 | |
iso.col=1 | |
ggplot(data=dat, | |
aes(x,y) | |
)+ | |
geom_path(data=data.frame(x=c(-25,-25,25,25,-25),y=-c(-4,47,47,-4,-4)), | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-6000:(-1)/1000,1:6000/1000),y=-c(15+sqrt(6^2-c(-6000:(-1)/1000,1:6000/1000)^2))), | |
aes(x=x,y=y), | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-6000:(-1)/1000,1:6000/1000),y=-c(15-sqrt(6^2-c(-6000:(-1)/1000,1:6000/1000)^2))), | |
aes(x=x,y=y), | |
linetype='dashed', | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-8,-8,8,8,-8),y=-c(-4,15,15,-4,-4)), | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-6,-6,6,6,-6),y=-c(-4,15,15,-4,-4)), | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-4000:(-1)/1000,1:4000/1000),y=-c(1.25+sqrt(4^2-c(-4000:(-1)/1000,1:4000/1000)^2))), | |
aes(x=x,y=y), | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-6000:(-1)/1000,1:6000/1000),y=-c(47-sqrt(6^2-c(-6000:(-1)/1000,1:6000/1000)^2))), | |
aes(x=x,y=y), | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-750:(-1)/1000,1:750/1000,750:1/1000,-1:-750/1000), | |
y=-c(c(1.25+sqrt(0.75^2-c(-750:(-1)/1000,1:750/1000)^2)),c(1.25-sqrt(0.75^2-c(750:1/1000,-1:-750/1000)^2))) | |
), | |
aes(x=x,y=y), | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-3,3),y=c(0,0)), | |
lineend='butt', | |
color=line.col | |
)+ | |
geom_path(data=data.frame(x=c(-22,-22,-22000:(-1)/1000,1:22000/1000,22,22), | |
y=-c(-4,169/12-4,1.25+sqrt(23.75^2-c(-22000:(-1)/1000,1:22000/1000)^2),169/12-4,-4) | |
), | |
aes(x=x,y=y), | |
color=line.col | |
)+ | |
coord_fixed()+ | |
theme(axis.line=element_blank(), | |
axis.text.x=element_blank(), | |
axis.text.y=element_blank(), | |
axis.ticks=element_blank(), | |
axis.title.x=element_blank(), | |
axis.title.y=element_blank(), | |
legend.position="none", | |
panel.background=element_blank(), | |
panel.border=element_blank(), | |
panel.grid.major=element_blank(), | |
panel.grid.minor=element_blank(), | |
plot.background=element_blank() | |
)+ | |
facet_wrap(~team)+ | |
geom_point(aes(-x,-y), | |
alpha=shot.alpha, | |
subset=.(fg.made==0), | |
color=miss.col, | |
size=shot.size, | |
shape=shot.shape, | |
position=position_jitter(h=jitterx,w=jittery) | |
)+ | |
geom_point(aes(-x,-y), | |
alpha=shot.alpha, | |
subset=.(fg.made==1), | |
color=made.col, | |
size=shot.size, | |
shape=shot.shape, | |
position=position_jitter(h=jitterx,w=jittery) | |
)+ | |
geom_density2d(aes(-x,-y), | |
col=iso.col, | |
size=iso.width) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment