Skip to content

Instantly share code, notes, and snippets.

@edkupfer
Created March 4, 2015 14:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edkupfer/1b0fdcbaf86a6bca7d2e to your computer and use it in GitHub Desktop.
Save edkupfer/1b0fdcbaf86a6bca7d2e to your computer and use it in GitHub Desktop.
Take shot data from Basketball-Reference player-season page, create ggplot with shot points and isobars
library(ggplot2)
library(stringr)
library(plyr)
library(car)
### the url of the player-season shooting page from basketball reference
url="http://www.basketball-reference.com/players/b/bryanko01/shooting/2015/"
dat=readLines(url)
dat=dat[grep("<div style=\"position:absolute;top:",dat)]
dat=data.frame(text=dat)
dat$x= -(as.numeric(gsub("px","",gsub("left:","",unlist(str_split(dat$text,";",4))[0:(length(dat$text)-1)*4+3])))/10-25)
dat$y= as.numeric(gsub("px","",gsub("top:","",unlist(str_split(dat$text,";",4))[0:(length(dat$text)-1)*4+2])))/472*45-4
dat$date=as.Date(substr(unlist(str_split(dat$text,"="))[4*1:length(dat$text)],2,13),"%b %d, %Y")
dat$team=unlist(str_split(dat$text," ",10))[9+10*0:(length(dat$text)-1)]
dat$home=unlist(str_split(dat$text," ",14))[10+14*0:(length(dat$text)-1)]
dat$home=recode(dat$home,"'vs'=T;'at'=F")
dat$opp=substr(unlist(str_split(dat$text," ",14))[11+14*0:(length(dat$text)-1)],1,3)
dat$fg.made=NA
dat$fg.made[grep(">Missed ",dat$text)]=0
dat$fg.made[grep(">Made ",dat$text)]=1
dat$shot.2.or.3=NA
dat$shot.2.or.3[grep(" 2-pointer",dat$text)]=2
dat$shot.2.or.3[grep(" 3-pointer",dat$text)]=3
dat$distance=as.numeric(substr(unlist(str_split(dat$text," ",18))[17+18*0:(length(dat$text)-1)],1,3))
dat$qtr=substr(unlist(str_split(dat$text,">",4))[3+4*0:(length(dat$text)-1)],1,5)
dat$qtr=recode(dat$qtr,"'1st Q'=1;'2nd Q'=2;'3rd Q'=3;'4th Q'=4;'1st O'=5;'2nd O'=6;'3rd O'=7;'4th O'=8")
dat$min=substr(unlist(str_split(dat$text,",",8))[4+4*0:(length(dat$text)-1)],2,6)
dat$min=c(12,24,36,48,53,58,63,58,73,78)[dat$qtr]-(as.numeric(gsub(":","",substr(dat$min,1,2)))+as.numeric(gsub(":","",substr(dat$min,3,6)))/60)
dat$final.state=gsub("&bull;</span></div>","",unlist(str_split(dat$text,"<br>",4))[4+4*0:(length(dat$text)-1)])
dat$final.state=gsub("&times;</span></div>","",dat$final.state)
dat$final.state=gsub("\\\">","",dat$final.state)
dat$final.state=gsub(" now","",dat$final.state)
dat$previous.margin=as.numeric(unlist(str_split(gsub("[a-zA-Z ]","",dat$final.state),"-"))[1:nrow(dat)*2-1])-as.numeric(unlist(str_split(gsub("[a-zA-Z ]","",dat$final.state),"-"))[1:nrow(dat)*2])-dat$fg*dat$shot.2.or.3
dat$text=NULL
##ggplot parameters
line.col='grey'
shot.alpha=.09
shot.size=3
shot.shape=16
jitterx=0
jittery=0
made.col=4
miss.col=2
iso.width=0
iso.col=1
ggplot(data=dat,
aes(x,y)
)+
geom_path(data=data.frame(x=c(-25,-25,25,25,-25),y=-c(-4,47,47,-4,-4)),
color=line.col
)+
geom_path(data=data.frame(x=c(-6000:(-1)/1000,1:6000/1000),y=-c(15+sqrt(6^2-c(-6000:(-1)/1000,1:6000/1000)^2))),
aes(x=x,y=y),
color=line.col
)+
geom_path(data=data.frame(x=c(-6000:(-1)/1000,1:6000/1000),y=-c(15-sqrt(6^2-c(-6000:(-1)/1000,1:6000/1000)^2))),
aes(x=x,y=y),
linetype='dashed',
color=line.col
)+
geom_path(data=data.frame(x=c(-8,-8,8,8,-8),y=-c(-4,15,15,-4,-4)),
color=line.col
)+
geom_path(data=data.frame(x=c(-6,-6,6,6,-6),y=-c(-4,15,15,-4,-4)),
color=line.col
)+
geom_path(data=data.frame(x=c(-4000:(-1)/1000,1:4000/1000),y=-c(1.25+sqrt(4^2-c(-4000:(-1)/1000,1:4000/1000)^2))),
aes(x=x,y=y),
color=line.col
)+
geom_path(data=data.frame(x=c(-6000:(-1)/1000,1:6000/1000),y=-c(47-sqrt(6^2-c(-6000:(-1)/1000,1:6000/1000)^2))),
aes(x=x,y=y),
color=line.col
)+
geom_path(data=data.frame(x=c(-750:(-1)/1000,1:750/1000,750:1/1000,-1:-750/1000),
y=-c(c(1.25+sqrt(0.75^2-c(-750:(-1)/1000,1:750/1000)^2)),c(1.25-sqrt(0.75^2-c(750:1/1000,-1:-750/1000)^2)))
),
aes(x=x,y=y),
color=line.col
)+
geom_path(data=data.frame(x=c(-3,3),y=c(0,0)),
lineend='butt',
color=line.col
)+
geom_path(data=data.frame(x=c(-22,-22,-22000:(-1)/1000,1:22000/1000,22,22),
y=-c(-4,169/12-4,1.25+sqrt(23.75^2-c(-22000:(-1)/1000,1:22000/1000)^2),169/12-4,-4)
),
aes(x=x,y=y),
color=line.col
)+
coord_fixed()+
theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank()
)+
facet_wrap(~team)+
geom_point(aes(-x,-y),
alpha=shot.alpha,
subset=.(fg.made==0),
color=miss.col,
size=shot.size,
shape=shot.shape,
position=position_jitter(h=jitterx,w=jittery)
)+
geom_point(aes(-x,-y),
alpha=shot.alpha,
subset=.(fg.made==1),
color=made.col,
size=shot.size,
shape=shot.shape,
position=position_jitter(h=jitterx,w=jittery)
)+
geom_density2d(aes(-x,-y),
col=iso.col,
size=iso.width)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment