Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Created August 30, 2012 08:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save psychemedia/3524075 to your computer and use it in GitHub Desktop.
Save psychemedia/3524075 to your computer and use it in GitHub Desktop.
Mid-Season Review, 2012 F1/Formula One
F1 2012 Season Summary
========================
```{r}
require(ggplot2)
require(plyr)
orderTeams=function (teams) factor(teams,levels=c("Red Bull Racing-Renault","McLaren-Mercedes","Ferrari","Mercedes","Lotus-Renault","Force India-Mercedes","Sauber-Ferrari","STR-Ferrari","Williams-Renault","Caterham-Renault","HRT-Cosworth","Marussia-Cosworth"),ordered=T)
orderRaces=function(races) factor(races,levels=c("AUSTRALIA","MALAYSIA","CHINA","BAHRAIN","SPAIN","MONACO","CANADA","EUROPE","GREAT BRITAIN","GERMANY","HUNGARY"),ordered=T)
teamcolours=c("blue","darkgray","red","lightsteelblue3","goldenrod3","darkorange","gray8","firebrick4","midnightblue","darkgreen","gray0","darkred")
#this function is used to denote driver 1 or driver 1 in a team
dc=function(x) if (x>13) return(x %% 2) else return (((x+1)%%2)+2)
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`raceResults`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
race=read.csv(temporaryFile)
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`qualiResults`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
quali=read.csv(temporaryFile)
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`racePits`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
pits=read.csv(temporaryFile)
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`qualiSpeeds`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
qualiSpeeds=read.csv(temporaryFile)
race$pos=as.numeric(as.character(race$pos))
race$grid=as.numeric(as.character(race$grid))
race$driverNum=as.numeric(as.character(race$driverNum))
race$driverName=reorder(race$driverName, race$driverNum)
qualiSpeeds$driverName=reorder(qualiSpeeds$driverName, qualiSpeeds$driverNum)
pits$driverName=reorder(pits$driverName, pits$driverNum)
race=ddply(.variables=c("race"),.data=race,.fun= function(d) data.frame(d,missing=sum(is.na(d$pos))))
race$team=orderTeams(race$team)
race$race=orderRaces(race$race)
pits$race=orderRaces(pits$race)
pits$team=orderTeams(pits$team)
quali$race=orderRaces(quali$race)
quali$team=orderTeams(quali$team)
qualiSpeeds$race=orderRaces(qualiSpeeds$race)
qualiSpeeds$team=orderTeams(qualiSpeeds$team)
race$dc=dc(race$driverNum)
g=ggplot(race)+geom_point(aes(x=race,y=pos,col=factor(dc)),size=1)+facet_wrap(~team)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Classification')+xlab(NULL)+ylab(NULL)
g=g+opts(legend.position = "none")+scale_y_reverse()
print(g)
nullmin=function(d) {if (is.finite(min(d,na.rm=T))) return(min(d,na.rm=T)) else return(NA)}
#Find the fastest time recorded by each driver across all qualifying sessions
minqx=ddply(.variables=c("race","team","driverName"),.data=quali,.fun= function(d) data.frame(minqxt=nullmin(min(d$q1time,d$q2time,d$q3time,na.rm=T))))
#Find the fastest overall qualifying time, over all sessions and drivers
minqx=ddply(.variables=c("race"),.data=minqx,.fun= function(d) data.frame(d,minqxtoverall=nullmin(d$minqxt)))
#Normalise the fastest time by each driver according to the fastest quali lap over all teams and sessions
minqx=ddply(.variables=c("race"),.data=minqx,.fun= function(d) data.frame(d,minqxtpc=d$minqxt/d$minqxtoverall))
#Find the fastest qualitime over all sessions for each team
minqx=ddply(.variables=c("race","team"),.data=minqx,.fun= function(d) data.frame(d,minqxtbyteam=nullmin(d$minqxt)))
#Find the fastest normalised qualitime over all sessions for each team by race
minqx=ddply(.variables=c("race","team"),.data=minqx,.fun= function(d) data.frame(d,minqxtbyteampc=nullmin(d$minqxtpc)))
#Plot the loess model based on both driver's fastest quali times
g=ggplot(minqx)+stat_smooth(method="loess",aes(x=race,y=minqxtpc,group=team,col=factor(team)), se=FALSE)+ylim(0.99,1.08)+opts(title="F1 2012 Quali - Team Progress (Both Drivers)",axis.text.x=theme_text(angle=-90))+scale_colour_manual(name="Teams",values = teamcolours)+xlab(NULL)+ylab("Av of both team drivers' quali laptimes as % of fastest overall quali time")
print(g)
#Plot the loess model based on the fastest overall quali time recorded across each team
g=ggplot(minqx)+stat_smooth(method="loess",aes(x=race,y=minqxtbyteampc,group=team,col=factor(team)), se=FALSE)+ylim(0.99,1.08)+opts(title="F1 2012 Quali - Team Progress (Best Driver)",axis.text.x=theme_text(angle=-90))+scale_colour_manual(name="Teams",values = teamcolours)+xlab(NULL)+ylab("Team's best quali laptime as % of fastest overall quali time")
print(g)
x1=aggregate(qualiSpeeds$qspeed,list(qualiSpeeds$race),function(x)max(x))
colnames(x1)=c('race','fastest')
qualiSpeeds=merge(x1,qualiSpeeds,by='race')
g=ggplot(qualiSpeeds)+geom_point(aes(x=driverName,y=qspeed),size=1)+facet_wrap(~race)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Quali Speed Trap')+xlab(NULL)+ylab(NULL)
print(g)
g=ggplot(qualiSpeeds)+geom_point(aes(x=driverName,y=qspeed/fastest),size=1)+facet_wrap(~race)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Quali Speed Trap (Normalised)')+xlab(NULL)+ylab(NULL)
print(g)
qualiSpeeds=merge(qualiSpeeds,subset(quali,select=c('team','driverName')),by='driverName')
qualiSpeeds$dc=dc(qualiSpeeds$driverNum)
g=ggplot(qualiSpeeds)+geom_point(aes(x=race,y=qspeed/fastest,col=factor(dc)),size=1)+facet_wrap(~team)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Quali Speed Trap (Normalised)')+xlab(NULL)+ylab(NULL)
g=g+opts(legend.position = "none")
print(g)
#Calculate the position change
race=ddply(.variables=c("race","team","driverName"),.data=race,.fun= function(d) data.frame(d,pchange=d$grid-d$pos))
g=ggplot(race)+geom_linerange(aes(ymin=grid,ymax=pos,x=race,group=driverName,color=factor(pchange>0)))+facet_wrap(~driverName)+geom_point(data=race,aes(x=race,y=grid),col="gray50",size=1)+opts(title="F1 2012 Position Changes Grid-Final Classification",axis.text.x=theme_text(angle=-90,size=5),axis.text.y=theme_text(size=4))+xlab(NULL)+ylab("Position")+scale_y_continuous(breaks = seq(1, 24, by=1))+scale_y_reverse()
g=g+scale_colour_discrete(name = "Gained Places")
print(g)
g=ggplot(pits)+geom_bar(aes(x=pitTime))+facet_wrap(~race)+xlim(0,40)
g=g+opts(title='F1 2012 Pit time distributions')+xlab(NULL)+ylab(NULL)
print(g)
#We use the following function to try to find the quickest pitstop time that isnlt a drive thru penalty
#Use the heuristic that the drive thru penalty is at least 2.5s faster than the fastest pitstop
#and that the 10 quickest pitstops are unlikely to have a range greater than 2.5 seconds.
#This further assumes there are no more than 9 drive thru penalties in a given race
#BUG? the else returned value should be x not the originally posted x[-1] used to generate the images in the orginal f1datajunkie blog post? But a quick check on that shows odd things happening? Too late/too tired to try to debug it tonight...:-( BAH:-()
xdt=function(x) {
l=head(sort(x),10)
if ((max(l)-min(l))>2.5) xdt(l[-1]) else return(x)
}
x=aggregate(pits$pitTime,list(pits$race),function(x)min(xdt(x)))
colnames(x)=c('race','minpit')
pits=merge(x,pits,by='race')
pits$dc=dc(pits$driverNum)
g=ggplot(pits)+geom_point(aes(x=team,y=pitTime-minpit,col=factor(dc)),size=1)+facet_wrap(~race)+ylim(0,4)+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by race and team')
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
print(g)
g=ggplot(pits)+geom_boxplot(aes(x=team,y=pitTime-minpit))+facet_wrap(~race)+ylim(0,4)+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by race and team')
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
print(g)
g=ggplot(pits)+geom_point(aes(x=race,y=pitTime-minpit,col=factor(dc)),size=1)
g=g+facet_wrap(~team)+ylim(0,4)
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
g=g+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by team and race')
print(g)
g=ggplot(pits)+geom_boxplot(aes(x=race,y=pitTime-minpit))+facet_wrap(~team)+ylim(0,4)+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by team and race')
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
print(g)
#Plot the loess model based on all pit times
g=ggplot(pits)+stat_smooth(method="loess",aes(x=race,y=pitTime-minpit,group=team,col=factor(team)), se=FALSE)+opts(title="F1 2012 - Pitting (loess fit)",axis.text.x=theme_text(angle=-90))+scale_colour_manual(name="Teams",values = teamcolours)+xlab(NULL)+ylab("Pit times across team")
print(g)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment