public
Created

Mid-Season Review, 2012 F1/Formula One

  • Download Gist
f1_2012_midSeasonReview.R
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
F1 2012 Season Summary
========================
 
```{r}
require(ggplot2)
require(plyr)
 
orderTeams=function (teams) factor(teams,levels=c("Red Bull Racing-Renault","McLaren-Mercedes","Ferrari","Mercedes","Lotus-Renault","Force India-Mercedes","Sauber-Ferrari","STR-Ferrari","Williams-Renault","Caterham-Renault","HRT-Cosworth","Marussia-Cosworth"),ordered=T)
 
orderRaces=function(races) factor(races,levels=c("AUSTRALIA","MALAYSIA","CHINA","BAHRAIN","SPAIN","MONACO","CANADA","EUROPE","GREAT BRITAIN","GERMANY","HUNGARY"),ordered=T)
 
teamcolours=c("blue","darkgray","red","lightsteelblue3","goldenrod3","darkorange","gray8","firebrick4","midnightblue","darkgreen","gray0","darkred")
 
#this function is used to denote driver 1 or driver 1 in a team
dc=function(x) if (x>13) return(x %% 2) else return (((x+1)%%2)+2)
 
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`raceResults`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
race=read.csv(temporaryFile)
 
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`qualiResults`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
quali=read.csv(temporaryFile)
 
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`racePits`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
pits=read.csv(temporaryFile)
 
temporaryFile <- tempfile()
fn=paste("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=csv&name=f1comscraper&query=select+*+from+`qualiSpeeds`&apikey=")
download.file(fn,destfile=temporaryFile, method="curl")
qualiSpeeds=read.csv(temporaryFile)
 
 
race$pos=as.numeric(as.character(race$pos))
race$grid=as.numeric(as.character(race$grid))
race$driverNum=as.numeric(as.character(race$driverNum))
 
race$driverName=reorder(race$driverName, race$driverNum)
qualiSpeeds$driverName=reorder(qualiSpeeds$driverName, qualiSpeeds$driverNum)
pits$driverName=reorder(pits$driverName, pits$driverNum)
 
 
race=ddply(.variables=c("race"),.data=race,.fun= function(d) data.frame(d,missing=sum(is.na(d$pos))))
 
race$team=orderTeams(race$team)
race$race=orderRaces(race$race)
 
pits$race=orderRaces(pits$race)
pits$team=orderTeams(pits$team)
 
quali$race=orderRaces(quali$race)
quali$team=orderTeams(quali$team)
 
qualiSpeeds$race=orderRaces(qualiSpeeds$race)
qualiSpeeds$team=orderTeams(qualiSpeeds$team)
 
race$dc=dc(race$driverNum)
g=ggplot(race)+geom_point(aes(x=race,y=pos,col=factor(dc)),size=1)+facet_wrap(~team)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Classification')+xlab(NULL)+ylab(NULL)
g=g+opts(legend.position = "none")+scale_y_reverse()
print(g)
 
 
nullmin=function(d) {if (is.finite(min(d,na.rm=T))) return(min(d,na.rm=T)) else return(NA)}
 
 
#Find the fastest time recorded by each driver across all qualifying sessions
minqx=ddply(.variables=c("race","team","driverName"),.data=quali,.fun= function(d) data.frame(minqxt=nullmin(min(d$q1time,d$q2time,d$q3time,na.rm=T))))
#Find the fastest overall qualifying time, over all sessions and drivers
minqx=ddply(.variables=c("race"),.data=minqx,.fun= function(d) data.frame(d,minqxtoverall=nullmin(d$minqxt)))
#Normalise the fastest time by each driver according to the fastest quali lap over all teams and sessions
minqx=ddply(.variables=c("race"),.data=minqx,.fun= function(d) data.frame(d,minqxtpc=d$minqxt/d$minqxtoverall))
#Find the fastest qualitime over all sessions for each team
minqx=ddply(.variables=c("race","team"),.data=minqx,.fun= function(d) data.frame(d,minqxtbyteam=nullmin(d$minqxt)))
#Find the fastest normalised qualitime over all sessions for each team by race
minqx=ddply(.variables=c("race","team"),.data=minqx,.fun= function(d) data.frame(d,minqxtbyteampc=nullmin(d$minqxtpc)))
 
 
#Plot the loess model based on both driver's fastest quali times
g=ggplot(minqx)+stat_smooth(method="loess",aes(x=race,y=minqxtpc,group=team,col=factor(team)), se=FALSE)+ylim(0.99,1.08)+opts(title="F1 2012 Quali - Team Progress (Both Drivers)",axis.text.x=theme_text(angle=-90))+scale_colour_manual(name="Teams",values = teamcolours)+xlab(NULL)+ylab("Av of both team drivers' quali laptimes as % of fastest overall quali time")
 
print(g)
 
#Plot the loess model based on the fastest overall quali time recorded across each team
g=ggplot(minqx)+stat_smooth(method="loess",aes(x=race,y=minqxtbyteampc,group=team,col=factor(team)), se=FALSE)+ylim(0.99,1.08)+opts(title="F1 2012 Quali - Team Progress (Best Driver)",axis.text.x=theme_text(angle=-90))+scale_colour_manual(name="Teams",values = teamcolours)+xlab(NULL)+ylab("Team's best quali laptime as % of fastest overall quali time")
print(g)
 
x1=aggregate(qualiSpeeds$qspeed,list(qualiSpeeds$race),function(x)max(x))
colnames(x1)=c('race','fastest')
qualiSpeeds=merge(x1,qualiSpeeds,by='race')
 
g=ggplot(qualiSpeeds)+geom_point(aes(x=driverName,y=qspeed),size=1)+facet_wrap(~race)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Quali Speed Trap')+xlab(NULL)+ylab(NULL)
print(g)
 
 
g=ggplot(qualiSpeeds)+geom_point(aes(x=driverName,y=qspeed/fastest),size=1)+facet_wrap(~race)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Quali Speed Trap (Normalised)')+xlab(NULL)+ylab(NULL)
print(g)
 
qualiSpeeds=merge(qualiSpeeds,subset(quali,select=c('team','driverName')),by='driverName')
qualiSpeeds$dc=dc(qualiSpeeds$driverNum)
g=ggplot(qualiSpeeds)+geom_point(aes(x=race,y=qspeed/fastest,col=factor(dc)),size=1)+facet_wrap(~team)
g=g+opts(axis.text.x=theme_text(angle=-90,size=5),title='F1 2012 Quali Speed Trap (Normalised)')+xlab(NULL)+ylab(NULL)
g=g+opts(legend.position = "none")
print(g)
 
 
#Calculate the position change
race=ddply(.variables=c("race","team","driverName"),.data=race,.fun= function(d) data.frame(d,pchange=d$grid-d$pos))
 
g=ggplot(race)+geom_linerange(aes(ymin=grid,ymax=pos,x=race,group=driverName,color=factor(pchange>0)))+facet_wrap(~driverName)+geom_point(data=race,aes(x=race,y=grid),col="gray50",size=1)+opts(title="F1 2012 Position Changes Grid-Final Classification",axis.text.x=theme_text(angle=-90,size=5),axis.text.y=theme_text(size=4))+xlab(NULL)+ylab("Position")+scale_y_continuous(breaks = seq(1, 24, by=1))+scale_y_reverse()
g=g+scale_colour_discrete(name = "Gained Places")
print(g)
 
g=ggplot(pits)+geom_bar(aes(x=pitTime))+facet_wrap(~race)+xlim(0,40)
g=g+opts(title='F1 2012 Pit time distributions')+xlab(NULL)+ylab(NULL)
 
print(g)
 
 
#We use the following function to try to find the quickest pitstop time that isnlt a drive thru penalty
#Use the heuristic that the drive thru penalty is at least 2.5s faster than the fastest pitstop
#and that the 10 quickest pitstops are unlikely to have a range greater than 2.5 seconds.
#This further assumes there are no more than 9 drive thru penalties in a given race
 
#BUG? the else returned value should be x not the originally posted x[-1] used to generate the images in the orginal f1datajunkie blog post? But a quick check on that shows odd things happening? Too late/too tired to try to debug it tonight...:-( BAH:-()
xdt=function(x) {
l=head(sort(x),10)
if ((max(l)-min(l))>2.5) xdt(l[-1]) else return(x)
}
 
x=aggregate(pits$pitTime,list(pits$race),function(x)min(xdt(x)))
colnames(x)=c('race','minpit')
pits=merge(x,pits,by='race')
 
 
pits$dc=dc(pits$driverNum)
 
g=ggplot(pits)+geom_point(aes(x=team,y=pitTime-minpit,col=factor(dc)),size=1)+facet_wrap(~race)+ylim(0,4)+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by race and team')
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
print(g)
 
g=ggplot(pits)+geom_boxplot(aes(x=team,y=pitTime-minpit))+facet_wrap(~race)+ylim(0,4)+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by race and team')
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
print(g)
 
g=ggplot(pits)+geom_point(aes(x=race,y=pitTime-minpit,col=factor(dc)),size=1)
g=g+facet_wrap(~team)+ylim(0,4)
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
g=g+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by team and race')
print(g)
 
g=ggplot(pits)+geom_boxplot(aes(x=race,y=pitTime-minpit))+facet_wrap(~team)+ylim(0,4)+opts(axis.text.x=theme_text(angle=-90),legend.position = "none",title='F1 2012 Pit stops by team and race')
g=g+xlab(NULL)+ylab("Time(s) over fastest pitstop")
print(g)
 
#Plot the loess model based on all pit times
g=ggplot(pits)+stat_smooth(method="loess",aes(x=race,y=pitTime-minpit,group=team,col=factor(team)), se=FALSE)+opts(title="F1 2012 - Pitting (loess fit)",axis.text.x=theme_text(angle=-90))+scale_colour_manual(name="Teams",values = teamcolours)+xlab(NULL)+ylab("Pit times across team")
 
print(g)
 
```

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.