Skip to content

Instantly share code, notes, and snippets.

@jalapic
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalapic/2c3d5bf1224e39d75782 to your computer and use it in GitHub Desktop.
Save jalapic/2c3d5bf1224e39d75782 to your computer and use it in GitHub Desktop.
#### Note - this isn't the cleanest code, but it works fine
#### install packages
devtools::install_github('jalapic/engsoccerdata', username = "jalapic")
library(engsoccerdata)
devtools::install_github('jalapic/curleylab')
library(curleylab) #just for a ggplot theme
head(engsoccerdata2)
df<-
rbind(
engsoccerdata2 %>% filter(Season!=1939) %>% mutate(venue='home') %>% select(Date,Season,team=home, opp=visitor, FT, venue, gd=goaldif),
engsoccerdata2 %>% filter(Season!=1939) %>% mutate(goaldif1=-goaldif, venue='away') %>% select(Date,Season,team=visitor, FT, venue, opp=home, gd=goaldif1)
)
head(df)
df$Date <- as.Date(df$Date, format="%Y-%m-%d")
# Functions to get sequences
fun1<-function(tmp){
ddf <- data.frame(winlengths = rle(tmp$win)$lengths, winvalues = rle(tmp$win)$values)
ddf.expanded <- ddf[rep(row.names(ddf), ddf$winlengths), 1:2]
tmp$winlengths <- ddf.expanded[,1]
tmp$winvalues <- ddf.expanded[,2]
return(tmp)
}
fun2<-function(tmp){
ddf<-data.frame(winlengths = rle(tmp$win)$lengths, winvalues = rle(tmp$win)$values)
ddf<-ddf %>% filter(winvalues==T) %>% arrange(desc(winlengths))
return(ddf)
}
###############################
#### Win Sequences
df1 <-df %>%
mutate(win = ifelse(gd>0, T, F))%>%
group_by(team) %>%
arrange(team,Date)
df1x <- lapply(split(df1, df1$team), fun1)
df1x <- do.call('rbind', df1x)
head(df1x)
df1y <- lapply(split(df1, df1$team), fun2)
df1y <- Map(cbind, df1y, team = names(split(df1,df1$team)))
df1y <- do.call('rbind', df1y)
table(df1y$winlengths)
library(ggplot2)
head(df1y)
ggplot(df1y,aes(x=factor(winlengths))) + geom_bar() + geom_text(stat='bin',aes(label=..count..),vjust=-.7) +
curleytheme() + theme(panel.grid.major.x = element_blank()) +
xlab("Consecutive Wins") + theme(axis.text.x = element_text(size=20), axis.text.y = element_blank(),
axis.title.x = element_text(size=20)) + ggtitle("Frequency of Winning Runs in English Football") +
ylab("")
##### Frequency of Unbeaten Runs
df1 <-df %>%
mutate(win = ifelse(gd>=0, T, F))%>%
group_by(team) %>%
arrange(team,Date)
df1x <- lapply(split(df1, df1$team), fun1)
df1x <- do.call('rbind', df1x)
head(df1x)
df1y <- lapply(split(df1, df1$team), fun2)
df1y <- Map(cbind, df1y, team = names(split(df1,df1$team)))
df1y <- do.call('rbind', df1y)
table(df1y$winlengths)
library(ggplot2)
head(df1y)
ggplot(df1y,aes(x=factor(winlengths))) + geom_bar() + geom_text(stat='bin',aes(label=..count..),vjust=-.7) +
curleytheme() + theme(panel.grid.major.x = element_blank()) +
xlab("Consecutive Wins") + theme(axis.text.x = element_text(size=20), axis.text.y = element_blank(),
axis.title.x = element_text(size=20)) + ggtitle("Frequency of Unbeaten Runs in English Football") +
ylim(0,38000)+ ylab("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment