Skip to content

Instantly share code, notes, and snippets.

@jalapic
Created April 15, 2019 22:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalapic/67df6f97996a2bfa9e6997f2792c36c6 to your computer and use it in GitHub Desktop.
Save jalapic/67df6f97996a2bfa9e6997f2792c36c6 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(magrittr)
library(rvest)
webpage <- read_html("https://en.wikipedia.org/wiki/List_of_tournament_performances_by_Tiger_Woods")
l <- webpage %>% html_nodes("table.wikitable") %>% html_table()
#keep ones where first column is 'Tournament'
l <- l[unlist(lapply(l, function(x) colnames(x)[1]=="Tournament"))]
#1992 is first one in list
#no tournaments in 2016
names(l) <- c(1992:2015,2017:2019)
# add year
l <- Map(cbind, l, year = names(l))
# bind together
df <- data.table::rbindlist(l)
#colnames
colnames(df) <- c("tournament","r1","r2","r3","r4","score","par","place","money","year")
#remove matchplays
df <- df[df$r1!="see below",]
#remove stableford event
df <- df[-60,]
df <- df[-78,]
#remove withdrawn event
df <- df[-258,]
#add event number - overall and within year.
df <- df %>% mutate(event = row_number())
#LasVegas Invertational has five rounds.
df <- df %>% separate("r4", c("r4","r5"), "/")
#add grouping variable
df$place1<-NA
df$place1<-gsub("T", "",df$place)
df$place1 <- as.numeric(as.character(df$place1)) #ensure numeric
df$group <- ifelse(df$place1<=10, "top10", NA)
df$group <- ifelse(df$place1<=5, "top5", df$group)
df$group <- ifelse(df$place1==1, "first", df$group)
df$group <- ifelse(df$place=="CUT", "cut", df$group)
df$group <- ifelse(is.na(df$group), "made cut", df$group)
table(df$group)
#make sure columns are numeric
df$r1 <- as.numeric(df$r1)
df$r2 <- as.numeric(df$r2)
df$r3 <- as.numeric(df$r3)
df$r4 <- as.numeric(df$r4)
df$r5 <- as.numeric(df$r5)
#gather data
df <- df %>% gather(round,value,2:6) %>% arrange(event,round) %>% filter(!is.na(value)) %>%
mutate(round = row_number())
#mark final round
df <- df %>% group_by(event) %>% mutate(finalrd = ifelse(row_number()==max(row_number()), "yes", "no"))
#just highlight last rounds by group - 5 groups
ggplot() +
geom_point(aes(x=round,y=value),alpha=.9,color="gray75",data = df %>% filter(finalrd=="no"),size=3) +
geom_point(aes(x=round,y=value,color=group),data = df %>% filter(finalrd=="yes"),size=3) +
scale_color_manual(values=c("blue", "red", "lightsalmon", "sienna1", "orangered")) +
theme_minimal()
df$group1 <- ifelse(df$group=="first", "first", ifelse(df$group=="cut", "cut", "made cut"))
#highlight all rounds but 3 groups
ggplot() +
geom_point(aes(x=round,y=value,color=group1),alpha=.9,data = df,size=3) +
scale_color_manual(values=c("blue", "red", "gray88")) +
theme_minimal()
#just highlight last rounds by group - 3 groups
ggplot() +
geom_point(aes(x=round,y=value),alpha=.9,color="gray75",data = df %>% filter(finalrd=="no"),size=3) +
geom_point(aes(x=round,y=value,color=group1),data = df %>% filter(finalrd=="yes"),size=3) +
scale_color_manual(values=c("blue", "red", "gray88")) +
theme_minimal()
#Identify Majors
table(df$tournament)
majors <- c("PGA Championship","U.S. Open","Open Championship","The Open Championship","Masters Tournament")
df$major <- ifelse(df$tournament %in% majors, "major", "pga")
df$group2 <- ifelse(df$major=="major" & df$group1=="first", "major-first", df$group1)
table(df$group2)
#just highlight last rounds by group - 4 groups
p <- ggplot() +
geom_point(aes(x=round,y=value),alpha=.9,color="gray80",data = df %>% filter(finalrd=="no"),size=3) +
geom_point(aes(x=round,y=value,color=group1),data = df %>% filter(finalrd=="yes", group2!="major-first"),size=3) +
scale_color_manual(values=c("blue", "red", "gray70")) +
theme_minimal()
p1 <- p +
geom_point(aes(x=round,y=value),
data = df %>% filter(finalrd=="yes", group2=="major-first"),
shape=21,
color="black",
fill="red",
stroke=2,
size=2)
p1
# add rolling average score, last 20 rounds...
df$ravg <- zoo::rollmeanr(df$value,k=20,fill=NA)
p2 <- p1+geom_line(aes(x=round,y=ravg),data=df,lwd=1)
## add labels and fix grid lines
p3<-p2 +
# ylim(60,90)+
scale_y_continuous(minor_breaks = NULL, breaks = seq(60, 90, 5),limits=c(60, 90)) +
xlab("Round Number") +
ylab("Score") +
ggtitle("Tiger Woods PGA Tour Scoring by Round")
p3
# get years
yearsdf <- df %>% group_by(year) %>% filter(row_number()==min(row_number()))
p4<-p3+
geom_vline(data = yearsdf, aes(xintercept = round), lty=2, color="gray67") +
geom_text(data = yearsdf %>% filter(year!=2017), mapping = aes(x = round, label = year, y = 87), angle = 90, hjust = 0,size=5)
p4
p5<- p4 +
geom_vline(data = df[38,], aes(xintercept = round), lty=2, color="pink") +
geom_text(aes(x = 38, y = 87),color="pink", label = "Turned Pro",angle = 90, hjust = 0,size=5)
#axis text size
p6 <- p5 + theme(
axis.title = element_text(size=20),
plot.title = element_text(size=20),
axis.text = element_text(size=15)
)
# create an svg image
library(svglite)
svglite("plot.svg", width = 30, height = 10)
p6
dev.off()
ggsave("tigerplot.png", plot=p6, width=25,height=8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment