Skip to content

Instantly share code, notes, and snippets.

@jalapic
Created April 9, 2015 13:16
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalapic/eb70a8395d112950a87d to your computer and use it in GitHub Desktop.
Save jalapic/eb70a8395d112950a87d to your computer and use it in GitHub Desktop.
golf major winners
### Cumulative Golf Major Winners
library(XML)
masters <- readHTMLTable("http://en.wikipedia.org/wiki/Masters_Tournament")
masters <- masters[[4]][,1:2]
usopen <- readHTMLTable("http://en.wikipedia.org/wiki/U.S._Open_(golf)")
usopen <- usopen[[2]][,1:2]
theopen <- readHTMLTable("http://en.wikipedia.org/wiki/The_Open_Championship")
theopen <- theopen[[5]][c(1,3)]
pga <- readHTMLTable("http://en.wikipedia.org/wiki/PGA_Championship")
pga1 <- pga[[3]][,1:2]
pga2 <- pga[[4]][,1:2]
#tidyup
masters<-masters[-71,]
masters<-masters[-1,]
usopen<-usopen[-95,]
usopen<-usopen[-71,]
usopen<-usopen[-1,]
theopen<-theopen[-136,]
theopen<-theopen[-92,]
theopen<-theopen[-71,]
theopen<-theopen[-1,]
pga1<-pga1[-1,]
pga2<-pga2[-41,]
pga2<-pga2[-40,]
pga2<-pga2[-15,]
#add major
masters$major<-"masters"
usopen$major<-"usopen"
theopen$major<-"theopen"
pga1$major<-"pga"
pga2$major<-"pga"
golf<-list(masters, usopen, theopen, pga1, pga2)
library(dplyr)
golf <- lapply(golf, function(x) x %>% mutate(player = gsub( " *\\(.*?\\) *", "", x[,2])))
golf <- do.call("rbind", golf)
golf[,1] <- as.numeric(as.character(golf[,1]))
head(golf)
golf <- golf %>% group_by(player) %>% arrange(Year) %>% mutate(value=1, total = cumsum(value))
topgolfers <- golf %>% filter(max(total)>7) %>% .$player %>% unique()
golf$grp <- ifelse(golf$player %in% topgolfers, 1, 0)
golf1 <- golf %>% filter(grp==1) %>% ungroup()
#not graphing everyone from 0 to 1 wins..... would need to add in zero wins into df.
library(ggplot2)
ggplot(golf, (aes(Year, total))) +
geom_path(aes(group=player), color="gray55", lwd=1) +
geom_path(aes(Year, total, group=player), color="dodgerblue", lwd=2, data=golf1) +
scale_x_continuous(breaks=seq(1860, 2020, by=20)) +
ylab("Total Majors")+
ggtitle("Cumulative Golf Majors by Player")+
theme(
plot.title = element_text(hjust=0,vjust=1, size=rel(3.3)),
panel.background = element_blank(),
panel.grid.major.y = element_line(color="gray65"),
panel.grid.major.x = element_line(color="gray65"),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
text = element_text(color="gray20", size=10),
axis.text = element_text(size=rel(1.0)),
axis.text.x = element_text(color="gray20",size=rel(2.5), angle=90, vjust=1),
axis.text.y = element_text(color="gray20", size=rel(2.5)),
axis.title.x = element_text(size=rel(2.5), vjust=0),
axis.title.y = element_text(size=rel(2.5), vjust=1),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "none"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment