Skip to content

Instantly share code, notes, and snippets.

@jalapic
Created July 18, 2015 02:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalapic/02521d38c55d57540221 to your computer and use it in GitHub Desktop.
Save jalapic/02521d38c55d57540221 to your computer and use it in GitHub Desktop.
### Getting golf major winners
library(dplyr)
library("rvest")
url <- "http://en.wikipedia.org/wiki/Masters_Tournament"
masters <- url %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[4]') %>% html_table()
url1 <- "http://en.wikipedia.org/wiki/U.S._Open_(golf)"
usopen <- url1 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[2]') %>% html_table()
url2 <- "http://en.wikipedia.org/wiki/The_Open_Championship"
theopen <- url2 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[5]') %>% html_table()
url3 <- "http://en.wikipedia.org/wiki/PGA_Championship"
pga1 <- url3 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[3]') %>% html_table()
pga2 <- url3 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[4]') %>% html_table(fill=T)
#tidy up
str(masters)
masters <- masters[[1]][,1:2]
usopen <- usopen[[1]][,1:2]
theopen <- theopen[[1]][c(1,3)]
pga1 <- pga1[[1]][,1:2]
pga2 <- pga2[[1]][,1:2]
#tidyup
masters<-masters[-71,]
usopen<-usopen[-95,]
usopen<-usopen[-71,]
theopen<-theopen[-136,]
theopen<-theopen[-92,]
theopen<-theopen[-71,]
theopen<-theopen[-1,]
pga1<-pga1[-1,]
pga2<-pga2[-41,]
pga2<-pga2[-40,]
pga2<-pga2[-15,]
#add major
masters$major<-"masters"
usopen$major<-"usopen"
theopen$major<-"theopen"
pga1$major<-"pga"
pga2$major<-"pga"
golf<-list(masters, usopen, theopen, pga1, pga2)
library(dplyr)
golf <- lapply(golf, function(x) x %>% mutate(player = gsub( " *\\(.*?\\) *", "", x[,2])))
golf <- do.call("rbind", golf)
golf[,1] <- as.numeric(as.character(golf[,1]))
head(golf)
golf <- golf %>% group_by(player) %>% arrange(Year) %>% mutate(value=1, total = cumsum(value))
topgolfers <- golf %>% filter(max(total)>7) %>% .$player %>% unique()
golf$grp <- ifelse(golf$player %in% topgolfers, 1, 0)
golf1 <- golf %>% filter(grp==1) %>% ungroup()
#not graphing everyone from 0 to 1 wins..... would need to add in zero wins into df.
library(ggplot2)
ggplot(golf, (aes(Year, total))) +
geom_path(aes(group=player), color="gray55", lwd=1) +
geom_path(aes(Year, total, group=player), color="dodgerblue", lwd=2, data=golf1) +
scale_x_continuous(breaks=seq(1860, 2020, by=20)) +
ylab("Total Majors")+
ggtitle("Cumulative Golf Majors by Player")+
theme(
plot.title = element_text(hjust=0,vjust=1, size=rel(3.3)),
panel.background = element_blank(),
panel.grid.major.y = element_line(color="gray65"),
panel.grid.major.x = element_line(color="gray65"),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
text = element_text(color="gray20", size=10),
axis.text = element_text(size=rel(1.0)),
axis.text.x = element_text(color="gray20",size=rel(2.5), angle=90, vjust=1),
axis.text.y = element_text(color="gray20", size=rel(2.5)),
axis.title.x = element_text(size=rel(2.5), vjust=0),
axis.title.y = element_text(size=rel(2.5), vjust=1),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "none"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment