Last active
August 29, 2015 14:23
-
-
Save mneedham/8c2b46d960c2208e9961 to your computer and use it in GitHub Desktop.
wimbledon.2013.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
winner | winner_seeding | loser | loser_seeding | score | round | year | |
---|---|---|---|---|---|---|---|
Grega Zemlja | NA | Michael Russell | NA | 673 64 64 61 | Round of 128 | 2013 | |
Jimmy Wang | (Q) | Wayne Odesnik | (Q) | 765 46 62 36 75 | Round of 128 | 2013 | |
Fernando Verdasco | NA | Xavier Malisse | NA | 675 61 64 63 | Round of 128 | 2013 | |
Jan-Lennard Struff | (Q) | Blaz Kavcic | NA | 64 61 63 | Round of 128 | 2013 | |
Radek Stepanek | NA | Matt Reid | (Q) | 62 62 64 | Round of 128 | 2013 | |
Sergiy Stakhovsky | NA | Rogerio Dutra Silva | NA | 64 60 64 | Round of 128 | 2013 | |
Go Soeda | (Q) | Andreas Haider-Maurer | NA | 766 75 61 | Round of 128 | 2013 | |
Igor Sijsling | NA | Alex Kuznetsov | (Q) | 63 64 64 | Round of 128 | 2013 | |
Guillaume Rufin | NA | Marinko Matosevic | NA | 61 46 64 63 | Round of 128 | 2013 | |
Stephane Robert | (Q) | Alejandro Falla | NA | 63 765 75 | Round of 128 | 2013 | |
Bobby Reynolds | (Q) | Steve Johnson | (WC) | 16 764 63 674 64 | Round of 128 | 2013 | |
Julian Reister | (Q) | Lukas Rosol | NA | 63 46 765 674 64 | Round of 128 | 2013 | |
Rajeev Ram | NA | Lukas Lacko | NA | 75 64 672 62 | Round of 128 | 2013 | |
Michal Przysiezny | (Q) | Philipp Petzschner | NA | 63 766 60 | Round of 128 | 2013 | |
Vasek Pospisil | NA | Marc Gicquel | (Q) | 63 62 763 | Round of 128 | 2013 | |
Leonardo Mayer | NA | Aljaz Bedene | NA | 62 63 64 | Round of 128 | 2013 | |
Paul-Henri Mathieu | NA | Ricardas Berankis | NA | 764 75 673 64 | Round of 128 | 2013 | |
Adrian Mannarino | NA | Pablo Andujar | NA | 61 62 63 | Round of 128 | 2013 | |
Nicolas Mahut | (WC) | Jan Hajek | NA | 62 64 63 | Round of 128 | 2013 | |
Yen-Hsun Lu | NA | James Ward | (WC) | 674 64 7611 764 | Round of 128 | 2013 | |
Michael Llodra | NA | Jarkko Nieminen | NA | 763 64 63 | Round of 128 | 2013 | |
Jesse Levine | NA | Guido Pella | NA | 64 62 46 36 43 (RET) | Round of 128 | 2013 | |
Andrey Kuznetsov | NA | Albert Montanes | NA | 63 64 36 63 | Round of 128 | 2013 | |
Denis Kudla | (Q) | James Duckworth | (Q) | 64 62 36 46 61 | Round of 128 | 2013 | |
Lukasz Kubot | NA | Igor Andreev | NA | 61 75 62 | Round of 128 | 2013 | |
Ernests Gulbis | NA | Edouard Roger-Vasselin | NA | 761 64 75 | Round of 128 | 2013 | |
Santiago Giraldo | NA | Horacio Zeballos | NA | 36 764 676 61 63 | Round of 128 | 2013 | |
Kenny De Schepper | NA | Paolo Lorenzi | NA | 766 64 62 | Round of 128 | 2013 | |
Dustin Brown | (Q) | Guillermo Garcia-Lopez | NA | 63 63 63 | Round of 128 | 2013 | |
Daniel Brands | NA | Daniel Gimeno-Traver | NA | 765 674 675 61 64 | Round of 128 | 2013 | |
James Blake | NA | Thiemo de Bakker | NA | 61 63 62 | Round of 128 | 2013 | |
Roberto Bautista Agut | NA | Teymuraz Gabashvili | (Q) | 63 64 763 | Round of 128 | 2013 | |
Tommy Robredo | (32) | Alex Bogomolov Jr. | NA | 62 62 64 | Round of 128 | 2013 | |
Julien Benneteau | (31) | Tobias Kamke | NA | 64 675 64 62 | Round of 128 | 2013 | |
Jurgen Melzer | NA | Fabio Fognini | (30) | 675 75 63 62 | Round of 128 | 2013 | |
Grigor Dimitrov | (29) | Simone Bolelli | NA | 61 64 63 | Round of 128 | 2013 | |
Jeremy Chardy | (28) | Ryan Harrison | NA | 764 46 75 62 | Round of 128 | 2013 | |
Kevin Anderson | (27) | Olivier Rochus | (LL) | 64 62 61 | Round of 128 | 2013 | |
Alexandr Dolgopolov | (26) | Gastao Elias | NA | 61 762 62 | Round of 128 | 2013 | |
Benoit Paire | (25) | Adrian Ungur | NA | 64 46 63 61 | Round of 128 | 2013 | |
Jerzy Janowicz | (24) | Kyle Edmund | (WC) | 62 62 64 | Round of 128 | 2013 | |
Andreas Seppi | (23) | Denis Istomin | NA | 766 763 57 36 63 | Round of 128 | 2013 | |
Juan Monaco | (22) | Bastian Knittel | (Q) | 64 62 63 | Round of 128 | 2013 | |
Bernard Tomic | NA | Sam Querrey | (21) | 766 763 36 26 63 | Round of 128 | 2013 | |
Mikhail Youzhny | (20) | Robin Haase | NA | 64 75 75 | Round of 128 | 2013 | |
Feliciano Lopez | NA | Gilles Simon | (19) | 62 64 7611 | Round of 128 | 2013 | |
John Isner | (18) | Evgeny Donskoy | NA | 61 765 763 | Round of 128 | 2013 | |
Milos Raonic | (17) | Carlos Berlocq | NA | 64 63 63 | Round of 128 | 2013 | |
Ivan Dodig | NA | Philipp Kohlschreiber | (16) | 46 676 763 63 21 (RET) | Round of 128 | 2013 | |
Nicolas Almagro | (15) | Jurgen Zopp | NA | 64 762 75 | Round of 128 | 2013 | |
Viktor Troicki | NA | Janko Tipsarevic | (14) | 63 64 765 | Round of 128 | 2013 | |
Tommy Haas | (13) | Dmitry Tursunov | NA | 63 75 75 | Round of 128 | 2013 | |
Kei Nishikori | (12) | Matthew Ebden | (WC) | 62 64 63 | Round of 128 | 2013 | |
Lleyton Hewitt | NA | Stan Wawrinka | (11) | 64 75 63 | Round of 128 | 2013 | |
Marin Cilic | (10) | Marcos Baghdatis | NA | 63 64 64 | Round of 128 | 2013 | |
Richard Gasquet | (9) | Marcel Granollers | NA | 672 64 75 64 | Round of 128 | 2013 | |
Juan Martin Del Potro | (8) | Albert Ramos-Vinolas | NA | 62 75 61 | Round of 128 | 2013 | |
Tomas Berdych | (7) | Martin Klizan | NA | 63 64 64 | Round of 128 | 2013 | |
Jo-Wilfried Tsonga | (6) | David Goffin | NA | 764 64 63 | Round of 128 | 2013 | |
Steve Darcis | NA | Rafael Nadal | (5) | 764 768 64 | Round of 128 | 2013 | |
David Ferrer | (4) | Martin Alund | NA | 61 46 75 62 | Round of 128 | 2013 | |
Roger Federer | (3) | Victor Hanescu | NA | 63 62 60 | Round of 128 | 2013 | |
Andy Murray | (2) | Benjamin Becker | NA | 64 63 62 | Round of 128 | 2013 | |
Novak Djokovic | (1) | Florian Mayer | NA | 63 75 64 | Round of 128 | 2013 | |
Viktor Troicki | NA | Andrey Kuznetsov | NA | 64 63 64 | Round of 64 | 2013 | |
Bernard Tomic | NA | James Blake | NA | 63 64 75 | Round of 64 | 2013 | |
Jurgen Melzer | NA | Julian Reister | (Q) | 36 762 765 62 | Round of 64 | 2013 | |
Feliciano Lopez | NA | Paul-Henri Mathieu | NA | 63 51 (RET) | Round of 64 | 2013 | |
Lukasz Kubot | NA | Steve Darcis | NA | (W/O) | Round of 64 | 2013 | |
Ivan Dodig | NA | Denis Kudla | (Q) | 61 764 75 | Round of 64 | 2013 | |
Dustin Brown | (Q) | Lleyton Hewitt | NA | 64 64 673 62 | Round of 64 | 2013 | |
Tommy Robredo | (32) | Nicolas Mahut | (WC) | 763 61 765 | Round of 64 | 2013 | |
Fernando Verdasco | NA | Julien Benneteau | (31) | 761 764 64 | Round of 64 | 2013 | |
Grega Zemlja | NA | Grigor Dimitrov | (29) | 36 764 36 64 119 | Round of 64 | 2013 | |
Jeremy Chardy | (28) | Jan-Lennard Struff | (Q) | 62 57 766 764 | Round of 64 | 2013 | |
Kevin Anderson | (27) | Michal Przysiezny | (Q) | 64 762 64 | Round of 64 | 2013 | |
Alexandr Dolgopolov | (26) | Santiago Giraldo | NA | 64 75 63 | Round of 64 | 2013 | |
Benoit Paire | (25) | Stephane Robert | (Q) | 64 75 64 | Round of 64 | 2013 | |
Jerzy Janowicz | (24) | Radek Stepanek | NA | 62 53 (RET) | Round of 64 | 2013 | |
Andreas Seppi | (23) | Michael Llodra | NA | 75 (RET) | Round of 64 | 2013 | |
Juan Monaco | (22) | Rajeev Ram | NA | 57 62 64 62 | Round of 64 | 2013 | |
Mikhail Youzhny | (20) | Vasek Pospisil | NA | 62 673 767 36 64 | Round of 64 | 2013 | |
Adrian Mannarino | NA | John Isner | (18) | 11 (RET) | Round of 64 | 2013 | |
Igor Sijsling | NA | Milos Raonic | (17) | 75 64 764 | Round of 64 | 2013 | |
Nicolas Almagro | (15) | Guillaume Rufin | NA | 75 676 63 64 | Round of 64 | 2013 | |
Tommy Haas | (13) | Jimmy Wang | (Q) | 63 62 75 | Round of 64 | 2013 | |
Kei Nishikori | (12) | Leonardo Mayer | NA | 765 64 62 | Round of 64 | 2013 | |
Kenny De Schepper | NA | Marin Cilic | (10) | (W/O) | Round of 64 | 2013 | |
Richard Gasquet | (9) | Go Soeda | (Q) | 60 63 675 63 | Round of 64 | 2013 | |
Juan Martin Del Potro | (8) | Jesse Levine | NA | 62 767 63 | Round of 64 | 2013 | |
Tomas Berdych | (7) | Daniel Brands | NA | 766 64 62 | Round of 64 | 2013 | |
Ernests Gulbis | NA | Jo-Wilfried Tsonga | (6) | 36 63 63 (RET) | Round of 64 | 2013 | |
David Ferrer | (4) | Roberto Bautista Agut | NA | 63 36 764 75 | Round of 64 | 2013 | |
Sergiy Stakhovsky | NA | Roger Federer | (3) | 675 765 75 765 | Round of 64 | 2013 | |
Andy Murray | (2) | Yen-Hsun Lu | NA | 63 63 75 | Round of 64 | 2013 | |
Novak Djokovic | (1) | Bobby Reynolds | (Q) | 762 63 61 | Round of 64 | 2013 | |
Fernando Verdasco | NA | Ernests Gulbis | NA | 62 64 64 | Round of 32 | 2013 | |
Jurgen Melzer | NA | Sergiy Stakhovsky | NA | 62 26 75 63 | Round of 32 | 2013 | |
Adrian Mannarino | NA | Dustin Brown | (Q) | 64 62 75 | Round of 32 | 2013 | |
Ivan Dodig | NA | Igor Sijsling | NA | 60 61 10 (RET) | Round of 32 | 2013 | |
Lukasz Kubot | NA | Benoit Paire | (25) | 61 63 64 | Round of 32 | 2013 | |
Kenny De Schepper | NA | Juan Monaco | (22) | 64 768 64 | Round of 32 | 2013 | |
Mikhail Youzhny | (20) | Viktor Troicki | NA | 63 64 75 | Round of 32 | 2013 | |
Jerzy Janowicz | (24) | Nicolas Almagro | (15) | 766 63 64 | Round of 32 | 2013 | |
Tommy Haas | (13) | Feliciano Lopez | NA | 46 62 75 64 | Round of 32 | 2013 | |
Andreas Seppi | (23) | Kei Nishikori | (12) | 36 62 674 61 64 | Round of 32 | 2013 | |
Bernard Tomic | NA | Richard Gasquet | (9) | 767 57 75 765 | Round of 32 | 2013 | |
Juan Martin Del Potro | (8) | Grega Zemlja | NA | 75 763 60 | Round of 32 | 2013 | |
Tomas Berdych | (7) | Kevin Anderson | (27) | 36 63 64 75 | Round of 32 | 2013 | |
David Ferrer | (4) | Alexandr Dolgopolov | (26) | 676 762 26 61 62 | Round of 32 | 2013 | |
Andy Murray | (2) | Tommy Robredo | (32) | 62 64 75 | Round of 32 | 2013 | |
Novak Djokovic | (1) | Jeremy Chardy | (28) | 63 62 62 | Round of 32 | 2013 | |
Fernando Verdasco | NA | Kenny De Schepper | NA | 64 64 64 | Round of 16 | 2013 | |
Lukasz Kubot | NA | Adrian Mannarino | NA | 46 63 36 63 64 | Round of 16 | 2013 | |
Jerzy Janowicz | (24) | Jurgen Melzer | NA | 36 761 64 46 64 | Round of 16 | 2013 | |
Juan Martin Del Potro | (8) | Andreas Seppi | (23) | 64 762 63 | Round of 16 | 2013 | |
Tomas Berdych | (7) | Bernard Tomic | NA | 764 675 64 64 | Round of 16 | 2013 | |
David Ferrer | (4) | Ivan Dodig | NA | 673 766 61 61 | Round of 16 | 2013 | |
Andy Murray | (2) | Mikhail Youzhny | (20) | 64 765 61 | Round of 16 | 2013 | |
Novak Djokovic | (1) | Tommy Haas | (13) | 61 64 764 | Round of 16 | 2013 | |
Jerzy Janowicz | (24) | Lukasz Kubot | NA | 75 64 64 | Quarter-Finals | 2013 | |
Juan Martin Del Potro | (8) | David Ferrer | (4) | 62 64 765 | Quarter-Finals | 2013 | |
Andy Murray | (2) | Fernando Verdasco | NA | 46 36 61 64 75 | Quarter-Finals | 2013 | |
Novak Djokovic | (1) | Tomas Berdych | (7) | 765 64 63 | Quarter-Finals | 2013 | |
Andy Murray | (2) | Jerzy Janowicz | (24) | 672 64 64 63 | Semi-Finals | 2013 | |
Novak Djokovic | (1) | Juan Martin Del Potro | (8) | 75 46 762 676 63 | Semi-Finals | 2013 | |
Andy Murray | (2) | Novak Djokovic | (1) | 64 75 64 | Finals | 2013 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(dplyr) | |
library(stringr) | |
s = html_session("http://www.atpworldtour.com/en/scores/archive/wimbledon/540/2013/results") | |
rows = s %>% html_nodes("div#scoresResultsContent tr") | |
matches = data.frame() | |
for(row in rows) { | |
players = row %>% html_nodes("td.day-table-name a") | |
seedings = row %>% html_nodes("td.day-table-seed") | |
score = row %>% html_node("td.day-table-score a") | |
if(!is.null(score)) { | |
player1 = players[1] %>% html_text() %>% str_trim() | |
seeding1 = ifelse(!is.na(seedings[1]), seedings[1] %>% html_node("span") %>% html_text() %>% str_trim(), NA) | |
player2 = players[2] %>% html_text() %>% str_trim() | |
seeding2 = ifelse(!is.na(seedings[2]), seedings[2] %>% html_node("span") %>% html_text() %>% str_trim(), NA) | |
matches = rbind(data.frame(winner = player1, | |
winner_seeding = seeding1, | |
loser = player2, | |
loser_seeding = seeding2, | |
score = score %>% html_text() %>% str_trim(), | |
round = round), matches) | |
} else { | |
round = row %>% html_node("th") %>% html_text() | |
} | |
} | |
main_matches = matches %>% filter(!grepl("Qualifying", round)) %>% mutate(year = 2013) | |
write.csv(main_matches, "/tmp/wimbledon.2013.csv", row.names = FALSE) | |
?write.csv | |
# have made a mistake further up when there's only one qualifier - it always gets set to be the winner even if it isn't | |
main_matches$winner_type = NA | |
main_matches$winner_type[main_matches$winner_seeding == "(WC)"] = "wildcard" | |
main_matches$winner_type[main_matches$winner_seeding == "(Q)"] = "qualifier" | |
main_matches$winner_type[main_matches$winner_seeding == "(LL)"] = "lucky loser" | |
main_matches$loser_type = NA | |
main_matches$loser_type[main_matches$loser_seeding == "(WC)"] = "wildcard" | |
main_matches$loser_type[main_matches$loser_seeding == "(Q)"] = "qualifier" | |
main_matches$loser_type[main_matches$loser_seeding == "(LL)"] = "lucky loser" | |
tidy_seeding = function(seeding) { | |
no_brackets = gsub("\\(|\\)", "", seeding) | |
return(gsub("WC|Q|L", NA, no_brackets)) | |
} | |
main_matches = main_matches %>% mutate(winner_seeding = as.numeric(tidy_seeding(winner_seeding)), loser_seeding = as.numeric(tidy_seeding(loser_seeding))) | |
main_matches$round = factor(main_matches$round, levels = c("Round of 128", "Round of 64", "Round of 32", "Round of 16", "Quarter-Finals", "Semi-Finals", "Finals")) | |
# The underdog won | |
main_matches %>% filter((winner_seeding > loser_seeding) | (is.na(winner_seeding) & !is.na(loser_seeding))) | |
# Seeds who lost in the first round | |
main_matches %>% filter(round == "Round of 128" & !is.na(loser_seeding)) | |
# How far did the unseeded players get? | |
main_matches %>% filter(is.na(loser_seeding)) %>% arrange(desc(round)) %>% head(5) | |
# How far did the wildcards/qualifier/lucky losers get? | |
main_matches %>% filter(loser_type == "wildcard") %>% arrange(desc(round)) %>% head(5) | |
main_matches %>% filter(loser_type == "qualifier") %>% arrange(desc(round)) %>% head(5) | |
main_matches %>% filter(loser_type == "lucky loser") %>% arrange(desc(round)) %>% head(5) | |
main_matches$winner = as.character(main_matches$winner) | |
main_matches$loser = as.character(main_matches$loser) | |
# How far did a particular player get? | |
round_reached = function(player, main_matches) { | |
furthest_match = main_matches %>% | |
filter(winner == player | loser == player) %>% | |
arrange(desc(round)) %>% | |
head(1) | |
return(ifelse(furthest_match$winner == player, "Winner", as.character(furthest_match$round))) | |
} | |
player = c("Rafael Nadal", "Andy Murray") | |
main_matches %>% | |
filter(winner %in% player | loser %in% player) %>% | |
group_by(winner, loser) | |
arrange(desc(round)) %>% | |
head(1) | |
round_reached("Novak Djokovic", main_matches) | |
round_reached("Rafael Nadal", main_matches) | |
round_reached("Andy Murray", main_matches) | |
rbind(main_matches %>% filter(winner_seeding %in% 1:8) %>% mutate(name = winner, seeding = winner_seeding), | |
main_matches %>% filter(loser_seeding %in% 1:8) %>% mutate(name = loser, seeding = loser_seeding)) %>% | |
select(name, seeding) %>% | |
distinct() %>% | |
arrange(seeding) %>% | |
group_by(name) %>% | |
mutate(round_reached = round_reached(name, main_matches)) | |
# Which seeds didn't get as far as they should have? | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment