Created
June 5, 2022 16:02
-
-
Save jtrecenti/65a657c39451f645d39182a2cdcf085a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Author: | |
#' Subject: | |
# Import ----------------------------------------------------------------------- | |
## Não funciona com selenium! | |
# u <- "https://www.atptour.com/en/rankings/singles" | |
# ses <- RSelenium::rsDriver(browser = "firefox") | |
# ses$client$navigate(u) | |
# | |
# tabela <- ses$client$getPageSource() |> | |
# dplyr::first() |> | |
# rvest::read_html() |> | |
# rvest::html_element(xpath = "//*[@id='player-rank-detail-ajax']") | |
# | |
# dados_tabela <- tabela |> | |
# rvest::html_table() |> | |
# janitor::clean_names() |> | |
# dplyr::transmute( | |
# rank, player, age, | |
# points = readr::parse_number(points), | |
# tourn_played | |
# ) | |
# | |
# dados_tabela$link <- tabela |> | |
# rvest::html_elements(xpath = ".//a[contains(@href, 'rankings-breakdown')]") |> | |
# rvest::html_attr("href") |> | |
# paste0("https://www.atptour.com", ... = _) | |
# | |
# # Quero apenas os dados dos 20 primeiros | |
# dados_tabela_20 <- dplyr::slice_head(dados_tabela, n = 20) | |
# | |
# dados_jogador <- function(link, ses) { | |
# ses$client$navigate(link) | |
# } | |
## Baixei manualmente | |
arqs <- fs::dir_ls("/Users/julio/Downloads/", glob = "*.htm") | |
parse_file <- function(arq) { | |
html <- arq |> | |
rvest::read_html() | |
tabelas <- html |> | |
rvest::html_table(convert = FALSE) |> | |
magrittr::extract(-c(1,2)) | |
titulos <- html |> | |
rvest::html_elements(".module-title") |> | |
rvest::html_text() |> | |
stringr::str_squish() |> | |
stringr::str_subset("^$", negate = TRUE) | |
tabelas |> | |
purrr::set_names(titulos) |> | |
dplyr::bind_rows(.id = "type") |> | |
dplyr::filter(!stringr::str_detect(type, "Non-Count")) |> | |
purrr::set_names( | |
"type", "date", "tourn", "x1", "x2", "x3", "round", "points", | |
"drop_date", "x4" | |
) |> | |
dplyr::select(-dplyr::starts_with("x")) |> | |
dplyr::mutate( | |
points = readr::parse_number(points), | |
drop_date = lubridate::ymd(drop_date) | |
) | |
} | |
da_breakdown <- purrr::map_dfr(arqs, parse_file, .id = "player") |> | |
dplyr::mutate(player = stringr::str_extract( | |
basename(player), ".*(?= Rankings)" | |
)) |> | |
dplyr::arrange(drop_date) |> | |
dplyr::group_by(player) |> | |
dplyr::mutate( | |
total_points = sum(points), | |
points_time = total_points - cumsum(points) | |
) |> | |
dplyr::ungroup() |> | |
dplyr::mutate( | |
player = forcats::fct_reorder(player, total_points, max, .desc = TRUE) | |
) |> | |
dplyr::mutate(ranking = as.integer(player)) | |
da_hoje <- da_breakdown |> | |
dplyr::distinct(player, .keep_all = TRUE) |> | |
dplyr::mutate(drop_date = Sys.Date()) | |
da_final <- dplyr::bind_rows(da_breakdown, da_hoje) | |
# transformação dos dados ------------------------------------------------- | |
pegar_rankings <- function(month) { | |
da_final |> | |
dplyr::mutate(drop_date = lubridate::floor_date(drop_date, "month")) |> | |
dplyr::filter(drop_date <= month) |> | |
dplyr::arrange(points_time) |> | |
dplyr::distinct(player, .keep_all = TRUE) |> | |
dplyr::mutate( | |
player = forcats::fct_reorder(player, points_time, max, .desc = TRUE), | |
ranking = as.integer(player) | |
) | |
} | |
proximos_meses <- lubridate::floor_date(Sys.Date() + months(0:6), "month") | |
rankings_tempo <- proximos_meses |> | |
purrr::set_names() |> | |
purrr::map_dfr(pegar_rankings, .id = "mes") |> | |
dplyr::mutate(mes = as.Date(mes)) |> | |
dplyr::arrange(mes, ranking) | |
foco <- "Rafael Nadal" | |
rankings_tempo |> | |
dplyr::mutate( | |
player = factor(player, levels = levels(da_final$player)), | |
player = forcats::fct_rev(player), | |
foco = ranking == dplyr::first(ranking[player == foco]) | |
) |> | |
ggplot2::ggplot() + | |
ggplot2::aes( | |
x = mes, y = player, group = factor(ranking) | |
) + | |
ggplot2::geom_line(size = 2) + | |
ggplot2::geom_point(size = 3) + | |
ggplot2::scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y") + | |
ggplot2::theme_minimal(14) + | |
ggplot2::theme(panel.grid.minor = ggplot2::element_blank()) + | |
gghighlight::gghighlight(foco) | |
# Tidy ------------------------------------------------------------------------- | |
# Visualize -------------------------------------------------------------------- | |
library(shiny) | |
library(miniUI) | |
readr::write_rds(rankings_tempo, "data/rankings_tempo.rds") | |
myGadgetFunc <- function(rankings_tempo) { | |
ui <- miniPage( | |
gadgetTitleBar("Gráfico"), | |
miniContentPanel( | |
shiny::selectInput( | |
"foco", | |
label = "Jogador", | |
choices = unique(rankings_tempo$player) | |
), | |
shiny::plotOutput("plot") | |
) | |
) | |
server <- function(input, output, session) { | |
# Define reactive expressions, outputs, etc. | |
# When the Done button is clicked, return a value | |
observeEvent(input$done, { | |
returnValue <- "obrigado!" | |
stopApp(returnValue) | |
}) | |
output$plot <- shiny::renderPlot({ | |
foco_r <- dplyr::first(rankings_tempo$ranking[rankings_tempo$player == input$foco]) | |
suppressWarnings({ | |
rankings_tempo |> | |
dplyr::mutate( | |
player = factor(player, levels = levels(da_final$player)), | |
player = forcats::fct_rev(player), | |
foco = ranking == foco_r | |
) |> | |
ggplot2::ggplot() + | |
ggplot2::aes( | |
x = mes, y = player, group = factor(ranking) | |
) + | |
ggplot2::geom_line(size = 2) + | |
ggplot2::geom_point(size = 3) + | |
ggplot2::scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y") + | |
ggplot2::theme_minimal(14) + | |
ggplot2::theme(panel.grid.minor = ggplot2::element_blank()) + | |
gghighlight::gghighlight(foco) | |
}) | |
}) | |
} | |
runGadget(ui, server) | |
} | |
myGadgetFunc(rankings_tempo) | |
# Model ------------------------------------------------------------------------ | |
# Export ----------------------------------------------------------------------- | |
# readr::write_rds(d, "") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment