Skip to content

Instantly share code, notes, and snippets.

@jtrecenti
Created June 5, 2022 16:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jtrecenti/65a657c39451f645d39182a2cdcf085a to your computer and use it in GitHub Desktop.
Save jtrecenti/65a657c39451f645d39182a2cdcf085a to your computer and use it in GitHub Desktop.
#' Author:
#' Subject:
# Import -----------------------------------------------------------------------
## Não funciona com selenium!
# u <- "https://www.atptour.com/en/rankings/singles"
# ses <- RSelenium::rsDriver(browser = "firefox")
# ses$client$navigate(u)
#
# tabela <- ses$client$getPageSource() |>
# dplyr::first() |>
# rvest::read_html() |>
# rvest::html_element(xpath = "//*[@id='player-rank-detail-ajax']")
#
# dados_tabela <- tabela |>
# rvest::html_table() |>
# janitor::clean_names() |>
# dplyr::transmute(
# rank, player, age,
# points = readr::parse_number(points),
# tourn_played
# )
#
# dados_tabela$link <- tabela |>
# rvest::html_elements(xpath = ".//a[contains(@href, 'rankings-breakdown')]") |>
# rvest::html_attr("href") |>
# paste0("https://www.atptour.com", ... = _)
#
# # Quero apenas os dados dos 20 primeiros
# dados_tabela_20 <- dplyr::slice_head(dados_tabela, n = 20)
#
# dados_jogador <- function(link, ses) {
# ses$client$navigate(link)
# }
## Baixei manualmente
arqs <- fs::dir_ls("/Users/julio/Downloads/", glob = "*.htm")
parse_file <- function(arq) {
html <- arq |>
rvest::read_html()
tabelas <- html |>
rvest::html_table(convert = FALSE) |>
magrittr::extract(-c(1,2))
titulos <- html |>
rvest::html_elements(".module-title") |>
rvest::html_text() |>
stringr::str_squish() |>
stringr::str_subset("^$", negate = TRUE)
tabelas |>
purrr::set_names(titulos) |>
dplyr::bind_rows(.id = "type") |>
dplyr::filter(!stringr::str_detect(type, "Non-Count")) |>
purrr::set_names(
"type", "date", "tourn", "x1", "x2", "x3", "round", "points",
"drop_date", "x4"
) |>
dplyr::select(-dplyr::starts_with("x")) |>
dplyr::mutate(
points = readr::parse_number(points),
drop_date = lubridate::ymd(drop_date)
)
}
da_breakdown <- purrr::map_dfr(arqs, parse_file, .id = "player") |>
dplyr::mutate(player = stringr::str_extract(
basename(player), ".*(?= Rankings)"
)) |>
dplyr::arrange(drop_date) |>
dplyr::group_by(player) |>
dplyr::mutate(
total_points = sum(points),
points_time = total_points - cumsum(points)
) |>
dplyr::ungroup() |>
dplyr::mutate(
player = forcats::fct_reorder(player, total_points, max, .desc = TRUE)
) |>
dplyr::mutate(ranking = as.integer(player))
da_hoje <- da_breakdown |>
dplyr::distinct(player, .keep_all = TRUE) |>
dplyr::mutate(drop_date = Sys.Date())
da_final <- dplyr::bind_rows(da_breakdown, da_hoje)
# transformação dos dados -------------------------------------------------
pegar_rankings <- function(month) {
da_final |>
dplyr::mutate(drop_date = lubridate::floor_date(drop_date, "month")) |>
dplyr::filter(drop_date <= month) |>
dplyr::arrange(points_time) |>
dplyr::distinct(player, .keep_all = TRUE) |>
dplyr::mutate(
player = forcats::fct_reorder(player, points_time, max, .desc = TRUE),
ranking = as.integer(player)
)
}
proximos_meses <- lubridate::floor_date(Sys.Date() + months(0:6), "month")
rankings_tempo <- proximos_meses |>
purrr::set_names() |>
purrr::map_dfr(pegar_rankings, .id = "mes") |>
dplyr::mutate(mes = as.Date(mes)) |>
dplyr::arrange(mes, ranking)
foco <- "Rafael Nadal"
rankings_tempo |>
dplyr::mutate(
player = factor(player, levels = levels(da_final$player)),
player = forcats::fct_rev(player),
foco = ranking == dplyr::first(ranking[player == foco])
) |>
ggplot2::ggplot() +
ggplot2::aes(
x = mes, y = player, group = factor(ranking)
) +
ggplot2::geom_line(size = 2) +
ggplot2::geom_point(size = 3) +
ggplot2::scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y") +
ggplot2::theme_minimal(14) +
ggplot2::theme(panel.grid.minor = ggplot2::element_blank()) +
gghighlight::gghighlight(foco)
# Tidy -------------------------------------------------------------------------
# Visualize --------------------------------------------------------------------
library(shiny)
library(miniUI)
readr::write_rds(rankings_tempo, "data/rankings_tempo.rds")
myGadgetFunc <- function(rankings_tempo) {
ui <- miniPage(
gadgetTitleBar("Gráfico"),
miniContentPanel(
shiny::selectInput(
"foco",
label = "Jogador",
choices = unique(rankings_tempo$player)
),
shiny::plotOutput("plot")
)
)
server <- function(input, output, session) {
# Define reactive expressions, outputs, etc.
# When the Done button is clicked, return a value
observeEvent(input$done, {
returnValue <- "obrigado!"
stopApp(returnValue)
})
output$plot <- shiny::renderPlot({
foco_r <- dplyr::first(rankings_tempo$ranking[rankings_tempo$player == input$foco])
suppressWarnings({
rankings_tempo |>
dplyr::mutate(
player = factor(player, levels = levels(da_final$player)),
player = forcats::fct_rev(player),
foco = ranking == foco_r
) |>
ggplot2::ggplot() +
ggplot2::aes(
x = mes, y = player, group = factor(ranking)
) +
ggplot2::geom_line(size = 2) +
ggplot2::geom_point(size = 3) +
ggplot2::scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y") +
ggplot2::theme_minimal(14) +
ggplot2::theme(panel.grid.minor = ggplot2::element_blank()) +
gghighlight::gghighlight(foco)
})
})
}
runGadget(ui, server)
}
myGadgetFunc(rankings_tempo)
# Model ------------------------------------------------------------------------
# Export -----------------------------------------------------------------------
# readr::write_rds(d, "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment