Skip to content

Instantly share code, notes, and snippets.

@herbps10
Created June 20, 2017 01:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save herbps10/c3282c5b869d7a33d601079e66eba490 to your computer and use it in GitHub Desktop.
Save herbps10/c3282c5b869d7a33d601079e66eba490 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rvest)
library(survival)
library(GGally)
library(stringr)
library(survminer)
game_data = tibble(
season = 1:33
) %>%
mutate(link = paste0("http://j-archive.com/showseason.php?season=", season),
html = map(link, read_html),
table = map(html, html_node, "table"),
dat = map(table, html_table)) %>%
select(season, dat) %>%
unnest()
results <- game_data %>%
transmute(airdate = X1,
contestants = X2,
extra = str_to_lower(X3)) %>%
filter(!str_detect(extra, "championship"), !str_detect(extra, "tournament")) %>%
separate(contestants, paste0("contestant", 1:3), "vs\\.") %>%
group_by(contestant1) %>%
summarize(wins = n())
surv <- Surv(time = results$wins)
fit <- survfit(surv~1, data = results)
ggsurvplot(fit, data = results)[[1]] +
geom_point() +
xlab("Wins") +
labs(title = "Jeopardy! Survival Curve",
subtitle = paste0(nrow(results), " games"),
caption = "Data: j-archive.com\nherbsusmann.com") +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(plot.caption = element_text(size = 10),
legend.position = "none")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment