Skip to content

Instantly share code, notes, and snippets.

@jkaupp
Last active February 14, 2019 15:24
Show Gist options
  • Save jkaupp/93487d2f1bbe496c09b4f9f1e670061a to your computer and use it in GitHub Desktop.
Save jkaupp/93487d2f1bbe496c09b4f9f1e670061a to your computer and use it in GitHub Desktop.
Scraping tables from NFL.com
library(tidyverse)
library(lubridate)
library(glue)
library(rvest)
libray(janitor)
scrape_nfl_table <- function(url, page) {
glue("{url}&d-447263-p={page}") %>%
read_html() %>%
html_nodes("#result") %>%
html_table() %>%
flatten_df() %>%
mutate_at(c(1, 5:8, 10:15, 17:19), as.numeric) %>%
mutate_at(9, as.character) %>%
clean_names() %>%
mutate_at("yds", parse_number)
}
pull_nfl_statistics <- function(archive, conference, statisticCategory, season, seasonType) {
Sys.sleep(5)
if (season != 2018) {
archive <- "true"
} else {
archive <- "false"
}
url <- glue("http://www.nfl.com/stats/categorystats?archive={archive}&conference={conference}&statisticCategory={statisticCategory}&season={season}&seasonType={seasonType}&experience=&tabSeq=0&qualified=true&Submit=Go")
pages <- read_html(url) %>%
html_nodes("#main-content > div.c > div.grid > div.col.span-12 > form > span:nth-child(4)") %>%
html_text() %>%
str_extract_all("\\d", simplify = TRUE)
map_dfr(pages, ~scrape_nfl_table(url, .x))
}
scaffold <- tibble(archive = "true",
conference = "null",
statisticCategory = "PASSING",
season = 2018,
seasonType = "REG")
output <- pmap(scaffold, pull_nfl_statistics)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment