Skip to content

Instantly share code, notes, and snippets.

@jeffgswanson
Created January 10, 2021 17:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffgswanson/6461f41c2ef1ca51e51aba3b377c9a1f to your computer and use it in GitHub Desktop.
Save jeffgswanson/6461f41c2ef1ca51e51aba3b377c9a1f to your computer and use it in GitHub Desktop.
Reproducible Scores Code
# Activate these packages
library(shiny)
library(DT)
library(shinythemes)
library(rvest)
library(expss)
library(dplyr)
library(tidyr)
library(stringr)
library(sqldf)
library(scales)
# List of schools and class:
## https://nsaa-static.s3.amazonaws.com/textfile/bask/bbbclass.pdf
# Manually create Class A names and enrollment dataframe
School <-
c(
'Omaha South',
'Omaha Central',
'Grand Island',
'Millard North',
'Millard South',
'Lincoln High',
'Lincoln East',
'Millard West',
'North Star',
'Omaha North',
'Creighton Prep',
'Lincoln Southeast',
'Lincoln Southwest',
'Burke',
'Omaha Westside',
'Papillion-LaVista South',
'Bryan',
'Papillion-LaVista',
'Omaha Northwest',
'Lincoln Northeast',
'Kearney',
'Bellevue West',
'Bellevue East',
'Gretna',
'Fremont',
'Elkhorn South',
'Benson',
'Norfolk',
'Columbus',
'Pius X',
'North Platte'
)
Enrollment <-
c(
2226,
2094,
1970,
1923,
1903,
1754,
1734,
1687,
1623,
1584,
1562,
1557,
1557,
1547,
1462,
1447,
1439,
1381,
1316,
1278,
1215,
1145,
1123,
1121,
1113,
1076,
1076,
1038,
965,
898,
896
)
classA <- data.frame(School, Enrollment)
# create game scores table -- I added game dates manually
maxprep_baseURL <-
"https://www.maxpreps.com/list/schedules_scores.aspx?date="
maxprep_paramURL <-
"&ssid=8f5a3f82-d7d8-4f76-a137-3c884eae5b54&statedivisionid=07ba312e-1587-4455-b373-ca343d52f4b2"
game_dates <- c(
"12/3/2020",
"12/4/2020",
"12/5/2020",
"12/7/2020",
"12/10/2020",
"12/11/2020",
"12/12/2020",
"12/15/2020",
"12/16/2020",
"12/17/2020",
"12/18/2020",
"12/19/2020",
"12/21/2020",
"12/22/2020",
"12/28/2020",
"12/29/2020",
"12/31/2020",
"1/2/2021",
"1/5/2021",
"1/7/2021",
"1/8/2021",
"1/9/2021",
"1/12/2021",
"1/14/2021",
"1/15/2021",
"1/16/2021",
"1/19/2021",
"1/21/2021",
"1/22/2021",
"1/23/2021",
"1/26/2021",
"1/28/2021",
"1/29/2021",
"1/30/2021",
"2/2/2021",
"2/4/2021",
"2/5/2021",
"2/6/2021",
"2/9/2021",
"2/11/2021",
"2/12/2021",
"2/13/2021",
"2/16/2021",
"2/18/2021",
"2/19/2021",
"2/20/2021",
"2/26/2021",
"2/27/2021",
"3/1/2021",
"3/2/2021",
"3/11/2021",
"3/12/2021",
"3/13/2021"
)
maxprep_page_list <-
as.list(paste0(maxprep_baseURL, game_dates, maxprep_paramURL))
maxprep_html <- lapply(maxprep_page_list, FUN=function(URLLink){
read_html(URLLink) %>% html_nodes("[data-contest-state='boxscore']") %>% html_text()
})
# Unlist
scores <-
unlist(maxprep_html)
scores <-
gsub("Final","", scores)
scores <-
grep("#", scores, invert = TRUE, value = TRUE)
scores <-
data.frame(scores)
colnames(scores) <-
c("V1")
scores <-
scores %>%
mutate(V1 = gsub("(\\d+)", ";\\1;", V1)) %>%
separate(V1, c(NA, "No1", "Let1", "No2", "Let2"), sep = " *; *")
colnames(scores) <-
c("Away_Score", "Away_Team", "Home_Score", "Home_Team")
scores$Winner <-
ifelse(scores$Away_Score > scores$Home_Score, scores$Away_Team, scores$Home_Team)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment