Created
January 10, 2021 17:14
-
-
Save jeffgswanson/6461f41c2ef1ca51e51aba3b377c9a1f to your computer and use it in GitHub Desktop.
Reproducible Scores Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Activate these packages | |
library(shiny) | |
library(DT) | |
library(shinythemes) | |
library(rvest) | |
library(expss) | |
library(dplyr) | |
library(tidyr) | |
library(stringr) | |
library(sqldf) | |
library(scales) | |
# List of schools and class: | |
## https://nsaa-static.s3.amazonaws.com/textfile/bask/bbbclass.pdf | |
# Manually create Class A names and enrollment dataframe | |
School <- | |
c( | |
'Omaha South', | |
'Omaha Central', | |
'Grand Island', | |
'Millard North', | |
'Millard South', | |
'Lincoln High', | |
'Lincoln East', | |
'Millard West', | |
'North Star', | |
'Omaha North', | |
'Creighton Prep', | |
'Lincoln Southeast', | |
'Lincoln Southwest', | |
'Burke', | |
'Omaha Westside', | |
'Papillion-LaVista South', | |
'Bryan', | |
'Papillion-LaVista', | |
'Omaha Northwest', | |
'Lincoln Northeast', | |
'Kearney', | |
'Bellevue West', | |
'Bellevue East', | |
'Gretna', | |
'Fremont', | |
'Elkhorn South', | |
'Benson', | |
'Norfolk', | |
'Columbus', | |
'Pius X', | |
'North Platte' | |
) | |
Enrollment <- | |
c( | |
2226, | |
2094, | |
1970, | |
1923, | |
1903, | |
1754, | |
1734, | |
1687, | |
1623, | |
1584, | |
1562, | |
1557, | |
1557, | |
1547, | |
1462, | |
1447, | |
1439, | |
1381, | |
1316, | |
1278, | |
1215, | |
1145, | |
1123, | |
1121, | |
1113, | |
1076, | |
1076, | |
1038, | |
965, | |
898, | |
896 | |
) | |
classA <- data.frame(School, Enrollment) | |
# create game scores table -- I added game dates manually | |
maxprep_baseURL <- | |
"https://www.maxpreps.com/list/schedules_scores.aspx?date=" | |
maxprep_paramURL <- | |
"&ssid=8f5a3f82-d7d8-4f76-a137-3c884eae5b54&statedivisionid=07ba312e-1587-4455-b373-ca343d52f4b2" | |
game_dates <- c( | |
"12/3/2020", | |
"12/4/2020", | |
"12/5/2020", | |
"12/7/2020", | |
"12/10/2020", | |
"12/11/2020", | |
"12/12/2020", | |
"12/15/2020", | |
"12/16/2020", | |
"12/17/2020", | |
"12/18/2020", | |
"12/19/2020", | |
"12/21/2020", | |
"12/22/2020", | |
"12/28/2020", | |
"12/29/2020", | |
"12/31/2020", | |
"1/2/2021", | |
"1/5/2021", | |
"1/7/2021", | |
"1/8/2021", | |
"1/9/2021", | |
"1/12/2021", | |
"1/14/2021", | |
"1/15/2021", | |
"1/16/2021", | |
"1/19/2021", | |
"1/21/2021", | |
"1/22/2021", | |
"1/23/2021", | |
"1/26/2021", | |
"1/28/2021", | |
"1/29/2021", | |
"1/30/2021", | |
"2/2/2021", | |
"2/4/2021", | |
"2/5/2021", | |
"2/6/2021", | |
"2/9/2021", | |
"2/11/2021", | |
"2/12/2021", | |
"2/13/2021", | |
"2/16/2021", | |
"2/18/2021", | |
"2/19/2021", | |
"2/20/2021", | |
"2/26/2021", | |
"2/27/2021", | |
"3/1/2021", | |
"3/2/2021", | |
"3/11/2021", | |
"3/12/2021", | |
"3/13/2021" | |
) | |
maxprep_page_list <- | |
as.list(paste0(maxprep_baseURL, game_dates, maxprep_paramURL)) | |
maxprep_html <- lapply(maxprep_page_list, FUN=function(URLLink){ | |
read_html(URLLink) %>% html_nodes("[data-contest-state='boxscore']") %>% html_text() | |
}) | |
# Unlist | |
scores <- | |
unlist(maxprep_html) | |
scores <- | |
gsub("Final","", scores) | |
scores <- | |
grep("#", scores, invert = TRUE, value = TRUE) | |
scores <- | |
data.frame(scores) | |
colnames(scores) <- | |
c("V1") | |
scores <- | |
scores %>% | |
mutate(V1 = gsub("(\\d+)", ";\\1;", V1)) %>% | |
separate(V1, c(NA, "No1", "Let1", "No2", "Let2"), sep = " *; *") | |
colnames(scores) <- | |
c("Away_Score", "Away_Team", "Home_Score", "Home_Team") | |
scores$Winner <- | |
ifelse(scores$Away_Score > scores$Home_Score, scores$Away_Team, scores$Home_Team) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment