Skip to content

Instantly share code, notes, and snippets.

@jeffgswanson
Created January 28, 2020 05:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffgswanson/18d7fc4f7452a2e25680b1aed21b3861 to your computer and use it in GitHub Desktop.
Save jeffgswanson/18d7fc4f7452a2e25680b1aed21b3861 to your computer and use it in GitHub Desktop.
Nebraska High School Basketball Rankings
library(dplyr)
library(tidyr)
library(rvest)
library(tabulizer)
# Manual Class A names
School <-
c(
'Omaha South',
'Omaha Central',
'Grand Island',
'Millard North',
'Millard South',
'Millard West',
'Lincoln East',
'Lincoln High',
'North Star',
'Creighton Prep',
'Omaha North',
'Lincoln Southeast',
'Burke',
'Lincoln Southwest',
'Bryan',
'Omaha Westside',
'Papillion-LaVista South',
'Papillion-LaVista',
'Lincoln Northeast',
'Bellevue West',
'Omaha Northwest',
'Kearney',
'Fremont ',
'Bellevue East',
'Benson',
'Gretna',
'Elkhorn',
'Elkhorn South',
'Norfolk',
'Columbus',
'North Platte',
'Lincoln Pius X',
'South Sioux City'
)
# Manual Class A enrollment
Enrollment <-
c(
2166,
2051,
1982,
1920,
1881,
1783,
1695,
1692,
1571,
1548,
1522,
1515,
1514,
1501,
1480,
1452,
1442,
1368,
1280,
1240,
1236,
1188,
1113,
1099,
1062,
1050,
1026,
1008,
1005,
971,
905,
897,
860
)
#Combine School and Enrollment lists into one data frame
df1 <-
data.frame(School, Enrollment)
# maxpreps data for 1 day
maxprep_page <-
"https://www.maxpreps.com/list/schedules_scores.aspx?date=1/11/2020&gendersport=boys,basketball&state=ne&statedivisionid=85757869-a232-41b9-a6b3-727edb24825e"
maxprep_html <-
read_html(maxprep_page)
df2 <-
maxprep_html %>% html_nodes("[data-contest-state='boxscore']") %>% html_text()
#Clean data
df2 <-
gsub("Final","", df2)
df2 <-
grep("#", df2, invert = TRUE, value = TRUE)
df2 <-
data.frame(df2)
colnames(df2) <- c("V1")
# Separate into multiple columns
df2 <-
df2 %>%
mutate(V1 = gsub("(\\d+)", ";\\1;", V1)) %>%
separate(V1, c(NA, "No1", "Let1", "No2", "Let2"), sep = " *; *")
colnames(df2) <- c("Away Score", "Away Team", "Home Score", "Home Team")
df2$Date <- "1/11/2020"
df2$Winner <-
if_else(df2$`Away Score` > df2$`Home Score`, df2$`Away Team`, df2$`Home Team`)
df2$Loser <-
if_else(df2$`Away Score` < df2$`Home Score`, df2$`Away Team`, df2$`Home Team`)
# View tables
View(df1)
View(df2)
df1 %>%
left_join(df2, by = c('School' = 'Winner')) %>%
na.omit() %>%
count(School, name = "Wins") %>%
right_join(df1) %>%
mutate(Wins = replace(Wins, is.na(Wins), 0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment