Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Nebraska High School Basketball Rankings
library(dplyr)
library(tidyr)
library(rvest)
library(tabulizer)
# Manual Class A names
School <-
c(
'Omaha South',
'Omaha Central',
'Grand Island',
'Millard North',
'Millard South',
'Millard West',
'Lincoln East',
'Lincoln High',
'North Star',
'Creighton Prep',
'Omaha North',
'Lincoln Southeast',
'Burke',
'Lincoln Southwest',
'Bryan',
'Omaha Westside',
'Papillion-LaVista South',
'Papillion-LaVista',
'Lincoln Northeast',
'Bellevue West',
'Omaha Northwest',
'Kearney',
'Fremont ',
'Bellevue East',
'Benson',
'Gretna',
'Elkhorn',
'Elkhorn South',
'Norfolk',
'Columbus',
'North Platte',
'Lincoln Pius X',
'South Sioux City'
)
# Manual Class A enrollment
Enrollment <-
c(
2166,
2051,
1982,
1920,
1881,
1783,
1695,
1692,
1571,
1548,
1522,
1515,
1514,
1501,
1480,
1452,
1442,
1368,
1280,
1240,
1236,
1188,
1113,
1099,
1062,
1050,
1026,
1008,
1005,
971,
905,
897,
860
)
#Combine School and Enrollment lists into one data frame
df1 <-
data.frame(School, Enrollment)
# maxpreps data for 1 day
maxprep_page <-
"https://www.maxpreps.com/list/schedules_scores.aspx?date=1/11/2020&gendersport=boys,basketball&state=ne&statedivisionid=85757869-a232-41b9-a6b3-727edb24825e"
maxprep_html <-
read_html(maxprep_page)
df2 <-
maxprep_html %>% html_nodes("[data-contest-state='boxscore']") %>% html_text()
#Clean data
df2 <-
gsub("Final","", df2)
df2 <-
grep("#", df2, invert = TRUE, value = TRUE)
df2 <-
data.frame(df2)
colnames(df2) <- c("V1")
# Separate into multiple columns
df2 <-
df2 %>%
mutate(V1 = gsub("(\\d+)", ";\\1;", V1)) %>%
separate(V1, c(NA, "No1", "Let1", "No2", "Let2"), sep = " *; *")
colnames(df2) <- c("Away Score", "Away Team", "Home Score", "Home Team")
df2$Date <- "1/11/2020"
df2$Winner <-
if_else(df2$`Away Score` > df2$`Home Score`, df2$`Away Team`, df2$`Home Team`)
df2$Loser <-
if_else(df2$`Away Score` < df2$`Home Score`, df2$`Away Team`, df2$`Home Team`)
# View tables
View(df1)
View(df2)
df1 %>%
left_join(df2, by = c('School' = 'Winner')) %>%
na.omit() %>%
count(School, name = "Wins") %>%
right_join(df1) %>%
mutate(Wins = replace(Wins, is.na(Wins), 0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment