Created
January 28, 2020 05:47
-
-
Save jeffgswanson/18d7fc4f7452a2e25680b1aed21b3861 to your computer and use it in GitHub Desktop.
Nebraska High School Basketball Rankings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(tidyr) | |
library(rvest) | |
library(tabulizer) | |
# Manual Class A names | |
School <- | |
c( | |
'Omaha South', | |
'Omaha Central', | |
'Grand Island', | |
'Millard North', | |
'Millard South', | |
'Millard West', | |
'Lincoln East', | |
'Lincoln High', | |
'North Star', | |
'Creighton Prep', | |
'Omaha North', | |
'Lincoln Southeast', | |
'Burke', | |
'Lincoln Southwest', | |
'Bryan', | |
'Omaha Westside', | |
'Papillion-LaVista South', | |
'Papillion-LaVista', | |
'Lincoln Northeast', | |
'Bellevue West', | |
'Omaha Northwest', | |
'Kearney', | |
'Fremont ', | |
'Bellevue East', | |
'Benson', | |
'Gretna', | |
'Elkhorn', | |
'Elkhorn South', | |
'Norfolk', | |
'Columbus', | |
'North Platte', | |
'Lincoln Pius X', | |
'South Sioux City' | |
) | |
# Manual Class A enrollment | |
Enrollment <- | |
c( | |
2166, | |
2051, | |
1982, | |
1920, | |
1881, | |
1783, | |
1695, | |
1692, | |
1571, | |
1548, | |
1522, | |
1515, | |
1514, | |
1501, | |
1480, | |
1452, | |
1442, | |
1368, | |
1280, | |
1240, | |
1236, | |
1188, | |
1113, | |
1099, | |
1062, | |
1050, | |
1026, | |
1008, | |
1005, | |
971, | |
905, | |
897, | |
860 | |
) | |
#Combine School and Enrollment lists into one data frame | |
df1 <- | |
data.frame(School, Enrollment) | |
# maxpreps data for 1 day | |
maxprep_page <- | |
"https://www.maxpreps.com/list/schedules_scores.aspx?date=1/11/2020&gendersport=boys,basketball&state=ne&statedivisionid=85757869-a232-41b9-a6b3-727edb24825e" | |
maxprep_html <- | |
read_html(maxprep_page) | |
df2 <- | |
maxprep_html %>% html_nodes("[data-contest-state='boxscore']") %>% html_text() | |
#Clean data | |
df2 <- | |
gsub("Final","", df2) | |
df2 <- | |
grep("#", df2, invert = TRUE, value = TRUE) | |
df2 <- | |
data.frame(df2) | |
colnames(df2) <- c("V1") | |
# Separate into multiple columns | |
df2 <- | |
df2 %>% | |
mutate(V1 = gsub("(\\d+)", ";\\1;", V1)) %>% | |
separate(V1, c(NA, "No1", "Let1", "No2", "Let2"), sep = " *; *") | |
colnames(df2) <- c("Away Score", "Away Team", "Home Score", "Home Team") | |
df2$Date <- "1/11/2020" | |
df2$Winner <- | |
if_else(df2$`Away Score` > df2$`Home Score`, df2$`Away Team`, df2$`Home Team`) | |
df2$Loser <- | |
if_else(df2$`Away Score` < df2$`Home Score`, df2$`Away Team`, df2$`Home Team`) | |
# View tables | |
View(df1) | |
View(df2) | |
df1 %>% | |
left_join(df2, by = c('School' = 'Winner')) %>% | |
na.omit() %>% | |
count(School, name = "Wins") %>% | |
right_join(df1) %>% | |
mutate(Wins = replace(Wins, is.na(Wins), 0)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment