Skip to content

Instantly share code, notes, and snippets.

@charliejhadley
Created May 11, 2022 16:26
Show Gist options
  • Save charliejhadley/d67e42145c4e4394ae2a8e1ad1b62e2c to your computer and use it in GitHub Desktop.
Save charliejhadley/d67e42145c4e4394ae2a8e1ad1b62e2c to your computer and use it in GitHub Desktop.
replace-na-shenanigans.R
library(tidyverse)
library(janitor)
library(readxl)
download.file(url = "https://github.com/rfortherestofus/going-deeper/raw/master/data-raw/enrollment-18-19.xlsx",
mode = "wb",
destfile = "data-raw/enrollment-18-19.xlsx")
download.file(url = "https://github.com/rfortherestofus/going-deeper/raw/master/data-raw/enrollment-17-18.xlsx",
mode = "wb",
destfile = "data-raw/enrollment-17-18.xlsx")
enrollment_18_19 <- read_excel(path = "data-raw/enrollment-18-19.xlsx",
sheet = "Sheet 1")
enrollment_17_18 <- read_excel(path = "data-raw/enrollment-17-18.xlsx",
sheet = "Sheet 1")
oregon_districts <- read_excel(path = "data-raw/oregon-districts.xlsx",
sheet = 'Sheet1') %>%
clean_names()
enrollment_18_19 %>%
select(-contains("grade")) %>%
select(-contains("kindergarten")) %>%
select(-contains("percent")) %>%
pivot_longer(cols = -district_id,
names_to = "race_ethnicity",
values_to = "number_of_students") %>%
mutate(number_of_students = na_if(number_of_students, "-")) %>%
mutate(number_of_students = replace_na(number_of_students, "0"))
clean_enrollment_data <- function(raw_data, data_year, race_ethnicity_remove_text) {
raw_data %>%
select(-contains("grade")) %>%
select(-contains("kindergarten")) %>%
select(-contains("percent")) %>%
pivot_longer(cols = -district_id,
names_to = "race_ethnicity",
values_to = "number_of_students") %>%
mutate(number_of_students = na_if(number_of_students, "-")) %>%
mutate(number_of_students = as.character(number_of_students),
number_of_students = replace_na(number_of_students, "0")) %>%
mutate(number_of_students = as.numeric(number_of_students)) %>%
mutate(race_ethnicity = str_remove(race_ethnicity, race_ethnicity_remove_text)) %>%
mutate(race_ethnicity = case_when(
race_ethnicity == "american_indian_alaska_native" ~ "American Indian Alaska Native",
race_ethnicity == "asian" ~ "Asian",
race_ethnicity == "black_african_american" ~ "Black/African American",
race_ethnicity == "hispanic_latino" ~ "Hispanic/Latino",
race_ethnicity == "multiracial" ~ "Multi-Racial",
race_ethnicity == "native_hawaiian_pacific_islander" ~ "Pacific Islander",
race_ethnicity == "white" ~ "White"
)) %>%
group_by(district_id) %>%
mutate(pct = number_of_students / sum(number_of_students)) %>%
ungroup() %>%
mutate(year = data_year)
}
enrollment_by_race_ethnicity_18_19 <- clean_enrollment_data(raw_data = enrollment_18_19,
data_year = "2018-2019",
race_ethnicity_remove_text = "x2018_19_")
enrollment_by_race_ethnicity_17_18 <- clean_enrollment_data(raw_data = enrollment_17_18,
data_year = "2017-2018",
race_ethnicity_remove_text = "x2017_18_")
enrollment_by_race_ethnicity <- bind_rows(enrollment_by_race_ethnicity_17_18,
enrollment_by_race_ethnicity_18_19) %>%
left_join(oregon_districts,
by = c("district_id" = "attending_district_institutional_id")) %>%
rename(percent_of_total_at_school = pct) %>%
select(district_id, district, everything())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment