Skip to content

Instantly share code, notes, and snippets.

@jmcastagnetto
Created January 12, 2022 15:48
Show Gist options
  • Save jmcastagnetto/066559c43154371cdc904b3f1cd62400 to your computer and use it in GitHub Desktop.
Save jmcastagnetto/066559c43154371cdc904b3f1cd62400 to your computer and use it in GitHub Desktop.
Cruzar datos de positivos con vacunados (COVID-19, Peru - basado en datos abiertos del MINSA")
library(tidyverse)
library(arrow)
# RDS de https://github.com/jmcastagnetto/covid-19-peru-limpiar-datos-minsa
positivos <- readRDS("tmp/positivos_covid_aumentado.rds") %>%
select(id_persona, fecha_resultado, metododx) %>%
distinct() %>%
mutate(
id_persona = as.integer(id_persona)
)
# Dataset intermedio de procesar los datos de vacunas
vacunados_ds <- open_dataset(
"tmp/arrow_augmented_data/"
)
vacunados_df <- tibble()
for (fn in vacunados_ds$files) {
cat(fn, "\n")
ofn <- str_replace(fn, "arrow_augmented_data", "vacunados_positivos")
df <- read_parquet(fn, col_select = c("id_persona", "fecha_vacunacion", "dosis", "flag_vacunacion_general")) %>%
filter(flag_vacunacion_general == TRUE) %>%
select(-flag_vacunacion_general)
if(nrow(df) > 0) {
tmp_df <- df %>%
left_join(
positivos,
by = "id_persona"
)
write_parquet(
tmp_df,
sink = ofn
)
}
cat("Done: ", ofn, "\n")
}
vac_pos <- open_dataset("tmp/vacunados_positivos/")
for (fn in vac_pos$files) {
epiyear <- str_extract(fn, "epi_year=\\d{4}") %>%
str_replace("=", "-")
epiweek <- str_extract(fn, "epi_week=\\d{1,2}") %>%
str_replace("=", "-")
ofn <- glue::glue("tmp/vacunados_positivos_{epiyear}_{epiweek}.csv")
tmp <- read_parquet(fn)
write_csv(tmp, ofn)
}
vac_pos_all <- vac_pos %>%
collect()
vac_pos_ids <- unique(vac_pos_all$id_persona)
novac_pos <- positivos %>%
filter(!id_persona %in% vac_pos_ids)
write_csv(
novac_pos,
"tmp/covid19_peru_positivos_no_vacunados.csv"
)
@jmcastagnetto
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment