Skip to content

Instantly share code, notes, and snippets.

@timriffe
Created June 28, 2022 10:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timriffe/7e9ece9d86489396baa333208a285b47 to your computer and use it in GitHub Desktop.
Save timriffe/7e9ece9d86489396baa333208a285b47 to your computer and use it in GitHub Desktop.
toy example research code that we want to convert into a reproducibility repository. Used in SICSS Covenant workshop, summer 2022
"https://covid.ourworldindata.org/data/owid-covid-data.csv"
download.file("https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/EXCEL_FILES/3_Mortality/WPP2019_MORT_F07_2_LIFE_EXPECTANCY_0_MALE.xlsx",
destfile = "Data/wpp2019_male_e0.xlsx")
download.file("https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/EXCEL_FILES/3_Mortality/WPP2019_MORT_F07_3_LIFE_EXPECTANCY_0_FEMALE.xlsx",
destfile = "Data/wpp2019_female_e0.xlsx")
library(readr)
library(readxl)
library(tidyverse)
library(lubridate)
library(janitor)
# what's the UN code for Nigeria?
Nigeria_code_UN <-
countrycode::countrycode("Nigeria",
origin = 'country.name',
destination = "un" )
e0m <-
read_excel("Data/wpp2019_male_e0.xlsx",
skip = 16,
na = "...") %>%
filter(`Country code` == Nigeria_code_UN) %>%
pivot_longer(`1950-1955`:`2015-2020`,
names_to = "period",
values_to = "e0") %>%
mutate(sex = "Male") %>%
select(period, sex, e0)
e0f <-
read_excel("Data/wpp2019_female_e0.xlsx",
skip = 16,
na = "...") %>%
filter(`Country code` == Nigeria_code_UN) %>%
pivot_longer(`1950-1955`:`2015-2020`,
names_to = "period",
values_to = "e0") %>%
mutate(sex = "Female") %>%
select(period, sex, e0)
e0wpp <-
bind_rows(e0m, e0f) %>%
separate(period,
into = c("year", NA),
sep = "-",
convert = TRUE) %>%
mutate(source = "WPP2019",
year = year + 2.5) # mid period reference
# -------------------------------------------- #
# compare with WHO
download.file("http://apps.who.int/gho/athena/data/GHO/WHOSIS_000001,WHOSIS_000015,WHOSIS_000002,WHOSIS_000007?filter=COUNTRY:*&ead=&x-sideaxis=COUNTRY;YEAR&x-topaxis=GHO;SEX&profile=crosstable&format=csv",
destfile = "Data/WHO_e0.csv")
e0who <- read_csv("Data/WHO_e0.csv",
show_col_types = FALSE) %>%
select(1, 2, contains("birth")) %>%
select(1, 2, !contains("HALE")) %>%
row_to_names(1) %>%
select(Country, Year, Male, Female) %>%
pivot_longer(c(Male, Female),
names_to = "sex",
values_to = "e0") %>%
clean_names() %>%
mutate(source = "WHO",
e0 = as.double(e0),
year = as.double(year) + .5) %>%
filter(country == "Nigeria")
e0 <-
bind_rows(e0who,
e0wpp)
# how big are differences
e0 %>%
filter(year > 2000) %>%
ggplot(aes(x = year,
y = e0,
color = sex,
linetype = source,
group = interaction(sex, source))) +
geom_line(size = 1.5) +
labs(title = "wildly different life expectancy estimates for Nigeria")+
theme_minimal()
# what if we want to compare point estimates?
# may as well do a linear interpolation to match UN to WHO time references.
who_time <- e0who %>%
pull(year) %>%
unique() %>%
sort()
library(Hmisc)
my_interp <- function(chunk){
who_time <- c(2000.5, 2010.5, 2015.5, 2019.5)
approxExtrap(x = chunk$year,
y = chunk$e0,
xout = who_time) %>%
as_tibble() %>%
rename(year = x, e0 = y)
}
e0diff <-
e0wpp %>%
group_by(sex, source) %>%
# this function is in a happy middle-ground
# between mutate() and summarize()
group_modify(~ my_interp(.x)) %>%
mutate(country = "Nigeria") %>%
bind_rows(e0who) %>%
pivot_wider(names_from = source, values_from = e0) %>%
mutate(e0diff = WHO - WPP2019)
# plot the difference
e0diff %>%
ggplot(aes(x = year, y = e0diff, color = sex)) +
geom_line(size = 1.5) +
theme_minimal() +
labs(y = "UN - WHO e0 estimates",
title = "Large discrepancies in life expectacy estimates for Nigeria")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment