Skip to content

Instantly share code, notes, and snippets.

@giocomai
Created August 7, 2018 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save giocomai/0a623ea43c09d782b02eaf49edbc3416 to your computer and use it in GitHub Desktop.
Save giocomai/0a623ea43c09d782b02eaf49edbc3416 to your computer and use it in GitHub Desktop.
Replicate NY times birth-age gap with Eurostat data
---
title: "Birth age gap"
author: "Giorgio Comai"
date: "7 August 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
pacman::p_load("tidyverse")
pacman::p_load("eurostat")
pacman::p_load("stringr")
pacman::p_load("lubridate")
pacman::p_load("remotes")
#remotes::install_github(repo = "https://github.com/thomasp85/gganimate")
library("gganimate")
```
https://www.nytimes.com/interactive/2018/08/04/upshot/up-birth-age-gap.html
# Relevant datasets
```{r}
# search
id <- search_eurostat("births by mother's age")
knitr::kable(id)
```
## 1980 VS 2016
```{r message=FALSE}
id <- search_eurostat("Live births by mother’s age and legal marital status")
demo_fagec <- label_eurostat(get_eurostat(id = "demo_fagec"))
```
```{r demo_fagec_italy_gg_animated}
demo_fagec_italy <- demo_fagec %>%
select(-unit) %>%
filter(geo=="Italy") %>%
mutate(age=as.character(age)) %>%
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>%
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
filter(age<45) %>% #keep only birth age until 48
filter(indic_de=="Live births - total") %>% #Including all live birhts %>%
select(-indic_de) %>%
mutate(year = lubridate::year(time)) %>%
select(-geo, -time)
demo_fagec_italy_gg_animated <- demo_fagec_italy %>%
ggplot(mapping = aes(x = age, y = values, fill = age)) +
geom_col() +
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
scale_x_continuous(name = "Mother's age at birth") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) +
theme_minimal() +
labs(title = "Number of live births by age of mother in Italy in {round(frame_time)}") +
transition_time(year) +
ease_aes('linear')
# animate(plot = demo_fagec_italy_gg_animated, renderer = ffmpeg_renderer())
# anim_save(filename = "demo_fagec_italy_gg_animated.mp4", animation = animate(plot = demo_fagec_italy_gg_animated, renderer = ffmpeg_renderer()))
# anim_save(filename = "demo_fagec_italy_gg_animated.gif", animation = animate(plot = demo_fagec_italy_gg_animated))
#knitr::kable(demo_fagec_italy %>% filter(year==2016))
demo_fagec_italy_gg_animated
```
```{r demo_fagec_italy_gg_animated_1980_2016}
demo_fagec_italy_gg_animated_1980_2016 <-
demo_fagec_italy %>%
filter(year==1980|year==2016) %>%
ggplot(mapping = aes(x = age, y = values, fill = age)) +
geom_col() +
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
scale_x_continuous(name = "Mother's age at birth") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
theme_minimal() +
labs(title = "Number of live births by age of mother in Italy in {closest_state}") +
transition_states(
year,
transition_length = 0.5,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')
demo_fagec_italy_gg_animated_1980_2016
```
## By marriage status
```{r}
demo_fagec_italy_2016_marriage <- demo_fagec %>%
select(-unit) %>%
filter(geo=="Italy") %>%
mutate(age=as.character(age)) %>%
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>%
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
filter(age<45) %>% #keep only birth age until 48
mutate(indic_de = as.character(indic_de)) %>%
filter(indic_de=="In marriage"|indic_de=="Outside marriage") %>% #Including all live birhts %>%
mutate(year = lubridate::year(time)) %>%
filter(year ==2016) %>%
select(-geo, -time)
```
```{r demo_fagec_italy_gg_animated_1980_2016_marriage}
demo_fagec_italy_gg_animated_1980_2016_marriage <-
demo_fagec_italy_2016_marriage %>%
ggplot(mapping = aes(x = age, y = values, fill = age)) +
geom_col() +
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
scale_x_continuous(name = "Mother's age at birth") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
theme_minimal() +
labs(title = "Number of live births by marriage status of the mother in Italy in 2016:\n{closest_state}") +
transition_states(
indic_de,
transition_length = 0.5,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')
demo_fagec_italy_gg_animated_1980_2016_marriage
```
## By education
```{r byEducationInRomania}
# extract exact code
id <- search_eurostat("Live births by mother's age and educational attainment level")
demo_faeduc <- label_eurostat(get_eurostat(id = "demo_faeduc"))
demo_faeduc_ro <- demo_faeduc %>%
select(-unit) %>%
filter(geo=="Romania") %>%
mutate(age=as.character(age)) %>%
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>%
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
filter(age<45) %>% #keep only birth age until 48
mutate(year = lubridate::year(time)) %>%
select(-geo, -time) %>%
mutate(isced11 = as.character(isced11)) %>%
filter(isced11=="Less than primary, primary and lower secondary education (levels 0-2)"|isced11=="Upper secondary and post-secondary non-tertiary education (levels 3 and 4)"| isced11=="Tertiary education (levels 5-8)") %>%
mutate(isced11 = factor(x = isced11, levels = c("Less than primary, primary and lower secondary education (levels 0-2)", "Upper secondary and post-secondary non-tertiary education (levels 3 and 4)", "Tertiary education (levels 5-8)"),labels = c("Less than primary, primary and lower secondary education", "Upper secondary and post-secondary non-tertiary education", "Tertiary education")))
demo_faeduc_ro_gganim <- demo_faeduc_ro %>%
ggplot(mapping = aes(x = age, y = values, fill = age)) +
geom_col() +
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
scale_x_continuous(name = "Mother's age at birth") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
theme_minimal() +
labs(title = "Number of live births by education level of the mother in Romania:\n{closest_state}") +
transition_states(
isced11,
transition_length = 0.5,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')
demo_faeduc_ro_gganim
```
```{r demo_faeduc_see_gganim}
demo_faeduc_see <- demo_faeduc %>%
mutate(geo = stringr::str_replace(string = as.character(geo),pattern = "Former Yugoslav Republic of Macedonia, the", replacement = "Macedonia")) %>%
select(-unit) %>%
filter(geo=="Romania"|
geo=="Serbia"|
geo=="Bulgaria"|
geo=="Albania"|
# geo=="Georgia"|
# geo=="Armenia"|
# geo=="Azerbaijan"|
geo=="Croatia"|
geo=="Macedonia"|
geo=="Slovenia") %>%
mutate(age=as.character(age)) %>%
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>%
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
filter(age<45) %>% #keep only birth age until 48
mutate(year = lubridate::year(time)) %>%
select(-time) %>%
mutate(isced11 = as.character(isced11)) %>%
filter(isced11=="Less than primary, primary and lower secondary education (levels 0-2)"|isced11=="Upper secondary and post-secondary non-tertiary education (levels 3 and 4)"| isced11=="Tertiary education (levels 5-8)") %>%
mutate(isced11 = factor(x = isced11, levels = c("Less than primary, primary and lower secondary education (levels 0-2)", "Upper secondary and post-secondary non-tertiary education (levels 3 and 4)", "Tertiary education (levels 5-8)"),labels = c("Less than primary, primary and lower secondary education", "Upper secondary and post-secondary non-tertiary education", "Tertiary education")))
demo_faeduc_see_gganim <- demo_faeduc_see %>%
ggplot(mapping = aes(x = age, y = values, fill = age)) +
geom_col() +
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
scale_x_continuous(name = "Mother's age at birth") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
theme_minimal() +
facet_grid(geo ~ .) +
labs(title = "Number of live births by marriage status of the mother in Romania:\n{closest_state}") +
transition_states(
isced11,
transition_length = 0.5,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')
demo_faeduc_see_gganim
```
```{r demo_faeduc_see_gganim_pct}
demo_faeduc_see_pct <- demo_faeduc_see %>%
group_by(geo, year, isced11) %>%
mutate(pct = values / sum(values))
demo_faeduc_see_gganim_pct <- demo_faeduc_see_pct %>%
ggplot(mapping = aes(x = age, y = pct, fill = age)) +
geom_col() +
scale_y_continuous(name = "Share of births by age of mother", labels = scales::percent) +
scale_x_continuous(name = "Mother's age at birth") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
theme_minimal() +
facet_grid(geo ~ .) +
labs(title = "Share of births by age of mother and education in SEE\n{closest_state}", caption = "Source: Eurostat, indicator demo_faeduc") +
transition_states(
isced11,
transition_length = 0.5,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')
demo_faeduc_see_gganim_pct
```
## By birth order
```{r}
demo_fordagec <- label_eurostat(get_eurostat(id = "demo_fordagec"))
```
```{r}
demo_fordagec_italy <- demo_fordagec %>%
select(-unit) %>%
filter(geo=="Italy", ord_brth == "First") %>%
mutate(age=as.character(age)) %>%
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>%
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
filter(age<45) %>% #keep only birth age until 48
mutate(year = lubridate::year(time)) %>%
select(-geo, -time)
#knitr::kable(demo_fordagec_italy %>% filter(year==2016))
```
```{r eval=FALSE}
demo_fordagec_italy %>%
filter(year == 2016) %>%
ggplot(mapping = aes(x = age, y = values)) +
geom_col()
```
```{r demo_fordagec_italy_gg_animated}
demo_fordagec_italy_gg_animated <- demo_fordagec_italy %>%
ggplot(mapping = aes(x = age, y = values, fill = age)) +
geom_col() +
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
scale_x_continuous(name = "Ages of first-time mothers") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) +
theme_minimal() +
labs(title = "Number of live births by age of first-time mothers in Italy in {round(frame_time)}") +
transition_time(year) +
ease_aes('linear')
demo_fordagec_italy_gg_animated
# animate(plot = demo_fordagec_italy_gg_animated, renderer = ffmpeg_renderer())
# anim_save(filename = "demo_fordagec_italy_gg_animated.mp4", animation = animate(plot = demo_fordagec_italy_gg_animated, renderer = ffmpeg_renderer()))
# anim_save(filename = "demo_fordagec_italy_gg_animated.gif", animation = animate(plot = demo_fordagec_italy_gg_animated))
```
```{r}
demo_fordagec_italy_pct <- demo_fordagec_italy %>%
group_by(year) %>%
mutate(pct = values / sum(values))
```
```{r demo_fordagec_italy_gg_animated_pct}
demo_fordagec_italy_gg_animated_pct <- demo_fordagec_italy_pct %>%
ggplot(mapping = aes(x = age, y = pct, fill = age)) +
geom_col() +
scale_y_continuous(name = "Percent of live births per year", labels = scales::percent) +
scale_x_continuous(name = "Ages of first-time mothers") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) +
theme_minimal() +
labs(title = "Ages of first-time mothers in Italy in {round(frame_time)}") +
transition_time(year) +
ease_aes('linear')
demo_fordagec_italy_gg_animated_pct
```
```{r demo_fordagec_italy_gg_animated_1980_2016_pct}
demo_fordagec_italy_gg_animated_1980_2016_pct <-
demo_fordagec_italy_pct %>%
filter(year==1980|year==2016) %>%
ggplot(mapping = aes(x = age, y = pct, fill = age)) +
geom_col() +
scale_y_continuous(name = "Share of live births per year", labels = scales::percent) +
scale_x_continuous(name = "Mother's age at birth") +
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
theme_minimal() +
labs(title = "Ages of first-time mothers in Italy in {closest_state}") +
transition_states(
year,
transition_length = 0.5,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')
demo_fordagec_italy_gg_animated_1980_2016_pct
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment