Created
August 7, 2018 20:01
-
-
Save giocomai/0a623ea43c09d782b02eaf49edbc3416 to your computer and use it in GitHub Desktop.
Replicate NY times birth-age gap with Eurostat data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Birth age gap" | |
author: "Giorgio Comai" | |
date: "7 August 2018" | |
output: html_document | |
--- | |
```{r setup, include=FALSE} | |
knitr::opts_chunk$set(echo = TRUE) | |
pacman::p_load("tidyverse") | |
pacman::p_load("eurostat") | |
pacman::p_load("stringr") | |
pacman::p_load("lubridate") | |
pacman::p_load("remotes") | |
#remotes::install_github(repo = "https://github.com/thomasp85/gganimate") | |
library("gganimate") | |
``` | |
https://www.nytimes.com/interactive/2018/08/04/upshot/up-birth-age-gap.html | |
# Relevant datasets | |
```{r} | |
# search | |
id <- search_eurostat("births by mother's age") | |
knitr::kable(id) | |
``` | |
## 1980 VS 2016 | |
```{r message=FALSE} | |
id <- search_eurostat("Live births by mother’s age and legal marital status") | |
demo_fagec <- label_eurostat(get_eurostat(id = "demo_fagec")) | |
``` | |
```{r demo_fagec_italy_gg_animated} | |
demo_fagec_italy <- demo_fagec %>% | |
select(-unit) %>% | |
filter(geo=="Italy") %>% | |
mutate(age=as.character(age)) %>% | |
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% | |
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50 | |
filter(age<45) %>% #keep only birth age until 48 | |
filter(indic_de=="Live births - total") %>% #Including all live birhts %>% | |
select(-indic_de) %>% | |
mutate(year = lubridate::year(time)) %>% | |
select(-geo, -time) | |
demo_fagec_italy_gg_animated <- demo_fagec_italy %>% | |
ggplot(mapping = aes(x = age, y = values, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) + | |
scale_x_continuous(name = "Mother's age at birth") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) + | |
theme_minimal() + | |
labs(title = "Number of live births by age of mother in Italy in {round(frame_time)}") + | |
transition_time(year) + | |
ease_aes('linear') | |
# animate(plot = demo_fagec_italy_gg_animated, renderer = ffmpeg_renderer()) | |
# anim_save(filename = "demo_fagec_italy_gg_animated.mp4", animation = animate(plot = demo_fagec_italy_gg_animated, renderer = ffmpeg_renderer())) | |
# anim_save(filename = "demo_fagec_italy_gg_animated.gif", animation = animate(plot = demo_fagec_italy_gg_animated)) | |
#knitr::kable(demo_fagec_italy %>% filter(year==2016)) | |
demo_fagec_italy_gg_animated | |
``` | |
```{r demo_fagec_italy_gg_animated_1980_2016} | |
demo_fagec_italy_gg_animated_1980_2016 <- | |
demo_fagec_italy %>% | |
filter(year==1980|year==2016) %>% | |
ggplot(mapping = aes(x = age, y = values, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) + | |
scale_x_continuous(name = "Mother's age at birth") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) + | |
theme_minimal() + | |
labs(title = "Number of live births by age of mother in Italy in {closest_state}") + | |
transition_states( | |
year, | |
transition_length = 0.5, | |
state_length = 1 | |
) + | |
enter_fade() + | |
exit_shrink() + | |
ease_aes('sine-in-out') | |
demo_fagec_italy_gg_animated_1980_2016 | |
``` | |
## By marriage status | |
```{r} | |
demo_fagec_italy_2016_marriage <- demo_fagec %>% | |
select(-unit) %>% | |
filter(geo=="Italy") %>% | |
mutate(age=as.character(age)) %>% | |
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% | |
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50 | |
filter(age<45) %>% #keep only birth age until 48 | |
mutate(indic_de = as.character(indic_de)) %>% | |
filter(indic_de=="In marriage"|indic_de=="Outside marriage") %>% #Including all live birhts %>% | |
mutate(year = lubridate::year(time)) %>% | |
filter(year ==2016) %>% | |
select(-geo, -time) | |
``` | |
```{r demo_fagec_italy_gg_animated_1980_2016_marriage} | |
demo_fagec_italy_gg_animated_1980_2016_marriage <- | |
demo_fagec_italy_2016_marriage %>% | |
ggplot(mapping = aes(x = age, y = values, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) + | |
scale_x_continuous(name = "Mother's age at birth") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) + | |
theme_minimal() + | |
labs(title = "Number of live births by marriage status of the mother in Italy in 2016:\n{closest_state}") + | |
transition_states( | |
indic_de, | |
transition_length = 0.5, | |
state_length = 1 | |
) + | |
enter_fade() + | |
exit_shrink() + | |
ease_aes('sine-in-out') | |
demo_fagec_italy_gg_animated_1980_2016_marriage | |
``` | |
## By education | |
```{r byEducationInRomania} | |
# extract exact code | |
id <- search_eurostat("Live births by mother's age and educational attainment level") | |
demo_faeduc <- label_eurostat(get_eurostat(id = "demo_faeduc")) | |
demo_faeduc_ro <- demo_faeduc %>% | |
select(-unit) %>% | |
filter(geo=="Romania") %>% | |
mutate(age=as.character(age)) %>% | |
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% | |
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50 | |
filter(age<45) %>% #keep only birth age until 48 | |
mutate(year = lubridate::year(time)) %>% | |
select(-geo, -time) %>% | |
mutate(isced11 = as.character(isced11)) %>% | |
filter(isced11=="Less than primary, primary and lower secondary education (levels 0-2)"|isced11=="Upper secondary and post-secondary non-tertiary education (levels 3 and 4)"| isced11=="Tertiary education (levels 5-8)") %>% | |
mutate(isced11 = factor(x = isced11, levels = c("Less than primary, primary and lower secondary education (levels 0-2)", "Upper secondary and post-secondary non-tertiary education (levels 3 and 4)", "Tertiary education (levels 5-8)"),labels = c("Less than primary, primary and lower secondary education", "Upper secondary and post-secondary non-tertiary education", "Tertiary education"))) | |
demo_faeduc_ro_gganim <- demo_faeduc_ro %>% | |
ggplot(mapping = aes(x = age, y = values, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) + | |
scale_x_continuous(name = "Mother's age at birth") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) + | |
theme_minimal() + | |
labs(title = "Number of live births by education level of the mother in Romania:\n{closest_state}") + | |
transition_states( | |
isced11, | |
transition_length = 0.5, | |
state_length = 1 | |
) + | |
enter_fade() + | |
exit_shrink() + | |
ease_aes('sine-in-out') | |
demo_faeduc_ro_gganim | |
``` | |
```{r demo_faeduc_see_gganim} | |
demo_faeduc_see <- demo_faeduc %>% | |
mutate(geo = stringr::str_replace(string = as.character(geo),pattern = "Former Yugoslav Republic of Macedonia, the", replacement = "Macedonia")) %>% | |
select(-unit) %>% | |
filter(geo=="Romania"| | |
geo=="Serbia"| | |
geo=="Bulgaria"| | |
geo=="Albania"| | |
# geo=="Georgia"| | |
# geo=="Armenia"| | |
# geo=="Azerbaijan"| | |
geo=="Croatia"| | |
geo=="Macedonia"| | |
geo=="Slovenia") %>% | |
mutate(age=as.character(age)) %>% | |
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% | |
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50 | |
filter(age<45) %>% #keep only birth age until 48 | |
mutate(year = lubridate::year(time)) %>% | |
select(-time) %>% | |
mutate(isced11 = as.character(isced11)) %>% | |
filter(isced11=="Less than primary, primary and lower secondary education (levels 0-2)"|isced11=="Upper secondary and post-secondary non-tertiary education (levels 3 and 4)"| isced11=="Tertiary education (levels 5-8)") %>% | |
mutate(isced11 = factor(x = isced11, levels = c("Less than primary, primary and lower secondary education (levels 0-2)", "Upper secondary and post-secondary non-tertiary education (levels 3 and 4)", "Tertiary education (levels 5-8)"),labels = c("Less than primary, primary and lower secondary education", "Upper secondary and post-secondary non-tertiary education", "Tertiary education"))) | |
demo_faeduc_see_gganim <- demo_faeduc_see %>% | |
ggplot(mapping = aes(x = age, y = values, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) + | |
scale_x_continuous(name = "Mother's age at birth") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) + | |
theme_minimal() + | |
facet_grid(geo ~ .) + | |
labs(title = "Number of live births by marriage status of the mother in Romania:\n{closest_state}") + | |
transition_states( | |
isced11, | |
transition_length = 0.5, | |
state_length = 1 | |
) + | |
enter_fade() + | |
exit_shrink() + | |
ease_aes('sine-in-out') | |
demo_faeduc_see_gganim | |
``` | |
```{r demo_faeduc_see_gganim_pct} | |
demo_faeduc_see_pct <- demo_faeduc_see %>% | |
group_by(geo, year, isced11) %>% | |
mutate(pct = values / sum(values)) | |
demo_faeduc_see_gganim_pct <- demo_faeduc_see_pct %>% | |
ggplot(mapping = aes(x = age, y = pct, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Share of births by age of mother", labels = scales::percent) + | |
scale_x_continuous(name = "Mother's age at birth") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) + | |
theme_minimal() + | |
facet_grid(geo ~ .) + | |
labs(title = "Share of births by age of mother and education in SEE\n{closest_state}", caption = "Source: Eurostat, indicator demo_faeduc") + | |
transition_states( | |
isced11, | |
transition_length = 0.5, | |
state_length = 1 | |
) + | |
enter_fade() + | |
exit_shrink() + | |
ease_aes('sine-in-out') | |
demo_faeduc_see_gganim_pct | |
``` | |
## By birth order | |
```{r} | |
demo_fordagec <- label_eurostat(get_eurostat(id = "demo_fordagec")) | |
``` | |
```{r} | |
demo_fordagec_italy <- demo_fordagec %>% | |
select(-unit) %>% | |
filter(geo=="Italy", ord_brth == "First") %>% | |
mutate(age=as.character(age)) %>% | |
filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% | |
mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50 | |
filter(age<45) %>% #keep only birth age until 48 | |
mutate(year = lubridate::year(time)) %>% | |
select(-geo, -time) | |
#knitr::kable(demo_fordagec_italy %>% filter(year==2016)) | |
``` | |
```{r eval=FALSE} | |
demo_fordagec_italy %>% | |
filter(year == 2016) %>% | |
ggplot(mapping = aes(x = age, y = values)) + | |
geom_col() | |
``` | |
```{r demo_fordagec_italy_gg_animated} | |
demo_fordagec_italy_gg_animated <- demo_fordagec_italy %>% | |
ggplot(mapping = aes(x = age, y = values, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Number of live births per year", labels = scales::comma) + | |
scale_x_continuous(name = "Ages of first-time mothers") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) + | |
theme_minimal() + | |
labs(title = "Number of live births by age of first-time mothers in Italy in {round(frame_time)}") + | |
transition_time(year) + | |
ease_aes('linear') | |
demo_fordagec_italy_gg_animated | |
# animate(plot = demo_fordagec_italy_gg_animated, renderer = ffmpeg_renderer()) | |
# anim_save(filename = "demo_fordagec_italy_gg_animated.mp4", animation = animate(plot = demo_fordagec_italy_gg_animated, renderer = ffmpeg_renderer())) | |
# anim_save(filename = "demo_fordagec_italy_gg_animated.gif", animation = animate(plot = demo_fordagec_italy_gg_animated)) | |
``` | |
```{r} | |
demo_fordagec_italy_pct <- demo_fordagec_italy %>% | |
group_by(year) %>% | |
mutate(pct = values / sum(values)) | |
``` | |
```{r demo_fordagec_italy_gg_animated_pct} | |
demo_fordagec_italy_gg_animated_pct <- demo_fordagec_italy_pct %>% | |
ggplot(mapping = aes(x = age, y = pct, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Percent of live births per year", labels = scales::percent) + | |
scale_x_continuous(name = "Ages of first-time mothers") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) + | |
theme_minimal() + | |
labs(title = "Ages of first-time mothers in Italy in {round(frame_time)}") + | |
transition_time(year) + | |
ease_aes('linear') | |
demo_fordagec_italy_gg_animated_pct | |
``` | |
```{r demo_fordagec_italy_gg_animated_1980_2016_pct} | |
demo_fordagec_italy_gg_animated_1980_2016_pct <- | |
demo_fordagec_italy_pct %>% | |
filter(year==1980|year==2016) %>% | |
ggplot(mapping = aes(x = age, y = pct, fill = age)) + | |
geom_col() + | |
scale_y_continuous(name = "Share of live births per year", labels = scales::percent) + | |
scale_x_continuous(name = "Mother's age at birth") + | |
scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) + | |
theme_minimal() + | |
labs(title = "Ages of first-time mothers in Italy in {closest_state}") + | |
transition_states( | |
year, | |
transition_length = 0.5, | |
state_length = 1 | |
) + | |
enter_fade() + | |
exit_shrink() + | |
ease_aes('sine-in-out') | |
demo_fordagec_italy_gg_animated_1980_2016_pct | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment