Animated sankey plots for UN migrant stock data (IMS2020) related to https://guyabel.com/post/animated-sankey/ blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(readxl) | |
library(tweenr) | |
library(ggforce) | |
library(gridExtra) | |
library(animation) | |
library(magick) | |
# download the migrant stock origin-destination xlsx file from the UN site | |
x <- tempfile(fileext = ".xlsx") | |
download.file( | |
url = "https://www.un.org/development/desa/pd/sites/www.un.org.development.desa.pd/files/undesa_pd_2020_ims_stock_by_sex_destination_and_origin.xlsx", | |
destfile = x, mode = "wb" | |
) | |
# read in data for all years | |
d <- read_excel(path = x, sheet = 2, range = "B11:N37062", na = "..", guess_max = 1e5) | |
d | |
# # read in data for all years, including "Other" origin for unknowns, not in sheet 2 | |
# d <- tibble(year = seq(from = 2020, to = 1990, by = -5), | |
# sheet = 3:9) %>% | |
# mutate(m = map(.x = sheet, .f = ~read_excel(path = x, sheet = .x, skip = 10))) | |
# d | |
# codes for world bank income groups and "unknown" origin | |
c1 <- c(1503:1500, 2003) | |
# world bank income group data frame | |
d1 <- d %>% | |
select(-2, -4) %>% | |
rename(dest = 1, | |
dest_code = 2, | |
orig = 3, | |
orig_code = 4) %>% | |
filter(orig_code %in% c1, | |
dest_code %in% c1) %>% | |
pivot_longer(cols = -(1:4), names_to = "year", values_to = "stock") %>% | |
mutate_if(is.character, ~str_remove(string = ., pattern = "-income countries")) %>% | |
mutate(year = as.numeric(year), | |
stock = stock/1e6) %>% | |
select(-contains("code")) | |
d1 | |
# # code for world bank income group data frame if read in data with "unknown" origin | |
# d0 <- d %>% | |
# unnest(m) %>% | |
# select(-sheet, -3, -5) %>% | |
# rename(dest = 2, dest_code = 3) %>% | |
# pivot_longer(cols = -(1:3), names_to = "orig_code", values_to = "stock") %>% | |
# filter(dest_code %in% c1, | |
# orig_code %in% c1) %>% | |
# mutate(orig_code = as.numeric(orig_code), | |
# dest = str_remove(string = dest, | |
# pattern = "-income countries")) | |
# | |
# # origin names to join on to data frame | |
# c1 <- d0 %>% | |
# distinct(dest, dest_code) %>% | |
# set_names(nm = str_replace(string = names(.), | |
# pattern = "dest", | |
# replacement = "orig")) | |
# | |
# # join on origin names | |
# d1 <- d0 %>% | |
# left_join(c1) %>% | |
# replace_na(list(orig = "Unknown")) %>% | |
# mutate(stock = stock/1e6) %>% | |
# select(-contains("code")) | |
# tween data frame | |
d2 <- d1 %>% | |
mutate(corridor = paste(orig, dest, sep = " -> ")) %>% | |
select(corridor, year, stock) %>% | |
mutate(ease = "linear") %>% | |
tween_elements(time = "year", group = c("corridor"), ease = "ease", nframes = 50) %>% | |
as_tibble() %>% | |
separate(col = .group, into = c("orig", "dest"), sep = " -> ") %>% | |
relocate(orig, dest) | |
d2 | |
# format data for sankey plot using gather_set_data() and factors for plot ordering | |
d2 <- d2 %>% | |
mutate(orig = factor(orig, levels = unique(d1$orig)), | |
dest = factor(dest, levels = unique(d1$orig))) %>% | |
arrange(orig, dest, .frame) %>% | |
gather_set_data(x = 1:2) %>% | |
mutate(x = fct_inorder(x)) | |
d2 | |
# set up data frame for position of labels; h for the horizontal justification, n for nudge away from center | |
p <- d2 %>% | |
distinct(x, y) %>% | |
mutate(h = as.numeric(x == "orig"), | |
n = h * -0.1 + 0.05) | |
p | |
# plots within for loop | |
g <- NULL | |
for(f in unique(d2$.frame)){ | |
# year for title | |
y <- d2 %>% | |
filter(.frame == f) %>% | |
pull(year) %>% | |
unique() %>% | |
`/`(5) %>% | |
round() %>% | |
`*`(5) | |
# plot for each frame | |
g[[f + 1]] <- | |
ggplot(data = filter(d2, .frame == f), | |
mapping = aes(x = x, id = id, value = stock, split = y)) + | |
geom_parallel_sets(mapping = aes(fill = orig), axis.width = -0.05, alpha = 0.5) + | |
geom_parallel_sets_axes(axis.width = 0.05, fill = "transparent", colour = "black") + | |
geom_parallel_sets_labels(angle = 0, size = 3, lineheight = 0.8, hjust = p$h, position = position_nudge(x = p$n)) + | |
guides(fill = FALSE) + | |
scale_fill_brewer(palette = "Set1") + | |
scale_x_discrete(labels = c(orig = "Place of Birth", dest = "Place of Residence")) + | |
# uncomment ylim for fixed limit plot, 320 chosen after inspecting initial plots, use 350 when including "unknown" origin data | |
# ylim(c(0, 320)) + | |
theme_minimal() + | |
theme(plot.subtitle = element_text(color = "darkgrey", size = 8)) + | |
labs(x = "", y = "Migrants (millions)", | |
title = paste("Migrant Population by World Bank Income Groups", y), | |
subtitle = "Plot by @guyabelguyabel", | |
caption = "Data from http://unmigration.org") | |
} | |
# create Grob for saving multiple plots to multi-page PDF | |
m <- marrangeGrob(g, nrow = 1, ncol = 1, top = NULL) | |
# save the Grob to PDF | |
ggsave(filename = "migrant_sankey.pdf", plot = m, height = 7, width = 5) | |
# set up frames for animation, pauses on first and last plot | |
ff <- c(rep(1, 5), 2:50, rep(51, 5)) | |
# bring in image files from each page of PDF | |
pp <- image_read_pdf("migrant_sankey.pdf") | |
# image_info(pp) | |
# create animated version of images in the PDF | |
saveVideo(expr = { | |
for(j in ff){ | |
img1 <- pp[j] | |
par(mar = rep(0,4)) | |
plot(as.raster(img1)) | |
}}, | |
ani.width = 1500, ani.height = 2100, n = length(ff), loop = TRUE, interval = 1/10, | |
ffmpeg = "C:/ffmpeg/bin/ffmpeg.exe", | |
video.name = "migrant_sankey.mp4") | |
file.show("migrant_sankey.mp4") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment