Skip to content

Instantly share code, notes, and snippets.

@guyabel
Last active June 13, 2021 17:00
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save guyabel/f7c844f18c4d11916a6ee000532d0e8e to your computer and use it in GitHub Desktop.
Animated sankey plots for UN migrant stock data (IMS2020) related to https://guyabel.com/post/animated-sankey/ blog post
library(tidyverse)
library(readxl)
library(tweenr)
library(ggforce)
library(gridExtra)
library(animation)
library(magick)
# download the migrant stock origin-destination xlsx file from the UN site
x <- tempfile(fileext = ".xlsx")
download.file(
url = "https://www.un.org/development/desa/pd/sites/www.un.org.development.desa.pd/files/undesa_pd_2020_ims_stock_by_sex_destination_and_origin.xlsx",
destfile = x, mode = "wb"
)
# read in data for all years
d <- read_excel(path = x, sheet = 2, range = "B11:N37062", na = "..", guess_max = 1e5)
d
# # read in data for all years, including "Other" origin for unknowns, not in sheet 2
# d <- tibble(year = seq(from = 2020, to = 1990, by = -5),
# sheet = 3:9) %>%
# mutate(m = map(.x = sheet, .f = ~read_excel(path = x, sheet = .x, skip = 10)))
# d
# codes for world bank income groups and "unknown" origin
c1 <- c(1503:1500, 2003)
# world bank income group data frame
d1 <- d %>%
select(-2, -4) %>%
rename(dest = 1,
dest_code = 2,
orig = 3,
orig_code = 4) %>%
filter(orig_code %in% c1,
dest_code %in% c1) %>%
pivot_longer(cols = -(1:4), names_to = "year", values_to = "stock") %>%
mutate_if(is.character, ~str_remove(string = ., pattern = "-income countries")) %>%
mutate(year = as.numeric(year),
stock = stock/1e6) %>%
select(-contains("code"))
d1
# # code for world bank income group data frame if read in data with "unknown" origin
# d0 <- d %>%
# unnest(m) %>%
# select(-sheet, -3, -5) %>%
# rename(dest = 2, dest_code = 3) %>%
# pivot_longer(cols = -(1:3), names_to = "orig_code", values_to = "stock") %>%
# filter(dest_code %in% c1,
# orig_code %in% c1) %>%
# mutate(orig_code = as.numeric(orig_code),
# dest = str_remove(string = dest,
# pattern = "-income countries"))
#
# # origin names to join on to data frame
# c1 <- d0 %>%
# distinct(dest, dest_code) %>%
# set_names(nm = str_replace(string = names(.),
# pattern = "dest",
# replacement = "orig"))
#
# # join on origin names
# d1 <- d0 %>%
# left_join(c1) %>%
# replace_na(list(orig = "Unknown")) %>%
# mutate(stock = stock/1e6) %>%
# select(-contains("code"))
# tween data frame
d2 <- d1 %>%
mutate(corridor = paste(orig, dest, sep = " -> ")) %>%
select(corridor, year, stock) %>%
mutate(ease = "linear") %>%
tween_elements(time = "year", group = c("corridor"), ease = "ease", nframes = 50) %>%
as_tibble() %>%
separate(col = .group, into = c("orig", "dest"), sep = " -> ") %>%
relocate(orig, dest)
d2
# format data for sankey plot using gather_set_data() and factors for plot ordering
d2 <- d2 %>%
mutate(orig = factor(orig, levels = unique(d1$orig)),
dest = factor(dest, levels = unique(d1$orig))) %>%
arrange(orig, dest, .frame) %>%
gather_set_data(x = 1:2) %>%
mutate(x = fct_inorder(x))
d2
# set up data frame for position of labels; h for the horizontal justification, n for nudge away from center
p <- d2 %>%
distinct(x, y) %>%
mutate(h = as.numeric(x == "orig"),
n = h * -0.1 + 0.05)
p
# plots within for loop
g <- NULL
for(f in unique(d2$.frame)){
# year for title
y <- d2 %>%
filter(.frame == f) %>%
pull(year) %>%
unique() %>%
`/`(5) %>%
round() %>%
`*`(5)
# plot for each frame
g[[f + 1]] <-
ggplot(data = filter(d2, .frame == f),
mapping = aes(x = x, id = id, value = stock, split = y)) +
geom_parallel_sets(mapping = aes(fill = orig), axis.width = -0.05, alpha = 0.5) +
geom_parallel_sets_axes(axis.width = 0.05, fill = "transparent", colour = "black") +
geom_parallel_sets_labels(angle = 0, size = 3, lineheight = 0.8, hjust = p$h, position = position_nudge(x = p$n)) +
guides(fill = FALSE) +
scale_fill_brewer(palette = "Set1") +
scale_x_discrete(labels = c(orig = "Place of Birth", dest = "Place of Residence")) +
# uncomment ylim for fixed limit plot, 320 chosen after inspecting initial plots, use 350 when including "unknown" origin data
# ylim(c(0, 320)) +
theme_minimal() +
theme(plot.subtitle = element_text(color = "darkgrey", size = 8)) +
labs(x = "", y = "Migrants (millions)",
title = paste("Migrant Population by World Bank Income Groups", y),
subtitle = "Plot by @guyabelguyabel",
caption = "Data from http://unmigration.org")
}
# create Grob for saving multiple plots to multi-page PDF
m <- marrangeGrob(g, nrow = 1, ncol = 1, top = NULL)
# save the Grob to PDF
ggsave(filename = "migrant_sankey.pdf", plot = m, height = 7, width = 5)
# set up frames for animation, pauses on first and last plot
ff <- c(rep(1, 5), 2:50, rep(51, 5))
# bring in image files from each page of PDF
pp <- image_read_pdf("migrant_sankey.pdf")
# image_info(pp)
# create animated version of images in the PDF
saveVideo(expr = {
for(j in ff){
img1 <- pp[j]
par(mar = rep(0,4))
plot(as.raster(img1))
}},
ani.width = 1500, ani.height = 2100, n = length(ff), loop = TRUE, interval = 1/10,
ffmpeg = "C:/ffmpeg/bin/ffmpeg.exe",
video.name = "migrant_sankey.mp4")
file.show("migrant_sankey.mp4")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment