Skip to content

Instantly share code, notes, and snippets.

@ikashnitsky
Last active April 28, 2023 12:31
Show Gist options
  • Save ikashnitsky/c221601f821670a0d427522b27108314 to your computer and use it in GitHub Desktop.
Save ikashnitsky/c221601f821670a0d427522b27108314 to your computer and use it in GitHub Desktop.
Scholarly Migration Database launch meeting -- examples
#===============================================================================
# 2023-04-12 -- SMD lightning talk
# Usecase of SMD
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com, @ikashnitsky
#===============================================================================
library(tidyverse)
library(magrittr)
library(prismatic)
library(janitor)
library(patchwork)
library(paletteer)
library(hrbrthemes)
library(cowplot)
# devtools::install_github("liamgilbey/ggwaffle")
library(ggwaffle)
library(ggflags)
library(countrycode)
library(sf)
library(rmapshaper)
library(treemapify)
library(ggrepel)
options(scipen = 999)
library(showtext)
sysfonts::font_add_google("Roboto Condensed", "rc")
sysfonts::font_add_google("Atkinson Hyperlegible", "ah")
showtext_auto()
# set ggplot2 theme
devtools::source_gist("653e1040a07364ae82b1bb312501a184")
theme_set(theme_ik())
# read the data
raw_s <- read_csv("~/data/smd/2022_V1_scholarlymigration_country.csv")
raw_f <- read_csv("~/data/smd/2022_V1_scholarlymigration_country_flow.csv")
# top 24 countries by the researchers
biggest_research <- raw_s %>%
distinct(countrycode, mean_n_researchers) %>%
arrange(mean_n_researchers %>% desc) %>%
mutate(
cntr =
case_when(
mean_n_researchers > 17e3 ~ countrycode,
TRUE ~ "rest"
) %>%
as_factor() %>%
fct_relevel("rest", after = Inf)
) %>%
group_by(cntr) %>%
summarise(n = mean_n_researchers %>% sum) %>%
arrange(cntr)
# the biggest 24 countries by the scholars population
biggest_24 <- raw_s %>%
distinct(countrycode, mean_n_researchers) %>%
arrange(mean_n_researchers %>% desc) %>%
slice(1:24) %>%
pull(countrycode)
# top 5 donors in these 24 countries
top_5_24 <- raw_f %>%
filter(migrationto %>% is_in(biggest_24)) %>%
group_by(dest = migrationto, origin = migrationfrom) %>%
summarise(n = n_migrations %>% sum) %>%
group_by(dest) %>%
arrange(n %>% desc) %>%
mutate(
top_5 = case_when(
seq_along(n) < 6 ~ origin,
TRUE ~ "rest"
) %>%
as_factor() %>%
fct_relevel("rest", after = Inf)
) %>%
group_by(dest, top_5) %>%
summarise(n = n %>% sum) %>%
mutate(
name = dest %>% toupper() %>%
countrycode(origin = "iso3c", destination = "country.name") %>%
tolower()
)
# set fixed colors for countries
set.seed(911)
pal_dict <- tibble(
cntr = c(biggest_research$cntr, top_5_24$top_5 %>% unique())
) %>%
distinct() %>%
mutate(
cntr = cntr %>% as_factor() %>%
fct_relevel("rest", after = Inf)
) %>%
arrange(cntr) %>%
mutate(col = c(paletteer_d("Polychrome::palette36", 36) %>% sample(30), "#dadada"))
# treemap of the biggest 24 countries by the number of researchers
biggest_research %>%
left_join(pal_dict) %>%
ggplot(aes(area = n))+
geom_treemap(
aes(fill = col), color = NA, start = "topleft",
radius = unit(2, "pt")
)+
geom_treemap_text(
aes(label = cntr),
color = "#264444", fontface = 2, alpha = 3/4,
place = "centre", start = "topleft", grow = TRUE
)+
scale_fill_identity()+
# coord_equal()+
labs(
title = "Biggest countries by researchers' population"
)+
theme(
plot.title = element_text(size = 20)
)
tree_pop <- last_plot()
ggsave("~/downloads//tree_pop.pdf", tree_pop, width = 6.4, height = 3.6)
# faceted treemap
top_5_24 %>%
left_join(pal_dict, by = c("top_5" = "cntr")) %>%
ggplot(aes(area = n))+
geom_treemap(
aes(fill = col), color = NA, start = "topleft",
radius = unit(2, "pt")
)+
geom_treemap_text(
aes(label = top_5),
color = "#264444", fontface = 2, alpha = 3/4,
place = "centre", start = "topleft", grow = TRUE
)+
scale_fill_identity()+
facet_wrap(~ name, ncol = 6)+
theme(
strip.text = element_text(face = 2)
)
tree_24 <- last_plot()
ggsave("~/downloads//tree_24.pdf", tree_24, width = 6.4, height = 3.6)
# calculate various quantities ------------------------------------------------------------------
# from
cum_from <- raw_f %>%
group_by(cntr = migrationfrom) %>%
summarise(n_from = n_migrations %>% sum) %>%
arrange(n_from %>% desc()) %>%
mutate(
name = cntr %>% toupper() %>%
countrycode(origin = "iso3c", destination = "country.name") %>%
tolower()
)
# to
cum_to <- raw_f %>%
group_by(cntr = migrationto) %>%
summarise(n_to = n_migrations %>% sum) %>%
arrange(n_to %>% desc()) %>%
mutate(
name = cntr %>% toupper() %>%
countrycode(origin = "iso3c", destination = "country.name") %>%
tolower()
)
# join
df <- left_join(
cum_from, cum_to
) %>%
drop_na(name) %>%
filter(n_from > 9 , n_to > 9) %>%
mutate(
iso2c = name %>% countrycode(origin = "country.name", destination = "iso2c"),
inout = n_to / n_from
)
# df for in out treemaps
df_inout <- df %>%
left_join(pal_dict) %>%
mutate(
colored = case_when(
cntr %>% is_in(pal_dict$cntr) ~ cntr,
TRUE ~ "rest"
) %>%
as_factor() %>%
fct_relevel("rest", after = Inf)
) %>%
group_by(colored) %>%
summarise(
n_to = n_to %>% sum,
n_from = n_from %>% sum
) %>%
left_join(pal_dict, by = c("colored" = "cntr"))
# treemap IN
df_inout %>%
ggplot(aes(area = n_to))+
geom_treemap(
aes(fill = col), color = NA, start = "topleft",
radius = unit(2, "pt")
)+
geom_treemap_text(
aes(label = colored),
color = "#264444", fontface = 2, alpha = 3/4,
place = "centre", start = "topleft", grow = TRUE
)+
scale_fill_identity()+
labs(
title = "IN-migration"
)
tree_in <- last_plot()
# treemap OUT
df_inout %>%
ggplot(aes(area = n_from))+
geom_treemap(
aes(fill = col), color = NA, start = "topleft",
radius = unit(2, "pt")
)+
geom_treemap_text(
aes(label = colored),
color = "#264444", fontface = 2, alpha = 3/4,
place = "centre", start = "topleft", grow = TRUE
)+
scale_fill_identity()+
labs(
title = "OUT-migration"
)
tree_out <- last_plot()
tree_inout <- tree_in + tree_out
ggsave("~/downloads//tree-inout.pdf", tree_inout, width = 6.4, height = 3.6)
# world sf data -----------------------------------------------------------
# let's use a fancy projection
world_outline_robinson <- spData::world %>%
st_as_sf() %>%
st_transform(crs = "ESRI:54030") %>%
filter(!iso_a2 == "AQ")
# produce borders layer
country_borders <- world_outline_robinson %>%
rmapshaper::ms_innerlines()
# merge the data and borders
df_map <- world_outline_robinson %>%
left_join(df, by = c("iso_a2" = "iso2c"))
# map in-out ratio --------------------------------------------------------
map_ratio <- df_map %>%
ggplot()+
geom_sf(aes(fill = inout), color = NA)+
geom_sf(data = country_borders, size = .1, color = "#ccffff")+
scale_fill_viridis_b(option = "H", breaks = c(.67, .8, 1, 1.25, 1.5))+
theme(
plot.title = element_text(size = 22),
axis.text = element_blank(),
legend.position = c(.15, .4)
)+
labs(
title = "Ratio of inflow and outflow of researchers",
caption = "Data: https://scholarlymigration.org | Design: @ikashnitsky",
fill = NULL
)
ggsave("~/downloads//map-ratio.pdf", map_ratio, width = 6.4, height = 3.6)
# weighted GDP of the sending countries -----------------------------------
weighted_gdp <- raw_f %>%
transmute(
dest = migrationto,
origin = migrationfrom,
year = migrationyearpadding,
n = n_migrations
) %>%
left_join(
raw_s %>%
transmute(
year,
origin = countrycode,
gdp_origin = gdp_per_capita
)
) %>%
left_join(
raw_s %>%
transmute(
year,
dest = countrycode,
gdp_dest = gdp_per_capita,
n_res = mean_n_researchers
)
) %>%
drop_na() %>%
group_by(dest) %>%
summarise(
w_gpd_origin = gdp_origin %>% weighted.mean(w = n),
gdp_dest = gdp_dest %>% mean,
n_res = n_res %>% mean
) %>%
ungroup() %>%
left_join(df_map, by = c("dest" = "cntr")) %>%
drop_na() %>%
# order the big countries first
arrange(n_res %>% desc) %>%
mutate(
dest = dest %>% as_factor() %>% fct_inorder(),
cntr_label = case_when(
dest %>% is_in(pal_dict$cntr) ~ dest
)
)
gpd_ratio <- weighted_gdp %>%
ggplot(aes(gdp_dest, w_gpd_origin, color = inout, size = n_res))+
geom_point(
data = . %>% drop_na(cntr_label),
aes(size = n_res * 1.2),
color = "#E91E63"
)+
geom_point()+
geom_text_repel(
aes(label = cntr_label), color = alpha("#264444",0.5)
)+
scale_color_viridis_b(
option = "H", breaks = c(.67, .8, 1, 1.25, 1.5),
guide = guide_colorbar(barwidth = 20, title.position = "top")
)+
scale_size_area(guide = "none", max_size = 20)+
scale_x_comma(trans = "log10")+
scale_y_comma(limits = c(15e3, 50e3), trans = "log10")+
labs(
title = "GDP per capita in destination and origin countries",
subtitle = "The values for origin countries are averages, weighted by the number of moving scholars",
x = "GDP per capita in the destination country",
y = "GDP per capita in the origin countries, weighted",
color = "Ratio of inflow and outflow of researchers"
)+
theme(
plot.title = element_text(size = 21),
axis.text.x = element_text(hjust = 1)
)
ggsave("~/downloads//gdp-ratio.pdf", gpd_ratio, width = 8, height = 4.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment