Last active
April 28, 2023 12:31
-
-
Save ikashnitsky/c221601f821670a0d427522b27108314 to your computer and use it in GitHub Desktop.
Scholarly Migration Database launch meeting -- examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#=============================================================================== | |
# 2023-04-12 -- SMD lightning talk | |
# Usecase of SMD | |
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com, @ikashnitsky | |
#=============================================================================== | |
library(tidyverse) | |
library(magrittr) | |
library(prismatic) | |
library(janitor) | |
library(patchwork) | |
library(paletteer) | |
library(hrbrthemes) | |
library(cowplot) | |
# devtools::install_github("liamgilbey/ggwaffle") | |
library(ggwaffle) | |
library(ggflags) | |
library(countrycode) | |
library(sf) | |
library(rmapshaper) | |
library(treemapify) | |
library(ggrepel) | |
options(scipen = 999) | |
library(showtext) | |
sysfonts::font_add_google("Roboto Condensed", "rc") | |
sysfonts::font_add_google("Atkinson Hyperlegible", "ah") | |
showtext_auto() | |
# set ggplot2 theme | |
devtools::source_gist("653e1040a07364ae82b1bb312501a184") | |
theme_set(theme_ik()) | |
# read the data | |
raw_s <- read_csv("~/data/smd/2022_V1_scholarlymigration_country.csv") | |
raw_f <- read_csv("~/data/smd/2022_V1_scholarlymigration_country_flow.csv") | |
# top 24 countries by the researchers | |
biggest_research <- raw_s %>% | |
distinct(countrycode, mean_n_researchers) %>% | |
arrange(mean_n_researchers %>% desc) %>% | |
mutate( | |
cntr = | |
case_when( | |
mean_n_researchers > 17e3 ~ countrycode, | |
TRUE ~ "rest" | |
) %>% | |
as_factor() %>% | |
fct_relevel("rest", after = Inf) | |
) %>% | |
group_by(cntr) %>% | |
summarise(n = mean_n_researchers %>% sum) %>% | |
arrange(cntr) | |
# the biggest 24 countries by the scholars population | |
biggest_24 <- raw_s %>% | |
distinct(countrycode, mean_n_researchers) %>% | |
arrange(mean_n_researchers %>% desc) %>% | |
slice(1:24) %>% | |
pull(countrycode) | |
# top 5 donors in these 24 countries | |
top_5_24 <- raw_f %>% | |
filter(migrationto %>% is_in(biggest_24)) %>% | |
group_by(dest = migrationto, origin = migrationfrom) %>% | |
summarise(n = n_migrations %>% sum) %>% | |
group_by(dest) %>% | |
arrange(n %>% desc) %>% | |
mutate( | |
top_5 = case_when( | |
seq_along(n) < 6 ~ origin, | |
TRUE ~ "rest" | |
) %>% | |
as_factor() %>% | |
fct_relevel("rest", after = Inf) | |
) %>% | |
group_by(dest, top_5) %>% | |
summarise(n = n %>% sum) %>% | |
mutate( | |
name = dest %>% toupper() %>% | |
countrycode(origin = "iso3c", destination = "country.name") %>% | |
tolower() | |
) | |
# set fixed colors for countries | |
set.seed(911) | |
pal_dict <- tibble( | |
cntr = c(biggest_research$cntr, top_5_24$top_5 %>% unique()) | |
) %>% | |
distinct() %>% | |
mutate( | |
cntr = cntr %>% as_factor() %>% | |
fct_relevel("rest", after = Inf) | |
) %>% | |
arrange(cntr) %>% | |
mutate(col = c(paletteer_d("Polychrome::palette36", 36) %>% sample(30), "#dadada")) | |
# treemap of the biggest 24 countries by the number of researchers | |
biggest_research %>% | |
left_join(pal_dict) %>% | |
ggplot(aes(area = n))+ | |
geom_treemap( | |
aes(fill = col), color = NA, start = "topleft", | |
radius = unit(2, "pt") | |
)+ | |
geom_treemap_text( | |
aes(label = cntr), | |
color = "#264444", fontface = 2, alpha = 3/4, | |
place = "centre", start = "topleft", grow = TRUE | |
)+ | |
scale_fill_identity()+ | |
# coord_equal()+ | |
labs( | |
title = "Biggest countries by researchers' population" | |
)+ | |
theme( | |
plot.title = element_text(size = 20) | |
) | |
tree_pop <- last_plot() | |
ggsave("~/downloads//tree_pop.pdf", tree_pop, width = 6.4, height = 3.6) | |
# faceted treemap | |
top_5_24 %>% | |
left_join(pal_dict, by = c("top_5" = "cntr")) %>% | |
ggplot(aes(area = n))+ | |
geom_treemap( | |
aes(fill = col), color = NA, start = "topleft", | |
radius = unit(2, "pt") | |
)+ | |
geom_treemap_text( | |
aes(label = top_5), | |
color = "#264444", fontface = 2, alpha = 3/4, | |
place = "centre", start = "topleft", grow = TRUE | |
)+ | |
scale_fill_identity()+ | |
facet_wrap(~ name, ncol = 6)+ | |
theme( | |
strip.text = element_text(face = 2) | |
) | |
tree_24 <- last_plot() | |
ggsave("~/downloads//tree_24.pdf", tree_24, width = 6.4, height = 3.6) | |
# calculate various quantities ------------------------------------------------------------------ | |
# from | |
cum_from <- raw_f %>% | |
group_by(cntr = migrationfrom) %>% | |
summarise(n_from = n_migrations %>% sum) %>% | |
arrange(n_from %>% desc()) %>% | |
mutate( | |
name = cntr %>% toupper() %>% | |
countrycode(origin = "iso3c", destination = "country.name") %>% | |
tolower() | |
) | |
# to | |
cum_to <- raw_f %>% | |
group_by(cntr = migrationto) %>% | |
summarise(n_to = n_migrations %>% sum) %>% | |
arrange(n_to %>% desc()) %>% | |
mutate( | |
name = cntr %>% toupper() %>% | |
countrycode(origin = "iso3c", destination = "country.name") %>% | |
tolower() | |
) | |
# join | |
df <- left_join( | |
cum_from, cum_to | |
) %>% | |
drop_na(name) %>% | |
filter(n_from > 9 , n_to > 9) %>% | |
mutate( | |
iso2c = name %>% countrycode(origin = "country.name", destination = "iso2c"), | |
inout = n_to / n_from | |
) | |
# df for in out treemaps | |
df_inout <- df %>% | |
left_join(pal_dict) %>% | |
mutate( | |
colored = case_when( | |
cntr %>% is_in(pal_dict$cntr) ~ cntr, | |
TRUE ~ "rest" | |
) %>% | |
as_factor() %>% | |
fct_relevel("rest", after = Inf) | |
) %>% | |
group_by(colored) %>% | |
summarise( | |
n_to = n_to %>% sum, | |
n_from = n_from %>% sum | |
) %>% | |
left_join(pal_dict, by = c("colored" = "cntr")) | |
# treemap IN | |
df_inout %>% | |
ggplot(aes(area = n_to))+ | |
geom_treemap( | |
aes(fill = col), color = NA, start = "topleft", | |
radius = unit(2, "pt") | |
)+ | |
geom_treemap_text( | |
aes(label = colored), | |
color = "#264444", fontface = 2, alpha = 3/4, | |
place = "centre", start = "topleft", grow = TRUE | |
)+ | |
scale_fill_identity()+ | |
labs( | |
title = "IN-migration" | |
) | |
tree_in <- last_plot() | |
# treemap OUT | |
df_inout %>% | |
ggplot(aes(area = n_from))+ | |
geom_treemap( | |
aes(fill = col), color = NA, start = "topleft", | |
radius = unit(2, "pt") | |
)+ | |
geom_treemap_text( | |
aes(label = colored), | |
color = "#264444", fontface = 2, alpha = 3/4, | |
place = "centre", start = "topleft", grow = TRUE | |
)+ | |
scale_fill_identity()+ | |
labs( | |
title = "OUT-migration" | |
) | |
tree_out <- last_plot() | |
tree_inout <- tree_in + tree_out | |
ggsave("~/downloads//tree-inout.pdf", tree_inout, width = 6.4, height = 3.6) | |
# world sf data ----------------------------------------------------------- | |
# let's use a fancy projection | |
world_outline_robinson <- spData::world %>% | |
st_as_sf() %>% | |
st_transform(crs = "ESRI:54030") %>% | |
filter(!iso_a2 == "AQ") | |
# produce borders layer | |
country_borders <- world_outline_robinson %>% | |
rmapshaper::ms_innerlines() | |
# merge the data and borders | |
df_map <- world_outline_robinson %>% | |
left_join(df, by = c("iso_a2" = "iso2c")) | |
# map in-out ratio -------------------------------------------------------- | |
map_ratio <- df_map %>% | |
ggplot()+ | |
geom_sf(aes(fill = inout), color = NA)+ | |
geom_sf(data = country_borders, size = .1, color = "#ccffff")+ | |
scale_fill_viridis_b(option = "H", breaks = c(.67, .8, 1, 1.25, 1.5))+ | |
theme( | |
plot.title = element_text(size = 22), | |
axis.text = element_blank(), | |
legend.position = c(.15, .4) | |
)+ | |
labs( | |
title = "Ratio of inflow and outflow of researchers", | |
caption = "Data: https://scholarlymigration.org | Design: @ikashnitsky", | |
fill = NULL | |
) | |
ggsave("~/downloads//map-ratio.pdf", map_ratio, width = 6.4, height = 3.6) | |
# weighted GDP of the sending countries ----------------------------------- | |
weighted_gdp <- raw_f %>% | |
transmute( | |
dest = migrationto, | |
origin = migrationfrom, | |
year = migrationyearpadding, | |
n = n_migrations | |
) %>% | |
left_join( | |
raw_s %>% | |
transmute( | |
year, | |
origin = countrycode, | |
gdp_origin = gdp_per_capita | |
) | |
) %>% | |
left_join( | |
raw_s %>% | |
transmute( | |
year, | |
dest = countrycode, | |
gdp_dest = gdp_per_capita, | |
n_res = mean_n_researchers | |
) | |
) %>% | |
drop_na() %>% | |
group_by(dest) %>% | |
summarise( | |
w_gpd_origin = gdp_origin %>% weighted.mean(w = n), | |
gdp_dest = gdp_dest %>% mean, | |
n_res = n_res %>% mean | |
) %>% | |
ungroup() %>% | |
left_join(df_map, by = c("dest" = "cntr")) %>% | |
drop_na() %>% | |
# order the big countries first | |
arrange(n_res %>% desc) %>% | |
mutate( | |
dest = dest %>% as_factor() %>% fct_inorder(), | |
cntr_label = case_when( | |
dest %>% is_in(pal_dict$cntr) ~ dest | |
) | |
) | |
gpd_ratio <- weighted_gdp %>% | |
ggplot(aes(gdp_dest, w_gpd_origin, color = inout, size = n_res))+ | |
geom_point( | |
data = . %>% drop_na(cntr_label), | |
aes(size = n_res * 1.2), | |
color = "#E91E63" | |
)+ | |
geom_point()+ | |
geom_text_repel( | |
aes(label = cntr_label), color = alpha("#264444",0.5) | |
)+ | |
scale_color_viridis_b( | |
option = "H", breaks = c(.67, .8, 1, 1.25, 1.5), | |
guide = guide_colorbar(barwidth = 20, title.position = "top") | |
)+ | |
scale_size_area(guide = "none", max_size = 20)+ | |
scale_x_comma(trans = "log10")+ | |
scale_y_comma(limits = c(15e3, 50e3), trans = "log10")+ | |
labs( | |
title = "GDP per capita in destination and origin countries", | |
subtitle = "The values for origin countries are averages, weighted by the number of moving scholars", | |
x = "GDP per capita in the destination country", | |
y = "GDP per capita in the origin countries, weighted", | |
color = "Ratio of inflow and outflow of researchers" | |
)+ | |
theme( | |
plot.title = element_text(size = 21), | |
axis.text.x = element_text(hjust = 1) | |
) | |
ggsave("~/downloads//gdp-ratio.pdf", gpd_ratio, width = 8, height = 4.5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment