Skip to content

Instantly share code, notes, and snippets.

@PaulC91
Last active March 4, 2018 10:22
Show Gist options
  • Save PaulC91/462f133e4af11b8da796becfba149be6 to your computer and use it in GitHub Desktop.
Save PaulC91/462f133e4af11b8da796becfba149be6 to your computer and use it in GitHub Desktop.
Example of using function to create d3 sankey diagram from multiple categorical variables + one numerical value variable of tidy data frame
library(tidyverse)
library(tidygraph)
library(igraph)
library(networkD3)
# https://www.kaggle.com/unitednations/refugee-data/data
asylum_seekers_raw <- read_csv("asylum_seekers.csv")
# get the top 10 countries of origin by 'Total decisions'
top_orig <- asylum_seekers_raw %>%
filter(Year == 2016) %>%
group_by(Origin) %>%
summarise(total = sum(`Total decisions`, na.rm = TRUE)) %>%
top_n(10, total) %>%
dplyr::pull(1)
# list of destination countries we want to look at
euro_countries <-
c("Germany", "Sweden", "France", "United Kingdom", "Netherlands", "Switzerland", "Italy", "Belgium")
# filter and clean the data we want to visualise
asylum_seekers <- asylum_seekers_raw %>%
filter(Origin %in% top_orig, `Country / territory of asylum/residence` %in% euro_countries) %>%
select(1, 3, 2, 8, 10) %>%
filter(Year == 2016) %>%
select(-Year) %>%
drop_na() %>%
gather(Decision, Weight, 3:4)
############################################## SANKEY FUNCTION ######################################
sankey_func <- function(data, val_col, ...) {
weight <- enquo(val_col)
df <- 1:(ncol(data)-2) %>%
# collapse dataframe into 3 columns: from, to, weight
map_df(~ select(data, from = !! quo(names(data)[.x]), to = !! quo(names(data)[.x + 1]), !! weight)) %>%
drop_na() %>%
group_by(from, to) %>%
summarise(weight = sum(!! weight)) %>%
mutate(colour = to)
ig <- igraph::graph_from_data_frame(df) %>%
as_tbl_graph()
nodes <- as_tibble(ig) %>%
rowid_to_column("id") %>%
mutate(id = id -1) %>%
as.data.frame
edges <- ig %>%
activate(edges) %>%
as_tibble() %>%
mutate(from = from - 1, to = to - 1) %>%
as.data.frame
sankeyNetwork(Links = edges, Nodes = nodes, Source = "from", Target = "to",
NodeID = "name", Value = "weight", LinkGroup = "colour",
fontSize = 14, fontFamily = "Roboto")
}
########################################## END OF FUNCTION ##########################################
# run the function on the cleaned asylum seeker data
sankey_func(data = asylum_seekers, val_col = Weight)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment