Skip to content

Instantly share code, notes, and snippets.

@jschoeley
Created January 15, 2018 10:47
Show Gist options
  • Save jschoeley/a7f71813b7d68d79254c6e7517564f6b to your computer and use it in GitHub Desktop.
Save jschoeley/a7f71813b7d68d79254c6e7517564f6b to your computer and use it in GitHub Desktop.
Tricolore: Experimenting with different legend styles for centered ternary color scales
#' ---
#' title: Different legend styles for centered ternary color scales
#' author: Jonas Schöley, Ilya Kashnitsky
#' output:
#' pdf_document
#' ---
#+echo=FALSE
knitr::opts_chunk$set(warning=FALSE, message=FALSE,
fig.width = 15, fig.height = 15)
# Ilyas code -- preparing the map -----------------------------------------
# ikashnitsky.github.io 2017-06-30
# load required packages
library(tidyverse) # data manipulation and viz
library(lubridate) # easy manipulations with dates
library(forcats) # good for dealing with factors
library(stringr) # good for dealing with text strings
library(eurostat) # grab data
library(rgdal) # deal with shapefiles
library(rgeos)
library(maptools)
# Download geodata
# Eurostat official shapefiles for regions
# http://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units
# geodata will be stored in a directory "geodata"
ifelse(!dir.exists('geodata'),
dir.create('geodata'),
paste("Directory already exists"))
f <- tempfile()
download.file("http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/NUTS_2013_20M_SH.zip", destfile = f)
unzip(f, exdir = "geodata/.")
NUTS_raw <- readOGR("geodata/NUTS_2013_20M_SH/data/.", "NUTS_RG_20M_2013")
# colnames to lower case
names(NUTS_raw@data) <- tolower(names(NUTS_raw@data))
# change coordinate system to LAEA Europe (EPSG:3035)
# check out https://epsg.io
epsg3035 <- "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"
NUTS <- spTransform(NUTS_raw, CRS(epsg3035))
# create borders between countries
NUTS0 <- NUTS[NUTS$stat_levl_==0,]
identify_borders <- function(SPolyDF){
borders <- gDifference(
as(SPolyDF,"SpatialLines"),
as(gUnaryUnion(SPolyDF),"SpatialLines"),
byid=TRUE)
df <- data.frame(len = sapply(1:length(borders),
function(i) gLength(borders[i, ])))
rownames(df) <- sapply(1:length(borders),
function(i) borders@lines[[i]]@ID)
SLDF <- SpatialLinesDataFrame(borders, data = df)
return(SLDF)
}
Sborders <- identify_borders(NUTS0)
bord <- fortify(Sborders)
# subset NUTS-3 regions
NUTS3 <- NUTS[NUTS$stat_levl_==3,]
# remote areas to remove (NUTS-2)
remote <- c(paste0('ES',c(63,64,70)),paste('FRA',1:5,sep=''),'PT20','PT30')
# make the geodata ready for ggplot
gd3 <- fortify(NUTS3, region = "nuts_id") %>%
filter(!str_sub(id, 1, 4) %in% remote,
!str_sub(id, 1, 2) == "AL")
# let's add neighbouring countries
f <- tempfile()
download.file("http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/CNTR_2010_20M_SH.zip", destfile = f)
unzip(f, exdir = "geodata/.")
WORLD <- readOGR("geodata/CNTR_2010_20M_SH/CNTR_2010_20M_SH/Data/.",
"CNTR_RG_20M_2010")
# colnames to lower case
names(WORLD@data) <- tolower(names(WORLD@data))
# filter only Europe and the neighbouring countries
neigh_subset <- c("AT", "BE", "BG", "CH", "CZ", "DE", "DK",
"EE", "EL", "ES", "FI", "FR", "HU", "IE", "IS", "IT", "LT", "LV",
"NL", "NO", "PL", "PT", "SE", "SI", "SK", "UK", "IM", "FO", "GI",
"LU", "LI", "AD", "MC", "MT", "VA", "SM", "HR", "BA", "ME", "MK",
"AL", "RS", "RO", "MD", "UA", "BY", "RU", "TR", "CY", "EG", "LY",
"TN", "DZ", "MA", "GG", "JE", "KZ", "AM", "GE", "AZ", "SY", "IQ",
"IR", "IL", "JO", "PS", "SA", "LB", "MN", "LY", "EG")
NEIGH <- WORLD[WORLD$cntr_id %in% neigh_subset,]
# reproject the shapefile to a pretty projection for mapping Europe
Sneighbors <- spTransform(NEIGH, CRS(epsg3035))
# cut of everything behing the borders
rect <- rgeos::readWKT(
"POLYGON((20e5 10e5,
80e5 10e5,
80e5 60e5,
20e5 60e5,
20e5 10e5))"
)
Sneighbors <- rgeos::gIntersection(Sneighbors,rect,byid = T)
# create a blank map
basemap <- ggplot() +
geom_polygon(data = fortify(Sneighbors),
aes(x = long, y = lat, group = group),
fill = "grey90",color = "grey90") +
coord_equal(ylim = c(1350000,5550000), xlim = c(2500000, 7500000)) +
theme_void() +
theme(panel.border = element_rect(color = "black",size = .5,fill = NA),
legend.position = c(1, 1),
legend.justification = c(1, 1),
legend.background = element_rect(colour = NA, fill = NA),
legend.title = element_text(size = 15),
legend.text = element_text(size = 15))+
scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0)) +
labs(x = NULL, y = NULL)
# Get stat data
# Find the needed dataset code
# http://ec.europa.eu/eurostat/web/regions/data/database
# download the data on broad pop structures at NUTS-3 level
df <- get_eurostat("demo_r_pjanaggr3")
# if the automated download does not work, the data can be grabbed manually at
# http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing
# filter NUTS-3, 2015, both sex, calculate shares
both15 <-
df %>%
filter(sex=="T", nchar(paste(geo))==5,
!str_sub(geo, 1, 4) %in% remote,
!str_sub(geo, 1, 2) %in% c("AL", "MK"),
year(time)==2015) %>%
droplevels() %>%
transmute(id = geo, age, value = values) %>%
spread(age, value)
# a function to overcome the problem of mapping nested polygons
# check out https://stackoverflow.com/questions/21748852
gghole <- function(fort){
poly <- fort[fort$id %in% fort[fort$hole,]$id,]
hole <- fort[!fort$id %in% fort[fort$hole,]$id,]
out <- list(poly,hole)
names(out) <- c('poly','hole')
return(out)
}
# Tricolore: raw proportions ----------------------------------------------
devtools::install_github('jschoeley/tricolore@0.0.3')
library(tricolore)
library(ggtern)
# color-codes and legend
tric <- Tricolore(both15, p1 = 'Y_LT15', p2 = 'Y15-64', p3 = 'Y_GE65', show_center = FALSE)
# customize legend
tric_legend <-
tric$legend +
scale_L_continuous('<15') +
scale_T_continuous('15-64') +
scale_R_continuous('65+') +
labs(subtitle = 'Age structure in Europe 2015\nUncentered proportions',
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1% 65+') +
theme(plot.background = element_rect(color = 'black'))
# add color-coded proportions to map
both15$rgb <- tric$hexsrgb
# plot color-coded map
p1 <-
basemap +
geom_map(map = gghole(gd3)[[1]], data = both15,
aes(map_id = id, fill = rgb))+
geom_map(map = gghole(gd3)[[2]], data = both15,
aes(map_id = id, fill = rgb))+
scale_fill_identity()+
geom_path(data = bord, aes(long, lat, group = group),
color = "white", size = .5)+
theme(legend.position = "none") +
annotation_custom(ggplotGrob(tric_legend),
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5)
p1
# Tricolore -- deviations from EU average ---------------------------------
# EU-28 age-structure 2015
center = c(0.155, 0.654, 0.191)
# coordinates and labels for the centered gridlines of a ternary diagram
TernaryCentroidGrid <- function (center) {
# center percent difference labels
labels <- seq(-1, 1, 0.1)
labels <- data.frame(
L = labels[labels >= -center[1]][1:10],
T = labels[labels >= -center[2]][1:10],
R = labels[labels >= -center[3]][1:10]
)
# breaks of uncentered grid
breaks = data.frame(
L = labels$L + center[1],
T = labels$T + center[2],
R = labels$R + center[3]
)
list(labels = labels, breaks = breaks)
}
# color-codes and legend
tric <- Tricolore(both15, p1 = 'Y_LT15', p2 = 'Y15-64', p3 = 'Y_GE65',
center = center, scale = 3, hue = 0.3)
# percent-point difference grid
legend_grid <- TernaryCentroidGrid(center)
# customize legend
tric_legend <-
tric$legend +
scale_L_continuous('<15',
breaks = legend_grid$breaks$L,
labels = legend_grid$labels$L) +
scale_T_continuous('15-64',
breaks = legend_grid$breaks$T,
labels = legend_grid$labels$T) +
scale_R_continuous('65+',
breaks = legend_grid$breaks$R,
labels = legend_grid$labels$R) +
labs(subtitle = 'Age structure in Europe 2015\nPercent-point diff. from EU avg.',
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1% 65+') +
theme(plot.background = element_rect(color = 'black'))
# add color-coded proportions to map
both15$rgb <- tric$hexsrgb
# plot color-coded map
p2 <-
basemap +
geom_map(map = gghole(gd3)[[1]], data = both15,
aes(map_id = id, fill = rgb))+
geom_map(map = gghole(gd3)[[2]], data = both15,
aes(map_id = id, fill = rgb))+
scale_fill_identity()+
geom_path(data = bord, aes(long, lat, group = group),
color = "white", size = .5)+
theme(legend.position = "none") +
annotation_custom(ggplotGrob(tric_legend),
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5)
p2
# Tricolore -- zoomed deviations from EU average ---------------------------
# Same colors as before, we just zoom into the legend a bit.
# Ternary limits spanning a regular triangle around a given center
CenterZoomLimits <- function (center = rep(1/3, 3), scale = 1) {
rbind(
center-scale*1/3*(1-max(center)),
center+scale*2/3*(1-max(center))
)
}
legend_limits <- CenterZoomLimits(center)
tric_legend <-
tric$legend +
scale_L_continuous('<15',
breaks = legend_grid$breaks$L,
labels = legend_grid$labels$L,
limits = legend_limits[,1]) +
scale_T_continuous('15-64',
breaks = legend_grid$breaks$T,
labels = legend_grid$labels$T,
limits = legend_limits[,2]) +
scale_R_continuous('65+',
breaks = legend_grid$breaks$R,
labels = legend_grid$labels$R,
limits = legend_limits[,3]) +
labs(subtitle = 'Age structure in Europe 2015\nPercent point diff. from EU avg.',
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1%') +
theme(plot.background = element_rect(color = 'black'))
# plot color-coded map
p3 <-
p2 +
annotation_custom(ggplotGrob(tric_legend),
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5)
p3
# Tricolore -- zoomed deviations from EU average ---------------------------
# Same colors as before, we just use the original proportion labels on the
# zoomed legend.
tric_legend <-
tric$legend +
scale_L_continuous('<15',
limits = legend_limits[,1]) +
scale_T_continuous('15-64',
limits = legend_limits[,2]) +
scale_R_continuous('65+',
limits = legend_limits[,3]) +
labs(subtitle = 'Age structure in Europe 2015\nColors show deviations from EU avg.',
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1%') +
theme(plot.background = element_rect(color = 'black'))
# plot color-coded map
p4 <-
p3 + annotation_custom(ggplotGrob(tric_legend),
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5)
p4
grid.arrange(p1, p2, p3, p4, ncol = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment