Skip to content

Instantly share code, notes, and snippets.

@chris-prener
Last active August 28, 2019 05:18
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save chris-prener/cd6b0aa0b2096daf13e545f4264ea6bb to your computer and use it in GitHub Desktop.
Save chris-prener/cd6b0aa0b2096daf13e545f4264ea6bb to your computer and use it in GitHub Desktop.
Adaptation of Grossenbacher/Zehr Bivariate Tutorial
# Bivariate Choropleth Map
## this is based on Timo Grossenbacher and Angelo Zehr's tutorial -
## https://timogrossenbacher.ch/2019/04/bivariate-maps-with-ggplot2-and-sf/
# dependencies
library(dplyr) # data wrangling
library(ggplot2) # plotting
library(purrr) # iteration
library(tidyr) # data wrangling
library(cowplot) # plot construction
library(sf) # spatial tools
library(tidycensus) # demographic data
library(tigris) # geometric data
# create map theme
## I modified Timo and Angelo's theme a bit to suite my taste and design style
## I also hard-coded the font name and color values in to the theme
theme_map <- function(...) {
theme_minimal() +
theme(
# text defaults
text = element_text(family = "sans", color = "#000000"),
# remove all axes
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
# add a grid that blends into plot background
panel.grid.major = element_line(color = "#ebebeb", size = 0.2),
panel.grid.minor = element_blank(),
# background colors
plot.background = element_rect(fill = "#ebebeb", color = NA),
panel.background = element_rect(fill = "#ebebeb", color = NA),
legend.background = element_rect(fill = "#ebebeb", color = NA),
# borders and margins
plot.margin = unit(c(.5, .5, .2, .5), "cm"),
panel.border = element_blank(),
panel.spacing = unit(c(-.1, 0.2, .2, 0.2), "cm"),
# titles
legend.title = element_text(size = 11),
legend.text = element_text(size = 9, hjust = 0, color = "#222222"),
plot.title = element_text(size = 24, hjust = 0.5, color = "#222222", face = "bold"),
plot.subtitle = element_text(size = 18, hjust = 0.5, color = "#222222",
margin = margin(b = -0.1, t = -0.1, l = 2, unit = "cm"),
face = "bold", debug = FALSE),
# captions
plot.caption = element_text(size = 10, hjust = .5, margin = margin(t = 0.2, b = 0, unit = "cm"),
color = "#939184"),
...
)
}
# download median household income data for St. Louis City and County
c(189, 510) %>%
map_df(~get_acs(year = 2017, geography = "tract", variables = "B19019_001", state = 29, county = .x)) %>%
select(GEOID, estimate) %>%
rename(medInc = estimate) -> medianInc
# download race data for St. Louis City and County and calculate percent white
c(189, 510) %>%
map_df(~get_acs(year = 2017, geography = "tract", table = "B02001", state = 29, county = .x, output = "wide")) %>%
select(GEOID, B02001_001E, B02001_002E) %>%
rename(total = B02001_001E, white = B02001_002E) %>%
mutate(pctWhite = white/total*100) -> race
# download tract geometry
tracts <- tracts(state = 29, class = "sf") %>%
filter(COUNTYFP %in% c(189, 510)) %>%
select(GEOID)
# combine tract data with geometry
tracts <- tracts %>%
left_join(., race, by = "GEOID") %>%
left_join(., medianInc, by = "GEOID")
# clean-up enviornment
# rm(race, medianInc)
# download country geometry
counties <- counties(state = 29, class = "sf") %>%
filter(COUNTYFP %in% c(189, 510)) %>%
select(NAMELSAD)
# create three bins for percent white
quantiles_white <- tracts %>%
pull(pctWhite) %>%
quantile(probs = seq(0, 1, length.out = 4))
# create three bins for mean income
quantiles_medInc <- tracts %>%
pull(medInc) %>%
quantile(probs = seq(0, 1, length.out = 4))
# create color scale that encodes two variables
# red for percent white and blue for median household income
bivariate_color_scale <- tibble(
"3 - 3" = "#3F2949", # high % white, high income
"2 - 3" = "#435786",
"1 - 3" = "#4885C1", # low % white, high income
"3 - 2" = "#77324C",
"2 - 2" = "#806A8A", # medium % white, medium income
"1 - 2" = "#89A1C8",
"3 - 1" = "#AE3A4E", # high % white, low income
"2 - 1" = "#BC7C8F",
"1 - 1" = "#CABED0" # low % white, low income
) %>%
gather("group", "fill")
# cut into groups defined above and join fill
tracts <- tracts %>%
mutate(pctWhite_quantiles = cut(pctWhite, breaks = quantiles_white, include.lowest = TRUE)) %>%
mutate(medInc_quantiles = cut(medInc, breaks = quantiles_medInc, include.lowest = TRUE)) %>%
mutate(group = paste(as.numeric(pctWhite_quantiles), "-", as.numeric(medInc_quantiles))) %>%
left_join(., bivariate_color_scale, by = "group")
# create high median income tract object
highTracts <- filter(tracts, medInc >= 150000) %>%
select(GEOID)
# re-project
## !! annotations will not work with decimal degrees, use UTM coordinates instead
tracts <- st_transform(tracts, crs = 26915)
# create annotations
## coordinates for arrows must be specified in UTM coordinates
## use https://www.geoplaner.com to find needed coordinate pairs - it is an iterative
## process of testing coordinate pairs until the desired cartographic effect is achieved
annotations <- tibble(
label = c(
"Grey areas have\nlow % white and\nlow income",
"Violet areas have\nhigh % white and\nhigh income",
"Blue areas have\nlow % white and\nhigh income",
"The only high\n% white and low\nincome tract",
"Gray-violet areas\nhave both moderate\n% white and income",
"Red outlined tracts have\na median household income \ngreater than $150,000"
),
arrow_from = c(
"745750, 4291382", # grey
"713518, 4262733", # violet
"741149, 4307104", # blue
"718167, 4297318", # bright red
"725925, 4302593", # gray violent
"701042, 4289956" # high tracts
),
arrow_to = c(
"739153, 4286241", # grey
"711985, 4266273", # violet
"737122, 4302125", # blue
"721222, 4294786", # bright red
"727449, 4298328", # gray violet
"702816, 4281282" # high tracts
),
curvature = c(
0.2, # grey
-0.2, # violent
0.2, # blue
0.2, # bright red
-0.2, # gray violet
0.3 # high tracts
),
nudge = c(
"500,500", # grey
"500,500", # violent
"500,500", # blue
"500,500", # bright red
"500,500", # gray violet
"500,500" # high tracts
),
just = c(
"0,1", # grey
"0,1", # violent
"0,1", # blue
"1,0", # bright red
"1,0", # gray violet
"0,1" # high tracts
)
) %>%
separate(arrow_from, into = c("x", "y")) %>%
separate(arrow_to, into = c("xend", "yend")) %>%
separate(nudge, into = c("nudge_x", "nudge_y"), sep = "\\,") %>%
separate(just, into = c("hjust", "vjust"), sep = "\\,")
# create initial map object
map <- ggplot() +
# color municipalities according to their race / income combination, with white boarders
geom_sf(data = tracts, aes(fill = fill), color = "white", size = 0.1) +
# as the sf object municipality_prod_geo has a column with name "fill" that
# contains the literal color as hex code for each municipality, we can use
# scale_fill_identity here
scale_fill_identity() +
# add in high median income tract data that highlight boundaries of those tracts
geom_sf(data = highTracts, fill = NA, color = "#ff0000", size = .5) +
# overlay county coundaries
geom_sf(data = counties, fill = NA, color = "white", size = .75) +
# labels
labs(
title = "Racial and Income Segregation (2017)",
subtitle = "St. Louis City and County Census Tracts",
y = "",
x = "",
caption = "Map by Christopher Prener, Ph.D.\nData via the U.S. Census Bureau's 2017 ACS 5-year Estimates \nBreaks for percent white are 41.7% and 84.4%. Breaks for \nmedian household income are $41,250 and $62,408."
) +
# apply theme created above
theme_map()
# add annotations one by one by walking over the annotations data frame
# this is necessary because we cannot define nudge_x, nudge_y and curvature
# in the aes in a data-driven way like as with x and y, for example
annotations %>%
pwalk(function(...) {
# collect all values in the row in a one-rowed data frame
current <- tibble(...)
# convert all columns from x to vjust to numeric
# as pwalk apparently turns everything into a character (why???)
current <- current %>%
mutate_at(vars(x:vjust), as.numeric)
# update the plot object with global assignment
map <<- map +
# for each annotation, add an arrow
geom_curve(data = current, aes(x = x, xend = xend, y = y, yend = yend),
# that's the whole point of doing this loop:
curvature = current %>% pull(curvature),
size = 0.2,
arrow = arrow(length = unit(0.005, "npc"))
) +
# for each annotation, add a label
geom_text(data = current, aes(x = x, y = y, label = label, hjust = hjust, vjust = vjust),
# that's the whole point of doing this loop:
nudge_x = current %>% pull(nudge_x),
nudge_y = current %>% pull(nudge_y),
# other styles
family = "sans",
color = "#000000",
size = 3.5
)
})
# use if annotations extend of map
map <- map + coord_sf(clip = 'off')
# separate the groups used for plotting
bivariate_color_scale <- bivariate_color_scale %>%
separate(group, into = c("pctWhite", "medInc"), sep = " - ") %>%
mutate(pctWhite = as.integer(pctWhite),
medInc = as.integer(medInc))
# draw legend
## the legend syntax with the right arrow on the original tutorial did not work for me
## the following workaround draws the arrows without using unicode symbols
legend <- ggplot() +
geom_tile(data = bivariate_color_scale, mapping = aes(x = pctWhite, y = medInc, fill = fill)) +
scale_fill_identity() +
labs(x = expression(paste("Higher % White ", ""%->%"")),
y = expression(paste("Higher Income ", ""%->%""))) +
theme_map() +
# make font small enough
theme(axis.title = element_text(size = 12)) +
# quadratic tiles
coord_fixed()
# combine map with legend
finalPlot <- ggdraw() +
draw_plot(map, 0, 0, 1, 1) +
draw_plot(legend, 0.18, .7, 0.2, 0.2)
# save plot
## !! replace with ggsave - the prener package is a set of personal functions for Chris
## ggsave(filename = "plot.png", x, dpi = 1000)
prener::cp_plotSave(filename = "plot.png", finalPlot, preset = "lg")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment