chris-prener/bivariateMapping.R

## bivariateMapping.R
# Bivariate Choropleth Map
## this is based on Timo Grossenbacher and Angelo Zehr's tutorial -
## https://timogrossenbacher.ch/2019/04/bivariate-maps-with-ggplot2-and-sf/

# dependencies
library(dplyr)      # data wrangling
library(ggplot2)    # plotting
library(purrr)      # iteration
library(tidyr)      # data wrangling

library(cowplot)    # plot construction

library(sf)         # spatial tools
library(tidycensus) # demographic data
library(tigris)     # geometric data

# create map theme
## I modified Timo and Angelo's theme a bit to suite my taste and design style
## I also hard-coded the font name and color values in to the theme
theme_map <- function(...) {
  theme_minimal() +
    theme(

      # text defaults
      text = element_text(family = "sans", color = "#000000"),

      # remove all axes
      axis.line = element_blank(),
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks = element_blank(),

      # add a grid that blends into plot background
      panel.grid.major = element_line(color = "#ebebeb", size = 0.2),
      panel.grid.minor = element_blank(),

      # background colors
      plot.background = element_rect(fill = "#ebebeb", color = NA),
      panel.background = element_rect(fill = "#ebebeb", color = NA),
      legend.background = element_rect(fill = "#ebebeb", color = NA),

      # borders and margins
      plot.margin = unit(c(.5, .5, .2, .5), "cm"),
      panel.border = element_blank(),
      panel.spacing = unit(c(-.1, 0.2, .2, 0.2), "cm"),

      # titles
      legend.title = element_text(size = 11),
      legend.text = element_text(size = 9, hjust = 0, color = "#222222"),
      plot.title = element_text(size = 24, hjust = 0.5, color = "#222222", face = "bold"),
      plot.subtitle = element_text(size = 18, hjust = 0.5, color = "#222222",
                                   margin = margin(b = -0.1, t = -0.1, l = 2, unit = "cm"),
                                   face = "bold", debug = FALSE),

      # captions
      plot.caption = element_text(size = 10, hjust = .5, margin = margin(t = 0.2, b = 0, unit = "cm"),
                                  color = "#939184"),
      ...
    )
}

# download median household income data for St. Louis City and County
c(189, 510) %>%
  map_df(~get_acs(year = 2017, geography = "tract", variables = "B19019_001", state = 29, county = .x)) %>%
  select(GEOID, estimate) %>%
  rename(medInc = estimate) -> medianInc

# download race data for St. Louis City and County and calculate percent white
c(189, 510) %>%
  map_df(~get_acs(year = 2017, geography = "tract", table = "B02001", state = 29, county = .x, output = "wide")) %>%
  select(GEOID, B02001_001E, B02001_002E) %>%
  rename(total = B02001_001E, white = B02001_002E) %>%
  mutate(pctWhite = white/total*100) -> race

# download tract geometry
tracts <- tracts(state = 29, class = "sf") %>%
  filter(COUNTYFP %in% c(189, 510)) %>%
  select(GEOID)

# combine tract data with geometry
tracts <- tracts %>%
  left_join(., race, by = "GEOID") %>%
  left_join(., medianInc, by = "GEOID")

# clean-up enviornment
# rm(race, medianInc)

# download country geometry
counties <- counties(state = 29, class = "sf") %>%
  filter(COUNTYFP %in% c(189, 510)) %>%
  select(NAMELSAD)

# create three bins for percent white
quantiles_white <- tracts %>%
  pull(pctWhite) %>%
  quantile(probs = seq(0, 1, length.out = 4))

# create three bins for mean income
quantiles_medInc <- tracts %>%
  pull(medInc) %>%
  quantile(probs = seq(0, 1, length.out = 4))

# create color scale that encodes two variables
# red for percent white and blue for median household income
bivariate_color_scale <- tibble(
  "3 - 3" = "#3F2949", # high % white, high income
  "2 - 3" = "#435786",
  "1 - 3" = "#4885C1", # low % white, high income
  "3 - 2" = "#77324C",
  "2 - 2" = "#806A8A", # medium % white, medium income
  "1 - 2" = "#89A1C8",
  "3 - 1" = "#AE3A4E", # high % white, low income
  "2 - 1" = "#BC7C8F",
  "1 - 1" = "#CABED0" # low % white, low income
) %>%
  gather("group", "fill")

# cut into groups defined above and join fill
tracts <- tracts %>%
  mutate(pctWhite_quantiles = cut(pctWhite, breaks = quantiles_white, include.lowest = TRUE)) %>%
  mutate(medInc_quantiles = cut(medInc, breaks = quantiles_medInc, include.lowest = TRUE)) %>%
  mutate(group = paste(as.numeric(pctWhite_quantiles), "-", as.numeric(medInc_quantiles))) %>%
  left_join(., bivariate_color_scale, by = "group")

# create high median income tract object
highTracts <- filter(tracts, medInc >= 150000) %>%
  select(GEOID)

# re-project
## !! annotations will not work with decimal degrees, use UTM coordinates instead
tracts <- st_transform(tracts, crs = 26915)

# create annotations
## coordinates for arrows must be specified in UTM coordinates
## use https://www.geoplaner.com to find needed coordinate pairs - it is an iterative
## process of testing coordinate pairs until the desired cartographic effect is achieved
annotations <- tibble(
  label = c(
    "Grey areas have\nlow % white and\nlow income",
    "Violet areas have\nhigh % white and\nhigh income",
    "Blue areas have\nlow % white and\nhigh income",
    "The only high\n% white and low\nincome tract",
    "Gray-violet areas\nhave both moderate\n% white and income",
    "Red outlined tracts have\na median household income \ngreater than $150,000"
  ),
  arrow_from = c(
    "745750, 4291382", # grey
    "713518, 4262733", # violet
    "741149, 4307104", # blue
    "718167, 4297318",  # bright red
    "725925, 4302593", # gray violent
    "701042, 4289956" # high tracts
  ),
  arrow_to = c(
    "739153, 4286241", # grey
    "711985, 4266273", # violet
    "737122, 4302125",  # blue
    "721222, 4294786",  # bright red
    "727449, 4298328", # gray violet
    "702816, 4281282" # high tracts
  ),
  curvature = c(
    0.2, # grey
    -0.2, # violent
    0.2, # blue
    0.2,  # bright red
    -0.2,  # gray violet
    0.3 # high tracts
  ),
  nudge = c(
    "500,500", # grey
    "500,500", # violent
    "500,500", # blue
    "500,500", # bright red
    "500,500", # gray violet
    "500,500" # high tracts
  ),
  just = c(
    "0,1", # grey
    "0,1", # violent
    "0,1", # blue
    "1,0",  # bright red
    "1,0",  # gray violet
    "0,1" # high tracts
  )
) %>%
  separate(arrow_from, into = c("x", "y")) %>%
  separate(arrow_to, into = c("xend", "yend")) %>%
  separate(nudge, into = c("nudge_x", "nudge_y"), sep = "\\,") %>%
  separate(just, into = c("hjust", "vjust"), sep = "\\,")

# create initial map object
map <- ggplot() +

  # color municipalities according to their race / income combination, with white boarders
  geom_sf(data = tracts, aes(fill = fill), color = "white", size = 0.1) +

  # as the sf object municipality_prod_geo has a column with name "fill" that
  # contains the literal color as hex code for each municipality, we can use
  # scale_fill_identity here
  scale_fill_identity() +

  # add in high median income tract data that highlight boundaries of those tracts
  geom_sf(data = highTracts, fill = NA, color = "#ff0000", size = .5) +

  # overlay county coundaries
  geom_sf(data = counties, fill = NA, color = "white", size = .75) +

  # labels
  labs(
    title = "Racial and Income Segregation (2017)",
    subtitle = "St. Louis City and County Census Tracts",
    y = "",
    x = "",
    caption = "Map by Christopher Prener, Ph.D.\nData via the U.S. Census Bureau's 2017 ACS 5-year Estimates \nBreaks for percent white are 41.7% and 84.4%. Breaks for \nmedian household income are $41,250 and $62,408."
  ) +

  # apply theme created above
  theme_map()

# add annotations one by one by walking over the annotations data frame
# this is necessary because we cannot define nudge_x, nudge_y and curvature
# in the aes in a data-driven way like as with x and y, for example
annotations %>%
  pwalk(function(...) {

    # collect all values in the row in a one-rowed data frame
    current <- tibble(...)

    # convert all columns from x to vjust to numeric
    # as pwalk apparently turns everything into a character (why???)
    current <- current %>%
      mutate_at(vars(x:vjust), as.numeric)

    # update the plot object with global assignment
    map <<- map +

      # for each annotation, add an arrow
      geom_curve(data = current, aes(x = x, xend = xend, y = y, yend = yend),

                 # that's the whole point of doing this loop:
                 curvature = current %>% pull(curvature),
                 size = 0.2,
                 arrow = arrow(length = unit(0.005, "npc"))
      ) +

      # for each annotation, add a label
      geom_text(data = current, aes(x = x, y = y, label = label, hjust = hjust, vjust = vjust),

                # that's the whole point of doing this loop:
                nudge_x = current %>% pull(nudge_x),
                nudge_y = current %>% pull(nudge_y),

                # other styles
                family = "sans",
                color = "#000000",
                size = 3.5
      )
  })

# use if annotations extend of map
map <- map + coord_sf(clip = 'off')

# separate the groups used for plotting
bivariate_color_scale <- bivariate_color_scale %>%
  separate(group, into = c("pctWhite", "medInc"), sep = " - ") %>%
  mutate(pctWhite = as.integer(pctWhite),
         medInc = as.integer(medInc))

# draw legend
## the legend syntax with the right arrow on the original tutorial did not work for me
## the following workaround draws the arrows without using unicode symbols
legend <- ggplot() +
  geom_tile(data = bivariate_color_scale, mapping = aes(x = pctWhite, y = medInc, fill = fill)) +
  scale_fill_identity() +
  labs(x = expression(paste("Higher % White ", ""%->%"")),
       y = expression(paste("Higher Income ", ""%->%""))) +
  theme_map() +

  # make font small enough
  theme(axis.title = element_text(size = 12)) +

  # quadratic tiles
  coord_fixed()

# combine map with legend
finalPlot <- ggdraw() +
  draw_plot(map, 0, 0, 1, 1) +
  draw_plot(legend, 0.18, .7, 0.2, 0.2)

# save plot
## !! replace with ggsave - the prener package is a set of personal functions for Chris
## ggsave(filename = "plot.png", x, dpi = 1000)

prener::cp_plotSave(filename = "plot.png", finalPlot, preset = "lg")
	# Bivariate Choropleth Map
	## this is based on Timo Grossenbacher and Angelo Zehr's tutorial -
	## https://timogrossenbacher.ch/2019/04/bivariate-maps-with-ggplot2-and-sf/

	# dependencies
	library(dplyr) # data wrangling
	library(ggplot2) # plotting
	library(purrr) # iteration
	library(tidyr) # data wrangling

	library(cowplot) # plot construction

	library(sf) # spatial tools
	library(tidycensus) # demographic data
	library(tigris) # geometric data

	# create map theme
	## I modified Timo and Angelo's theme a bit to suite my taste and design style
	## I also hard-coded the font name and color values in to the theme
	theme_map <- function(...) {
	theme_minimal() +
	theme(

	# text defaults
	text = element_text(family = "sans", color = "#000000"),

	# remove all axes
	axis.line = element_blank(),
	axis.text.x = element_blank(),
	axis.text.y = element_blank(),
	axis.ticks = element_blank(),

	# add a grid that blends into plot background
	panel.grid.major = element_line(color = "#ebebeb", size = 0.2),
	panel.grid.minor = element_blank(),

	# background colors
	plot.background = element_rect(fill = "#ebebeb", color = NA),
	panel.background = element_rect(fill = "#ebebeb", color = NA),
	legend.background = element_rect(fill = "#ebebeb", color = NA),

	# borders and margins
	plot.margin = unit(c(.5, .5, .2, .5), "cm"),
	panel.border = element_blank(),
	panel.spacing = unit(c(-.1, 0.2, .2, 0.2), "cm"),

	# titles
	legend.title = element_text(size = 11),
	legend.text = element_text(size = 9, hjust = 0, color = "#222222"),
	plot.title = element_text(size = 24, hjust = 0.5, color = "#222222", face = "bold"),
	plot.subtitle = element_text(size = 18, hjust = 0.5, color = "#222222",
	margin = margin(b = -0.1, t = -0.1, l = 2, unit = "cm"),
	face = "bold", debug = FALSE),

	# captions
	plot.caption = element_text(size = 10, hjust = .5, margin = margin(t = 0.2, b = 0, unit = "cm"),
	color = "#939184"),
	...
	)
	}

	# download median household income data for St. Louis City and County
	c(189, 510) %>%
	map_df(~get_acs(year = 2017, geography = "tract", variables = "B19019_001", state = 29, county = .x)) %>%
	select(GEOID, estimate) %>%
	rename(medInc = estimate) -> medianInc

	# download race data for St. Louis City and County and calculate percent white
	c(189, 510) %>%
	map_df(~get_acs(year = 2017, geography = "tract", table = "B02001", state = 29, county = .x, output = "wide")) %>%
	select(GEOID, B02001_001E, B02001_002E) %>%
	rename(total = B02001_001E, white = B02001_002E) %>%
	mutate(pctWhite = white/total*100) -> race

	# download tract geometry
	tracts <- tracts(state = 29, class = "sf") %>%
	filter(COUNTYFP %in% c(189, 510)) %>%
	select(GEOID)

	# combine tract data with geometry
	tracts <- tracts %>%
	left_join(., race, by = "GEOID") %>%
	left_join(., medianInc, by = "GEOID")

	# clean-up enviornment
	# rm(race, medianInc)

	# download country geometry
	counties <- counties(state = 29, class = "sf") %>%
	filter(COUNTYFP %in% c(189, 510)) %>%
	select(NAMELSAD)

	# create three bins for percent white
	quantiles_white <- tracts %>%
	pull(pctWhite) %>%
	quantile(probs = seq(0, 1, length.out = 4))

	# create three bins for mean income
	quantiles_medInc <- tracts %>%
	pull(medInc) %>%
	quantile(probs = seq(0, 1, length.out = 4))

	# create color scale that encodes two variables
	# red for percent white and blue for median household income
	bivariate_color_scale <- tibble(
	"3 - 3" = "#3F2949", # high % white, high income
	"2 - 3" = "#435786",
	"1 - 3" = "#4885C1", # low % white, high income
	"3 - 2" = "#77324C",
	"2 - 2" = "#806A8A", # medium % white, medium income
	"1 - 2" = "#89A1C8",
	"3 - 1" = "#AE3A4E", # high % white, low income
	"2 - 1" = "#BC7C8F",
	"1 - 1" = "#CABED0" # low % white, low income
	) %>%
	gather("group", "fill")

	# cut into groups defined above and join fill
	tracts <- tracts %>%
	mutate(pctWhite_quantiles = cut(pctWhite, breaks = quantiles_white, include.lowest = TRUE)) %>%
	mutate(medInc_quantiles = cut(medInc, breaks = quantiles_medInc, include.lowest = TRUE)) %>%
	mutate(group = paste(as.numeric(pctWhite_quantiles), "-", as.numeric(medInc_quantiles))) %>%
	left_join(., bivariate_color_scale, by = "group")

	# create high median income tract object
	highTracts <- filter(tracts, medInc >= 150000) %>%
	select(GEOID)

	# re-project
	## !! annotations will not work with decimal degrees, use UTM coordinates instead
	tracts <- st_transform(tracts, crs = 26915)

	# create annotations
	## coordinates for arrows must be specified in UTM coordinates
	## use https://www.geoplaner.com to find needed coordinate pairs - it is an iterative
	## process of testing coordinate pairs until the desired cartographic effect is achieved
	annotations <- tibble(
	label = c(
	"Grey areas have\nlow % white and\nlow income",
	"Violet areas have\nhigh % white and\nhigh income",
	"Blue areas have\nlow % white and\nhigh income",
	"The only high\n% white and low\nincome tract",
	"Gray-violet areas\nhave both moderate\n% white and income",
	"Red outlined tracts have\na median household income \ngreater than $150,000"
	),
	arrow_from = c(
	"745750, 4291382", # grey
	"713518, 4262733", # violet
	"741149, 4307104", # blue
	"718167, 4297318", # bright red
	"725925, 4302593", # gray violent
	"701042, 4289956" # high tracts
	),
	arrow_to = c(
	"739153, 4286241", # grey
	"711985, 4266273", # violet
	"737122, 4302125", # blue
	"721222, 4294786", # bright red
	"727449, 4298328", # gray violet
	"702816, 4281282" # high tracts
	),
	curvature = c(
	0.2, # grey
	-0.2, # violent
	0.2, # blue
	0.2, # bright red
	-0.2, # gray violet
	0.3 # high tracts
	),
	nudge = c(
	"500,500", # grey
	"500,500", # violent
	"500,500", # blue
	"500,500", # bright red
	"500,500", # gray violet
	"500,500" # high tracts
	),
	just = c(
	"0,1", # grey
	"0,1", # violent
	"0,1", # blue
	"1,0", # bright red
	"1,0", # gray violet
	"0,1" # high tracts
	)
	) %>%
	separate(arrow_from, into = c("x", "y")) %>%
	separate(arrow_to, into = c("xend", "yend")) %>%
	separate(nudge, into = c("nudge_x", "nudge_y"), sep = "\\,") %>%
	separate(just, into = c("hjust", "vjust"), sep = "\\,")

	# create initial map object
	map <- ggplot() +

	# color municipalities according to their race / income combination, with white boarders
	geom_sf(data = tracts, aes(fill = fill), color = "white", size = 0.1) +

	# as the sf object municipality_prod_geo has a column with name "fill" that
	# contains the literal color as hex code for each municipality, we can use
	# scale_fill_identity here
	scale_fill_identity() +

	# add in high median income tract data that highlight boundaries of those tracts
	geom_sf(data = highTracts, fill = NA, color = "#ff0000", size = .5) +

	# overlay county coundaries
	geom_sf(data = counties, fill = NA, color = "white", size = .75) +

	# labels
	labs(
	title = "Racial and Income Segregation (2017)",
	subtitle = "St. Louis City and County Census Tracts",
	y = "",
	x = "",
	caption = "Map by Christopher Prener, Ph.D.\nData via the U.S. Census Bureau's 2017 ACS 5-year Estimates \nBreaks for percent white are 41.7% and 84.4%. Breaks for \nmedian household income are $41,250 and $62,408."
	) +

	# apply theme created above
	theme_map()

	# add annotations one by one by walking over the annotations data frame
	# this is necessary because we cannot define nudge_x, nudge_y and curvature
	# in the aes in a data-driven way like as with x and y, for example
	annotations %>%
	pwalk(function(...) {

	# collect all values in the row in a one-rowed data frame
	current <- tibble(...)

	# convert all columns from x to vjust to numeric
	# as pwalk apparently turns everything into a character (why???)
	current <- current %>%
	mutate_at(vars(x:vjust), as.numeric)

	# update the plot object with global assignment
	map <<- map +

	# for each annotation, add an arrow
	geom_curve(data = current, aes(x = x, xend = xend, y = y, yend = yend),

	# that's the whole point of doing this loop:
	curvature = current %>% pull(curvature),
	size = 0.2,
	arrow = arrow(length = unit(0.005, "npc"))
	) +

	# for each annotation, add a label
	geom_text(data = current, aes(x = x, y = y, label = label, hjust = hjust, vjust = vjust),

	# that's the whole point of doing this loop:
	nudge_x = current %>% pull(nudge_x),
	nudge_y = current %>% pull(nudge_y),

	# other styles
	family = "sans",
	color = "#000000",
	size = 3.5
	)
	})

	# use if annotations extend of map
	map <- map + coord_sf(clip = 'off')

	# separate the groups used for plotting
	bivariate_color_scale <- bivariate_color_scale %>%
	separate(group, into = c("pctWhite", "medInc"), sep = " - ") %>%
	mutate(pctWhite = as.integer(pctWhite),
	medInc = as.integer(medInc))

	# draw legend
	## the legend syntax with the right arrow on the original tutorial did not work for me
	## the following workaround draws the arrows without using unicode symbols
	legend <- ggplot() +
	geom_tile(data = bivariate_color_scale, mapping = aes(x = pctWhite, y = medInc, fill = fill)) +
	scale_fill_identity() +
	labs(x = expression(paste("Higher % White ", ""%->%"")),
	y = expression(paste("Higher Income ", ""%->%""))) +
	theme_map() +

	# make font small enough
	theme(axis.title = element_text(size = 12)) +

	# quadratic tiles
	coord_fixed()

	# combine map with legend
	finalPlot <- ggdraw() +
	draw_plot(map, 0, 0, 1, 1) +
	draw_plot(legend, 0.18, .7, 0.2, 0.2)

	# save plot
	## !! replace with ggsave - the prener package is a set of personal functions for Chris
	## ggsave(filename = "plot.png", x, dpi = 1000)

	prener::cp_plotSave(filename = "plot.png", finalPlot, preset = "lg")