Skip to content

Instantly share code, notes, and snippets.

@mkiang
Created January 17, 2017 02:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mkiang/fe91ad9f81c33927fc90c4a22cd0b1cf to your computer and use it in GitHub Desktop.
Save mkiang/fe91ad9f81c33927fc90c4a22cd0b1cf to your computer and use it in GitHub Desktop.
Code demonstrating how to use a histogram as a legend on a choropleth
## Code for this blog post: https://mathewkiang.com/2017/01/16/using-histogram-legend-choropleths/
## Download the drug death data from:
## https://blogs.cdc.gov/nchs-data-visualization/drug-poisoning-mortality/
##
## Download the 2013 CB shapefiles (500k will do) for counties and states:
## https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html
## https://www.census.gov/geo/maps-data/data/cbf/cbf_state.html
## Imports ----
library(tidyverse)
library(broom)
library(rgdal)
library(viridis)
library(grid)
## Lower 48 states ----
lower_48 <- c("09", "23", "25", "33", "44", "50", "34", "36", "42", "18",
"17", "26", "39", "55", "19", "20", "27", "29", "31", "38",
"46", "10", "11", "12", "13", "24", "37", "45", "51", "54",
"01", "21", "28", "47", "05", "22", "40", "48", "04", "08",
"16", "35", "30", "49", "32", "56", "06", "41", "53")
## Load drug poisoning data ----
county_df <- read_csv(paste0('./../original_data/NCHS_-_Drug_' ,
'Poisoning_Mortality__County_Trends',
'__United_States__1999_2014.csv'),
col_names = c('fipschar', 'year', 'state', 'st',
'stfips', 'county', 'pop', 'smr'),
skip = 1)
county_df$smr <- factor(county_df$smr,
levels = unique(county_df$smr)[c(10, 1:9, 11)],
ordered = TRUE)
## Import US states and counties from 2013 CB shapefile ----
## We could use map_data() -- but want this to be generalizable to all shp.
allcounties <- readOGR(dsn = "./../shapefiles/",
layer = "cb_2013_us_county_500k")
allstates <- readOGR(dsn = "./../shapefiles/",
layer = "cb_2013_us_state_500k")
## A little munging and subsetting for maps ----
allcounties@data$fipschar <- as.character(allcounties@data$GEOID)
allcounties@data$state <- as.character(allcounties@data$STATEFP)
allstates@data$state <- as.character(allstates@data$STATEFP)
## Only use lower 48 states
subcounties <- subset(allcounties, allcounties@data$state %in% lower_48)
substates <- subset(allstates, allstates@data$state %in% lower_48)
## Fortify into dataframes
subcounties_df <- tidy(subcounties, region = "GEOID")
substates_df <- tidy(substates, region = "GEOID")
## Let's focus on one year and do a practice run ----
df_2014 <- filter(county_df, year == 2014)
map_2014 <- ggplot(data = df_2014) +
geom_map(aes(map_id = fipschar, fill = smr), map = subcounties_df) +
expand_limits(x = subcounties_df$long, y = subcounties_df$lat)
ggsave(map_2014, file = './first_pass.jpg', width = 5, height = 3.5)
## Make better colors ----
map_2014 <- map_2014 +
theme_classic() +
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0))
ggsave(map_2014, file = './better_colors.jpg', width = 5, height = 3.5)
## Add state lines and better labels (remove unnecessary ink) ----
map_2014 <- map_2014 +
geom_path(data = substates_df, aes(long, lat, group = group),
color = "white", size = .7, alpha = .75) +
theme(plot.title = element_text(size = 30, face = "bold"),
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none") +
labs(title = "Drug poisoning deaths (2014)",
caption = paste0('Source: https://blogs.cdc.gov/',
'nchs-data-visualization/',
'drug-poisoning-mortality/')) +
coord_fixed(ratio = 1.35)
ggsave(map_2014, file = './getting_closer.jpg', width = 5, height = 3.5,
scale = 2)
## Make the legend plot ----
## First, aggregate the data
agg_2014 <- df_2014 %>%
group_by(smr) %>%
summarize(n_county = n())
## Legend, first pass ----
legend_plot <- ggplot(data = agg_2014,
aes(y = n_county, x = smr, fill = smr)) +
geom_bar(stat = 'identity') +
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) +
theme_classic()
ggsave(legend_plot, file = './legend_first.jpg',
height = 2, width = 1.75, scale = 3)
legend_plot <- legend_plot +
scale_y_continuous("", expand = c(0, 0), breaks = seq(0, 900, 300),
limits = c(0, 900)) +
scale_x_discrete(limits = rev(levels(df_2014$smr))) +
coord_flip()
ggsave(legend_plot, file = './proper_orientation.jpg',
height = 2, width = 1.75, scale = 3)
legend_plot <- legend_plot +
theme(plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 15),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
legend.position = "none",
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)) +
geom_hline(yintercept = seq(0, 900, 150), color = "white") +
labs(title = "Legend", subtitle = " Deaths per 100,000")
ggsave(legend_plot, file = './final_legend.jpg',
height = 2, width = 1.75, scale = 3)
## Now put the legend on the map ----
legend_grob = ggplotGrob(legend_plot)
final_plot <- map_2014 + annotation_custom(grob = legend_grob,
xmin = max(subcounties_df$long) - 11,
xmax = max(subcounties_df$long) - 1,
ymin = min(subcounties_df$lat),
ymax = min(subcounties_df$lat) + 9)
ggsave(final_plot, width = 8, height = 5, scale = 2, limitsize = FALSE,
file = './jpeg_drug_deaths_2014.jpg')
for (y in unique(county_df$year)) {
subdf <- filter(county_df, year == y)
map_y <- ggplot(data = subdf) +
geom_map(aes(map_id = fipschar, fill = smr), map = subcounties_df) +
expand_limits(x = subcounties_df$long, y = subcounties_df$lat) +
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
geom_path(data = substates_df, aes(long, lat, group = group),
color = "white", size = .7, alpha = .75) +
theme_classic() +
theme(plot.title = element_text(size = 30, face = "bold"),
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none") +
labs(title = paste0("Drug poisoning deaths (", y, ")"),
caption = paste0('Source: https://blogs.cdc.gov/',
'nchs-data-visualization/',
'drug-poisoning-mortality/')) +
coord_fixed(ratio = 1.35)
agg_df <- subdf %>%
group_by(smr) %>%
summarize(n_county = n())
legend_plot <- ggplot(data = agg_df,
aes(y = n_county, x = smr, fill = smr)) +
geom_bar(stat = 'identity') +
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) +
scale_y_continuous("", expand = c(0, 0), breaks = seq(0, 900, 300),
limits = c(0, 900)) +
scale_x_discrete(limits = rev(levels(county_2014$smr))) +
coord_flip() +
theme_classic() +
theme(plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 15),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
legend.position = "none",
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)) +
geom_hline(yintercept = seq(0, 900, 150), color = "white") +
labs(title = "Legend", subtitle = " Deaths per 100,000")
legend_grob = ggplotGrob(legend_plot)
f_plot <- map_y + annotation_custom(grob = legend_grob,
xmin = max(subcounties_df$long) - 11,
xmax = max(subcounties_df$long) - 1,
ymin = min(subcounties_df$lat),
ymax = min(subcounties_df$lat) + 9)
ggsave(f_plot, width = 8, height = 5, scale = 2,
file = paste0('./drug_deaths_', y, '.pdf'))
ggsave(f_plot, width = 8, height = 5, scale = 2, limitsize = FALSE,
file = paste0('./jpeg_drug_deaths_', y, '.jpg'))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment