Created
January 17, 2017 02:06
-
-
Save mkiang/fe91ad9f81c33927fc90c4a22cd0b1cf to your computer and use it in GitHub Desktop.
Code demonstrating how to use a histogram as a legend on a choropleth
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Code for this blog post: https://mathewkiang.com/2017/01/16/using-histogram-legend-choropleths/ | |
## Download the drug death data from: | |
## https://blogs.cdc.gov/nchs-data-visualization/drug-poisoning-mortality/ | |
## | |
## Download the 2013 CB shapefiles (500k will do) for counties and states: | |
## https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html | |
## https://www.census.gov/geo/maps-data/data/cbf/cbf_state.html | |
## Imports ---- | |
library(tidyverse) | |
library(broom) | |
library(rgdal) | |
library(viridis) | |
library(grid) | |
## Lower 48 states ---- | |
lower_48 <- c("09", "23", "25", "33", "44", "50", "34", "36", "42", "18", | |
"17", "26", "39", "55", "19", "20", "27", "29", "31", "38", | |
"46", "10", "11", "12", "13", "24", "37", "45", "51", "54", | |
"01", "21", "28", "47", "05", "22", "40", "48", "04", "08", | |
"16", "35", "30", "49", "32", "56", "06", "41", "53") | |
## Load drug poisoning data ---- | |
county_df <- read_csv(paste0('./../original_data/NCHS_-_Drug_' , | |
'Poisoning_Mortality__County_Trends', | |
'__United_States__1999_2014.csv'), | |
col_names = c('fipschar', 'year', 'state', 'st', | |
'stfips', 'county', 'pop', 'smr'), | |
skip = 1) | |
county_df$smr <- factor(county_df$smr, | |
levels = unique(county_df$smr)[c(10, 1:9, 11)], | |
ordered = TRUE) | |
## Import US states and counties from 2013 CB shapefile ---- | |
## We could use map_data() -- but want this to be generalizable to all shp. | |
allcounties <- readOGR(dsn = "./../shapefiles/", | |
layer = "cb_2013_us_county_500k") | |
allstates <- readOGR(dsn = "./../shapefiles/", | |
layer = "cb_2013_us_state_500k") | |
## A little munging and subsetting for maps ---- | |
allcounties@data$fipschar <- as.character(allcounties@data$GEOID) | |
allcounties@data$state <- as.character(allcounties@data$STATEFP) | |
allstates@data$state <- as.character(allstates@data$STATEFP) | |
## Only use lower 48 states | |
subcounties <- subset(allcounties, allcounties@data$state %in% lower_48) | |
substates <- subset(allstates, allstates@data$state %in% lower_48) | |
## Fortify into dataframes | |
subcounties_df <- tidy(subcounties, region = "GEOID") | |
substates_df <- tidy(substates, region = "GEOID") | |
## Let's focus on one year and do a practice run ---- | |
df_2014 <- filter(county_df, year == 2014) | |
map_2014 <- ggplot(data = df_2014) + | |
geom_map(aes(map_id = fipschar, fill = smr), map = subcounties_df) + | |
expand_limits(x = subcounties_df$long, y = subcounties_df$lat) | |
ggsave(map_2014, file = './first_pass.jpg', width = 5, height = 3.5) | |
## Make better colors ---- | |
map_2014 <- map_2014 + | |
theme_classic() + | |
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) + | |
scale_x_continuous(expand = c(0, 0)) + | |
scale_y_continuous(expand = c(0, 0)) | |
ggsave(map_2014, file = './better_colors.jpg', width = 5, height = 3.5) | |
## Add state lines and better labels (remove unnecessary ink) ---- | |
map_2014 <- map_2014 + | |
geom_path(data = substates_df, aes(long, lat, group = group), | |
color = "white", size = .7, alpha = .75) + | |
theme(plot.title = element_text(size = 30, face = "bold"), | |
axis.line = element_blank(), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), | |
axis.ticks = element_blank(), | |
axis.title.x = element_blank(), | |
axis.title.y = element_blank(), | |
legend.position = "none") + | |
labs(title = "Drug poisoning deaths (2014)", | |
caption = paste0('Source: https://blogs.cdc.gov/', | |
'nchs-data-visualization/', | |
'drug-poisoning-mortality/')) + | |
coord_fixed(ratio = 1.35) | |
ggsave(map_2014, file = './getting_closer.jpg', width = 5, height = 3.5, | |
scale = 2) | |
## Make the legend plot ---- | |
## First, aggregate the data | |
agg_2014 <- df_2014 %>% | |
group_by(smr) %>% | |
summarize(n_county = n()) | |
## Legend, first pass ---- | |
legend_plot <- ggplot(data = agg_2014, | |
aes(y = n_county, x = smr, fill = smr)) + | |
geom_bar(stat = 'identity') + | |
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) + | |
theme_classic() | |
ggsave(legend_plot, file = './legend_first.jpg', | |
height = 2, width = 1.75, scale = 3) | |
legend_plot <- legend_plot + | |
scale_y_continuous("", expand = c(0, 0), breaks = seq(0, 900, 300), | |
limits = c(0, 900)) + | |
scale_x_discrete(limits = rev(levels(df_2014$smr))) + | |
coord_flip() | |
ggsave(legend_plot, file = './proper_orientation.jpg', | |
height = 2, width = 1.75, scale = 3) | |
legend_plot <- legend_plot + | |
theme(plot.title = element_text(size = 16, face = "bold"), | |
plot.subtitle = element_text(size = 15), | |
axis.text.y = element_text(size = 14), | |
axis.text.x = element_text(size = 12), | |
axis.line = element_blank(), | |
axis.ticks = element_blank(), | |
axis.title.y = element_blank(), | |
legend.position = "none", | |
panel.background = element_rect(fill = "transparent",colour = NA), | |
plot.background = element_rect(fill = "transparent",colour = NA)) + | |
geom_hline(yintercept = seq(0, 900, 150), color = "white") + | |
labs(title = "Legend", subtitle = " Deaths per 100,000") | |
ggsave(legend_plot, file = './final_legend.jpg', | |
height = 2, width = 1.75, scale = 3) | |
## Now put the legend on the map ---- | |
legend_grob = ggplotGrob(legend_plot) | |
final_plot <- map_2014 + annotation_custom(grob = legend_grob, | |
xmin = max(subcounties_df$long) - 11, | |
xmax = max(subcounties_df$long) - 1, | |
ymin = min(subcounties_df$lat), | |
ymax = min(subcounties_df$lat) + 9) | |
ggsave(final_plot, width = 8, height = 5, scale = 2, limitsize = FALSE, | |
file = './jpeg_drug_deaths_2014.jpg') | |
for (y in unique(county_df$year)) { | |
subdf <- filter(county_df, year == y) | |
map_y <- ggplot(data = subdf) + | |
geom_map(aes(map_id = fipschar, fill = smr), map = subcounties_df) + | |
expand_limits(x = subcounties_df$long, y = subcounties_df$lat) + | |
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) + | |
scale_x_continuous(expand = c(0, 0)) + | |
scale_y_continuous(expand = c(0, 0)) + | |
geom_path(data = substates_df, aes(long, lat, group = group), | |
color = "white", size = .7, alpha = .75) + | |
theme_classic() + | |
theme(plot.title = element_text(size = 30, face = "bold"), | |
axis.line = element_blank(), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), | |
axis.ticks = element_blank(), | |
axis.title.x = element_blank(), | |
axis.title.y = element_blank(), | |
legend.position = "none") + | |
labs(title = paste0("Drug poisoning deaths (", y, ")"), | |
caption = paste0('Source: https://blogs.cdc.gov/', | |
'nchs-data-visualization/', | |
'drug-poisoning-mortality/')) + | |
coord_fixed(ratio = 1.35) | |
agg_df <- subdf %>% | |
group_by(smr) %>% | |
summarize(n_county = n()) | |
legend_plot <- ggplot(data = agg_df, | |
aes(y = n_county, x = smr, fill = smr)) + | |
geom_bar(stat = 'identity') + | |
scale_fill_viridis(option = "inferno", discrete = TRUE, direction = -1) + | |
scale_y_continuous("", expand = c(0, 0), breaks = seq(0, 900, 300), | |
limits = c(0, 900)) + | |
scale_x_discrete(limits = rev(levels(county_2014$smr))) + | |
coord_flip() + | |
theme_classic() + | |
theme(plot.title = element_text(size = 16, face = "bold"), | |
plot.subtitle = element_text(size = 15), | |
axis.text.y = element_text(size = 14), | |
axis.text.x = element_text(size = 12), | |
axis.line = element_blank(), | |
axis.ticks = element_blank(), | |
axis.title.y = element_blank(), | |
legend.position = "none", | |
panel.background = element_rect(fill = "transparent",colour = NA), | |
plot.background = element_rect(fill = "transparent",colour = NA)) + | |
geom_hline(yintercept = seq(0, 900, 150), color = "white") + | |
labs(title = "Legend", subtitle = " Deaths per 100,000") | |
legend_grob = ggplotGrob(legend_plot) | |
f_plot <- map_y + annotation_custom(grob = legend_grob, | |
xmin = max(subcounties_df$long) - 11, | |
xmax = max(subcounties_df$long) - 1, | |
ymin = min(subcounties_df$lat), | |
ymax = min(subcounties_df$lat) + 9) | |
ggsave(f_plot, width = 8, height = 5, scale = 2, | |
file = paste0('./drug_deaths_', y, '.pdf')) | |
ggsave(f_plot, width = 8, height = 5, scale = 2, limitsize = FALSE, | |
file = paste0('./jpeg_drug_deaths_', y, '.jpg')) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment