Skip to content

Instantly share code, notes, and snippets.

@walkerke
Created August 15, 2023 15:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save walkerke/4df3064f33c31fd333d2ccc103748efc to your computer and use it in GitHub Desktop.
Save walkerke/4df3064f33c31fd333d2ccc103748efc to your computer and use it in GitHub Desktop.
# Install packages (if necessary)
# install.packages(c("tidycensus", "tidyverse", "tmap", "mapview"))
# Load libraries
library(tidycensus)
library(tidyverse)
# Get a Census API key at https://api.census.gov/data/key_signup.html.
# Activate the key from the link in your email.
# Now you can get started!
# Restart R after running this line if you plan to install your key.
# Otherwise, don't worry about it.
install <- FALSE
census_api_key("YOUR KEY HERE", install = install)
# Decennial Census data with `get_decennial()`
# 2020 data are available from the PL-94171 redistricting file
# The `geography` argument controls the level of aggregation;
# many geographies can be optionally subset by state and / or county
md_pop_2020 <- get_decennial(
geography = "county",
variables = "P1_001N",
state = "MD",
year = 2020
)
# Either print out the first few rows in your R console:
md_pop_2020
# Or view the entire dataset in RStudio
View(md_pop_2020)
# ACS data are available with `get_acs()`.
# More variables, but with margins of error.
md_income <- get_acs(
geography = "tract",
variables = "B19013_001",
state = "MD",
year = 2021
)
View(md_income)
# The `variables` argument takes one or more Census variable codes.
# How to identify them?
# `load_variables()` fetches a dataset of Census variables for
# a given year and dataset.
# For the detailed tables:
acs_detailed <- load_variables(2019, "acs5")
# For the Data Profile (pre-computed data):
acs_profile <- load_variables(2019, "acs5/profile")
View(acs_profile)
#### GIS workflows with tidycensus
# The argument `geometry = TRUE` gets you pre-joined geographic and demographic
# data - no need to carry out the steps yourselves!
md_2020_geo <- get_decennial(
geography = "county",
state = "MD",
variables = c(total_pop = "P5_001N",
hispanic = "P5_010N"),
year = 2020,
output = "wide",
sumfile = "dhc",
geometry = TRUE
) %>%
mutate(percent_hispanic = 100 * (hispanic / total_pop))
# To browse your data interactively, use the mapview package
library(mapview)
mapview(md_2020_geo)
# ggplot2 can map this data with `geom_sf()`
# We'll focus here on the tmap package, which offers a familiar
# interface to GIS users.
# We initialize a map with `tm_shape()`, then specify how to draw
# the data with `tm_polygons()`:
library(tmap)
tm_shape(md_2020_geo) +
tm_polygons()
# A choropleth map can be rendered by specifying a column to map:
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic")
# We'll likely want to do some customization. Let's change the colors
# and move the legend outside the map frame:
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic", palette = "Purples",
title = "% Hispanic, 2020") +
tm_layout(legend.outside = TRUE)
# We can also modify map elements for clarity and change
# the breaks from "pretty" (the default) to Jenks natural breaks
# (the default in ArcGIS)
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic", palette = "Purples",
title = "% Hispanic, 2020", style = "jenks") +
tm_layout(legend.outside = TRUE, bg.color = "grey80")
# Alternative map types are also available, like graduated symbols:
tm_shape(md_2020_geo) +
tm_polygons() +
tm_bubbles(size = "hispanic",
col = "navy",
alpha = 0.5,
scale = 3,
title.size = "Hispanic residents, 2020") +
tm_layout(legend.outside = TRUE,
legend.outside.position = "bottom",
legend.outside.size = 0.25)
# To automatically convert to an interactive map, use `tmap_mode("view")`
tmap_mode("view")
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic", palette = "Purples",
title = "% Hispanic, 2020", alpha = 0.5,
id = "NAME")
# Dot-density maps are great for visualizing neighborhood heterogeneity
# Let's make a race / ethnicity dot-density map for PG County
tmap_mode("plot")
pg_race <- get_decennial(
geography = "tract",
state = "MD",
county = "Prince George's",
variables = c(
Hispanic = "P5_010N",
White = "P5_003N",
Black = "P5_004N",
Asian = "P5_006N"
),
year = 2020,
sumfile = "dhc",
geometry = TRUE
)
# We use the `as_dot_density()` function in tidycensus to
# convert to scattered dots for mapping
pg_dots <- as_dot_density(
pg_race,
value = "value",
values_per_dot = 50,
group = "variable"
)
background_tracts <- filter(pg_race, variable == "White")
# Use `tm_dots()` to make the dot map
tm_shape(background_tracts) +
tm_polygons(col = "white",
border.col = "grey") +
tm_shape(pg_dots) +
tm_dots(col = "variable",
palette = "Set1",
alpha = 0.5,
size = 0.01,
title = "1 dot = 50 people") +
tm_layout(legend.outside = TRUE,
title = "Race/ethnicity,\n2020 US Census")
# Interactive dot maps are great for exploring demographic patterns!
tmap_mode("view")
tm_basemap(leaflet::providers$CartoDB.Positron) +
tm_shape(pg_dots) +
tm_dots(col = "variable",
border.lwd = 0,
palette = "Set1",
size = 0.01,
title = "1 dot = 200 people") +
tm_layout(legend.outside = TRUE,
title = "Race/ethnicity,\n2020 US Census")
# GIS workflows
library(sf)
dc_tract_income <- get_acs(
geography = "tract",
variables = "B19013_001",
state = "DC",
year = 2021,
geometry = TRUE
)
dc_banks <- st_read("data/dc_banks.geojson")
mapview(dc_banks)
# Spatial filter: which Census tracts have banks?
tracts_with_banks <- st_filter(dc_tract_income, dc_banks)
# Error message! Layers must share a CRS
dc_tract_income <- st_transform(dc_tract_income, 4326)
tracts_with_banks <- st_filter(dc_tract_income, dc_banks)
mapview(tracts_with_banks)
# Spatial join: what is the income of the area around each bank?
joined_banks <- st_join(dc_banks, dc_tract_income)
View(joined_banks)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment