Skip to content

Instantly share code, notes, and snippets.

@jonspring
Last active December 18, 2019 17:51
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonspring/78b2124cf9daa351de98b5e031473585 to your computer and use it in GitHub Desktop.
Save jonspring/78b2124cf9daa351de98b5e031473585 to your computer and use it in GitHub Desktop.
When the lights go down in the city.... Local sunset times
# https://twitter.com/jonlovett/status/1191490424965218304
# What I'm curious about is how we're distributed inside of our time zones, because the further east you are inside of a time zone, the harder standard time hits. Boston's sunset is 4:34pm today. Brutal. Detroit's sunset, on the other side of the same time zone, is 5:22pm.
# I'm a big fan of Jon Lovett's podcasts, and I love trying new things in R, so it was Game On when I saw the tweet above. I googled around and combined a few data sources to get US counties' populations, coordinates, sunsets, and time zones. From those you can see who has it worst in the evenings when standard time kicks in.
# Curiously, big cities in the US tend to be on the east, "darker evening" end of their time zones.
####### Libraries #####
# Functions from these libraries are used, but mostly only once
library(tidyverse) # For data wrangling
# library(lubridate) # To help wrangle dates & times
# library(tidycensus) # To get county populations with GEOID (= FIPS?)
# library(housingData) # For lon/lat by county
# library(suncalc) # To get GMT sunset
# library(countytimezones)# To calculate local time from GMT sunset
library(extrafont) # To get fonts to plot right
extrafont::loadfonts() # ""
library(gganimate) # For animation
####### Load data #####
# First, we'll load population for US counties.
# The "population" product comes with population and density.
# You'll need to get a census api token, instructions in tidycensus.
tidycensus::census_api_key(keyring::key_get("census"))
pop <- tidycensus::get_estimates(geography = "county",
product = "population") %>%
pivot_wider(names_from = variable, values_from = value)
# To plot these, I want lat/long for each. Unfortunately, this source is limited
# to the continental US states, but I'm a mapping noob so that will do for now.
loc <- housingData::geoCounty %>% as_tibble
# The suncalc package takes coordinates and a date and gives sunset time, etc.
sunsets <- suncalc::getSunlightTimes(
data = loc %>% mutate(date = as.Date("2019-11-04")),
tz = "UTC") %>%
as_tibble() %>%
select(lat, lon, sunset)
# Combine 'em
county <- pop %>%
left_join(loc, by = c("GEOID" = "fips")) %>%
left_join(sunsets) %>%
select(NAME, GEOID, lon, lat, POP, DENSITY, sunset) %>%
filter(!is.na(sunset))
# convert sunset to local time
times <- countytimezones::calc_local_time(
county$sunset, county$GEOID, include_tz = TRUE)
county2 <- bind_cols(county, times) %>%
mutate(loc_sunset = lubridate::ymd_hm(local_time))
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ
votes <- read_csv("~/countypres_2000-2016.csv")
votes2 <- votes %>%
filter(party == "republican", year == 2016) %>%
count(FIPS, party, candidatevotes, totalvotes) %>%
mutate(rep_share = candidatevotes / totalvotes)
county3 <- county2 %>%
left_join(votes2 %>%
mutate(GEOID = str_pad(FIPS, 5, "left", "0")) %>%
group_by(GEOID) %>%
summarise(candidatevotes = sum(candidatevotes),
totalvotes = median(totalvotes)) %>%
mutate(rep_share = candidatevotes / totalvotes)) %>%
mutate(sunset_hr_rnd = lubridate::floor_date(loc_sunset, "5 minutes")) %>%
mutate(sunset_hr_dec = lubridate::hour(sunset_hr_rnd) +
lubridate::minute(sunset_hr_rnd)/60)
##### Plot - map w/ sunset shading ######
ggplot(county3, aes(lon, lat, size = POP,
fill = sunset_hr_dec - 12)) +
geom_point(alpha = 0.7, shape = 21, stroke = 0.1) +
scale_size_area(max_size = 12) +
guides(size = F) +
scale_fill_viridis_c(option = "B", breaks = c(4.5, 5, 5.5, 6),
labels = c("4:30", "5pm", "5:30", "6pm")) +
coord_map() +
hrbrthemes::theme_ipsum(grid = F, axis = F) +
theme(axis.text = element_blank(), axis.title = element_blank()) +
labs(title = "Standard Time is brutal on some big counties",
fill = "Sunset time\non Nov. 04")
ggsave("pundit.png", width = 8, height = 4,
dpi = 300, type = "cairo")
##### Plot - bimodal sunset distribution ######
county3 %>%
group_by(sunset_hr_rnd) %>%
arrange(sunset_hr_rnd, POP) %>%
mutate(cuml_pop = cumsum(POP)) %>%
ungroup() %>%
select(NAME, GEOID, lon, lat, POP, cuml_pop,
sunset_hr_rnd, sunset_hr_dec,
rep_share) %>%
ggplot(data = .,
aes(sunset_hr_rnd, POP, size = POP,
fill = sunset_hr_dec - 12)) +
geom_col(color = "white", size = 0.05) +
ggrepel::geom_text_repel(data = . %>%
mutate(NAME = if_else(
POP > 2000000 | GEOID == 25025, NAME, "")),
size = 3, lineheight = 0.8, vjust = 1,
box.padding = 0.3, point.padding = 1,
aes(y = cuml_pop - POP/2, label = str_wrap(NAME, 16))) +
guides(size = F) +
coord_cartesian(clip = "off", ylim = c(0, 4E7)) +
scale_x_datetime(date_labels = "%I:%M", name ="") +
scale_size_area() +
scale_fill_viridis_c(option = "B", breaks = c(4.5, 5, 5.5, 6),
labels = c("4:30", "5pm", "5:30", "6pm")) +
scale_y_continuous(breaks = 1E7*0:4,
labels = scales::comma_format(scale = 1E-6, suffix = "M")) +
hrbrthemes::theme_ipsum(grid = F) +
# theme(axis.text.y = element_blank(), axis.title = element_blank()) +
labs(title = "Standard Time is brutal on some big counties",
fill = "Sunset time\non Nov. 04")
ggsave("pundit2.png", width = 8, height = 4,
scale = 1.5, dpi = 300, type = "cairo")
# Animation: transition from map view to a scatter plot based on vote share.
# The trick here is to use vote share to calc new fake latitudes.
county3 %>%
select(NAME, GEOID, lon, lat, POP,
sunset_hr_dec, sunset, loc_sunset,
rep_share, sunset, candidatevotes) %>%
# Split into time zones
mutate(shift = (sunset - loc_sunset)/lubridate::dhours(1),
tz = round(shift, 0),
local_shift = shift - tz) %>%
# Add overall median lat and lon
mutate(avg_lat = median(lat)) %>%
mutate(avg_lon_all = median(lon)) %>%
# Group by time zone to get middle longitude. This will be handy later
# so that we can put a little separation between the zones.
group_by(tz) %>%
mutate(avg_lon = median(lon)) %>%
ungroup() %>%
# Copy every row, with one copy for each "phase."
# The first phase will look like a map, the 2nd will be a scatter
# graph with 2016 R vote share on the y, by using that value to create
# an imaginary new latitude
uncount(2, .id = "phase") %>%
mutate(phase = phase - 1) %>%
# Longitude will in all cases be 90% based on real longitude and
# 10% based on the time zones' average. This will "sqeeze" the points
# together a bit so there's a bit of space between each zone
mutate(lon1 = 0.9 * lon + (0.1 * avg_lon)) %>%
# When phase is zero, latitude will be original latitude.
# When phase is 1, latitude will shift to reflect 2016 trump share.
# I picked some values to shift GOP counties up toward Canada and
# Dem counties toward Mexico:
# Each 10% higher trump share than 50% will shift the county 3 degrees
# north, and shift more Dem counties south the same amount.
# When phase is 1, the he 76% trump vote share in
# Baldwin County, Alabama will be mapped to avg lat (38N) + 30*0.26
# degrees north, or 46 north, approaching the US-Canada border.
# DC, with a 4% 2016 trump vote share, will be mapped 30*0.46
# or 14 degrees south of median, putting in even with Mexico.
mutate(lat1 = (1-phase) * lat +
(phase * (avg_lat + (rep_share - 0.5) * 30))) -> county_anim
# Here are the axis titles meant to fade in as we shift to the 2nd phase
note <- tibble(
lon1 = -120,
lat1 = c(50,25,50,25),
phase = c(0,0,1,1),
label = c("Higher Trump\nvote share",
"Lower Trump\nvote share",
"Higher Trump\nvote share",
"Lower Trump\nvote share")
)
# The gganimate package will handle the tweening for us, determining
# intermediate positions to slide our counties between the two views.
# To make the "Higher Trump vote share" axes labels show up only for the
# 2nd phase, the text is mapped to alpha, so its transparent initially
# and fully visible in the 2nd phase.
animate(
ggplot(county_anim,
aes(lon1, lat1)) +
# ggplot(aes(sunset_hr, lat, size = pop,
# fill = sunset_hr - 12)) +
geom_point(aes(size = POP, fill = sunset_hr_dec - 12),
alpha = 0.7, shape = 21, stroke = 0.1, color = "gray90") +
geom_text(data = note, size = 6, family = "Arial", hjust = 0,
lineheight = 0.8,
aes(label = label, alpha = phase)) +
scale_size_area(max_size = 12) +
scale_alpha_continuous(range = c(0,1)) +
guides(size = F, alpha = F,
fill = guide_colourbar(barwidth = 2, barheight = 16)) +
scale_fill_viridis_c(option = "B", breaks = c(4.5, 5, 5.5, 6),
labels = c("4:30", "5pm", "5:30", "6pm")) +
coord_map(clip = "off") +
hrbrthemes::theme_ipsum(grid = F, axis = F, base_size = 20) +
theme(axis.text = element_blank(), axis.title = element_blank(),
plot.title = element_text(size = 30),
plot.subtitle = element_text(size = 20, lineheight = 1)) +
labs(title = "Later sunsets correlate with higher 2016 Trump vote share",
subtitle = "(and Pundit is an angel)",
fill = "Sunset time\non Nov. 04") +
transition_states(phase, transition_length = 9, state_length = 1) +
ease_aes("cubic-in-out"),
width = 800, height = 500, fps = 25, duration = 12, type = "cairo")
# .... later explorations around correlations with 2016 vote share, density, etc.
# Curious how there there are no large GOP-voting counties with early sunsets. How much just random chance, vs. due to original 1880's design of time zones (might have purposely put borders in rural areas, having effect of putting cities like Chicago near the eastern edge, or valuing sunset time differently in agricultural areas), vs. potential behavioral response since then (maybe retirees attracted to more sun & longer days in FL and TX).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment