Skip to content

Instantly share code, notes, and snippets.

@bmschmidt
Created November 6, 2020 19:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bmschmidt/eba211034510d3a79c96ed34ff8834a7 to your computer and use it in GitHub Desktop.
Save bmschmidt/eba211034510d3a79c96ed34ff8834a7 to your computer and use it in GitHub Desktop.
Code for mapping 2020 elections by subway lines and ethnic data in NYC. Extraordinarily messy, will not run, all sort of filepath names on the data, etc. Most downloaded data from the city of new york; this includes code to live-scrape the 2020 elections data by ED.
```{r}
library(tidyverse)
library(sf)
shapes = st_read("/drobo/Downloads/Election Districts/geo_export_2ab7b79f-931c-423a-8e71-a78b1e084d86.shp", stringsAsFactors=FALSE)
other_stuff = tibble(`Unit Name` = c("Public Counter", "Emergency", "Absentee/Military", "Federal", "Affidavit"))
read_election = function(fr) {
fr %>% read_csv() %>%
mutate(elect_dist = str_c(AD, ED) %>% as.numeric) %>%
anti_join(other_stuff) %>%
separate(`Unit Name`, c("name", "party"), "[\\(\\)]", remove=F) %>%
group_by(elect_dist) %>%
mutate(tot = sum(Tally)) %>%
mutate(share = Tally/tot) %>%
ungroup
}
#"~/Downloads/00001100000Citywide Mayor Citywide EDLevel.csv" %>% read_election -> e1
#"~/Downloads/00000500000Citywide Governor Lieutenant Governor Citywide EDLevel.csv" %>% read_election -> e2
#"~/Downloads/01000200000Citywide Democratic President Citywide EDLevel.csv" %>% read_election -> e3
e2 = read_csv("/drobo/Downloads/00000100000Citywide President Vice President Citywide EDLevel.csv")
library(rvest)
y2020 = read_html("https://web.enrboenyc.us/CD23464AD0.html")
links = y2020 %>% html_nodes("a") %>% html_attr("href") %>% keep(~.x %>% str_detect("CD"))
ADs = links %>% str_extract("AD...")
all_pages = links %>% map(~read_html(str_glue("https://web.enrboenyc.us/", .x)))
```
```{r}
library(tidyverse)
t_to_rows = . %>% html_nodes("table.underline") %>% map(~.x %>% html_table(header = FALSE)) %>% purrr::pluck(1)
joint = all_pages %>% map2_dfr(ADs, ~t_to_rows(.x) %>% mutate(ADE = .y))
joint = joint %>% select(ED = X1, p_in = X2, Biden.Democratic = X4, Trump.Republican = X6, Trump.Conservative = X8, Biden.Working_Families = X10, Hawkins.Green = X12, Jorgensen.Libertarian = X14, Pierce.Independence = X16, write_in = X18, ADE = ADE)
tidied_2020 = joint %>% filter(Biden.Democratic %>% str_detect("Democrat|Biden", negate = TRUE)) %>%
pivot_longer(cols=c(-ED,-p_in, -ADE), values_to = "votes") %>% mutate(votes = as.numeric(votes)) %>%
separate(name, c("cand", "party")) %>% mutate(ED = ED %>% str_extract("[0-9]+") %>% as.numeric) %>%
mutate(AD = ADE%>% str_extract("[0-9]+") %>% as.numeric) %>% mutate(elect_dist = 100 * AD + ED) %>%
count(elect_dist, cand, wt=votes, name="votes") %>% group_by(elect_dist) %>% mutate(share = votes/sum(votes), total = sum(votes)) %>% filter(!is.na(elect_dist)) %>% ungroup
tidied_2020 %>% count(cand, wt=votes)
tidied_2016 = e2 %>% mutate(
AD = AD,
ED = as.numeric(ED),
cand = `Unit Name` %>% str_extract( "([A-Za-z]+) /") %>% str_replace(" /", ""),
party = `Unit Name` %>% str_extract( "\\(.*\\)") %>% str_replace_all("[()]", ""),
votes = Tally
)%>% mutate(elect_dist=AD*1000 + ED) %>%
filter(!is.na(party)) %>%
count(elect_dist, cand, wt=votes, name = "votes") %>% group_by(elect_dist) %>% mutate(share = votes/sum(votes), total = sum(votes)) %>% ungroup
tidied_2016 %>% ungroup %>% sample_n(10)
tidied_2016 %>% count(cand, wt=votes)
reshaper = function(x, year=2016) {x %>% filter(cand %in% c("Clinton", "Biden", "Trump")) %>% select(elect_dist, cand, votes, share) %>% pivot_wider(names_prefix=year, names_from = (c("cand")), values_from = c(votes, share))}
reshaped = tidied_2016 %>% reshaper(2016) %>% full_join(tidied_2020 %>% reshaper(2020))
jointmap = shapes %>% inner_join(reshaped) %>% st_simplify(dTolerance = .0001)
jointmap %>% ggplot() + geom_sf(aes(fill = share_2020Biden), lwd=.03) +
scale_fill_distiller(type = "div", limits = c(.15, .85), direction = 1, palette = 5, labels = scales::percent, oob = scales::squish) + labs(title = "As of Nov 5, NYC prez results", caption="Ben Schmidt") + theme_bw()
acs = read_sf("/drobo/Downloads/ACS.geojson")
acs2 = acs %>% select(OBJECTID, BoroName, NTACode, geogname, ends_with("E", ignore.case = FALSE)) %>% st_transform(crs = 2263)
jointmap = jointmap %>% st_transform(crs = 2263)
ethnic_groups = acs2 %>% st_set_geometry(NULL) %>%
select(OBJECTID, ends_with("E", ignore.case = FALSE)) %>% pivot_longer(cols = (-OBJECTID)) %>%
filter(name %>% str_detect("[0-9]|male|fem|nhspE|asnnhE|mdageE", negate = TRUE)) %>%
count(OBJECTID, name, wt=value)
top_groups = ethnic_groups %>%
group_by(OBJECTID) %>%
mutate(share = n/sum(n)) %>%
arrange(-n) %>%
slice(1)
acs3 = acs2 %>% inner_join(top_group)
acs2 %>% inner_join(top_group) %>% ggplot() + geom_sf(aes(fill = name)) + labs(title = "Top ethnic group, by neighborhood.") + scale_fill_brewer(type="div", palette = 2)
combined = jointmap %>% st_join(acs3) %>%
group_by(OBJECTID) %>%
summarize(across(c(votes_2020Biden, votes_2020Trump, votes_2016Clinton, votes_2016Trump), function(x) {sum(x, na.rm = TRUE)}))
simp = combined %>% st_simplify(dTolerance = 202)
simp = simp %>% mutate(
biden_share = votes_2020Biden/(votes_2020Biden + votes_2020Trump),
clinton_share = votes_2016Clinton/(votes_2016Clinton + votes_2016Trump)
)
simp %>% ggplot() + geom_sf(aes(fill = votes_2016Trump/(votes_2016Trump + votes_2016Clinton)), lwd = 0.2)
simp %>% inner_join(top_group) %>% inner_join(acs3 %>% st_set_geometry(NULL)) %>%
inner_join(names) %>%
ggplot() + geom_boxplot() + aes(x = `top group`, y = biden_share - clinton_share, color= BoroName) + scale_y_continuous(labels = scales::percent, limits = c(-.15, 0)) + coord_flip() +
theme_bw() + labs(title = "Biden underperformed Clinton in NYC's immigrant neighborhoods.",
subtitle = "Change from Clinton to Biden share of two-party vote.")
simp %>% inner_join(top_group) %>% inner_join(acs3 %>% st_set_geometry(NULL))
complicated = simp %>% inner_join(acs3 %>% st_set_geometry(NULL)) %>% inner_join(names)
complicated %>% ggplot() + geom_point(aes(x = clinton_share, y = biden_share - clinton_share, color = `top group`))+ scale_y_continuous(limits = c(-.17, 0), labels = scales::percent) + facet_wrap(~BoroName) + scale_color_brewer(type="qual", direction = 1) + theme_bw() + labs(title = "Fall in Biden support is correlated to ethnicity, not to vote share." )
g = complicated %>% select(OBJECTID, delta = biden_share/clinton_share) %>%
inner_join(acs %>% st_set_geometry(NULL) %>% select(OBJECTID, matches("^(asn|hsp|wtnh|blnh).*P$")))
d = g %>% st_set_geometry(NULL) %>% select(-OBJECTID)
model = lm(log(delta) ~ blnhP + asnnhP + hspmeP + hspsalvP + hspdomP, data = d)
summary(model)
subway = read_sf("/drobo/Downloads/Subway Lines.geojson") %>% st_transform(crs=2263)
install.packages("lwgeom")
subway %>% group_by(name) %>% summarize(l)
subway_lines = subway %>% mutate(line = name %>% str_split("-")) %>%
unnest(line) %>% group_by(line) %>%
summarize(len = sum(as.numeric(shape_len)))
subs = jointmap %>% st_join(subway_lines, join = st_is_within_distance, 5280/2, left = FALSE) %>%
group_by(line) %>% summarize(across(starts_with("votes"), sum, na.rm=TRUE)) %>% mutate(
share = votes_2020Trump/(votes_2020Trump + votes_2020Biden
))
subs %>% ggplot() + geom_sf(data = boros, fill = "#BBBBBB", lwd=0) + facet_wrap(~reorder(line, share)) +
theme_bw() + scale_fill_viridis_c("Clinton Share") + geom_sf(aes(fill = share), lwd=0) + labs(title = "Election districts on subway lines, by Clinton vote share.", caption = "Ben Schmidt--10:00AM 2020-11-06 10:00AM")
subs %>% group_by(line) %>% ggplot(aes(x = line, y = votes_2020Biden)) + geom_col() + coord_flip()
boros = acs %>% group_by(BoroName) %>% summarize(codes = n()) %>%
filter(BoroName != "Staten Island") %>% st_transform(crs=2263) %>% st_simplify(dTolerance = 30)
ggplot(boros)
shifts
colors = tribble(~line, ~color, list("A", "C", "E"), "#0039A6", list("1", "2", "3"), "#EE352E", list("4", "5", "6"), "#00933C", list("7"), "#B933AD", list("B", "D", "F", "M"), "#FF6319", list("G"), "#6CBE45", list("J", "Z"), "#996633", list("L"), "#A7A9AC", list("N", "Q", "R", "W"), "#FCCC0A", list("S"), "#808183") %>%
unnest(line) %>% unnest(line)
ethnic_groups %>% filter(name %>% str_detect("hspsamE|asneastE|asnsouthE|asnseastE|othnh", negate = TRUE)) %>% group_by(OBJECTID)
%>% mutate(share = )
names = tribble(~name, ~`top group`,
"asneastE", "East Asian",
"asnsouthE", "South Asian",
"blnhE", "Non-hispanic Black",
"hspdomE", "Dominican/Puerto Rican",
"hspmeE", "Mexican",
"hspprE", "Dominican/Puerto Rican",
"hspsamE", "South American",
"wtnhE", "White non-hispanic")
subs %>% inner_join(colors) %>% ggplot() + geom_col() + aes(x = reorder(line, share), y = share, fill = color) + scale_fill_identity() + coord_flip() + theme_bw() + scale_x_discrete("Line") + labs(title = "Trump share in precincts within 1/2 mile of train tracks, by subway line") + scale_y_continuous(labels = scales::percent )
```
```{r}
```
```{r}
g %>% filter(BoroName %in% c("Brooklyn", "Queens")) %>% ggplot() + geom_sf(aes(fill = share_2020Biden- share_2016Clinton), lwd=.03) +
scale_fill_distiller(type = "div", limits = c(-.15, .15), direction = 1, palette = 5, labels = scales::percent, oob = scales::squish) + labs(title = "As of Nov 5, NYC prez results", caption="Ben Schmidt") + theme_bw() + facet_wrap(~name)
```
```{r}
joint %>% st_simplify(dTolerance = .0001) %>%
ggplot() + geom_sf(aes(fill = share), lwd=.33) + scale_fill_viridis_c()
tidied_2020
tidied_2016 %>% sample_n(10)
tidied_2016 %>% filter(!is.na(party)) %>%
count(cand, wt=votes)
t
system("mkdir -p NYC_elections")
shapes %>% head
#pg %>% html_nodes("a") %>% html_attr("href") %>% keep(str_detect, ".*EDLevel.csv$")
#year = read_xml("https://www.vote.nyc.ny.us/html/results/2018.shtml", as_html = TRUE)
#xml_find_all(year, "")
#fr = "~/Downloads/01000200000Citywide Democratic President Citywide EDLevel.csv"
e1 %>% filter(party == "Democratic") %>% inner_join(e2, by = c("elect_dist" = "elect_dist", party = "party")) %>% mutate(relative = share.x/share.y) %>% select(elect_dist, relative, party) ->
relative
shapes %>% inner_join(e3 %>% filter(name == "Hillary Clinton")) %>% ggplot() + geom_sf(aes(fill=share))
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment