Last active
September 2, 2020 21:50
-
-
Save sysilviakim/846f70acc02ae8b80b00ac6ec4d27cc4 to your computer and use it in GitHub Desktop.
How to Scrape/Calculate State-by-state Population Per Electoral College Elector
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rvest) | |
library(jsonlite) | |
library(xtable) | |
## How many Electoral College electors per state? ============================== | |
## Page as is for Sep 1, 2020 | |
url_270 <- "https://www.270towin.com" | |
electors <- read_html(url_270) %>% | |
html_nodes("script") %>% | |
html_nodes(xpath = '//*[@type="text/javascript"]') %>% | |
html_text() %>% | |
.[grepl("https://www.270towin.com/maps/", .)] %>% | |
## string text surrounding the JSON structure | |
## regex after manual inspection | |
## Class, make sure to check the page source code to see what I am doing! | |
str_match(., pattern = "races = (.*?);\n map_d3") %>% | |
.[[2]] %>% | |
fromJSON(str_match(., pattern = "races = (.*?);\n map_d3")[, 2]) %>% | |
map(~ tibble(state = .x$state_name, electors = as.numeric(.x$e_votes))) %>% | |
bind_rows() | |
## How does this compare against population? =================================== | |
pop_state <- paste0( | |
## Census estimate of state-by-state population | |
"http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/", | |
"totals/nst-est2019-alldata.csv" | |
) %>% | |
## read_csv does not require that a file is in the hard drive | |
read_csv() %>% | |
select(state = NAME, pop = POPESTIMATE2019) | |
## Join then arrange by population per elector ================================= | |
df <- left_join(electors, pop_state) %>% | |
mutate( | |
## Convert population to million (scientific notation is 1e6) | |
pop = pop / 1e6, | |
pop_per_elector = pop / electors | |
) %>% | |
arrange(desc(pop_per_elector)) | |
## Were states matched perfectly by name? No? Good. ============================ | |
any(is.na(df$pop_per_elector)) | |
## Export to LaTeX ============================================================= | |
print( | |
## No caption or labels | |
xtable( | |
## Set column names | |
df %>% | |
rename( | |
State = state, | |
Electors = electors, | |
`Pop. 2019 (million)` = pop, | |
`Pop. Per Elector` = pop_per_elector | |
), | |
digits = c(0, 0, 0, 2, 2) | |
), | |
include.rownames = FALSE, | |
booktabs = TRUE | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment