Skip to content

Instantly share code, notes, and snippets.

@reuning
Created November 5, 2020 14:59
Show Gist options
  • Save reuning/9ad061d9b07480a9b4b95a7610c219b4 to your computer and use it in GitHub Desktop.
Save reuning/9ad061d9b07480a9b4b95a7610c219b4 to your computer and use it in GitHub Desktop.
Compare polling and election margins in R. Data scraped from NYTimes results.
library(rvest)
library(data.table)
library(ggplot2)
library(ggrepel)
page <- read_html("https://www.nytimes.com/interactive/2020/11/03/us/elections/results-president.html")
links <- page %>% html_node(".e-state-list") %>% html_nodes("a") %>% html_attr("href")
get_data <- function(link){
page <- read_html(link)
trump <- page %>% html_node("tr.e-donald-j-trump > td.e-percent" ) %>% html_text(trim=T)
biden <- page %>% html_node("tr.e-joseph-r-biden > td.e-percent" ) %>% html_text(trim=T)
pct_all <- page %>% html_node("span.e-est-pct") %>% html_text(trim=T)
state <- page %>% html_node("h1[itemprop='headline']") %>% html_text(trim=T)
m <- regexpr("\\d+", pct_all)
pct <- as.numeric(regmatches(pct_all, m))
trump <- as.numeric(gsub("%", "", trump))
biden <- as.numeric(gsub("%", "", biden))
state <- gsub(" Election Results", "" ,state )
return(data.frame("state"=state, "trump"=trump,
"biden"=biden, "pct_all"=pct_all,
"pct"=pct))
}
tables <- lapply(links, get_data)
table <- do.call(rbind, tables)
poll_data <- fread("https://projects.fivethirtyeight.com/polls-page/president_polls.csv")
poll_data <- poll_data[state!=""]
poll_data[, end_data:=as.Date(end_date, format = "%m/%d/%y")]
poll_data[, start_date:=as.Date(start_date, format = "%m/%d/%y")]
# poll_data <- poll_data[start_date >= as.Date("2020-10-20")]
poll_data$id <- paste(poll_data$question_id, poll_data$poll_id, sep="-")
poll_data <- dcast(poll_data, start_date+ end_date + state+id + pollster + fte_grade +
sample_size + population +methodology~candidate_name, value.var="pct")
poll_data <- merge(poll_data, table, by="state", all.x=T)
ggplot(poll_data[pct>97 & start_date >= as.Date("2020-10-20")],
aes(y=biden-trump,
x=`Joseph R. Biden Jr.` - `Donald Trump`,
color=state)) +
# geom_boxplot(width=4,outlier.size = .5,
# varwidth = F, orientation = "y", position = position_identity()) +
geom_point(size=1.5, alpha=.7) +
geom_abline(slope=1, intercept=0, color='gray') +
theme_minimal() + labs(y="Vote Margin\n(Biden-Trump)",
x="Polling Margin\n(Biden-Trump)",
caption="Only including polls since Oct 20th and states with >97% reporting") +
geom_text(aes(label=state), x=40) +
scale_x_continuous(limits = c(-40,50)) +
scale_y_continuous(limits = c(-40,50)) +
guides(color=F) + geom_smooth(method='lm',
aes(y=biden-trump,
x=`Joseph R. Biden Jr.` - `Donald Trump`),
inherit.aes = F,
se=F,color='black')
setwd("~/Desktop/")
ggsave("polling_results.png", height=5, width=7)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment