Skip to content

Instantly share code, notes, and snippets.

@elliottmorris
Last active October 29, 2020 02:36
Show Gist options
  • Save elliottmorris/b1a368c0eee194764f484267e98efc1f to your computer and use it in GitHub Desktop.
Save elliottmorris/b1a368c0eee194764f484267e98efc1f to your computer and use it in GitHub Desktop.
Charts the poll-level trend in 2020 polls
library(tidyverse)
library(lubridate)
library(pbapply)
url<- 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQ56fySJKLL18Lipu1_i3ID9JE06voJEz2EXm6JW4Vh11zmndyTwejMavuNntzIWLY0RyhA1UsVEen0/pub?gid=0&single=true&output=csv'
all_polls <- read_csv(url)
# remove any polls if biden or trump blank
all_polls <- all_polls %>% filter(!is.na(biden),!is.na(trump))#, include == "TRUE")
poll_shifts <- all_polls %>%
mutate(date = as_date(entry.date.time..et., format = '%m/%d/%Y %H:%M',tz='US/Eastern'),
number.of.observations = ifelse(number.of.observations > 1500,
1500,number.of.observations)) %>%
mutate(sample_size_weight = sqrt(number.of.observations / mean(number.of.observations))) %>%
arrange(date) %>%
group_by(state, pollster) %>%
mutate(shift_biden_margin = biden_margin - lag(biden_margin)) %>%
group_by(state) %>%
mutate(n_polls = n()) %>%
filter(n_polls > 10,!is.na(shift_biden_margin)) %>%
mutate(shift_biden_margin = ifelse(is.nan(shift_biden_margin),NA,shift_biden_margin))
poll_shifts <- expand_grid(date = unique(poll_shifts$date),
state = unique(poll_shifts$state)) %>%
left_join(poll_shifts)
poll_shifts <- pblapply(unique(poll_shifts$state),
cl = 8,
function(x){
print(x)
tmp_state <- poll_shifts %>% filter(state == x)
lapply(as_date(unique(tmp_state$date)),
function(y){
tmp_date <- tmp_state %>%
filter(date <= y) %>%
group_by(date,state) %>%
summarise(shift_biden_margin = weighted.mean(shift_biden_margin, number.of.observations,na.rm=T),
sample_size_weight = sum(sample_size_weight,na.rm=T))
sample_size_weight <- tmp_date %>%
mutate(shift_biden_margin = ifelse(is.nan(shift_biden_margin),NA,shift_biden_margin),
sample_size_weight = ifelse(sample_size_weight == 0,NA,sample_size_weight))
average_shift <- weighted.mean(tmp_date$shift_biden_margin, tmp_date$sample_size_weight *
exp(-0.1 * as.numeric(difftime(y, tmp_date$date,units='days'))),
na.rm=T)
return(
tmp_date %>%
tail(1) %>%
mutate(average_shift = average_shift)
)
}) %>%
bind_rows %>%
return
}) %>%
bind_rows
state_order <- poll_shifts %>%
group_by(state) %>%
summarise(final_shift = last(average_shift)) %>%
arrange(desc(final_shift)) %>% pull(state)
poll_shifts <- poll_shifts %>%
mutate(state = factor(state,levels=state_order))
ggplot(poll_shifts,aes(x=date, y=shift_biden_margin)) +
geom_hline(yintercept = 0,col='gray40') +
geom_point(shape=1,alpha=0.3,aes(size=sample_size_weight),show.legend = F) +
scale_size(range=c(0.5,3)) +
geom_smooth(aes(y=shift_biden_margin),span = 0.2,method='loess',se=F,col='black',size=0.5) +
theme_minimal() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks = seq(-20,20,5)) +
labs(x='Date',y='',title='Change in Biden margin since last poll',
subtitle="Each point is a poll, showing the change in Biden's margin since the last poll from that pollster") +
scale_x_date(limits=c(ymd("2020-09-01"),ymd("2020-11-03")))
ggplot(poll_shifts,aes(x=date, y=shift_biden_margin)) +
geom_hline(yintercept = 0,col='gray40') +
geom_point(shape=1,alpha=0.3,aes(size=sample_size_weight),show.legend = F) +
scale_size(range=c(0.5,3)) +
geom_line(aes(y=average_shift,group=state)) +
#geom_smooth(aes(y=shift_biden_margin,group=state),span = 1,method='loess',se=F,col='black',size=0.5) +
theme_minimal() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks = seq(-20,20,5)) +
facet_wrap(~state) +
labs(x='Date',y='',title='Change in Biden margin since last poll',
subtitle="Each point is a poll, showing the change in Biden's margin since the last poll from that pollster") +
scale_x_date(limits=c(ymd("2020-09-01"),ymd("2020-11-03")))
avg_last_week <- mean(poll_shifts %>% filter(date >= (Sys.Date()-7)) %>% pull(shift_biden_margin),na.rm=T)
poll_shifts %>%
filter(date >= (Sys.Date()-7)) %>%
ggplot(.,aes(x=shift_biden_margin)) +
geom_histogram(binwidth=0.5) +
#geom_density() +
geom_vline(xintercept = 0) +
geom_vline(xintercept = avg_last_week,col='red',linetype=2) +
theme_minimal() +
theme(panel.grid.minor = element_blank()) +
labs(x='Change in Biden margin since last poll',
y='Number of polls') +
scale_x_continuous(breaks = seq(-20,20,1),limits=c(-10,10))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment