Skip to content

Instantly share code, notes, and snippets.

@Deleetdk
Last active June 7, 2016 14:11
Show Gist options
  • Save Deleetdk/880b4b0653361d0b5e6b4c911f5ae2bb to your computer and use it in GitHub Desktop.
Save Deleetdk/880b4b0653361d0b5e6b4c911f5ae2bb to your computer and use it in GitHub Desktop.
### THIS IS NOW MOSTLY REDUNDANT
### SHINY VERSION HERE: https://github.com/Deleetdk/brexit_model
# libs --------------------------------------------------------------------
library(pacman)
p_load(lubridate, rvest, stringr, kirkegaard, ggplot2, scales, boot)
# scrape data -------------------------------------------------------------
#get newest data on website
d_brexit = read_html("https://ig.ft.com/sites/brexit-polling/") %>% html_node("table") %>% html_table()
#rename
colnames(d_brexit) = c("Stay", "Leave", "Undecided", "Date", "Pollster", "N")
# transform ---------------------------------------------------------------
#make gap
d_brexit$Favor_Leave = d_brexit$Leave - d_brexit$Stay
#interpret date
d_brexit$Date = lubridate::mdy(d_brexit$Date, locale = "English_United States.1252")
#this code may not run on your computer. If not, then find out how locales are treated there. You may not need to use a custom locale at all.
#num date
d_brexit$Date_num = as.numeric(d_brexit$Date)
#fix N
d_brexit$N[d_brexit$N == "-"] = NA #recode NA
d_brexit$N = d_brexit["N"] %>% as_num_df() %>% unlist() #convert to num
#impute N with medians
d_brexit$N[is.na(d_brexit$N)] = median(d_brexit$N, na.rm=T)
# model -------------------------------------------------------------------
#predict leave-lead by date
fit = lm("Favor_Leave ~ Date", data = d_brexit, weights = sqrt(d_brexit$N))
predict(object = fit, newdata = data.frame(Date = dmy("23 Jun 2016")))
#loess
fit_loess = loess("Favor_Leave ~ Date_num", data = d_brexit, control=loess.control(surface="direct"))
predict(object = fit_loess, newdata = data.frame(Date_num = dmy("23 Jun 2016") %>% as.numeric()))
d_brexit$loess_predict = fitted(fit_loess) #get values
# bootstrap confidence interval -------------------------------------------
boot_replications = boot(data = d_brexit, statistic = function(data, i) {
#subset data
tmp = data[i, ]
#fit
fit_loess = loess("Favor_Leave ~ Date_num", data = tmp, control=loess.control(surface="direct"))
#get value
predict(object = fit_loess, newdata = data.frame(Date_num = dmy("23 Jun 2016") %>% as.numeric()))
}, R = 1000)
boot.ci(boot_replications)
# plot --------------------------------------------------------------------
ggplot(d_brexit, aes(Date, Favor_Leave)) +
geom_point(aes(size = sqrt(N)), alpha = .3) +
scale_size_continuous(guide = F) +
geom_smooth(method = loess, fullrange = TRUE, method.args = list(control=loess.control(surface="direct"))) +
ylab("Leave advantage (%)") +
scale_x_date(limits = c(d_brexit$Date[which.min(d_brexit$Date)], dmy("23 Jun 2016"))) +
geom_vline(xintercept = dmy("23 Jun 2016") %>% as.numeric(), linetype = "dotted", color = "red")
ggsave("brexit_model.png")
#predictions distribution
GG_denhist(boot_replications$t %>% as.vector()) + xlab("Election day 'leave' advantage (bootstrapped)")
ggsave("brexit_predictions.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment