Last active
June 7, 2016 14:11
-
-
Save Deleetdk/880b4b0653361d0b5e6b4c911f5ae2bb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### THIS IS NOW MOSTLY REDUNDANT | |
### SHINY VERSION HERE: https://github.com/Deleetdk/brexit_model | |
# libs -------------------------------------------------------------------- | |
library(pacman) | |
p_load(lubridate, rvest, stringr, kirkegaard, ggplot2, scales, boot) | |
# scrape data ------------------------------------------------------------- | |
#get newest data on website | |
d_brexit = read_html("https://ig.ft.com/sites/brexit-polling/") %>% html_node("table") %>% html_table() | |
#rename | |
colnames(d_brexit) = c("Stay", "Leave", "Undecided", "Date", "Pollster", "N") | |
# transform --------------------------------------------------------------- | |
#make gap | |
d_brexit$Favor_Leave = d_brexit$Leave - d_brexit$Stay | |
#interpret date | |
d_brexit$Date = lubridate::mdy(d_brexit$Date, locale = "English_United States.1252") | |
#this code may not run on your computer. If not, then find out how locales are treated there. You may not need to use a custom locale at all. | |
#num date | |
d_brexit$Date_num = as.numeric(d_brexit$Date) | |
#fix N | |
d_brexit$N[d_brexit$N == "-"] = NA #recode NA | |
d_brexit$N = d_brexit["N"] %>% as_num_df() %>% unlist() #convert to num | |
#impute N with medians | |
d_brexit$N[is.na(d_brexit$N)] = median(d_brexit$N, na.rm=T) | |
# model ------------------------------------------------------------------- | |
#predict leave-lead by date | |
fit = lm("Favor_Leave ~ Date", data = d_brexit, weights = sqrt(d_brexit$N)) | |
predict(object = fit, newdata = data.frame(Date = dmy("23 Jun 2016"))) | |
#loess | |
fit_loess = loess("Favor_Leave ~ Date_num", data = d_brexit, control=loess.control(surface="direct")) | |
predict(object = fit_loess, newdata = data.frame(Date_num = dmy("23 Jun 2016") %>% as.numeric())) | |
d_brexit$loess_predict = fitted(fit_loess) #get values | |
# bootstrap confidence interval ------------------------------------------- | |
boot_replications = boot(data = d_brexit, statistic = function(data, i) { | |
#subset data | |
tmp = data[i, ] | |
#fit | |
fit_loess = loess("Favor_Leave ~ Date_num", data = tmp, control=loess.control(surface="direct")) | |
#get value | |
predict(object = fit_loess, newdata = data.frame(Date_num = dmy("23 Jun 2016") %>% as.numeric())) | |
}, R = 1000) | |
boot.ci(boot_replications) | |
# plot -------------------------------------------------------------------- | |
ggplot(d_brexit, aes(Date, Favor_Leave)) + | |
geom_point(aes(size = sqrt(N)), alpha = .3) + | |
scale_size_continuous(guide = F) + | |
geom_smooth(method = loess, fullrange = TRUE, method.args = list(control=loess.control(surface="direct"))) + | |
ylab("Leave advantage (%)") + | |
scale_x_date(limits = c(d_brexit$Date[which.min(d_brexit$Date)], dmy("23 Jun 2016"))) + | |
geom_vline(xintercept = dmy("23 Jun 2016") %>% as.numeric(), linetype = "dotted", color = "red") | |
ggsave("brexit_model.png") | |
#predictions distribution | |
GG_denhist(boot_replications$t %>% as.vector()) + xlab("Election day 'leave' advantage (bootstrapped)") | |
ggsave("brexit_predictions.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment