Skip to content

Instantly share code, notes, and snippets.

@glamp
Last active June 11, 2021 01:18
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 12 You must be signed in to fork a gist
  • Save glamp/223ee1b5665b8ab8d90fc2786f4b6b44 to your computer and use it in GitHub Desktop.
Save glamp/223ee1b5665b8ab8d90fc2786f4b6b44 to your computer and use it in GitHub Desktop.
library(XML)
library(uuid)
library(stringr)
library(plyr)
library(reshape2)
library(ggplot2)
f <- "https://raw.githubusercontent.com/chris-taylor/USElection/master/data/electoral-college-votes.csv"
electoral.college <- read.csv(f, header=FALSE)
names(electoral.college) <- c("state", "electoral_votes")
head(electoral.college)
# exclude D.C. from the data pull b/c there aren't any polls!. we'll add it in manually
states <- electoral.college$state[c(1:7, 9:51)]
results <- ldply(states, function(state) {
url <- "http://www.electionprojection.com/latest-polls/%s-presidential-polls-trump-vs-clinton-vs-johnson-vs-stein.php"
state.fmt <- gsub(" ", "-", tolower(state))
url.state <- sprintf(url, state.fmt)
print(url.state)
r <- readHTMLTable(url.state, stringsAsFactors=FALSE)[[3]]
r$state <- state
r$id <- 1:nrow(r)
cols <- c("Dates", "Firm", "state", "Clinton", "Trump", "Johnson", "id")
r <- r[2:nrow(r),][,cols]
r <- melt(r, id=c("Dates", "Firm", "state", "id"), variable.name="candidate", value.name="vote")
names(r) <- c("date", "poll", "state", "id", "candidate", "vote")
r$race <- ""
cols <- c("date", "race", "state", "poll", "candidate", "vote", "id")
r <- r[,cols]
r$vote <- as.numeric(r$vote)
r
})
# adding D.C. on manually b/c it's slightly different. it also doesn't produce material changes to the
# results
results <- rbind(results, data.frame(
date='10/20 - 10/28',
race='',
state='District of Columbia',
poll='SurveyMonkey',
candidate=c("Clinton", "Trump", "Johnson"),
vote=c(87, 5, 4),
id=1
))
head(results)
tail(results)
table(results$candidate)
table(results$state)
results <- results[order(results$state, results$id, results$candidate),]
poll.freq <- data.frame(table(results$state))
ggplot(poll.freq, aes(x=Var1, weight=Freq)) +
geom_bar() +
coord_flip() +
scale_y_continuous("# of Polls") +
scale_x_discrete("State", limits=rev(levels(poll.freq$Var1)))
weight <- function(i) {
exp(1)*1 / exp(i)
}
w <- data.frame(poll=1:8, weight=weight(1:8))
ggplot(w, aes(x=poll, weight=weight)) +
geom_bar() +
scale_x_continuous("nth poll", breaks=1:8) +
scale_y_continuous("weight")
election.sim <- function() {
ddply(results, .(state), function(polls.state) {
polls.state$.id <- NULL
polls.state$id <- cumsum(!duplicated(polls.state$id))
polls.state$weight <- weight(polls.state$id)
polls.state$weighted_vote <- polls.state$vote * polls.state$weight
tally <- ddply(polls.state, .(candidate), function(p) {
r <- rnorm(nrow(p), 1, .15)
data.frame(weighted_vote=sum(p$weighted_vote * r))
})
tally <- head(tally, 3)
tally$estimated_popular_vote <- tally$weighted_vote / sum(tally$weighted_vote)
tally
})
}
(election <- election.sim())
colormap <- c(Clinton="#179ee0", Trump="#ff5d40", Johnson="#f6b900")
ggplot(election, aes(x=candidate, weight=estimated_popular_vote, fill=candidate)) +
geom_bar() +
facet_wrap(~state) +
scale_fill_manual(values=colormap) +
scale_y_continuous(labels = scales::percent, breaks=c(0, 0.25, 0.5, 0.75, 1)) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank())
library(doParallel)
doParallel::registerDoParallel(cores=8)
simulated.state.results <- ldply(1:10000, function(i) {
election <- election.sim()
election.results <- dcast(election, state ~ candidate, value.var="estimated_popular_vote")
election.results <- merge(election.results, electoral.college, by.x="state", by.y="state", all.x = TRUE)
candidates <- c("Clinton", "Trump", "Johnson")
election.results$winner <- candidates[max.col(election.results[,candidates])]
election.results$sim_id <- UUIDgenerate()
election.results
}, .progress="text", .parallel=TRUE)
simulated.elections <- ddply(simulated.state.results, .(sim_id), function(simulation) {
clinton <- sum(ifelse(simulation$Clinton > simulation$Trump & simulation$Clinton > simulation$Johnson, simulation$electoral_votes, 0))
trump <- sum(ifelse(simulation$Clinton < simulation$Trump & simulation$Trump > simulation$Johnson, simulation$electoral_votes, 0))
johnson <- sum(ifelse(simulation$Johnson > simulation$Trump & simulation$Johnson > simulation$Trump, simulation$electoral_votes, 0))
data.frame(
clinton=clinton,
trump=trump,
johnson=johnson,
winner=ifelse(clinton > trump, "Clinton", "Trump")
)
}, .progress="text", .parallel=TRUE)
table(simulated.elections$winner)
table(simulated.elections$winner) / nrow(simulated.elections)
summary(simulated.elections$clinton)
summary(simulated.elections$trump)
summary(simulated.elections$johnson)
head(simulated.elections)
ggplot(melt(simulated.elections[,1:4], id.vars = "sim_id"), aes(x=value, fill=variable)) +
geom_histogram(position="identity", alpha=0.7) +
scale_fill_manual(values=c(clinton="#179ee0", trump="#ff5d40", johnson="#f6b900"))
ggplot(melt(simulated.elections[,1:3], id.vars = "sim_id"), aes(x=value, fill=variable)) +
geom_histogram(position="identity", alpha=0.7) +
scale_fill_manual(values=c(clinton="#179ee0", trump="#ff5d40", johnson="#f6b900"))
ggplot(melt(simulated.elections[,1:3], id.vars = "sim_id"), aes(x=value, fill=variable)) +
geom_density(position="identity", alpha=0.7) +
scale_fill_manual(values=c(clinton="#179ee0", trump="#ff5d40", johnson="#f6b900"))
# popular vote
clinton <- sum(simulated.elections$clinton * simulated.elections$electoral_votes)
trump <- sum(simulated.elections$trump * simulated.elections$electoral_votes)
johnson <- sum(simulated.elections$johnson * simulated.elections$electoral_votes)
total <- sum(clinton, trump, johnson)
data.frame(Clinton=clinton/total, Trump=trump/total, Johnson=johnson/total)
simulated.state.results.agg <- ddply(simulated.state.results, .(state), function(state) {
data.frame(
state=state$state[1],
trump=sum(state$winner=="Trump") / nrow(state),
clinton=sum(state$winner=="Clinton") / nrow(state),
johnson=sum(state$winner=="Johnson") / nrow(state),
electoral_votes=state$electoral_votes[1],
n=nrow(state)
)
})
simulated.state.results.agg
data.frame(
Trump=sum(simulated.state.results.agg$electoral_votes * simulated.state.results.agg$trump),
Clinton=sum(simulated.state.results.agg$electoral_votes * simulated.state.results.agg$clinton),
Johnson=sum(simulated.state.results.agg$electoral_votes * simulated.state.results.agg$johnson)
)
us.states <- map_data("state")
simulated.state.results.agg$state.mergecol <- as.character(tolower(simulated.state.results.agg$state))
state.plot <- merge(us.states, simulated.state.results.agg, by.x="region", by.y="state.mergecol")
table(us.states$region)
state.plot$winner <- ifelse(state.plot$trump > state.plot$clinton, "Trump", "Clinton")
# binary outcomes for states
ggplot(state.plot, aes(x=long, y=lat, group=group, fill=winner)) +
geom_polygon(colour="white") +
scale_fill_manual(values=colormap) +
coord_map()
# shaded outcomes for states
ggplot(state.plot, aes(x=long, y=lat)) +
geom_polygon(aes(group=group, fill=trump), colour="grey10") +
scale_fill_gradient2("", low=colormap["Clinton"], mid="white", high=colormap["Trump"], midpoint=0.5,
breaks=c(0, 0.5, 1), labels=c("Clinton", "?", "Trump")) +
theme_minimal() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
legend.text=element_text(size=12, face="bold")) +
coord_map()
us.regions <- read.csv("~/workspace/github.com/yhat/blog/code-exp/election/regions.csv")
state.plot <- merge(state.plot, us.regions, by.x="state", by.y="state")
head(state.plot)
# http://www.census.gov/econ/census/help/geography/regions_and_divisions.html
# http://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf
ddply(state.plot, .(division), function(div) {
p <- ggplot(div, aes(x=long, y=lat)) +
geom_polygon(aes(group=group, fill=trump), colour="grey10") +
scale_fill_gradient2("", low=colormap["Clinton"], mid="white", high=colormap["Trump"], midpoint=0.5,
guide=FALSE) +
theme_minimal() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
legend.text=element_text(size=12, face="bold")) +
coord_map() +
ggtitle(div$division[1])
filename <- paste0("~/workspace/github.com/yhat/blog/static/img/election-region-", tolower(str_replace_all(div$division[1], " ", "-")), ".png")
print(p)
ggsave(filename)
NULL
})
@jbredice
Copy link

jbredice commented Nov 3, 2016

Can you provide the source for this?
~/workspace/github.com/yhat/blog/code-exp/election/regions.csv

@bbroke
Copy link

bbroke commented Nov 4, 2016

@CatzillaOrz
Copy link

Oopus!

@simonisacoder
Copy link

simonisacoder commented Nov 30, 2016

the latest presidential poll in http://www.electionprojection.com has been removed. can you send me your file of the data set of "results"?
Thx a lot. 921181549@qq.com or simonisacoder@gmail.com. or can you upload it to the Github.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment