|
# hawaiikernel.com |
|
# primary election |
|
|
|
# INTRODUCTION ----------------------------------------------------------------- |
|
# primary election chart |
|
|
|
# DEPENDENCIES ----------------------------------------------------------------- |
|
# http://www.politico.com/2016-election/results/map/president/hawaii |
|
|
|
dir.rtools <- "%PATH%;C:/RBuildTools/3.3/gcc-4.6.3/bin;c:/RBuildTools/3.3/bin" |
|
Sys.setenv(PATH=dir.rtools) |
|
|
|
setwd("D:/scratch") |
|
|
|
pacman::p_load(dplyr, lubridate, rvest, stringr, tidyr, readr, purrr, jsonlite) |
|
|
|
# DATA ------------------------------------------------------------------------- |
|
primary_HI_R <- fromJSON("http://data.cnn.com/ELECTION/2016primary/HI/county/S.json", |
|
flatten=TRUE) |
|
primary_HI_D <- fromJSON("http://data.cnn.com/ELECTION/2016primary/HI/county/E.json", |
|
flatten=TRUE) |
|
primary_HI_R$counties |
|
|
|
pacman::p_load(lazyeval) |
|
|
|
scrapeJSON <- function(df, candidate) { |
|
which_column <- "lname" |
|
filter_criteria <- interp(~which_column==candidate, which_column=as.name("lname")) |
|
|
|
y <- mutate(map_df(df$counties$race.candidates, function(x) { |
|
x %>% filter_(filter_criteria)}), |
|
FIPS=df$counties$countycode, |
|
COUNTY=df$counties$name) |
|
return(y) |
|
} |
|
|
|
cruz <- scrapeJSON(primary_HI_R, "Cruz") |
|
kasich <- scrapeJSON(primary_HI_R, "Kasich") |
|
trump <- scrapeJSON(primary_HI_R, "Trump") |
|
sanders <- scrapeJSON(primary_HI_D, "Sanders") |
|
clinton <- scrapeJSON(primary_HI_D, "Clinton") |
|
|
|
# PREPROCESS ------------------------------------------------------------------- |
|
|
|
republican <- rbind_list(cruz, kasich, trump) %>% |
|
group_by(FIPS) %>% |
|
mutate(N=sum(votes), |
|
P=votes / sum(votes)) %>% |
|
replace(is.na(.), 0) |
|
|
|
democrat <- rbind_list(sanders, clinton) %>% |
|
group_by(FIPS) %>% |
|
mutate(N=sum(votes), |
|
P=votes / sum(votes)) %>% |
|
replace(is.na(.), 0) |
|
|
|
primary_df <- rbind_list(republican, democrat) |
|
|
|
primary_df <- primary_df %>% |
|
select(lname, party, COUNTY, P, votes) %>% |
|
mutate(party=ifelse(party=="D", "Democratic Party", |
|
ifelse(party=="R", "Republican Party", NA))) |
|
|
|
answers_order <- c("Clinton", "Sanders", "Trump", "Cruz", "Kasich") |
|
|
|
get_middle_position <- function(middle_pos, levels, start_values, end_values) { |
|
if(middle_pos %% 1 != 0) { |
|
|
|
if(ceiling(middle_pos) %in% levels) { |
|
value <- (end_values[levels==ceiling(middle_pos)] - start_values[levels==ceiling(middle_pos)]) * .5 + start_values[levels==ceiling(middle_pos)] |
|
} else { |
|
value <- start_values[levels > middle_pos][1] |
|
} |
|
} else { |
|
|
|
if(middle_pos %in% levels){ |
|
value <- end_values[levels==middle_pos] |
|
} else { |
|
value <- start_values[levels > middle_pos][1] |
|
} |
|
} |
|
value |
|
} |
|
|
|
middle_pos <- 2 |
|
|
|
primary_df2 <- primary_df %>% |
|
ungroup %>% |
|
mutate(COUNTY=as.factor(COUNTY), |
|
lname=factor(lname, levels=answers_order)) %>% |
|
arrange_(.dots=c("COUNTY", "lname")) |
|
|
|
primary_df_RP <- primary_df2 %>% |
|
group_by_(.dots=c("COUNTY")) %>% |
|
filter(!is.na(lname)) %>% |
|
mutate(response_relative=votes / sum(votes), |
|
y_start=lag(cumsum(response_relative), default=0), |
|
y_end=y_start + response_relative, |
|
middle=get_middle_position(middle_pos, as.numeric(lname), y_start, y_end)) %>% |
|
mutate(y_start_center=y_start - middle, |
|
y_end_center=y_end - middle, |
|
y_max_centered=max(y_end_center)) %>% |
|
ungroup |
|
|
|
primary_df_wide <- primary_df2 %>% |
|
group_by_(.dots=c("COUNTY")) %>% |
|
summarise(missing=length(lname[is.na(lname)]), |
|
total=n(), |
|
completed=total - missing) %>% |
|
left_join(primary_df_RP %>% |
|
group_by_(.dots=c("COUNTY")) %>% |
|
summarise(neg=abs(min(y_start_center)), |
|
pos=max(y_end_center)) %>% |
|
ungroup, by=c("COUNTY")) |
|
|
|
primary_df_long <- |
|
primary_df_wide %>% |
|
gather(type, value, completed, missing) |
|
|
|
final <- list( |
|
percentages=primary_df_wide, |
|
main=primary_df_RP %>% filter(!is.na(lname)), |
|
counts=primary_df_long) |
|
|
|
# CHART ------------------------------------------------------------------------ |
|
pacman::p_load(ggplot2) |
|
windowsFonts(Consolas=windowsFont("Consolas")) |
|
|
|
png("primary.png",height=500,width=960) |
|
|
|
ggplot(final$main) + |
|
geom_rect(aes(xmin=as.numeric(COUNTY)-.4, xmax=as.numeric(COUNTY)+.4, |
|
ymin=y_start_center, ymax=y_end_center, fill=lname)) + |
|
geom_hline(yintercept=0, lty=2) + |
|
geom_text(aes(x=as.numeric(COUNTY), y=-1.06, label=paste0(round(neg*100), "%")), |
|
data=final$percentages,size=6, fontface="bold", colour="#535353") + |
|
geom_text(aes(x=as.numeric(COUNTY), y=1.06, label=paste0(round(pos*100), "%")), |
|
data=final$percentages, size=6, fontface="bold", colour="#535353") + |
|
coord_flip() + |
|
scale_y_continuous("", breaks=seq(-1,1,.5), limits=c(-1.1,1.1), |
|
labels=c("100%","50%","0","50%","100%")) + |
|
xlab("County") + ylab("Votes") + |
|
theme(legend.position="bottom") + |
|
scale_x_continuous(breaks=1:length(levels(final$main$COUNTY)), |
|
labels=levels(final$main$COUNTY)) + |
|
scale_fill_manual("", |
|
labels=c("Clinton", "Sanders", "Trump", "Cruz", "Kasich"), |
|
values=c("#9ecae1", "#3182bd", "#de2d26", |
|
"#fc9272", "#fee0d2")) + |
|
ggtitle(expression( |
|
atop("State of Hawaii 2016 Primary Election Results", |
|
atop("Primary election results by County"), ""))) + |
|
theme_bw() + |
|
theme(panel.background=element_rect(fill="#F0F0F0"), |
|
plot.background =element_rect(fill="#F0F0F0"), |
|
panel.border =element_rect(colour="#F0F0F0"), |
|
panel.grid.major=element_line(colour="#D0D0D0",size=.75), |
|
panel.grid.minor=element_blank(), |
|
axis.ticks=element_blank(), |
|
legend.position="right", |
|
legend.key=element_blank(), |
|
legend.background=element_blank(), |
|
strip.text.x=element_text(face="bold", family="Consolas", |
|
size=8, colour="#3C3C3C"), |
|
plot.title=element_text(face="bold", family="Consolas", vjust=-1, |
|
colour="#3C3C3C",size=22), |
|
axis.text.x=element_text(size=16, family="Consolas", |
|
colour="#535353",face="bold", angle=90), |
|
axis.text.y=element_text(size=12, family="Consolas", |
|
colour="#535353",face="bold"), |
|
axis.title.y=element_text(size=16, family="Consolas", |
|
colour="#535353",face="bold",vjust=0), |
|
axis.title.x=element_text(size=16, family="Consolas", |
|
colour="#535353",face="bold",vjust=0)) + |
|
geom_hline(yintercept=21.20894,size=1.2,colour="#535353") |
|
|
|
dev.off() |