Skip to content

Instantly share code, notes, and snippets.

@tyokota tyokota/README.md
Last active May 2, 2016

Embed
What would you like to do?
2016 primary election

###### chart 1. 2016 Hawaii primary election results.

pairing national election data and maps is a hot mess.

Recently, I have come across a few choropleth maps using 2016 primary election data presenting relative proportions of voters by county for a single candidate. One example that stands out is a map of Bernie supporters in Alaska. The heavily colored map feels misleading when considering ~600 democrats participated. More people could fit in a Walmart on Black Friday.

Sometimes, a less sexy visualization tells an interesting story. My modest stacked bar graph shows the proportion of Republican and Democratic voters (portrays Hawaii’s huge imbalance towards the Democratic party) and the relative proportion of votes each candidate garnered.

Choropleth maps can be beautiful and effective, especially when making state-by-state comparisons. Other times, less fancy charts can be just as effective. It’s all about the story.

# hawaiikernel.com
# primary election
# INTRODUCTION -----------------------------------------------------------------
# primary election chart
# DEPENDENCIES -----------------------------------------------------------------
# http://www.politico.com/2016-election/results/map/president/hawaii
dir.rtools <- "%PATH%;C:/RBuildTools/3.3/gcc-4.6.3/bin;c:/RBuildTools/3.3/bin"
Sys.setenv(PATH=dir.rtools)
setwd("D:/scratch")
pacman::p_load(dplyr, lubridate, rvest, stringr, tidyr, readr, purrr, jsonlite)
# DATA -------------------------------------------------------------------------
primary_HI_R <- fromJSON("http://data.cnn.com/ELECTION/2016primary/HI/county/S.json",
flatten=TRUE)
primary_HI_D <- fromJSON("http://data.cnn.com/ELECTION/2016primary/HI/county/E.json",
flatten=TRUE)
primary_HI_R$counties
pacman::p_load(lazyeval)
scrapeJSON <- function(df, candidate) {
which_column <- "lname"
filter_criteria <- interp(~which_column==candidate, which_column=as.name("lname"))
y <- mutate(map_df(df$counties$race.candidates, function(x) {
x %>% filter_(filter_criteria)}),
FIPS=df$counties$countycode,
COUNTY=df$counties$name)
return(y)
}
cruz <- scrapeJSON(primary_HI_R, "Cruz")
kasich <- scrapeJSON(primary_HI_R, "Kasich")
trump <- scrapeJSON(primary_HI_R, "Trump")
sanders <- scrapeJSON(primary_HI_D, "Sanders")
clinton <- scrapeJSON(primary_HI_D, "Clinton")
# PREPROCESS -------------------------------------------------------------------
republican <- rbind_list(cruz, kasich, trump) %>%
group_by(FIPS) %>%
mutate(N=sum(votes),
P=votes / sum(votes)) %>%
replace(is.na(.), 0)
democrat <- rbind_list(sanders, clinton) %>%
group_by(FIPS) %>%
mutate(N=sum(votes),
P=votes / sum(votes)) %>%
replace(is.na(.), 0)
primary_df <- rbind_list(republican, democrat)
primary_df <- primary_df %>%
select(lname, party, COUNTY, P, votes) %>%
mutate(party=ifelse(party=="D", "Democratic Party",
ifelse(party=="R", "Republican Party", NA)))
answers_order <- c("Clinton", "Sanders", "Trump", "Cruz", "Kasich")
get_middle_position <- function(middle_pos, levels, start_values, end_values) {
if(middle_pos %% 1 != 0) {
if(ceiling(middle_pos) %in% levels) {
value <- (end_values[levels==ceiling(middle_pos)] - start_values[levels==ceiling(middle_pos)]) * .5 + start_values[levels==ceiling(middle_pos)]
} else {
value <- start_values[levels > middle_pos][1]
}
} else {
if(middle_pos %in% levels){
value <- end_values[levels==middle_pos]
} else {
value <- start_values[levels > middle_pos][1]
}
}
value
}
middle_pos <- 2
primary_df2 <- primary_df %>%
ungroup %>%
mutate(COUNTY=as.factor(COUNTY),
lname=factor(lname, levels=answers_order)) %>%
arrange_(.dots=c("COUNTY", "lname"))
primary_df_RP <- primary_df2 %>%
group_by_(.dots=c("COUNTY")) %>%
filter(!is.na(lname)) %>%
mutate(response_relative=votes / sum(votes),
y_start=lag(cumsum(response_relative), default=0),
y_end=y_start + response_relative,
middle=get_middle_position(middle_pos, as.numeric(lname), y_start, y_end)) %>%
mutate(y_start_center=y_start - middle,
y_end_center=y_end - middle,
y_max_centered=max(y_end_center)) %>%
ungroup
primary_df_wide <- primary_df2 %>%
group_by_(.dots=c("COUNTY")) %>%
summarise(missing=length(lname[is.na(lname)]),
total=n(),
completed=total - missing) %>%
left_join(primary_df_RP %>%
group_by_(.dots=c("COUNTY")) %>%
summarise(neg=abs(min(y_start_center)),
pos=max(y_end_center)) %>%
ungroup, by=c("COUNTY"))
primary_df_long <-
primary_df_wide %>%
gather(type, value, completed, missing)
final <- list(
percentages=primary_df_wide,
main=primary_df_RP %>% filter(!is.na(lname)),
counts=primary_df_long)
# CHART ------------------------------------------------------------------------
pacman::p_load(ggplot2)
windowsFonts(Consolas=windowsFont("Consolas"))
png("primary.png",height=500,width=960)
ggplot(final$main) +
geom_rect(aes(xmin=as.numeric(COUNTY)-.4, xmax=as.numeric(COUNTY)+.4,
ymin=y_start_center, ymax=y_end_center, fill=lname)) +
geom_hline(yintercept=0, lty=2) +
geom_text(aes(x=as.numeric(COUNTY), y=-1.06, label=paste0(round(neg*100), "%")),
data=final$percentages,size=6, fontface="bold", colour="#535353") +
geom_text(aes(x=as.numeric(COUNTY), y=1.06, label=paste0(round(pos*100), "%")),
data=final$percentages, size=6, fontface="bold", colour="#535353") +
coord_flip() +
scale_y_continuous("", breaks=seq(-1,1,.5), limits=c(-1.1,1.1),
labels=c("100%","50%","0","50%","100%")) +
xlab("County") + ylab("Votes") +
theme(legend.position="bottom") +
scale_x_continuous(breaks=1:length(levels(final$main$COUNTY)),
labels=levels(final$main$COUNTY)) +
scale_fill_manual("",
labels=c("Clinton", "Sanders", "Trump", "Cruz", "Kasich"),
values=c("#9ecae1", "#3182bd", "#de2d26",
"#fc9272", "#fee0d2")) +
ggtitle(expression(
atop("State of Hawaii 2016 Primary Election Results",
atop("Primary election results by County"), ""))) +
theme_bw() +
theme(panel.background=element_rect(fill="#F0F0F0"),
plot.background =element_rect(fill="#F0F0F0"),
panel.border =element_rect(colour="#F0F0F0"),
panel.grid.major=element_line(colour="#D0D0D0",size=.75),
panel.grid.minor=element_blank(),
axis.ticks=element_blank(),
legend.position="right",
legend.key=element_blank(),
legend.background=element_blank(),
strip.text.x=element_text(face="bold", family="Consolas",
size=8, colour="#3C3C3C"),
plot.title=element_text(face="bold", family="Consolas", vjust=-1,
colour="#3C3C3C",size=22),
axis.text.x=element_text(size=16, family="Consolas",
colour="#535353",face="bold", angle=90),
axis.text.y=element_text(size=12, family="Consolas",
colour="#535353",face="bold"),
axis.title.y=element_text(size=16, family="Consolas",
colour="#535353",face="bold",vjust=0),
axis.title.x=element_text(size=16, family="Consolas",
colour="#535353",face="bold",vjust=0)) +
geom_hline(yintercept=21.20894,size=1.2,colour="#535353")
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.