Skip to content

Instantly share code, notes, and snippets.

@rinze
Created November 18, 2016 03:36
Show Gist options
  • Save rinze/781426c433fa1cbbb3b79b7492187d68 to your computer and use it in GitHub Desktop.
Save rinze/781426c433fa1cbbb3b79b7492187d68 to your computer and use it in GitHub Desktop.
library(readr)
library(dplyr)
library(ggplot2)
get_first_digit <- function(x) {
return(substr(x, 1, 1))
}
votes <- read_csv("https://github.com/Prooffreader/election_2016_data/raw/master/data/presidential_general_election_2016_by_county.csv")
# Not interested in 0 votes
votes <- votes[votes$votes != 0, ]
votes$first_digit <- sapply(votes$votes, get_first_digit)
votes_dist <- votes %>%
group_by(name, first_digit) %>%
summarise(n = n())
votes_candidate <- votes_dist %>%
group_by(name) %>%
summarise(n_counties = sum(n))
votes_dist <- left_join(votes_dist, votes_candidate, by = "name")
votes_dist$observed <- votes_dist$n / votes_dist$n_counties
# Expected probs
benford <- log10(1 + (1 / 1:9))
# Get only 4 main candidates
votes_dist <- votes_dist[votes_dist$n_counties > 2500, ]
votes_dist$expected <- benford
plt1 <- ggplot(votes_dist) +
geom_line(aes(x = first_digit, y = expected), group = 1) +
geom_point(aes(x = first_digit, y = observed)) +
facet_wrap(~ name) +
ggtitle("Bendford's law applied to USA 2016 Elections by county") +
ylab("Observed ratio") + xlab("First digit")
plot(plt1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment