Skip to content

Instantly share code, notes, and snippets.

@jamesdunham
Created June 17, 2018 17:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamesdunham/9805895741354311f9945366026f1785 to your computer and use it in GitHub Desktop.
Save jamesdunham/9805895741354311f9945366026f1785 to your computer and use it in GitHub Desktop.
Review New York county returns
library(data.table)
library(stringr)
OE_PATH = '~/medsl/openelections'
`%=%` = function(string, pattern) {
str_detect(string, stringr::regex(pattern, TRUE))
}
`%-%` = function(string, pattern) {
str_remove_all(string, regex(pattern, TRUE))
}
# Read and combine counties
paths = list.files(file.path(OE_PATH, 'openelections-data-ny', '2016'),
'20161108.*general__', full.names = TRUE)
stopifnot(length(paths) == 62)
ny_counties = lapply(paths, function(p) {
d = withCallingHandlers({ fread(p) }, warning = function(w) { print(p) })
keep = names(d)[!str_detect(names(d), '^V\\d+')]
d[, c(keep), with = FALSE]
})
ny_counties = setNames(ny_counties, str_extract(basename(paths), '(?<=general__).*(?=__precinct)'))
d = rbindlist(ny_counties, fill = TRUE, idcol = 'path')
d = melt(d, id.vars = c('county', 'precinct', 'office', 'district', 'candidate',
'party', 'path'), variable.name = 'mode', value.name = 'votes', variable.factor =
FALSE, na.rm = TRUE)
# After removing thousands separators, votes can be integer
d[, votes := votes %-% ',']
d[, votes := as.integer(votes)]
# Drop Total, Total Outside NYC, Total NYC, Statewide Total, Public Counter
d[candidate %=% 'Public Counter', unique(county)]
# [1] "Bronx" "New York" "Queens" "Richmond"
unique(d[candidate %=% 'total', .(county, candidate)])
# county candidate
# 1: Cayuga Total Special Votes
# 2: Cayuga Total Votes
# 3: Chemung Total
# 4: Chenango Total
# 5: Delaware Total
# 6: Erie Total
# 7: Franklin Total
# 8: Genesee Total
# 9: Jefferson Total
# 10: Lewis Total
# 11: Livingston Total
# 12: Monroe Total
# 13: Orange Total Votes Cast
# 14: Orleans Total
# 15: Putnam Total
# 16: Schoharie Total Special Votes
# 17: Schoharie Total Votes
# 18: Seneca Total
# 19: Steuben Total Enrolled Voters
# 20: Sullivan Total
# 21: Wyoming Total
# 22: Yates Total
# county candidate
# In Niagara, we have "Wilson/000/1", "Wilson/000/2", ... "Wilson/000"
d[(county == 'Niagara' & str_count(precinct, '\\/') == 1), unique(precinct)]
# [1] "Cambria/000" "City of Lockport/001" "City of Lockport/002"
# [4] "City of Lockport/003" "City of Lockport/004" "City of Lockport/005"
# [7] "Hartland/000" "Lewiston/000" "N Tonawanda/001"
# [10] "N Tonawanda/002" "N Tonawanda/003" "Newfane/000"
# [13] "Niagara Falls/003" "Niagara Falls/004" "Niagara Falls/005"
# [16] "Niagara Falls/006" "Pendleton/000" "Porter/000"
# [19] "Royalton/000" "Somerset/000" "Lockport/000"
# [22] "Niagara/000" "Wheatfield/000" "Wilson/000"
# In Columbia, mode "votes" = "AbsenteeAffidavit" + "ElectionDayVotes"
d[county == 'Columbia', unique(mode)]
# [1] "votes" "election_day" "absentee"
# Same in Herkimer, but "votes" = "absentee_affidavit" + "polling_place"
d[county == 'Herkimer', unique(mode)]
# [1] "votes" "election_day" "absentee"
# In Lewis and Seneca, "absentee" + "election_day" = "votes"
d[county %in% c('Lewis', 'Seneca'), unique(mode)]
# [1] "votes" "election_day" "absentee"
# In St. Lawrence, mode "votes" = "machine_votes" + "absentee" + "affidavit" + "absentee_hc"
d[county == 'St. Lawrence', unique(mode)]
# [1] "votes" "absentee" "machine_votes" "absentee_hc"
# [5] "affidavit"
# Candidate totals and cumulative precinct rows...
unique(d[candidate %=% 'total', .(county, candidate)])
# county candidate
# 1: Cayuga Total Special Votes
# 2: Cayuga Total Votes
# 3: Chemung Total
# 4: Chenango Total
# 5: Delaware Total
# 6: Erie Total
# 7: Franklin Total
# 8: Genesee Total
# 9: Jefferson Total
# 10: Lewis Total
# 11: Livingston Total
# 12: Monroe Total
# 13: Orange Total Votes Cast
# 14: Orleans Total
# 15: Putnam Total
# 16: Schoharie Total Special Votes
# 17: Schoharie Total Votes
# 18: Seneca Total
# 19: Steuben Total Enrolled Voters
# 20: Sullivan Total
# 21: Wyoming Total
# 22: Yates Total
# county candidate
unique(d[precinct %=% 'cumulative|total', .(county, precinct)])
# county precinct
# 1: Lewis TOTALS
# 2: Madison TOTAL
# 3: Montgomery Total
# 4: Nassau Total
# 5: Oneida Total
# 6: Onondaga Syracuse Total
# 7: Onondaga Onondaga Town Total
# 8: Onondaga Total
# 9: Ontario Total
# 10: Orleans TOTAL
# 11: Oswego Total
# 12: Otsego TOTAL
# 13: Rockland Total
# 14: Saratoga Total
# 15: Schuyler Cumulative
# 16: Schuyler Total
# 17: Seneca Cumulative
# 18: Seneca TOTAL
# 19: Tompkins Total
# 20: Ulster Total
unique(d[precinct == '', .(county, precinct)])
# county precinct
# 1: Wayne
# 2: Wyoming
# Ballots cast...
d[candidate %=% 'ballots cast', unique(county)]
# [1] "Niagara" "Schenectady" "St. Lawrence"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment