Created
June 17, 2018 17:53
-
-
Save jamesdunham/9805895741354311f9945366026f1785 to your computer and use it in GitHub Desktop.
Review New York county returns
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(stringr) | |
OE_PATH = '~/medsl/openelections' | |
`%=%` = function(string, pattern) { | |
str_detect(string, stringr::regex(pattern, TRUE)) | |
} | |
`%-%` = function(string, pattern) { | |
str_remove_all(string, regex(pattern, TRUE)) | |
} | |
# Read and combine counties | |
paths = list.files(file.path(OE_PATH, 'openelections-data-ny', '2016'), | |
'20161108.*general__', full.names = TRUE) | |
stopifnot(length(paths) == 62) | |
ny_counties = lapply(paths, function(p) { | |
d = withCallingHandlers({ fread(p) }, warning = function(w) { print(p) }) | |
keep = names(d)[!str_detect(names(d), '^V\\d+')] | |
d[, c(keep), with = FALSE] | |
}) | |
ny_counties = setNames(ny_counties, str_extract(basename(paths), '(?<=general__).*(?=__precinct)')) | |
d = rbindlist(ny_counties, fill = TRUE, idcol = 'path') | |
d = melt(d, id.vars = c('county', 'precinct', 'office', 'district', 'candidate', | |
'party', 'path'), variable.name = 'mode', value.name = 'votes', variable.factor = | |
FALSE, na.rm = TRUE) | |
# After removing thousands separators, votes can be integer | |
d[, votes := votes %-% ','] | |
d[, votes := as.integer(votes)] | |
# Drop Total, Total Outside NYC, Total NYC, Statewide Total, Public Counter | |
d[candidate %=% 'Public Counter', unique(county)] | |
# [1] "Bronx" "New York" "Queens" "Richmond" | |
unique(d[candidate %=% 'total', .(county, candidate)]) | |
# county candidate | |
# 1: Cayuga Total Special Votes | |
# 2: Cayuga Total Votes | |
# 3: Chemung Total | |
# 4: Chenango Total | |
# 5: Delaware Total | |
# 6: Erie Total | |
# 7: Franklin Total | |
# 8: Genesee Total | |
# 9: Jefferson Total | |
# 10: Lewis Total | |
# 11: Livingston Total | |
# 12: Monroe Total | |
# 13: Orange Total Votes Cast | |
# 14: Orleans Total | |
# 15: Putnam Total | |
# 16: Schoharie Total Special Votes | |
# 17: Schoharie Total Votes | |
# 18: Seneca Total | |
# 19: Steuben Total Enrolled Voters | |
# 20: Sullivan Total | |
# 21: Wyoming Total | |
# 22: Yates Total | |
# county candidate | |
# In Niagara, we have "Wilson/000/1", "Wilson/000/2", ... "Wilson/000" | |
d[(county == 'Niagara' & str_count(precinct, '\\/') == 1), unique(precinct)] | |
# [1] "Cambria/000" "City of Lockport/001" "City of Lockport/002" | |
# [4] "City of Lockport/003" "City of Lockport/004" "City of Lockport/005" | |
# [7] "Hartland/000" "Lewiston/000" "N Tonawanda/001" | |
# [10] "N Tonawanda/002" "N Tonawanda/003" "Newfane/000" | |
# [13] "Niagara Falls/003" "Niagara Falls/004" "Niagara Falls/005" | |
# [16] "Niagara Falls/006" "Pendleton/000" "Porter/000" | |
# [19] "Royalton/000" "Somerset/000" "Lockport/000" | |
# [22] "Niagara/000" "Wheatfield/000" "Wilson/000" | |
# In Columbia, mode "votes" = "AbsenteeAffidavit" + "ElectionDayVotes" | |
d[county == 'Columbia', unique(mode)] | |
# [1] "votes" "election_day" "absentee" | |
# Same in Herkimer, but "votes" = "absentee_affidavit" + "polling_place" | |
d[county == 'Herkimer', unique(mode)] | |
# [1] "votes" "election_day" "absentee" | |
# In Lewis and Seneca, "absentee" + "election_day" = "votes" | |
d[county %in% c('Lewis', 'Seneca'), unique(mode)] | |
# [1] "votes" "election_day" "absentee" | |
# In St. Lawrence, mode "votes" = "machine_votes" + "absentee" + "affidavit" + "absentee_hc" | |
d[county == 'St. Lawrence', unique(mode)] | |
# [1] "votes" "absentee" "machine_votes" "absentee_hc" | |
# [5] "affidavit" | |
# Candidate totals and cumulative precinct rows... | |
unique(d[candidate %=% 'total', .(county, candidate)]) | |
# county candidate | |
# 1: Cayuga Total Special Votes | |
# 2: Cayuga Total Votes | |
# 3: Chemung Total | |
# 4: Chenango Total | |
# 5: Delaware Total | |
# 6: Erie Total | |
# 7: Franklin Total | |
# 8: Genesee Total | |
# 9: Jefferson Total | |
# 10: Lewis Total | |
# 11: Livingston Total | |
# 12: Monroe Total | |
# 13: Orange Total Votes Cast | |
# 14: Orleans Total | |
# 15: Putnam Total | |
# 16: Schoharie Total Special Votes | |
# 17: Schoharie Total Votes | |
# 18: Seneca Total | |
# 19: Steuben Total Enrolled Voters | |
# 20: Sullivan Total | |
# 21: Wyoming Total | |
# 22: Yates Total | |
# county candidate | |
unique(d[precinct %=% 'cumulative|total', .(county, precinct)]) | |
# county precinct | |
# 1: Lewis TOTALS | |
# 2: Madison TOTAL | |
# 3: Montgomery Total | |
# 4: Nassau Total | |
# 5: Oneida Total | |
# 6: Onondaga Syracuse Total | |
# 7: Onondaga Onondaga Town Total | |
# 8: Onondaga Total | |
# 9: Ontario Total | |
# 10: Orleans TOTAL | |
# 11: Oswego Total | |
# 12: Otsego TOTAL | |
# 13: Rockland Total | |
# 14: Saratoga Total | |
# 15: Schuyler Cumulative | |
# 16: Schuyler Total | |
# 17: Seneca Cumulative | |
# 18: Seneca TOTAL | |
# 19: Tompkins Total | |
# 20: Ulster Total | |
unique(d[precinct == '', .(county, precinct)]) | |
# county precinct | |
# 1: Wayne | |
# 2: Wyoming | |
# Ballots cast... | |
d[candidate %=% 'ballots cast', unique(county)] | |
# [1] "Niagara" "Schenectady" "St. Lawrence" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment