Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Campaign Finance Explorer
library(tidyverse)
library(rvest)
# define list of input
years = c('1996', '1998', '2000', '2002', '2004', '2006', '2008', '2010', '2012', '2014', '2016', '2018', '2020')
years_abbr = c('96', '98', '00', '02', '04','06', '08', '10', '12', '14', '16', '18', '20')
# loop over years, combine data
# source: https://www.fec.gov/data/browse-data/?tab=bulk-data
setwd("~/Projects/fec")
datalist = list()
for (i in 1:length(years)) {
url = paste('https://www.fec.gov/files/bulk-downloads/', years[i], '/webl', years_abbr[i], '.zip', sep = '')
print(url)
download.file(url, 'temp.zip')
file_name = paste('webl', years_abbr[i], '.txt', sep = '')
unzip('temp.zip', file_name)
datalist[[i]] = read_delim(file_name, delim = '|', col_names = F) %>% mutate(X31 = years[i])
}
raw = do.call(rbind, datalist)
# add column names
url = 'https://www.fec.gov/campaign-finance-data/current-campaigns-house-and-senate-file-description/'
page = read_html(url)
column_names = page %>% html_table() %>% first() %>% pull(X1)
colnames(raw) = c(column_names[2:length(column_names)], 'YEAR') # remove "Column name", add year
glimpse(raw)
# remove election result variables only included in 1996-2006 files
campaigns = raw %>% select(-SPEC_ELECTION, -PRIM_ELECTION, -RUN_ELECTION, -GEN_ELECTION, -GEN_ELECTION_PRECENT)
glimpse(campaigns)
campaigns %>% group_by(PTY_CD, CAND_PTY_AFFILIATION) %>% count(sort = T)
# export
write_csv(campaigns, 'fec-campaign-finance.csv')
# PTY_CD: 2 = Republican, 1 = Democrat, 3 = Other
# CAND ICI: C = Challenger, I = Incumbent, O = Open Seat, NA = Other
@erikgregorywebb

This comment has been minimized.

Copy link
Owner Author

@erikgregorywebb erikgregorywebb commented Apr 3, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment