Skip to content

Instantly share code, notes, and snippets.

@wboykinm
Created June 27, 2016 19:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wboykinm/2d3782a5c90de5ef4bece2971e69c710 to your computer and use it in GitHub Desktop.
Save wboykinm/2d3782a5c90de5ef4bece2971e69c710 to your computer and use it in GitHub Desktop.
A scratchpad for dealing with the FEC whole-cycle data dumps
# look at the top 20 rows of the data
head -n 20 itcont.txt | csvlook -d '|'
# combine a header file with a csv-formatted ful table
cp indiv_header_file.csv individual_2016.csv
csvformat -d '|' itcont.txt >> individual_2016.csv
# pull out all the records where the donor is from Vermont
csvgrep -c 10 -m VT individual_2016.csv > individual_2016_vt.csv
# join committee information to donors
csvjoin -c "CMTE_ID,CMTE_ID" individual_2016.csv committees_2016.csv --left > individual_2016_joined.csv
-- build the shell of the table with the right schema for both individual contributions and committee info
DROP TABLE IF EXISTS individual_2016;
CREATE TABLE individual_2016 (
cmte_id text,
amndt_ind text,
rpt_tp text,
transaction_pgi text,
image_num text,
transaction_tp text,
entity_tp text,
name text,
city text,
state text,
zip_code text,
employer text,
occupation text,
transaction_dt text,
transaction_amt int,
other_id text,
tran_id text,
file_num text,
memo_cd text,
memo_text text,
sub_id text,
cmte_nm text,
tres_nm text,
cmte_st1 text,
cmte_st2 text,
cmte_city text,
cmte_st text,
cmte_zip text,
cmte_dsgn text,
cmte_tp text,
cmte_pty_affiliation text,
cmte_filing_freq text,
org_tp text,
connected_org_nm text,
cand_id text
);
-- pull in TEH DATAZ
\COPY individual_2016 FROM 'usa_2016.csv' DELIMITER ',' CSV HEADER;
-- add a few indexes to speed things up
CREATE INDEX IF NOT EXISTS ind_name_idx ON individual_2016 USING GIN(to_tsvector('english', name));
CREATE INDEX IF NOT EXISTS state_idx ON individual_2016 (state);
CREATE INDEX IF NOT EXISTS zip_code_idx ON individual_2016 (zip_code);
CREATE INDEX IF NOT EXISTS occupation_idx ON individual_2016 USING GIN(to_tsvector('english', occupation));
CREATE INDEX IF NOT EXISTS employer_idx ON individual_2016 USING GIN(to_tsvector('english', employer));
CREATE INDEX IF NOT EXISTS cmte_nm_idx ON individual_2016 USING GIN(to_tsvector('english', cmte_nm));
CREATE INDEX IF NOT EXISTS cmte_tp_idx ON individual_2016 (cmte_tp);
CREATE INDEX IF NOT EXISTS transaction_amt_idx ON individual_2016 (transaction_amt);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment