Skip to content

Instantly share code, notes, and snippets.

@dannguyen
Last active June 16, 2018 09:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dannguyen/902ffeb1a9b0943156f6f975460deea9 to your computer and use it in GitHub Desktop.
Save dannguyen/902ffeb1a9b0943156f6f975460deea9 to your computer and use it in GitHub Desktop.
Python 3.6 script for downloading House disbursement data (~10 years worth) from ProPublica: https://projects.propublica.org/represent/expenditures
"""
Fetches House disbursement CSV files from
https://projects.propublica.org/represent/expenditures
Saves them to:
data/raw/{year}Q{q}.csv
"""
import requests
from pathlib import Path
DATADIR = Path('data', 'raw')
DATADIR.mkdir(exist_ok=True, parents=True)
Q_START = (2009, 3)
Q_END = (2018, 1)
SRC_BASEURL = 'https://projects.propublica.org/congress/assets/staffers'\
+ '/{year}Q{qtr}-house-disburse-detail.csv'
def get_qtrs(startq, endq):
"""Returns a list of tuples, e.g. [(2018, 1), (2018,2)]"""
quarters = []
t = startq
while t <= endq:
quarters.append(t)
yr, q = t
t = (yr, q+1) if q < 4 else (yr+1, 1)
return quarters
def fetch_and_save(url, destpath):
"""Returns length of response test as integer"""
resp = requests.get(url)
if resp.status_code == 200:
return destpath.write_bytes(resp.content)
else:
print("Received status of:", resp.status_code)
def main():
for yr, q in get_qtrs(Q_START, Q_END):
url = SRC_BASEURL.format(year=yr, qtr=q)
dest = Path(DATADIR, '{}Q{}.csv'.format(yr, q))
print("Downloading:\n", url)
_c = fetch_and_save(url, dest)
print("Saved", _c, "bytes to:", dest, "\n")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment