Skip to content

Instantly share code, notes, and snippets.

@konklone
Created January 14, 2017 20:21
Show Gist options
  • Save konklone/70829e8cfdd2cd01f3b57f4bd00937d0 to your computer and use it in GitHub Desktop.
Save konklone/70829e8cfdd2cd01f3b57f4bd00937d0 to your computer and use it in GitHub Desktop.
House and Senate URL extractor, needs to be in unitedstates/congress-legislators' scripts/ dir
#!/usr/bin/env python
import utils
import urllib
from utils import load_data, save_data
import io
import csv
def save_csv(header, rows, path):
output = io.StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC)
writer.writerow(header)
for row in rows:
writer.writerow(row)
write(output.getvalue(), path)
def write(content, destination):
f = open(destination, 'w', encoding='utf-8')
f.write(content)
f.close()
current = load_data("legislators-current.yaml")
house_urls = []
senate_urls = []
for member in current:
last_term = member['terms'][-1]
url = last_term.get('url', None)
if url is None:
print("[%s] No URL known." % member['name']['official_full'])
continue
url = url.replace("://www.", "://")
parsed = urllib.parse.urlparse(url)
stripped = parsed.netloc
if last_term['type'] == 'sen':
senate_urls.append([stripped])
else:
house_urls.append([stripped])
save_csv(["Domain"], senate_urls, "../../senate.csv")
save_csv(["Domain"], house_urls, "../../house.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment