Skip to content

Instantly share code, notes, and snippets.

@citizenrich
Created April 24, 2017 20:11
Show Gist options
  • Save citizenrich/75ea8e03bd61725a9219ddc2341a6a0b to your computer and use it in GitHub Desktop.
Save citizenrich/75ea8e03bd61725a9219ddc2341a6a0b to your computer and use it in GitHub Desktop.
So far on census API
#!/usr/bin/env python3
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
from itertools import zip_longest
census_key = os.environ.get("CENSUS_KEY")
# base = 'http://api.census.gov/data/2011/acs5'
base = 'http://api.census.gov/data/2015/acs5'
# washington dc is 11
params = {'key': census_key, 'get': 'B00001_001E', 'for': 'state:11'}
params2 = {'key': census_key, 'get': 'B00001_001E', 'for': 'tract:*', 'in': 'state:11+county:*'}
y = requests.get(base, params=params)
print(y.url, y.text)
z = requests.get(base, params=params2)
print(z.url, z.text)
rent_base = 'http://api.census.gov/data/2015/acs5'
params3 = {'key': census_key, 'get': 'NAME,B01003_001E,B25057_001E,B25058_001E,B25059_001E', 'for': 'tract:*', 'in': 'state:11'}
w = requests.get(rent_base, params3)
print(w.url, w.text)
all_base = 'http://api.census.gov/data/2015/acs5'
params4 = {'key': census_key, 'get': '*', 'for': 'tract:*', 'in': 'state:11'}
a = requests.get(all_base, params4)
print(a.url, a.text)
bs_url = 'http://api.census.gov/data/2015/acs5/variables.html'
y = requests.get(bs_url)
soup = BeautifulSoup(y.text, 'lxml')
table = soup.find_all('table')[0]
names = []
for link in table.find_all('a'):
names.append(link.get('name'))
# print(names)
def grouper(iterable, n, fillvalue=''):
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
list_of_lists_of_lists = []
for g in grouper(names, 30):
gstring = ','.join(g)
params_template = {'key': census_key, 'get': gstring, 'for': 'tract:*', 'in': 'state:11'}
b = requests.get(all_base, params_template)
# print(b.text)
list_of_lists_of_lists.append(b.text)
# STOPPED WORKING HERE. String error with data type.
for i, lol in enumerate(list_of_lists_of_lists):
# header = lol.pop() <- not working for some reason
header = lol[0]
if i == 0:
df = pd.DataFrame(lol, header)
else:
df = pd.concat(df, pd.DataFrame(lol, header), axis=1) #, join_axes=[df[0]["tract"]]
df.to_csv('out.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment