Skip to content

Instantly share code, notes, and snippets.

@JoeGermuska
Last active April 25, 2016 15:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoeGermuska/1ed425c068d540326854 to your computer and use it in GitHub Desktop.
Save JoeGermuska/1ed425c068d540326854 to your computer and use it in GitHub Desktop.
Using Census Reporter API with Pandas
import pandas as pd
import requests
API_URL="http://api.censusreporter.org/1.0/data/show/{release}?table_ids={table_ids}&geo_ids={geoids}"
def get_data(tables=None, geoids=None, release='latest'):
if geoids is None:
geoids = ['040|01000US']
if tables is None:
tables = ['B01001']
url = API_URL.format(table_ids=','.join(tables).upper(),
geoids=','.join(geoids),
release=release)
response = requests.get(url)
return response.json()
def prep_for_pandas(json_data,include_moe=False):
"""Given a dict of dicts as they come from a Census Reporter API call, set it up to be amenable to pandas.DataFrame.from_dict"""
result = {}
for geoid, tables in json_data.items():
flat = {}
for table,values in tables.items():
for kind, columns in values.items():
if kind == 'estimate':
flat.update(columns)
elif kind == 'error' and include_moe:
renamed = dict((k+"_moe",v) for k,v in columns.items())
flat.update(renamed)
result[geoid] = flat
return result
if __name__ == '__main__':
response = get_data()
df = pd.DataFrame.from_dict(prep_for_pandas(response['data']),orient='index')
print "Top 10 most populous states"
print df.sort('B01001001',ascending=False)['B01001001'].head(10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment