Skip to content

Instantly share code, notes, and snippets.

@migurski
Created April 28, 2017 21:52
Show Gist options
  • Save migurski/73f3e49267b298e406a6b24746548033 to your computer and use it in GitHub Desktop.
Save migurski/73f3e49267b298e406a6b24746548033 to your computer and use it in GitHub Desktop.
Script to bake gridded U.S. population based on ACS 5-year data
from osgeo import ogr
import requests, zipfile, io, tempfile, shutil, os, sys, csv, math, collections
state_codes = {
'01': 'AL', '31': 'NE', '02': 'AK', '32': 'NV', '04': 'AZ', '33': 'NH',
'05': 'AR', '34': 'NJ', '06': 'CA', '35': 'NM', '08': 'CO', '36': 'NY',
'09': 'CT', '37': 'NC', '10': 'DE', '38': 'ND', '11': 'DC', '39': 'OH',
'12': 'FL', '40': 'OK', '13': 'GA', '41': 'OR', '15': 'HI', '42': 'PA',
'16': 'ID', '72': 'PR', '17': 'IL', '44': 'RI', '18': 'IN', '45': 'SC',
'19': 'IA', '46': 'SD', '20': 'KS', '47': 'TN', '21': 'KY', '48': 'TX',
'22': 'LA', '49': 'UT', '23': 'ME', '50': 'VT', '24': 'MD', '51': 'VA',
'25': 'MA', '78': 'VI', '26': 'MI', '53': 'WA', '27': 'MN', '54': 'WV',
'28': 'MS', '55': 'WI', '29': 'MO', '56': 'WY', '30': 'MT'
}
grid = collections.defaultdict(lambda: dict(population=0, area=0))
for (fips, usps_code) in sorted(state_codes.items()):
tracts_url = 'https://www2.census.gov/geo/tiger/TIGER2015/TRACT/tl_2015_{}_tract.zip'.format(fips)
counts_url = 'http://api.census.gov/data/2015/acs5?get=NAME,B01001_001E&for=tract:*&in=state:{}'.format(fips)
try:
counts_list = requests.get(counts_url).json()
counts_ = [dict(zip(counts_list[0], values)) for values in counts_list[1:]]
counts = {(c['state'] + c['county'] + c['tract']): int(c['B01001_001E']) for c in counts_}
print(len(counts), 'tracts in', counts_url, file=sys.stderr)
except Exception as e:
print(e, file=sys.stderr)
continue
try:
tracts_zipdata = io.BytesIO(requests.get(tracts_url).content)
tracts_zipfile = zipfile.ZipFile(tracts_zipdata)
except Exception as e:
print(e, file=sys.stderr)
continue
tempdir = tempfile.mkdtemp(prefix='gpwv4-{}-'.format(fips), dir='/tmp')
for name in tracts_zipfile.namelist():
if name[-4:] in ('.shp', '.shx', '.prj', '.dbf'):
path = tracts_zipfile.extract(name, tempdir)
if path.endswith('.shp'):
tracts_path = path
tracts_ds = ogr.Open(tracts_path)
tracts = list(tracts_ds.GetLayer(0))
print(len(tracts), 'tracts in', tracts_url, file=sys.stderr)
for tract in tracts:
center = tract.GetGeometryRef().Centroid()
x, y = math.floor(center.GetX()), math.floor(center.GetY())
grid_key = (usps_code, x, y)
grid[grid_key]['population'] += counts[tract.GetField('GEOID')]
grid[grid_key]['area'] += (tract.GetField('ALAND') / 1000000)
tracts_ds.Destroy()
shutil.rmtree(tempdir)
out = csv.DictWriter(sys.stdout, ('usps_code', 'lon', 'lat', 'size', 'population', 'area'))
out.writeheader()
for ((usps_code, x, y), value) in sorted(grid.items()):
out.writerow(dict(usps_code=usps_code, lon=x, lat=y, size=1,
population=value['population'], area=value['area']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment