migurski/make-gpwv4-us.py

## make-gpwv4-us.py
from osgeo import ogr
import requests, zipfile, io, tempfile, shutil, os, sys, csv, math, collections

state_codes = {
    '01': 'AL', '31': 'NE', '02': 'AK', '32': 'NV', '04': 'AZ', '33': 'NH',
    '05': 'AR', '34': 'NJ', '06': 'CA', '35': 'NM', '08': 'CO', '36': 'NY',
    '09': 'CT', '37': 'NC', '10': 'DE', '38': 'ND', '11': 'DC', '39': 'OH',
    '12': 'FL', '40': 'OK', '13': 'GA', '41': 'OR', '15': 'HI', '42': 'PA',
    '16': 'ID', '72': 'PR', '17': 'IL', '44': 'RI', '18': 'IN', '45': 'SC',
    '19': 'IA', '46': 'SD', '20': 'KS', '47': 'TN', '21': 'KY', '48': 'TX',
    '22': 'LA', '49': 'UT', '23': 'ME', '50': 'VT', '24': 'MD', '51': 'VA',
    '25': 'MA', '78': 'VI', '26': 'MI', '53': 'WA', '27': 'MN', '54': 'WV',
    '28': 'MS', '55': 'WI', '29': 'MO', '56': 'WY', '30': 'MT'
    }

grid = collections.defaultdict(lambda: dict(population=0, area=0))

for (fips, usps_code) in sorted(state_codes.items()):
    tracts_url = 'https://www2.census.gov/geo/tiger/TIGER2015/TRACT/tl_2015_{}_tract.zip'.format(fips)
    counts_url = 'http://api.census.gov/data/2015/acs5?get=NAME,B01001_001E&for=tract:*&in=state:{}'.format(fips)

    try:
        counts_list = requests.get(counts_url).json()
        counts_ = [dict(zip(counts_list[0], values)) for values in counts_list[1:]]
        counts = {(c['state'] + c['county'] + c['tract']): int(c['B01001_001E']) for c in counts_}
        print(len(counts), 'tracts in', counts_url, file=sys.stderr)
    except Exception as e:
        print(e, file=sys.stderr)
        continue

    try:
        tracts_zipdata = io.BytesIO(requests.get(tracts_url).content)
        tracts_zipfile = zipfile.ZipFile(tracts_zipdata)
    except Exception as e:
        print(e, file=sys.stderr)
        continue

    tempdir = tempfile.mkdtemp(prefix='gpwv4-{}-'.format(fips), dir='/tmp')

    for name in tracts_zipfile.namelist():
        if name[-4:] in ('.shp', '.shx', '.prj', '.dbf'):
            path = tracts_zipfile.extract(name, tempdir)
            if path.endswith('.shp'):
                tracts_path = path

    tracts_ds = ogr.Open(tracts_path)
    tracts = list(tracts_ds.GetLayer(0))

    print(len(tracts), 'tracts in', tracts_url, file=sys.stderr)

    for tract in tracts:
        center = tract.GetGeometryRef().Centroid()
        x, y = math.floor(center.GetX()), math.floor(center.GetY())
        grid_key = (usps_code, x, y)

        grid[grid_key]['population'] += counts[tract.GetField('GEOID')]
        grid[grid_key]['area'] += (tract.GetField('ALAND') / 1000000)

    tracts_ds.Destroy()
    shutil.rmtree(tempdir)

out = csv.DictWriter(sys.stdout, ('usps_code', 'lon', 'lat', 'size', 'population', 'area'))
out.writeheader()

for ((usps_code, x, y), value) in sorted(grid.items()):
    out.writerow(dict(usps_code=usps_code, lon=x, lat=y, size=1,
        population=value['population'], area=value['area']))
	from osgeo import ogr
	import requests, zipfile, io, tempfile, shutil, os, sys, csv, math, collections

	state_codes = {
	'01': 'AL', '31': 'NE', '02': 'AK', '32': 'NV', '04': 'AZ', '33': 'NH',
	'05': 'AR', '34': 'NJ', '06': 'CA', '35': 'NM', '08': 'CO', '36': 'NY',
	'09': 'CT', '37': 'NC', '10': 'DE', '38': 'ND', '11': 'DC', '39': 'OH',
	'12': 'FL', '40': 'OK', '13': 'GA', '41': 'OR', '15': 'HI', '42': 'PA',
	'16': 'ID', '72': 'PR', '17': 'IL', '44': 'RI', '18': 'IN', '45': 'SC',
	'19': 'IA', '46': 'SD', '20': 'KS', '47': 'TN', '21': 'KY', '48': 'TX',
	'22': 'LA', '49': 'UT', '23': 'ME', '50': 'VT', '24': 'MD', '51': 'VA',
	'25': 'MA', '78': 'VI', '26': 'MI', '53': 'WA', '27': 'MN', '54': 'WV',
	'28': 'MS', '55': 'WI', '29': 'MO', '56': 'WY', '30': 'MT'
	}

	grid = collections.defaultdict(lambda: dict(population=0, area=0))

	for (fips, usps_code) in sorted(state_codes.items()):
	tracts_url = 'https://www2.census.gov/geo/tiger/TIGER2015/TRACT/tl_2015_{}_tract.zip'.format(fips)
	counts_url = 'http://api.census.gov/data/2015/acs5?get=NAME,B01001_001E&for=tract:*&in=state:{}'.format(fips)

	try:
	counts_list = requests.get(counts_url).json()
	counts_ = [dict(zip(counts_list[0], values)) for values in counts_list[1:]]
	counts = {(c['state'] + c['county'] + c['tract']): int(c['B01001_001E']) for c in counts_}
	print(len(counts), 'tracts in', counts_url, file=sys.stderr)
	except Exception as e:
	print(e, file=sys.stderr)
	continue

	try:
	tracts_zipdata = io.BytesIO(requests.get(tracts_url).content)
	tracts_zipfile = zipfile.ZipFile(tracts_zipdata)
	except Exception as e:
	print(e, file=sys.stderr)
	continue

	tempdir = tempfile.mkdtemp(prefix='gpwv4-{}-'.format(fips), dir='/tmp')

	for name in tracts_zipfile.namelist():
	if name[-4:] in ('.shp', '.shx', '.prj', '.dbf'):
	path = tracts_zipfile.extract(name, tempdir)
	if path.endswith('.shp'):
	tracts_path = path

	tracts_ds = ogr.Open(tracts_path)
	tracts = list(tracts_ds.GetLayer(0))

	print(len(tracts), 'tracts in', tracts_url, file=sys.stderr)

	for tract in tracts:
	center = tract.GetGeometryRef().Centroid()
	x, y = math.floor(center.GetX()), math.floor(center.GetY())
	grid_key = (usps_code, x, y)

	grid[grid_key]['population'] += counts[tract.GetField('GEOID')]
	grid[grid_key]['area'] += (tract.GetField('ALAND') / 1000000)

	tracts_ds.Destroy()
	shutil.rmtree(tempdir)

	out = csv.DictWriter(sys.stdout, ('usps_code', 'lon', 'lat', 'size', 'population', 'area'))
	out.writeheader()

	for ((usps_code, x, y), value) in sorted(grid.items()):
	out.writerow(dict(usps_code=usps_code, lon=x, lat=y, size=1,
	population=value['population'], area=value['area']))