Skip to content

Instantly share code, notes, and snippets.

@migurski
Created October 29, 2017 17:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save migurski/d9a91e2f243a45666827e2b1216d15a2 to your computer and use it in GitHub Desktop.
Save migurski/d9a91e2f243a45666827e2b1216d15a2 to your computer and use it in GitHub Desktop.
Intersect North Carolina voting precincts with Census tracts to get selected area-weighted race, income, and education statistics
import psycopg2, csv, math
with psycopg2.connect('postgres:///redistrict_nc') as conn:
with conn.cursor() as db:
for year in ['2016', '2014', '2012']:
with open(f'{year}-4.txt') as in_file, open(f'{year}-5.txt', 'w') as out_file:
in_rows = csv.DictReader(in_file, dialect='excel-tab')
out_fields = in_rows.fieldnames + ['Non-Hispanic White',
'Non-Hispanic White, Error', 'College', 'College, Error',
'Median Household Income', 'Median Household Income, Error']
out_csv = csv.DictWriter(out_file, out_fields, dialect='excel-tab')
out_csv.writeheader()
for in_row in in_rows:
out_row = {k: in_row.get(k) for k in out_fields}
PSID = in_row['PSID']
if PSID and in_row['Type'] == 'county':
db.execute('''
SELECT a.pct_nh_white, a.pct_nh_white_err, a.pct_college,
a.pct_college_err, a.income, a.income_err
FROM nc_counties AS c, acs_areas AS a
WHERE 'PSID:'||c.psid = %s
AND a.geoid = '05000US37'||c.county_fips
''', (PSID, ))
(pct_nh_white, pct_nh_white_err, pct_college,
pct_college_err, income, income_err) = next(db)
numbers = {
'Non-Hispanic White': f'{pct_nh_white:.4f}',
'Non-Hispanic White, Error': f'{pct_nh_white_err:.4f}',
'College': f'{pct_college:.4f}',
'College, Error': f'{pct_college_err:.4f}',
'Median Household Income': f'{income:.0f}',
'Median Household Income, Error': f'{income_err:.0f}'
}
elif PSID and in_row['Type'] == 'precinct':
db.execute('''
SELECT a.pct_nh_white, a.pct_nh_white_err, a.pct_college,
a.pct_college_err, a.income, a.income_err,
ST_Area(ST_Intersection(a.geom, p.geom)) / ST_Area(a.geom),
ST_Area(ST_Intersection(a.geom, p.geom)) / ST_Area(p.geom),
a.geoid
FROM nc121416 AS p, acs_areas AS a, tl_2016_37_tract AS t
WHERE 'PSID:'||p.psid = %s
AND p.year = %s
AND a.geoid LIKE '14000US%%'
AND a.geom && p.geom
AND ST_Relate(a.geom, p.geom, '2********') -- 2d overlap
AND '14000US'||t."GEOID" = a.geoid
''', (PSID, year))
_numbers = {
'Non-Hispanic White': 0,
'Non-Hispanic White, Error': 0,
'College': 0,
'College, Error': 0,
'Median Household Income': 0,
'Median Household Income, Error': 0
}
covered_portion = 0
for (pct_nh_white, pct_nh_white_err, pct_college,
pct_college_err, income, income_err,
tract_portion, precinct_portion, geoid) in db:
_numbers['Non-Hispanic White'] += precinct_portion * pct_nh_white
_numbers['Non-Hispanic White, Error'] += precinct_portion * pct_nh_white_err
_numbers['College'] += precinct_portion * pct_college
_numbers['College, Error'] += precinct_portion * pct_college_err
_numbers['Median Household Income'] += precinct_portion * income
_numbers['Median Household Income, Error'] += precinct_portion * income_err
covered_portion += precinct_portion
print('-', geoid, precinct_portion, ':', pct_nh_white, pct_nh_white_err, pct_college,
pct_college_err, income, income_err)
print(_numbers)
# Account for uncovered portion of precinct
numbers = {k: '{:.04f}'.format(v/covered_portion)
for (k, v) in _numbers.items()}
print(covered_portion)
print(numbers)
assert .01 < covered_portion and covered_portion < 1.01
else:
numbers = {
'Non-Hispanic White': None,
'Non-Hispanic White, Error': None,
'College': None,
'College, Error': None,
'Median Household Income': None,
'Median Household Income, Error': None
}
out_row.update(numbers)
out_csv.writerow(out_row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment