Created
April 4, 2012 20:47
-
-
Save evz/2305477 to your computer and use it in GitHub Desktop.
2010 Census: A pain in my block
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ $# \< 1 ] | |
then | |
echo "You must specify exactly one argument: the proper-case name of a state '0.Batch.sh Delaware'." | |
exit | |
fi | |
STATE_NAME="${@:1:1}" | |
STATE_NAME_SPACE_FIXED=`echo "${STATE_NAME}" | tr '[ ]' '[_]'` | |
STATE_NAME_LOWER=`echo "${STATE_NAME}" | tr '[A-Z ]' '[a-z_]'` | |
STATE_NAME_ABBR=`python get_state_abbr.py "${STATE_NAME}"` || exit $? | |
STATE_FIPS=`python get_state_fips.py "${STATE_NAME}"` || exit $? | |
echo Begin $STATE_NAME at `date` | |
echo 'Fetching Geo Data' | |
mkdir data/tl_2010_${STATE_FIPS}_tabblock10 | |
wget -O data/tl_2010_${STATE_FIPS}_tabblock10/tl_2010_${STATE_FIPS}_tabblock10.zip http://www2.census.gov/geo/tiger/TIGER2010/TABBLOCK/2010/tl_2010_${STATE_FIPS}_tabblock10.zip | |
unzip data/tl_2010_${STATE_FIPS}_tabblock10/tl_2010_${STATE_FIPS}_tabblock10.zip -d data/tl_2010_${STATE_FIPS}_tabblock10/ | |
echo 'Loading Geo Data' | |
python 1.LoadBlockGeo.py ${STATE_FIPS} || exit $? | |
echo 'Fetching Census Data' | |
./2.FetchCensusData.sh "$STATE_NAME_SPACE_FIXED" "$STATE_NAME_LOWER" "$STATE_NAME_ABBR" | |
echo 'Loading Cross reference table' | |
python 3.LoadXref.py data/${STATE_NAME_ABBR}geo2010.csv || exit $? | |
echo 'Loading 2010 data' | |
# Only loading certain csvs cause I'm only building some census tables | |
# for i in {1..47} to load all data | |
for i in 1 3 4 5 6 44 47 | |
do | |
python 4.LoadBlockData.py data/sf_data_2010_${STATE_NAME_LOWER}_$i.csv || exit $? | |
done | |
echo 'Relating Blocks to Localities' | |
python 5.MakeLocalityRel.py ${STATE_NAME_ABBR} || exit $? | |
echo Complete $STATE_NAME at `date` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
# All this stuff is just the normal dance you do to get the environment setup for Django to work | |
import sys | |
import site | |
import os | |
vepath = '/home/wdo/sites/my.bahai.us/lib/python2.6/site-packages' | |
prev_sys_path = list(sys.path) | |
site.addsitedir(vepath) | |
sys.path.append('/home/wdo/sites/my.bahai.us/checkouts/mybahai') | |
sys.path.append('/home/wdo/sites/my.bahai.us/checkouts') | |
new_sys_path = [p for p in sys.path if p not in prev_sys_path] | |
for item in new_sys_path: | |
sys.path.remove(item) | |
sys.path[:0] = new_sys_path | |
os.environ['DJANGO_SETTINGS_MODULE'] = 'mybahai.settings' | |
from psycopg2 import IntegrityError | |
from django.contrib.gis.utils import mapping, LayerMapping, add_postgis_srs | |
from mybahai.census.models import CensusBlock | |
from django.conf import settings | |
if len(sys.argv) < 2: | |
sys.exit('You must provide the state name of the block data you want to load an argument to this script.') | |
STATE = sys.argv[1] | |
try: | |
add_postgis_srs(900913) | |
except IntegrityError: | |
print "The Google Spherical Mercator projection, or a projection with srid 900913, already exists, skipping insert" | |
census_shp = os.path.join(settings.PROJECT_PATH,'census/scripts/data/tl_2010_' + STATE + '_tabblock10/tl_2010_' + STATE + '_tabblock10.shp') | |
census_mapping = { | |
'state_fips': 'STATEFP10', | |
'county_fips': 'COUNTYFP10', | |
'tract_code': 'TRACTCE10', | |
'block_number': 'BLOCKCE10', | |
'geo_id': 'GEOID10', | |
'name': 'NAME10', | |
'feat_code': 'MTFCC10', | |
'land_area': 'ALAND10', | |
'water_area': 'AWATER10', | |
'internal_lat': 'INTPTLAT10', | |
'internal_lon': 'INTPTLON10', | |
'mpoly': 'MULTIPOLYGON', | |
} | |
census_layer = LayerMapping(CensusBlock, | |
census_shp, | |
census_mapping, | |
transform=False, | |
encoding='iso-8859-1') | |
census_layer.save(verbose=False, strict=True, progress=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
import sys | |
import site | |
import os | |
import json | |
... Normal Django environment setup ... | |
from mybahai.census.models import * | |
import sys | |
from csvkit.unicsv import UnicodeCSVReader | |
import utils | |
import config | |
if len(sys.argv) < 2: | |
sys.exit('You must provide the filename of a CSV as an argument to this script.') | |
FILENAME = sys.argv[1] | |
GET_CENSUS_TABLE = { | |
'P1' : P1, | |
'P3': P3, | |
... bunch more keys related to Django models for census tables ... | |
} | |
with open(FILENAME) as f: | |
rows = UnicodeCSVReader(f) | |
headers = rows.next() | |
for row in rows: | |
row_dict = dict(zip(headers, row)) | |
try: | |
x = XRef.objects.get(fileid=row_dict['FILEID'], stusab=row_dict['STUSAB'], logrecno=row_dict['LOGRECNO']) | |
except XRef.DoesNotExist: | |
continue | |
block = CensusBlock.objects.get(xref=x) | |
geo_id = block.geo_id | |
tables = {} | |
for k, v in row_dict.items(): | |
t = utils.parse_table_from_key(k) | |
if t: | |
if t not in tables: | |
tables[t] = {} | |
tables[t][k] = v | |
for k, v in tables.items(): | |
if k not in GET_CENSUS_TABLE.keys(): | |
continue | |
m = GET_CENSUS_TABLE[k] | |
v = dict((k.lower(), v) for k,v in v.iteritems()) | |
v['geo_id'] = geo_id | |
o = m(**v) | |
o.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CensusBlock(models.Model): | |
locality = models.ForeignKey(Locality, null=True) | |
xref = models.ForeignKey(XRef, null=True) | |
state_fips = models.CharField(max_length=2) | |
county_fips = models.CharField(max_length=3) | |
tract_code = models.CharField(max_length=6) | |
block_number = models.CharField(max_length=4) | |
geo_id = models.CharField(max_length=15) | |
name = models.CharField(max_length=10) | |
feat_code = models.CharField(max_length=5) | |
land_area = models.IntegerField() | |
water_area = models.IntegerField() | |
internal_lat = models.CharField(max_length=11) | |
internal_lon = models.CharField(max_length=12) | |
mpoly = models.MultiPolygonField('Block area', srid=4326) | |
objects = models.GeoManager() | |
def __unicode__(self): | |
return u'%s, (%s)' % (self.name, self.geo_id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class P1(models.Model): | |
geo_id = models.CharField(max_length=15, primary_key=True) | |
p001001 = models.IntegerField() | |
def __unicode_(self): | |
return 'Total Population' | |
class P3(models.Model): | |
geo_id = models.CharField(max_length=15, primary_key=True) | |
p003001 = models.IntegerField() | |
p003002 = models.IntegerField() | |
p003003 = models.IntegerField() | |
p003004 = models.IntegerField() | |
p003005 = models.IntegerField() | |
p003006 = models.IntegerField() | |
p003007 = models.IntegerField() | |
p003008 = models.IntegerField() | |
def __unicode_(self): | |
return 'Race %s' % self.geo_id | |
... etc ... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
import sys | |
import site | |
import os | |
import json | |
from csvkit.unicsv import UnicodeCSVReader | |
import config | |
import utils | |
... LA LA LA Django env setup LA LA LA ... | |
from mybahai.census.models import * | |
from mybahai.natgeo.models import Locality | |
if len(sys.argv) < 2: | |
sys.exit('You must provide a state abbreviation as an argument for this script.') | |
STATE = sys.argv[1] | |
locs = Locality.objects.filter(locality_state__state_abrv=STATE.upper()) | |
for loc in locs: | |
first_pass = CensusBlock.objects.filter(mpoly__coveredby=loc.mpoly) | |
total_pop = 0 | |
for block in first_pass: | |
block.locality = loc | |
block.save() | |
p1 = P1.objects.filter(geo_id=block.geo_id) | |
total_pop += p1[0].p001001 | |
second_pass = CensusBlock.objects.filter(mpoly__overlaps=loc.mpoly) | |
for block in second_pass: | |
if not block.locality: | |
block.locality = loc | |
block.save() | |
p1 = P1.objects.filter(geo_id=block.geo_id) | |
total_pop += p1[0].p001001 | |
third_pass = CensusBlock.objects.filter(mpoly__bboverlaps=loc.mpoly) | |
for block in third_pass: | |
if not block.locality: | |
block.locality = loc | |
block.save() | |
p1 = P1.objects.filter(geo_id=block.geo_id) | |
total_pop += p1[0].p001001 | |
loc.locality_census_pop = total_pop | |
loc.save() | |
print 'Saved %s' % loc.locality_name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import site | |
import os | |
import json | |
from csvkit.unicsv import UnicodeCSVReader | |
import config | |
import utils | |
... Normal Django environment setup goes here ... | |
from mybahai.census.models import CensusBlock, XRef | |
if len(sys.argv) < 2: | |
sys.exit('You must provide the filename of a CSV as an argument to this script.') | |
FILENAME = sys.argv[1] | |
def make_xref(d): | |
# Strip off unncessary attrs | |
d.pop('CHARITER') | |
d.pop('CIFSN') | |
x, created = XRef.objects.get_or_create(fileid=d.pop('FILEID'), stusab=d.pop('STUSAB'), logrecno=d.pop('LOGRECNO')) | |
return x | |
with open(FILENAME) as f: | |
rows = UnicodeCSVReader(f) | |
headers = rows.next() | |
for row in rows: | |
geography = {} | |
row_dict = dict(zip(headers, row)) | |
if row_dict['SUMLEV'] not in config.SUMLEVS: | |
continue | |
# Ignore that is not for complete geographies | |
if row_dict['GEOCOMP'] != config.GEOCOMP_COMPLETE: | |
continue | |
geography['sumlev'] = row_dict.pop('SUMLEV') | |
geography['geoid'] = utils.GEOID_COMPUTERS[geography['sumlev']](row_dict) | |
xref = make_xref(row_dict) | |
block = CensusBlock.objects.get(geo_id=geography['geoid']) | |
block.xref = xref | |
block.save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment