Skip to content

Instantly share code, notes, and snippets.

@evz
Created April 4, 2012 20:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save evz/2305477 to your computer and use it in GitHub Desktop.
Save evz/2305477 to your computer and use it in GitHub Desktop.
2010 Census: A pain in my block
#!/bin/bash
if [ $# \< 1 ]
then
echo "You must specify exactly one argument: the proper-case name of a state '0.Batch.sh Delaware'."
exit
fi
STATE_NAME="${@:1:1}"
STATE_NAME_SPACE_FIXED=`echo "${STATE_NAME}" | tr '[ ]' '[_]'`
STATE_NAME_LOWER=`echo "${STATE_NAME}" | tr '[A-Z ]' '[a-z_]'`
STATE_NAME_ABBR=`python get_state_abbr.py "${STATE_NAME}"` || exit $?
STATE_FIPS=`python get_state_fips.py "${STATE_NAME}"` || exit $?
echo Begin $STATE_NAME at `date`
echo 'Fetching Geo Data'
mkdir data/tl_2010_${STATE_FIPS}_tabblock10
wget -O data/tl_2010_${STATE_FIPS}_tabblock10/tl_2010_${STATE_FIPS}_tabblock10.zip http://www2.census.gov/geo/tiger/TIGER2010/TABBLOCK/2010/tl_2010_${STATE_FIPS}_tabblock10.zip
unzip data/tl_2010_${STATE_FIPS}_tabblock10/tl_2010_${STATE_FIPS}_tabblock10.zip -d data/tl_2010_${STATE_FIPS}_tabblock10/
echo 'Loading Geo Data'
python 1.LoadBlockGeo.py ${STATE_FIPS} || exit $?
echo 'Fetching Census Data'
./2.FetchCensusData.sh "$STATE_NAME_SPACE_FIXED" "$STATE_NAME_LOWER" "$STATE_NAME_ABBR"
echo 'Loading Cross reference table'
python 3.LoadXref.py data/${STATE_NAME_ABBR}geo2010.csv || exit $?
echo 'Loading 2010 data'
# Only loading certain csvs cause I'm only building some census tables
# for i in {1..47} to load all data
for i in 1 3 4 5 6 44 47
do
python 4.LoadBlockData.py data/sf_data_2010_${STATE_NAME_LOWER}_$i.csv || exit $?
done
echo 'Relating Blocks to Localities'
python 5.MakeLocalityRel.py ${STATE_NAME_ABBR} || exit $?
echo Complete $STATE_NAME at `date`
#!/bin/env python
# All this stuff is just the normal dance you do to get the environment setup for Django to work
import sys
import site
import os
vepath = '/home/wdo/sites/my.bahai.us/lib/python2.6/site-packages'
prev_sys_path = list(sys.path)
site.addsitedir(vepath)
sys.path.append('/home/wdo/sites/my.bahai.us/checkouts/mybahai')
sys.path.append('/home/wdo/sites/my.bahai.us/checkouts')
new_sys_path = [p for p in sys.path if p not in prev_sys_path]
for item in new_sys_path:
sys.path.remove(item)
sys.path[:0] = new_sys_path
os.environ['DJANGO_SETTINGS_MODULE'] = 'mybahai.settings'
from psycopg2 import IntegrityError
from django.contrib.gis.utils import mapping, LayerMapping, add_postgis_srs
from mybahai.census.models import CensusBlock
from django.conf import settings
if len(sys.argv) < 2:
sys.exit('You must provide the state name of the block data you want to load an argument to this script.')
STATE = sys.argv[1]
try:
add_postgis_srs(900913)
except IntegrityError:
print "The Google Spherical Mercator projection, or a projection with srid 900913, already exists, skipping insert"
census_shp = os.path.join(settings.PROJECT_PATH,'census/scripts/data/tl_2010_' + STATE + '_tabblock10/tl_2010_' + STATE + '_tabblock10.shp')
census_mapping = {
'state_fips': 'STATEFP10',
'county_fips': 'COUNTYFP10',
'tract_code': 'TRACTCE10',
'block_number': 'BLOCKCE10',
'geo_id': 'GEOID10',
'name': 'NAME10',
'feat_code': 'MTFCC10',
'land_area': 'ALAND10',
'water_area': 'AWATER10',
'internal_lat': 'INTPTLAT10',
'internal_lon': 'INTPTLON10',
'mpoly': 'MULTIPOLYGON',
}
census_layer = LayerMapping(CensusBlock,
census_shp,
census_mapping,
transform=False,
encoding='iso-8859-1')
census_layer.save(verbose=False, strict=True, progress=True)
#!/bin/env python
import sys
import site
import os
import json
... Normal Django environment setup ...
from mybahai.census.models import *
import sys
from csvkit.unicsv import UnicodeCSVReader
import utils
import config
if len(sys.argv) < 2:
sys.exit('You must provide the filename of a CSV as an argument to this script.')
FILENAME = sys.argv[1]
GET_CENSUS_TABLE = {
'P1' : P1,
'P3': P3,
... bunch more keys related to Django models for census tables ...
}
with open(FILENAME) as f:
rows = UnicodeCSVReader(f)
headers = rows.next()
for row in rows:
row_dict = dict(zip(headers, row))
try:
x = XRef.objects.get(fileid=row_dict['FILEID'], stusab=row_dict['STUSAB'], logrecno=row_dict['LOGRECNO'])
except XRef.DoesNotExist:
continue
block = CensusBlock.objects.get(xref=x)
geo_id = block.geo_id
tables = {}
for k, v in row_dict.items():
t = utils.parse_table_from_key(k)
if t:
if t not in tables:
tables[t] = {}
tables[t][k] = v
for k, v in tables.items():
if k not in GET_CENSUS_TABLE.keys():
continue
m = GET_CENSUS_TABLE[k]
v = dict((k.lower(), v) for k,v in v.iteritems())
v['geo_id'] = geo_id
o = m(**v)
o.save()
class CensusBlock(models.Model):
locality = models.ForeignKey(Locality, null=True)
xref = models.ForeignKey(XRef, null=True)
state_fips = models.CharField(max_length=2)
county_fips = models.CharField(max_length=3)
tract_code = models.CharField(max_length=6)
block_number = models.CharField(max_length=4)
geo_id = models.CharField(max_length=15)
name = models.CharField(max_length=10)
feat_code = models.CharField(max_length=5)
land_area = models.IntegerField()
water_area = models.IntegerField()
internal_lat = models.CharField(max_length=11)
internal_lon = models.CharField(max_length=12)
mpoly = models.MultiPolygonField('Block area', srid=4326)
objects = models.GeoManager()
def __unicode__(self):
return u'%s, (%s)' % (self.name, self.geo_id)
class P1(models.Model):
geo_id = models.CharField(max_length=15, primary_key=True)
p001001 = models.IntegerField()
def __unicode_(self):
return 'Total Population'
class P3(models.Model):
geo_id = models.CharField(max_length=15, primary_key=True)
p003001 = models.IntegerField()
p003002 = models.IntegerField()
p003003 = models.IntegerField()
p003004 = models.IntegerField()
p003005 = models.IntegerField()
p003006 = models.IntegerField()
p003007 = models.IntegerField()
p003008 = models.IntegerField()
def __unicode_(self):
return 'Race %s' % self.geo_id
... etc ...
#!/bin/env python
import sys
import site
import os
import json
from csvkit.unicsv import UnicodeCSVReader
import config
import utils
... LA LA LA Django env setup LA LA LA ...
from mybahai.census.models import *
from mybahai.natgeo.models import Locality
if len(sys.argv) < 2:
sys.exit('You must provide a state abbreviation as an argument for this script.')
STATE = sys.argv[1]
locs = Locality.objects.filter(locality_state__state_abrv=STATE.upper())
for loc in locs:
first_pass = CensusBlock.objects.filter(mpoly__coveredby=loc.mpoly)
total_pop = 0
for block in first_pass:
block.locality = loc
block.save()
p1 = P1.objects.filter(geo_id=block.geo_id)
total_pop += p1[0].p001001
second_pass = CensusBlock.objects.filter(mpoly__overlaps=loc.mpoly)
for block in second_pass:
if not block.locality:
block.locality = loc
block.save()
p1 = P1.objects.filter(geo_id=block.geo_id)
total_pop += p1[0].p001001
third_pass = CensusBlock.objects.filter(mpoly__bboverlaps=loc.mpoly)
for block in third_pass:
if not block.locality:
block.locality = loc
block.save()
p1 = P1.objects.filter(geo_id=block.geo_id)
total_pop += p1[0].p001001
loc.locality_census_pop = total_pop
loc.save()
print 'Saved %s' % loc.locality_name
#!/usr/bin/env python
import sys
import site
import os
import json
from csvkit.unicsv import UnicodeCSVReader
import config
import utils
... Normal Django environment setup goes here ...
from mybahai.census.models import CensusBlock, XRef
if len(sys.argv) < 2:
sys.exit('You must provide the filename of a CSV as an argument to this script.')
FILENAME = sys.argv[1]
def make_xref(d):
# Strip off unncessary attrs
d.pop('CHARITER')
d.pop('CIFSN')
x, created = XRef.objects.get_or_create(fileid=d.pop('FILEID'), stusab=d.pop('STUSAB'), logrecno=d.pop('LOGRECNO'))
return x
with open(FILENAME) as f:
rows = UnicodeCSVReader(f)
headers = rows.next()
for row in rows:
geography = {}
row_dict = dict(zip(headers, row))
if row_dict['SUMLEV'] not in config.SUMLEVS:
continue
# Ignore that is not for complete geographies
if row_dict['GEOCOMP'] != config.GEOCOMP_COMPLETE:
continue
geography['sumlev'] = row_dict.pop('SUMLEV')
geography['geoid'] = utils.GEOID_COMPUTERS[geography['sumlev']](row_dict)
xref = make_xref(row_dict)
block = CensusBlock.objects.get(geo_id=geography['geoid'])
block.xref = xref
block.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment