Skip to content

Instantly share code, notes, and snippets.

@evz
Created May 3, 2012 21:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save evz/2589579 to your computer and use it in GitHub Desktop.
Save evz/2589579 to your computer and use it in GitHub Desktop.
File to grab census data to populate tables for openblock geocoder
import urllib
import os
import sys
import requests
import json
import zipfile
import shelve
from get_state_abbr import STATE_ABBRS # this is just a big 'ol dict
from get_state_fips import STATE_FIPS # so is this
from subprocess import call
SHELF_PATH = 'data/processed.db'
SHELF = shelve.open(SHELF_PATH, writeback=True)
def fetch_unzip(url,fn):
fn_dir = 'data/'+fn
urllib.urlretrieve(url,fn_dir)
unzipped_dir = fn_dir.rstrip('.zip')
zf = zipfile.ZipFile(fn_dir, 'r').extractall(unzipped_dir)
return unzipped_dir
def get_counties(state):
abbr = STATE_ABBRS[state]
r = requests.get('http://api.sba.gov/geodata/county_data_for_state_of/' + abbr.lower() + '.json')
counties = json.loads(r.content)
sf = STATE_FIPS[state]
base_url = 'http://www2.census.gov/geo/tiger/TIGER2009/{0}_{1}/{2}'.format(sf,state.upper(),sf)
path = None
for county in counties:
paths = {}
county_name = '_'.join(county['name'].split(' '))
county_fips = county['fips_county_cd'].rjust(3,'0')
c = SHELF[state].get(county_fips, False)
if not c:
SHELF[state][county_fips] = {}
for z in ['edges', 'faces', 'featnames']:
f = SHELF[state][county_fips].get(z,False)
if not f:
SHELF[state][county_fips][z] = False
fn = 'tl_2009_{0}{1}_{2}.zip'.format(sf,county_fips,z)
url = '{0}{1}_{2}/{3}'.format(base_url, county_fips, county_name, fn)
paths[z] = fetch_unzip(url,fn)
SHELF[state][county_fips][z] = True
try:
if not SHELF[state][county_fips]['processed']:
raise KeyError
else:
pass
except KeyError:
SHELF[state][county_fips]['processed'] = False
edges = '{0}/tl_2009_{1}{2}_edges.shp'.format(paths['edges'],sf,county_fips)
featnames = '{0}/tl_2009_{1}{2}_featnames.dbf'.format(paths['featnames'],sf,county_fips)
faces = '{0}/tl_2009_{1}{2}_faces.dbf'.format(paths['faces'],sf,county_fips)
place = '{0}/tl_2009_{1}_place.shp'.format(SHELF[state]['place_path'],sf)
call(['import_blocks_tiger', edges, featnames, faces, place])
SHELF[state][county_fips]['processed'] = True
try:
if not SHELF[state]['streets']:
raise KeyError
else:
pass
except KeyError:
SHELF[state]['streets'] = False
call(['populate_streets', '-v', '-v', '-v', '-v', 'streets',])
call(['populate_streets', '-v', '-v', '-v', '-v', 'block_intersections',])
call(['populate_streets', '-v', '-v', '-v', '-v', 'intersections',])
SHELF[state]['streets'] = True
def get_state_zip(state):
try:
if not SHELF[state]['zip']:
raise KeyError
else:
pass
except KeyError:
SHELF[state]['zip'] = False
fn = 'tl_2009_{0}_zcta5.zip'.format(STATE_FIPS[state])
url = 'http://www2.census.gov/geo/tiger/TIGER2009/{0}_{1}/{2}'.format(STATE_FIPS[state],state.upper(),fn)
path = fetch_unzip(url,fn)
call(['import_zips_tiger', path])
SHELF[state]['zip'] = True
def get_state_place(state):
try:
if not SHELF[state]['place']:
raise KeyError
else:
pass
except KeyError:
SHELF[state]['place'] = False
fn = 'tl_2009_{0}_place.zip'.format(STATE_FIPS[state])
url = 'http://www2.census.gov/geo/tiger/TIGER2009/{0}_{1}/{2}'.format(STATE_FIPS[state],state.upper(),fn)
path = fetch_unzip(url,fn)
SHELF[state]['place'] = True
SHELF[state]['place_path'] = path
# get_counties(path, state)
if __name__ == '__main__':
# Doing all this shelve stuff so I can eventually keep track of what's been processed
os.putenv('DJANGO_SETTINGS_MODULE','geocoder.settings')
state = sys.argv[1]
s = SHELF.get(state,False)
if s:
try:
if not SHELF[state]['streets']:
raise KeyError
else:
pass
except KeyError:
SHELF[state] = {}
get_state_zip(state)
get_state_place(state)
get_counties(state)
else:
SHELF[state] = {}
get_state_zip(state)
get_state_place(state)
get_counties(state)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment