Skip to content

Instantly share code, notes, and snippets.

@mattdennewitz
Created August 13, 2009 19:43
Show Gist options
  • Save mattdennewitz/167412 to your computer and use it in GitHub Desktop.
Save mattdennewitz/167412 to your computer and use it in GitHub Desktop.
# -*- coding: utf8 -*-
"""
Download and import US census data from
`http://www.census.gov/geo/www/cob/z52000.html`
# shapefile mapping:
zipcode_mapping = {
'area' : 'AREA',
'perimeter' : 'PERIMETER',
'name' : 'NAME',
'lsad' : 'LSAD',
'lsad_trans' : 'LSAD_TRANS',
'geom' : 'MULTIPOLYGON',
}
# places.Zipcode
class Zipcode(models.Model):
area = models.FloatField()
perimeter = models.FloatField()
name = models.CharField(max_length=90)
lsad = models.CharField(max_length=2)
lsad_trans = models.CharField(max_length=50)
geom = models.MultiPolygonField(srid=4326)
objects = models.GeoManager()
def __unicode__(self):
return self.name
"""
from BeautifulSoup import BeautifulSoup as BS, SoupStrainer as SS
import glob
import logging
import os
import re
import sys
import urllib2
import urlparse
import zipfile
from django.core.management import setup_environ
import settings
setup_environ(settings)
from django.db import transaction
from django.contrib.gis.utils import LayerMapping
from places.mappings import zipcode_mapping
from places.models import Zipcode
#
# config
########
CENSUS_DATA_URL = "http://www.census.gov/geo/www/cob/z52000.html"
REMOVE_SHAPEFILES = True
CLEAR_ZIPCODES_AT_STARTUP = True
# configure simple logging
logging.basicConfig(level=logging.DEBUG)
if CLEAR_ZIPCODES_AT_STARTUP:
try:
Zipcode.objects.filter().delete()
logging.debug("Removed all Zipcode objects")
except:
transaction.rollback_unless_managed()
logging.error("Couldn't remove Zipcode objects: %s. Exiting." % sys.exc_info()[1])
sys.exit()
# create SoupStrainer for *_shp.zip files
zip_strainer = SS('a', attrs={'href': re.compile("_shp.zip$")})
# get census data, parse
request = urllib2.urlopen(CENSUS_DATA_URL)
response = request.read()
# get shapefile archive links
doc = BS(response, parseOnlyThese=zip_strainer)
shapefile_urls = [el.get('href') for el in doc]
#
# path setup
############
# create data dir at ~/census_zipcode_data
user_home_path = os.path.expanduser("~")
data_dir = os.path.join(user_home_path, "census_zipcode_data")
shapefiles_dir = os.path.join(data_dir, "shapefiles")
dl_dir = os.path.join(data_dir, "downloads")
if not os.path.exists(data_dir):
# create data dir, if necessary
os.mkdir(data_dir)
logging.debug("Created data dir: %s" % data_dir)
if not os.path.exists(shapefiles_dir):
# create shapefiles path
os.mkdir(shapefiles_dir)
logging.debug("Created shapefiles dir: %s" % shapefiles_dir)
if not os.path.exists(dl_dir):
# create dl dir, if necessary
os.mkdir(dl_dir)
logging.debug("Created dl dir: %s" % dl_dir)
#
# data file preparation
#######################
# download each url
for shapefile_archive_url in shapefile_urls:
# convert relative download url into absolute url
abs_url = urlparse.urljoin(CENSUS_DATA_URL, shapefile_archive_url)
# get the archive's filename
filename = os.path.split(shapefile_archive_url)[-1]
dl_path = os.path.join(dl_dir, filename)
if os.path.exists(dl_path):
# os.remove(dl_path)
# logging.debug("Removed existing file: %s" % dl_path)
logging.debug("Skipping existing archive: %s" % dl_path)
continue
logging.debug("Downloading %s" % abs_url)
try:
response = urllib2.urlopen(abs_url)
archive_data = response.read()
open(dl_path, "wb").write(archive_data)
logging.debug("Downloaded %s to %s" % (abs_url, dl_path))
except:
logging.error("Could not download %s: %s" % (
abs_url, sys.exc_info()[1]))
continue
# extract the shapefile
logging.debug("Attempting to decompress shapefile archive: %s" % filename)
shapefile_archive = zipfile.ZipFile(dl_path)
files_in_archive = [f.filename for f in shapefile_archive.infolist()]
for filename in files_in_archive:
shapefile = shapefile_archive.read(filename)
file_path = os.path.join(shapefiles_dir, filename)
open(file_path, "wb").write(shapefile)
logging.info("Extracted %s to %s" % (filename, file_path))
logging.info("Shapefile download complete")
#
# import data via LayerMapping -> places.Zipcode model
######################################################
logging.debug("Preparing to import shapefiles")
shapefiles = glob.glob(os.path.join(shapefiles_dir, "*.shp"))
for shapefile in shapefiles:
logging.debug("Mapping %s" % shapefile)
lm = LayerMapping(Zipcode, shapefile, zipcode_mapping, transform=False)
lm.save(strict=True, verbose=False)
logging.info("Shapefile set import complete")
#
# cleanup
#########
if REMOVE_SHAPEFILES:
for root, dirs, files in os.walk(data_dir, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
logging.debug("Removed file: %s" % name)
for name in dirs:
os.rmdir(os.path.join(root, name))
logging.debug("Remove path: %s" % name)
os.rmdir(data_dir)
logging.info("Cleanup removed all data files and folders")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment