Created
August 13, 2009 19:43
-
-
Save mattdennewitz/167412 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf8 -*- | |
""" | |
Download and import US census data from | |
`http://www.census.gov/geo/www/cob/z52000.html` | |
# shapefile mapping: | |
zipcode_mapping = { | |
'area' : 'AREA', | |
'perimeter' : 'PERIMETER', | |
'name' : 'NAME', | |
'lsad' : 'LSAD', | |
'lsad_trans' : 'LSAD_TRANS', | |
'geom' : 'MULTIPOLYGON', | |
} | |
# places.Zipcode | |
class Zipcode(models.Model): | |
area = models.FloatField() | |
perimeter = models.FloatField() | |
name = models.CharField(max_length=90) | |
lsad = models.CharField(max_length=2) | |
lsad_trans = models.CharField(max_length=50) | |
geom = models.MultiPolygonField(srid=4326) | |
objects = models.GeoManager() | |
def __unicode__(self): | |
return self.name | |
""" | |
from BeautifulSoup import BeautifulSoup as BS, SoupStrainer as SS | |
import glob | |
import logging | |
import os | |
import re | |
import sys | |
import urllib2 | |
import urlparse | |
import zipfile | |
from django.core.management import setup_environ | |
import settings | |
setup_environ(settings) | |
from django.db import transaction | |
from django.contrib.gis.utils import LayerMapping | |
from places.mappings import zipcode_mapping | |
from places.models import Zipcode | |
# | |
# config | |
######## | |
CENSUS_DATA_URL = "http://www.census.gov/geo/www/cob/z52000.html" | |
REMOVE_SHAPEFILES = True | |
CLEAR_ZIPCODES_AT_STARTUP = True | |
# configure simple logging | |
logging.basicConfig(level=logging.DEBUG) | |
if CLEAR_ZIPCODES_AT_STARTUP: | |
try: | |
Zipcode.objects.filter().delete() | |
logging.debug("Removed all Zipcode objects") | |
except: | |
transaction.rollback_unless_managed() | |
logging.error("Couldn't remove Zipcode objects: %s. Exiting." % sys.exc_info()[1]) | |
sys.exit() | |
# create SoupStrainer for *_shp.zip files | |
zip_strainer = SS('a', attrs={'href': re.compile("_shp.zip$")}) | |
# get census data, parse | |
request = urllib2.urlopen(CENSUS_DATA_URL) | |
response = request.read() | |
# get shapefile archive links | |
doc = BS(response, parseOnlyThese=zip_strainer) | |
shapefile_urls = [el.get('href') for el in doc] | |
# | |
# path setup | |
############ | |
# create data dir at ~/census_zipcode_data | |
user_home_path = os.path.expanduser("~") | |
data_dir = os.path.join(user_home_path, "census_zipcode_data") | |
shapefiles_dir = os.path.join(data_dir, "shapefiles") | |
dl_dir = os.path.join(data_dir, "downloads") | |
if not os.path.exists(data_dir): | |
# create data dir, if necessary | |
os.mkdir(data_dir) | |
logging.debug("Created data dir: %s" % data_dir) | |
if not os.path.exists(shapefiles_dir): | |
# create shapefiles path | |
os.mkdir(shapefiles_dir) | |
logging.debug("Created shapefiles dir: %s" % shapefiles_dir) | |
if not os.path.exists(dl_dir): | |
# create dl dir, if necessary | |
os.mkdir(dl_dir) | |
logging.debug("Created dl dir: %s" % dl_dir) | |
# | |
# data file preparation | |
####################### | |
# download each url | |
for shapefile_archive_url in shapefile_urls: | |
# convert relative download url into absolute url | |
abs_url = urlparse.urljoin(CENSUS_DATA_URL, shapefile_archive_url) | |
# get the archive's filename | |
filename = os.path.split(shapefile_archive_url)[-1] | |
dl_path = os.path.join(dl_dir, filename) | |
if os.path.exists(dl_path): | |
# os.remove(dl_path) | |
# logging.debug("Removed existing file: %s" % dl_path) | |
logging.debug("Skipping existing archive: %s" % dl_path) | |
continue | |
logging.debug("Downloading %s" % abs_url) | |
try: | |
response = urllib2.urlopen(abs_url) | |
archive_data = response.read() | |
open(dl_path, "wb").write(archive_data) | |
logging.debug("Downloaded %s to %s" % (abs_url, dl_path)) | |
except: | |
logging.error("Could not download %s: %s" % ( | |
abs_url, sys.exc_info()[1])) | |
continue | |
# extract the shapefile | |
logging.debug("Attempting to decompress shapefile archive: %s" % filename) | |
shapefile_archive = zipfile.ZipFile(dl_path) | |
files_in_archive = [f.filename for f in shapefile_archive.infolist()] | |
for filename in files_in_archive: | |
shapefile = shapefile_archive.read(filename) | |
file_path = os.path.join(shapefiles_dir, filename) | |
open(file_path, "wb").write(shapefile) | |
logging.info("Extracted %s to %s" % (filename, file_path)) | |
logging.info("Shapefile download complete") | |
# | |
# import data via LayerMapping -> places.Zipcode model | |
###################################################### | |
logging.debug("Preparing to import shapefiles") | |
shapefiles = glob.glob(os.path.join(shapefiles_dir, "*.shp")) | |
for shapefile in shapefiles: | |
logging.debug("Mapping %s" % shapefile) | |
lm = LayerMapping(Zipcode, shapefile, zipcode_mapping, transform=False) | |
lm.save(strict=True, verbose=False) | |
logging.info("Shapefile set import complete") | |
# | |
# cleanup | |
######### | |
if REMOVE_SHAPEFILES: | |
for root, dirs, files in os.walk(data_dir, topdown=False): | |
for name in files: | |
os.remove(os.path.join(root, name)) | |
logging.debug("Removed file: %s" % name) | |
for name in dirs: | |
os.rmdir(os.path.join(root, name)) | |
logging.debug("Remove path: %s" % name) | |
os.rmdir(data_dir) | |
logging.info("Cleanup removed all data files and folders") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment