Skip to content

Instantly share code, notes, and snippets.

@brianboyer
Created February 16, 2011 16:50
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save brianboyer/829700 to your computer and use it in GitHub Desktop.
Save brianboyer/829700 to your computer and use it in GitHub Desktop.
PostgreSQL importer for the pre-processed census files provided to IRE members by Paul Overberg and Anthony DeBarros from USA Today.
#! /bin/sh
#
# usat_import
# Created by Ryan Nagle, Chris Groskopf and Brian Boyer
#
# PostgreSQL importer for the pre-processed census files provided to IRE
# members by Paul Overberg and Anthony DeBarros from USA Today.
#
# To get the source files, join IRE (it's cheap!):
# http://www.ire.org/getcensus/
#
# Requires a GIS stack.
# Build your own with Homebrew on OSX (what I do):
# http://blog.apps.chicagotribune.com/2010/02/17/quick-install-pythonpostgis-geo-stack-on-snow-leopard/
#
# You must also get usat_table.py, which generates the create statement
# for those extra-wide tables.
#
# (Also requires Python and PostgreSQL.)
#
# Running the script will create a new database, create shape tables, create
# data tables, and then create tables of the data joined to the shapes. Once
# that's complete, pop open QGIS and look at your work!
#
# Usage:
# Download your state from IRE and unzip.
# Download place and tract shapefiles from the US Census and unzip:
# http://www.census.gov/cgi-bin/geo/shapefiles2010/main (TRACTS and PLACES)
#
# This script works out of the box for Illinois. To make it work for your
# state, search and replace these strings:
# tl_2010_17_tract10
# tl_2010_17_place10
# ILtracts
# IL/TractIL.csv
# IL/PlacesIL.csv
#
# Fork requests:
# * Parameterize for any state.
# * Export from the database back to shapefiles so you can view w/ Arc.
dropdb census2010geo
createdb census2010geo -T template_postgis
ogr2ogr -f PostgreSQL PG:dbname=census2010geo tl_2010_17_tract10/tl_2010_17_tract10.shp -s_srs EPSG:4326 -nlt multipolygon -nln tl_2010_17_tract10
ogr2ogr -f PostgreSQL PG:dbname=census2010geo tl_2010_17_place10/tl_2010_17_place10.shp -s_srs EPSG:4326 -nlt multipolygon -nln tl_2010_17_place10
python usat_table.py ILtracts IL/TractIL.csv | psql -d census2010geo
python usat_table.py ILplaces IL/PlacesIL.csv | psql -d census2010geo
psql -d census2010geo -c "copy ILtracts from '/Volumes/Home/bboyer/src/census2010/usat/IL/TractIL.csv' delimiters ',' CSV HEADER;"
psql -d census2010geo -c "copy ILplaces from '/Volumes/Home/bboyer/src/census2010/usat/IL/PlacesIL.csv' delimiters ',' CSV HEADER;"
#tracts, all fields -- column names too long for shapefile export
psql -d census2010geo -c "select * into merged from ILtracts inner join tl_2010_17_tract10 on ILtracts.tract = cast(tl_2010_17_tract10.tractce10 as int) and ILtracts.county = cast(tl_2010_17_tract10.countyfp10 as int);"
#tracts, population -- fewer columns, shortened names
psql -d census2010geo -c "select name, _2000Pop as Pop2000, P0010001 as Pop2010, TotPopChangeRaw as PopChange, PctChgTotPop2010_2000 as PopChgPct, wkb_geometry into merged_population from ILtracts inner join tl_2010_17_tract10 on ILtracts.tract = cast(tl_2010_17_tract10.tractce10 as int) and ILtracts.county = cast(tl_2010_17_tract10.countyfp10 as int);"
#places, population -- fewer columns, shortened names
psql -d census2010geo -c "select name, _2000Pop as Pop2000, P0010001 as Pop2010, TotPopChangeRaw as PopChange, PctChgTotPop2010_2000 as PopChgPct, wkb_geometry into merged_places_population from ILplaces inner join tl_2010_17_place10 on ILplaces.place = cast(tl_2010_17_place10.placefp10 as int);"
# create_table.py
#
# Python helper for PosegreSQL importer of pre-processed census files
# provided to IRE members by Paul Overberg and Anthony DeBarros from USA Today.
#
# See usat_import for usage
#
import csv
import sys
table_name = sys.argv[1]
file_name = sys.argv[2]
r = csv.reader(open(file_name,'rb'))
headers = r.next()
values = r.next()
s = "create table %s (\n" % (table_name)
for i,val in enumerate(headers):
col = headers[i]
value = values[i]
if value == '': #blank, skip it!
col += " numeric"
else:
try:
float(value)
col += " numeric"
except ValueError:
col += " varchar(50)"
if len(headers) - 1 != i:
col += ",\n"
s += col
s += "\n);"
print s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment