brianboyer/import_usat

## import_usat
#! /bin/sh
#
# usat_import
# Created by Ryan Nagle, Chris Groskopf and Brian Boyer
#
# PostgreSQL importer for the pre-processed census files provided to IRE
# members by Paul Overberg and Anthony DeBarros from USA Today.
#
# To get the source files, join IRE (it's cheap!):
# http://www.ire.org/getcensus/
#
# Requires a GIS stack.
# Build your own with Homebrew on OSX (what I do):
# http://blog.apps.chicagotribune.com/2010/02/17/quick-install-pythonpostgis-geo-stack-on-snow-leopard/
#
# You must also get usat_table.py, which generates the create statement
# for those extra-wide tables.
#
# (Also requires Python and PostgreSQL.)
#
# Running the script will create a new database, create shape tables, create
# data tables, and then create tables of the data joined to the shapes. Once
# that's complete, pop open QGIS and look at your work!
#
# Usage:
# Download your state from IRE and unzip.
# Download place and tract shapefiles from the US Census and unzip:
# http://www.census.gov/cgi-bin/geo/shapefiles2010/main (TRACTS and PLACES)
#
# This script works out of the box for Illinois. To make it work for your
# state, search and replace these strings:
# tl_2010_17_tract10
# tl_2010_17_place10
# ILtracts
# IL/TractIL.csv
# IL/PlacesIL.csv
#
# Fork requests:
# * Parameterize for any state.
# * Export from the database back to shapefiles so you can view w/ Arc.

dropdb census2010geo
createdb census2010geo -T template_postgis
ogr2ogr -f PostgreSQL PG:dbname=census2010geo tl_2010_17_tract10/tl_2010_17_tract10.shp -s_srs EPSG:4326 -nlt multipolygon -nln tl_2010_17_tract10
ogr2ogr -f PostgreSQL PG:dbname=census2010geo tl_2010_17_place10/tl_2010_17_place10.shp -s_srs EPSG:4326 -nlt multipolygon -nln tl_2010_17_place10
python usat_table.py ILtracts IL/TractIL.csv | psql -d census2010geo
python usat_table.py ILplaces IL/PlacesIL.csv | psql -d census2010geo
psql -d census2010geo -c "copy ILtracts from '/Volumes/Home/bboyer/src/census2010/usat/IL/TractIL.csv' delimiters ',' CSV HEADER;"
psql -d census2010geo -c "copy ILplaces from '/Volumes/Home/bboyer/src/census2010/usat/IL/PlacesIL.csv' delimiters ',' CSV HEADER;"
#tracts, all fields -- column names too long for shapefile export
psql -d census2010geo -c "select * into merged from ILtracts inner join tl_2010_17_tract10 on ILtracts.tract = cast(tl_2010_17_tract10.tractce10 as int) and ILtracts.county = cast(tl_2010_17_tract10.countyfp10 as int);"
#tracts, population -- fewer columns, shortened names
psql -d census2010geo -c "select name, _2000Pop as Pop2000, P0010001 as Pop2010, TotPopChangeRaw as PopChange, PctChgTotPop2010_2000 as PopChgPct, wkb_geometry into merged_population from ILtracts inner join tl_2010_17_tract10 on ILtracts.tract = cast(tl_2010_17_tract10.tractce10 as int) and ILtracts.county = cast(tl_2010_17_tract10.countyfp10 as int);"
#places, population -- fewer columns, shortened names
psql -d census2010geo -c "select name, _2000Pop as Pop2000, P0010001 as Pop2010, TotPopChangeRaw as PopChange, PctChgTotPop2010_2000 as PopChgPct, wkb_geometry into merged_places_population from ILplaces inner join tl_2010_17_place10 on ILplaces.place = cast(tl_2010_17_place10.placefp10 as int);"

## usat_table.py
# create_table.py
#
# Python helper for PosegreSQL importer of pre-processed census files
# provided to IRE members by Paul Overberg and Anthony DeBarros from USA Today.
#
# See usat_import for usage
#

import csv
import sys

table_name = sys.argv[1]
file_name = sys.argv[2]

r = csv.reader(open(file_name,'rb'))
headers = r.next()
values = r.next()
s = "create table %s (\n" % (table_name)
for i,val in enumerate(headers):
    col = headers[i]
    value = values[i]

    if value == '': #blank, skip it!
        col += " numeric"
    else:
        try:
            float(value)
            col += " numeric"
        except ValueError:
            col += " varchar(50)"

    if len(headers) - 1 != i:
        col += ",\n"
    s += col
s += "\n);"
print s
	#! /bin/sh
	#
	# usat_import
	# Created by Ryan Nagle, Chris Groskopf and Brian Boyer
	#
	# PostgreSQL importer for the pre-processed census files provided to IRE
	# members by Paul Overberg and Anthony DeBarros from USA Today.
	#
	# To get the source files, join IRE (it's cheap!):
	# http://www.ire.org/getcensus/
	#
	# Requires a GIS stack.
	# Build your own with Homebrew on OSX (what I do):
	# http://blog.apps.chicagotribune.com/2010/02/17/quick-install-pythonpostgis-geo-stack-on-snow-leopard/
	#
	# You must also get usat_table.py, which generates the create statement
	# for those extra-wide tables.
	#
	# (Also requires Python and PostgreSQL.)
	#
	# Running the script will create a new database, create shape tables, create
	# data tables, and then create tables of the data joined to the shapes. Once
	# that's complete, pop open QGIS and look at your work!
	#
	# Usage:
	# Download your state from IRE and unzip.
	# Download place and tract shapefiles from the US Census and unzip:
	# http://www.census.gov/cgi-bin/geo/shapefiles2010/main (TRACTS and PLACES)
	#
	# This script works out of the box for Illinois. To make it work for your
	# state, search and replace these strings:
	# tl_2010_17_tract10
	# tl_2010_17_place10
	# ILtracts
	# IL/TractIL.csv
	# IL/PlacesIL.csv
	#
	# Fork requests:
	# * Parameterize for any state.
	# * Export from the database back to shapefiles so you can view w/ Arc.

	dropdb census2010geo
	createdb census2010geo -T template_postgis
	ogr2ogr -f PostgreSQL PG:dbname=census2010geo tl_2010_17_tract10/tl_2010_17_tract10.shp -s_srs EPSG:4326 -nlt multipolygon -nln tl_2010_17_tract10
	ogr2ogr -f PostgreSQL PG:dbname=census2010geo tl_2010_17_place10/tl_2010_17_place10.shp -s_srs EPSG:4326 -nlt multipolygon -nln tl_2010_17_place10
	python usat_table.py ILtracts IL/TractIL.csv \| psql -d census2010geo
	python usat_table.py ILplaces IL/PlacesIL.csv \| psql -d census2010geo
	psql -d census2010geo -c "copy ILtracts from '/Volumes/Home/bboyer/src/census2010/usat/IL/TractIL.csv' delimiters ',' CSV HEADER;"
	psql -d census2010geo -c "copy ILplaces from '/Volumes/Home/bboyer/src/census2010/usat/IL/PlacesIL.csv' delimiters ',' CSV HEADER;"
	#tracts, all fields -- column names too long for shapefile export
	psql -d census2010geo -c "select * into merged from ILtracts inner join tl_2010_17_tract10 on ILtracts.tract = cast(tl_2010_17_tract10.tractce10 as int) and ILtracts.county = cast(tl_2010_17_tract10.countyfp10 as int);"
	#tracts, population -- fewer columns, shortened names
	psql -d census2010geo -c "select name, _2000Pop as Pop2000, P0010001 as Pop2010, TotPopChangeRaw as PopChange, PctChgTotPop2010_2000 as PopChgPct, wkb_geometry into merged_population from ILtracts inner join tl_2010_17_tract10 on ILtracts.tract = cast(tl_2010_17_tract10.tractce10 as int) and ILtracts.county = cast(tl_2010_17_tract10.countyfp10 as int);"
	#places, population -- fewer columns, shortened names
	psql -d census2010geo -c "select name, _2000Pop as Pop2000, P0010001 as Pop2010, TotPopChangeRaw as PopChange, PctChgTotPop2010_2000 as PopChgPct, wkb_geometry into merged_places_population from ILplaces inner join tl_2010_17_place10 on ILplaces.place = cast(tl_2010_17_place10.placefp10 as int);"
	# create_table.py
	#
	# Python helper for PosegreSQL importer of pre-processed census files
	# provided to IRE members by Paul Overberg and Anthony DeBarros from USA Today.
	#
	# See usat_import for usage
	#

	import csv
	import sys

	table_name = sys.argv[1]
	file_name = sys.argv[2]

	r = csv.reader(open(file_name,'rb'))
	headers = r.next()
	values = r.next()
	s = "create table %s (\n" % (table_name)
	for i,val in enumerate(headers):
	col = headers[i]
	value = values[i]

	if value == '': #blank, skip it!
	col += " numeric"
	else:
	try:
	float(value)
	col += " numeric"
	except ValueError:
	col += " varchar(50)"

	if len(headers) - 1 != i:
	col += ",\n"
	s += col
	s += "\n);"
	print s