Skip to content

Instantly share code, notes, and snippets.

@caged
Last active March 1, 2023 06:15
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save caged/6943733 to your computer and use it in GitHub Desktop.
Save caged/6943733 to your computer and use it in GitHub Desktop.
Shell script to import data into Postgres from https://www.nhgis.org. If you use this, you'll need to edit line 14 to include the census data you're importing.
#!/bin/sh
# usage ./create-psql-database DATABASE
#
DB_NAME=$1
FOLDER_NAME="nhgis0004"
# Drop and recreate database
dropdb --interactive "${DB_NAME}"
createdb --encoding UTF8 $DB_NAME
# Create the POSTGIS tables and data table
psql -d $DB_NAME -c 'CREATE EXTENSION postgis'
psql -d $DB_NAME -c "CREATE TABLE ${DB_NAME}_data (GISJOIN varchar(15),YEAR varchar(9), REGION varchar(30),REGIONA integer,DIVISION varchar(30),DIVISIONA integer,STATE varchar(30),STATEA integer,COUNTY text,COUNTYA integer,COUSUB varchar(30),COUSUBA integer,PLACE varchar(30),PLACEA integer,TRACTA integer,BLKGRPA integer,CONCITA integer,RES_ONLYA integer,AIANHH integer,AIANHHA integer,TRUSTA integer,AITSCEA integer,ANRCA integer,CBSA integer,CBSAA integer,CSA integer,CSAA integer,METDIV integer,METDIVA integer,NECTA integer,NECTAA integer,CNECTA integer,CNECTAA integer,NECTADIV integer,NECTADIVA integer,UA integer,UAA integer,CDCURRA integer,SLDUA integer,SLDLA integer,ZCTA5A integer,SUBMCDA integer,SDELM integer,SDELMA integer,SDSEC integer,SDSECA integer,SDUNI integer,SDUNIA integer,PUMA5A integer,NAME text,BTTRA integer,BTBGA integer,MNUE001 integer,MNUE002 integer,MNUE003 integer,MNUE004 integer,MNUE005 integer,MNUE006 integer,MNUE007 integer,MNUE008 integer,MNUE009 integer,MNUE010 integer,MN3E001 integer,MN3E002 integer,MN3E003 integer,MNUM001 integer,MNUM002 integer,MNUM003 integer,MNUM004 integer,MNUM005 integer,MNUM006 integer,MNUM007 integer,MNUM008 integer,MNUM009 integer,MNUM010 integer,MN3M001 integer,MN3M002 integer,MN3M003 integer)"
tar -xzm --totals -C /tmp -f data/us-blockgroup-race.zip
tar -xzm --totals -C /tmp -f data/us-blockgroup-shape.zip
# Create some temporary directories for reprojecting shape files
mkdir /tmp/census-shapefiles
mkdir /tmp/reprojected-census-shapefiles
# Extract individual state shapefiles to the same directory
for file in /tmp/nhgis0004_shape/*.zip; do \
tar -xzm -C /tmp/census-shapefiles -f $file
done
# Iterate through all the shapefiles and append them to the table
for file in /tmp/census-shapefiles/*.shp; do \
file_name=$(basename $file)
ogr2ogr -f 'ESRI Shapefile' -t_srs EPSG:4326 "/tmp/reprojected-census-shapefiles/${file_name}" $file
#shp2pgsql -a -s 4326 -i -D $file us_census_blockgroups | psql $DB_NAME
done
# We need to ensure content is UTF-8
iconv -c -f UTF-8 -t UTF-8 < "/tmp/nhgis0004_csv/nhgis0004_ds184_20115_2011_blck_grp.csv" > "/tmp/nhgis0004_csv/nhgis0004_ds184_20115_2011_blck_grp-utf8.csv"
# Import UTF-8 CSV race data
psql -d $DB_NAME -c "COPY ${DB_NAME}_data FROM '/tmp/nhgis0004_csv/nhgis0004_ds184_20115_2011_blck_grp-utf8.csv' WITH CSV HEADER"
# Create initial table schema and then delete the data so we can iterate through
# all the files later to append them to the table
first_file=$(find /tmp/reprojected-census-shapefiles -name "*.shp" | head -n 1)
shp2pgsql -s 4326 -i -D $first_file "${DB_NAME}_blockgroups" | psql $DB_NAME
psql -c "delete from ${DB_NAME}_blockgroups" $DB_NAME
# Iterate through all the shapefiles and append them to the table
for file in /tmp/reprojected-census-shapefiles/*.shp; do \
file_name=$(basename $file)
echo $file_name
shp2pgsql -a -s 4326 -i -D $file us_census_blockgroups | psql $DB_NAME
done
# Create the index
psql -c "create index ${DB_NAME}_blockgroups_gix on ${DB_NAME}_blockgroups using gist (geom)" $DB_NAME
rm -rf /tmp/census-shapefiles
rm -rf /tmp/reprojected-census-shapefiles
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment