Skip to content

Instantly share code, notes, and snippets.

@andrewbt
Last active March 3, 2016 01:01
Show Gist options
  • Save andrewbt/da97eedfd6428d2ef5f1 to your computer and use it in GitHub Desktop.
Save andrewbt/da97eedfd6428d2ef5f1 to your computer and use it in GitHub Desktop.
Downloads a double-zipped shapefile from NYC Socrata, separates it, and uploads to CartoDB
#!/bin/sh
CDB_USER=$1
API_KEY=$2
v1=$(uname)
if [[ -z $CDB_USER ]]
then
echo "Missing user"
fi
if [[ -z $API_KEY ]]
then
echo "Missing api key"
fi
echo "rm local files"
# remove any of the files from this directory if they're already present
rm -f building_1015.zip building_1015_metadata.htm building_1015.dbf building_1015.prj building_1015.shp building_1015.shx building_1015.shp.xml building_historic_1015.zip building_historic_1015.dbf building_historic_1015.prj building_historic_1015.shp building_historic_1015.shp.xml building_historic_1015.shx zip
echo "download latest"
# download the new file. Could change this to the socrata catalog if a stable, curl-able URL is found
curl -O "http://www.opendatacache.com/data.cityofnewyork.us/download/tb92-6tj8/application/zip"
echo "unzipping"
# unzip the file to get the two nested shapefiles
unzip zip
echo "zip building_historic_1015.zip"
# build the second nested shapefile into a zipped shapefile for upload to cartodb
zip building_historic_1015.zip building_historic_1015.dbf building_historic_1015.prj building_historic_1015.shp building_historic_1015.shp.xml building_historic_1015.shx
echo "zip building_1015.zip"
# build the first nested shapefile into a zipped shapefile for upload to cartodb
zip building_1015.zip building_1015.dbf building_1015.prj building_1015.shp building_1015.shx building_1015.shp.xml
echo "drop building_historic_1015 table"
#drop the first file's table in cartodb
curl "https://${CDB_USER}.cartodb.com/api/v2/sql?q=DROP%20TABLE%20building_historic_1015&api_key=${API_KEY}"
echo "Sending building_historic_1015.zip file..."
if [[ "$v1" = Darwin ]];
then
job_id=`curl -s -F file=@building_historic_1015.zip "https://${CDB_USER}.cartodb.com/api/v1/imports/?api_key=${API_KEY}" | sed -E 's/\{\"item_queue_id\":\"([^"]+)\".*/\1/'`
else
job_id=`curl -s -F file=@building_historic_1015.zip "https://${CDB_USER}.cartodb.com/api/v1/imports/?api_key=${API_KEY}" | sed -r 's/\{\"item_queue_id\":\"([^"]+)\".*/\1/'`
fi
echo "Waiting for job ${job_id} to be completed..."
while true
do
if [[ "$v1" = Darwin ]];
then
status=`curl -s "https://${CDB_USER}.cartodb.com/api/v1/imports/${job_id}?api_key=${API_KEY}" | sed -E 's/(.*)\"state\":\"([a-z]+)\"(.*)/\2/'`
else
status=`curl -s "https://${CDB_USER}.cartodb.com/api/v1/imports/${job_id}?api_key=${API_KEY}" | sed -r 's/(.*)\"state\":\"([a-z]+)\"(.*)/\2/'`
fi
echo "STATE: ${status}"
if [[ $status == 'complete' ]]
then
echo "Import building_historic_1015.zip successful"
break
fi
sleep 2
done
echo "drop building_1015 table"
# drop second file's table in cartodb
curl "https://${CDB_USER}.cartodb.com/api/v2/sql?q=DROP%20TABLE%20building_1015&api_key=${API_KEY}"
echo "Sending building_1015.zip file..."
if [[ "$v1" = Darwin ]];
then
job_id=`curl -s -F file=@building_1015.zip "https://${CDB_USER}.cartodb.com/api/v1/imports/?api_key=${API_KEY}" | sed -E 's/\{\"item_queue_id\":\"([^"]+)\".*/\1/'`
else
job_id=`curl -s -F file=@building_1015.zip "https://${CDB_USER}.cartodb.com/api/v1/imports/?api_key=${API_KEY}" | sed -r 's/\{\"item_queue_id\":\"([^"]+)\".*/\1/'`
fi
echo "Waiting for job ${job_id} to be completed..."
while true
do
if [[ "$v1" = Darwin ]];
then
status=`curl -s "https://${CDB_USER}.cartodb.com/api/v1/imports/${job_id}?api_key=${API_KEY}" | sed -E 's/(.*)\"state\":\"([a-z]+)\"(.*)/\2/'`
else
status=`curl -s "https://${CDB_USER}.cartodb.com/api/v1/imports/${job_id}?api_key=${API_KEY}" | sed -r 's/(.*)\"state\":\"([a-z]+)\"(.*)/\2/'`
fi
echo "STATE: ${status}"
if [[ $status == 'complete' ]]
then
echo "Import building_1015.zip successful"
break
elif [[ $status == 'failure' ]]
then
echo "Import failed. Re-sending building_1015.zip file..."
curl "https://${CDB_USER}.cartodb.com/api/v2/sql?q=DROP%20TABLE%20building_1015&api_key=${API_KEY}"
if [[ "$v1" = Darwin ]];
then
job_id=`curl -s -F file=@building_1015.zip "https://${CDB_USER}.cartodb.com/api/v1/imports/?api_key=${API_KEY}" | sed -E 's/\{\"item_queue_id\":\"([^"]+)\".*/\1/'`
else
job_id=`curl -s -F file=@building_1015.zip "https://${CDB_USER}.cartodb.com/api/v1/imports/?api_key=${API_KEY}" | sed -r 's/\{\"item_queue_id\":\"([^"]+)\".*/\1/'`
fi
fi
sleep 2
done
./building_downloader.sh <cdb_username> <api_key>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment