Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Download shapefiles at the census tract and block level for all of urban Mexico—more than 5 GB of data!
#!/bin/bash
# Author: Diego Valle-Jones
# Web: http://www.diegovalle.net
# Purpose: Download shapefiles of manzanas (blocks), agebs (census areas), ejes
# viales (streets), interesting areas and a whole bunch of other stuff
# Note that you'll need a recent version of innoextract
# http://constexpr.org/innoextract/, one that can work with files
# created by version 5.5 of Inno Setup, the one in Ubuntu universe is
# not recent enough. The version available at
# https://launchpad.net/~arx/+archive/release is good enough.
# sudo add-apt-repository ppa:arx/release
# sudo apt-get update
# sudo apt-get install innoextract
# chmod +x download-census-shp.sh
# As of now, this script has not been tested on Windows and Mac
# systems, only on Ubuntu. The script will create a directory
# called 'shps' where all the shapefiles are located, if something
# goes wrong when dowloading be sure to delete it and try again
set -e
# Projection compatible with Google Maps
PROJECTION="+proj=longlat +ellps=WGS84 +no_defs +towgs84=0,0,0"
# wget command
CURL="curl -fsS --retry 3 "
# The list of shapefiles of manzanas, agebs, etc
declare -a files=("ageb_urb" "eje_vial" "estatal" "loc_rur" "loc_urb"
"manzanas" "municipal" "servicios_a" "servicios_l" "servicios_p")
# List of files for the national (not state level) data
declare -a national_files=("estatal" "loc_urb" "nacional" "loc_rur" "municipal" "zonas_metro")
# State abbreviations
declare -a states=("national" "ags" "bc" "bcs" "camp" "coah" "col" "chis" "chih"
"df" "dgo" "gto" "gro" "hgo" "jal" "mex" "mich" "mor" "nay" "nl" "oax"
"pue" "qro" "qroo" "slp" "sin" "son" "tab" "tamps" "tlax" "ver" "yuc"
"zac");
# Use gdal to reproject, and then rename the shapefiles to include
# a user friendly abbreviation instead of a number
# First argument: directory of shapefiles shps/state_abbreviation
# Second argument: the state abbreviation
# TODO: convert the encoding from windows-1252 to utf-8
function reproject {
name="$3[@]"
arr=("${!name}")
for i in "${arr[@]}"
do
ogr2ogr "$1/$2_$i.shp" "$1"/$i.shp -t_srs "$PROJECTION"
rm "$1"/$i*
done
# rename the extra census data that comes with the shapefiles
cd "$1/tablas"
rename "s/^cpv2010/$2_cpv2010/" cpv2010*
rm -rf cpv2010*
cd ../../..
}
# For each of the 32 states (and national data == 00) download and reproject
for i in $(seq 0 32);
do
# The INEGI uses a leading zero for all one digit numbers
if [ "$i" -lt 10 ]
then
FILENUM="0$i"
else
FILENUM="$i"
fi
# download the files from the inegi server. 'idusr' is the id you get
# when you register at the INEGI (yes, I'm 12 years old)
$CURL "http://www.inegi.org.mx/est/scince/scince2010.aspx?_file=/est/scince/scince2010/Scince2010_$FILENUM.exe&idusr=80085" -o ${states[$i]}_scince.exe
# Extract the shapefiles from the inno setup installer windows
# executable (note that it doesn't allow you to specify the
# directory to extract the files)
innoextract --lowercase --silent ${states[$i]}_scince.exe
# Create a directory called "shps" to store the shapefiles
mkdir -p shps/${states[$i]}
# Copy the shapefiles to the new directory
cp -r app/"$FILENUM"/* shps/${states[$i]}
# Delete the temp files from innoextract
rm -rf app
rm -rf tmp
rm -rf ${states[$i]}_scince.exe
# call the reproject function above
if [ "$i" -eq 0 ]
then
reproject shps/${states[$i]} ${states[$i]} national_files
else
reproject shps/${states[$i]} ${states[$i]} files
fi
# give the server a rest before downloading the next file
sleep 20
done
# You could use the code below to merge all the states into one giant
# shapefile of Mexico. Change '_manzanas' to '_agebs' or '_eje_vial' or whatever
#for file in $(find shps -maxdepth 2 -name "*_manzanas.shp" )
#do
# ogr2ogr -update -append mexico_manzanas.shp $file -f "esri shapefile" -nln merge
#done
# Filter attributes (e.g. only include total population in the dbf)
#ogr2ogr -select POB1 mexico_manazanas.shp merge.shp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment