Last active
May 20, 2019 19:20
-
-
Save diegovalle/5843688 to your computer and use it in GitHub Desktop.
Download shapefiles at the census tract and block level for all of urban Mexico—more than 5 GB of data!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Author: Diego Valle-Jones | |
# Web: http://www.diegovalle.net | |
# Purpose: Download shapefiles of manzanas (blocks), agebs (census areas), ejes | |
# viales (streets), interesting areas and a whole bunch of other stuff | |
# Note that you'll need a recent version of innoextract | |
# http://constexpr.org/innoextract/, one that can work with files | |
# created by version 5.5 of Inno Setup, the one in Ubuntu universe is | |
# not recent enough. The version available at | |
# https://launchpad.net/~arx/+archive/release is good enough. | |
# sudo add-apt-repository ppa:arx/release | |
# sudo apt-get update | |
# sudo apt-get install innoextract | |
# chmod +x download-census-shp.sh | |
# As of now, this script has not been tested on Windows and Mac | |
# systems, only on Ubuntu. The script will create a directory | |
# called 'shps' where all the shapefiles are located, if something | |
# goes wrong when dowloading be sure to delete it and try again | |
set -e | |
# Projection compatible with Google Maps | |
PROJECTION="+proj=longlat +ellps=WGS84 +no_defs +towgs84=0,0,0" | |
# wget command | |
CURL="curl -fsS --retry 3 " | |
# The list of shapefiles of manzanas, agebs, etc | |
declare -a files=("ageb_urb" "eje_vial" "estatal" "loc_rur" "loc_urb" | |
"manzanas" "municipal" "servicios_a" "servicios_l" "servicios_p") | |
# List of files for the national (not state level) data | |
declare -a national_files=("estatal" "loc_urb" "nacional" "loc_rur" "municipal" "zonas_metro") | |
# State abbreviations | |
declare -a states=("national" "ags" "bc" "bcs" "camp" "coah" "col" "chis" "chih" | |
"df" "dgo" "gto" "gro" "hgo" "jal" "mex" "mich" "mor" "nay" "nl" "oax" | |
"pue" "qro" "qroo" "slp" "sin" "son" "tab" "tamps" "tlax" "ver" "yuc" | |
"zac"); | |
# Use gdal to reproject, and then rename the shapefiles to include | |
# a user friendly abbreviation instead of a number | |
# First argument: directory of shapefiles shps/state_abbreviation | |
# Second argument: the state abbreviation | |
# TODO: convert the encoding from windows-1252 to utf-8 | |
function reproject { | |
name="$3[@]" | |
arr=("${!name}") | |
for i in "${arr[@]}" | |
do | |
ogr2ogr "$1/$2_$i.shp" "$1"/$i.shp -t_srs "$PROJECTION" | |
rm "$1"/$i* | |
done | |
# rename the extra census data that comes with the shapefiles | |
cd "$1/tablas" | |
rename "s/^cpv2010/$2_cpv2010/" cpv2010* | |
rm -rf cpv2010* | |
cd ../../.. | |
} | |
# For each of the 32 states (and national data == 00) download and reproject | |
for i in $(seq 0 32); | |
do | |
# The INEGI uses a leading zero for all one digit numbers | |
if [ "$i" -lt 10 ] | |
then | |
FILENUM="0$i" | |
else | |
FILENUM="$i" | |
fi | |
# download the files from the inegi server. 'idusr' is the id you get | |
# when you register at the INEGI (yes, I'm 12 years old) | |
$CURL "http://www.inegi.org.mx/est/scince/scince2010.aspx?_file=/est/scince/scince2010/Scince2010_$FILENUM.exe&idusr=80085" -o ${states[$i]}_scince.exe | |
# Extract the shapefiles from the inno setup installer windows | |
# executable (note that it doesn't allow you to specify the | |
# directory to extract the files) | |
innoextract --lowercase --silent ${states[$i]}_scince.exe | |
# Create a directory called "shps" to store the shapefiles | |
mkdir -p shps/${states[$i]} | |
# Copy the shapefiles to the new directory | |
cp -r app/"$FILENUM"/* shps/${states[$i]} | |
# Delete the temp files from innoextract | |
rm -rf app | |
rm -rf tmp | |
rm -rf ${states[$i]}_scince.exe | |
# call the reproject function above | |
if [ "$i" -eq 0 ] | |
then | |
reproject shps/${states[$i]} ${states[$i]} national_files | |
else | |
reproject shps/${states[$i]} ${states[$i]} files | |
fi | |
# give the server a rest before downloading the next file | |
sleep 20 | |
done | |
# You could use the code below to merge all the states into one giant | |
# shapefile of Mexico. Change '_manzanas' to '_agebs' or '_eje_vial' or whatever | |
#for file in $(find shps -maxdepth 2 -name "*_manzanas.shp" ) | |
#do | |
# ogr2ogr -update -append mexico_manzanas.shp $file -f "esri shapefile" -nln merge | |
#done | |
# Filter attributes (e.g. only include total population in the dbf) | |
#ogr2ogr -select POB1 mexico_manazanas.shp merge.shp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment