Skip to content

Instantly share code, notes, and snippets.

@aborruso
Last active July 31, 2020 16:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aborruso/45b61dd71b3621b7d5f43ef789733805 to your computer and use it in GitHub Desktop.
Save aborruso/45b61dd71b3621b7d5f43ef789733805 to your computer and use it in GitHub Desktop.
#!/bin/bash
### requisiti ###
# miller https://github.com/johnkerl/miller
# scrape https://github.com/aborruso/scrape-cli
# pyexcel-cli https://github.com/pyexcel/pyexcel-cli
# pyexcel-xls https://github.com/pyexcel/pyexcel-xls
# yq https://github.com/kislyuk/yq
# parallel https://www.gnu.org/software/parallel/
### requisiti ###
set -x
folder="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
mkdir -p ./rawdata
mkdir -p ./processing
# cancella file xls
rm -r "$folder"/rawdata/*
rm -r "$folder"/processing/*
# scrica i file xls
curl "https://www.politicheagricole.it/flex/cm/pages/ServeBLOB.php/L/IT/IDPagina/11260" |
# estrai dati sui file XLS degli alberi
scrape -be ".blob-element-download .viewLinkIMG" |
# estrai proprietà href e text (URL e nome file)
xq -r '.html.body.a[]|[."@href",."#text"]|@tsv' |
# fai partire il download
parallel --colsep '\t' wget {1} -O ./rawdata/{2}.xls
for i in "$folder"/rawdata/*.xls; do
#crea una variabile da usare per estrarre nome e estensione
filename=$(basename "$i")
#estrai estensione
extension="${filename##*.}"
#estrai nome file
filename="${filename%.*}"
# converti file XLS in CSV
pyexcel transcode --sheet-index 0 "$i" "$folder"/processing/"$filename".csv
done
# unisci i file, tenendo conto di eventuali campi in più e fai pulizia di eventuali spazi bianchi in più
mlr --csv unsparsify then clean-whitespace "$folder"/processing/*.csv >"$folder"/alberi.csv
# dalle coordinate rimuovi spazi e sostituisci "," con "."
mlr -I --csv put '${LATITUDINE SU GIS}=gsub(${LATITUDINE SU GIS}," ","");${LONGITUDINE SU GIS}=gsub(${LONGITUDINE SU GIS}," ","");${LATITUDINE SU GIS}=gsub(${LATITUDINE SU GIS},",",".");${LONGITUDINE SU GIS}=gsub(${LONGITUDINE SU GIS},",",".")' "$folder"/alberi.csv
# scarica limit comunali
curl -Lk "http://www.istat.it/storage/cartografia/confini_amministrativi/non_generalizzati/Limiti01012020.zip" >"$folder"/rawdata/Limiti01012020.zip
# unzippa i limiti comunali
cd "$folder"/rawdata
unzip "$folder"/Limiti01012020.zip
# riproietta limiti da EPSG 32632 a EPSG 4326
mapshaper "$folder"/rawdata/Limiti01012020/Com01012020/Com01012020_WGS84.shp -proj wgs84 -o "$folder"/processing/comuni.shp
# crea colonne coordinate in formato 13d25'25
mlr --csv put '$lat_dd=sub(${LATITUDINE SU GIS},"^([0-9]+)[^0-9]+([0-9]+)[^0-9]+([0-9]+\.?[0-9]*)[^0-9]*","\1d\2'\''\3");$lon_dd=sub(${LONGITUDINE SU GIS},"^([0-9]+)[^0-9]+([0-9]+)[^0-9]+([0-9]+\.?[0-9]*)[^0-9]*","\1d\2'\''\3")' "$folder"/alberi.csv >"$folder"/processing/alberi.csv
# estrai un file con le sole due colonne create sopra
mlr --csv cut -f "lat_dd","lon_dd" "$folder"/processing/alberi.csv >"$folder"/processing/albLL.csv
# converti il file in un CSV separato da spazi senza intestazione
mlr --c2n cat "$folder"/processing/albLL.csv >"$folder"/processing/albLL.txt
# converti le coordinate del file albLL.txt da DDMMSS a decimal degrees
cs2cs -f '%.6f' +proj=latlong +datum=WGS84 +to +proj=latlong +datum=WGS84 "$folder"/processing/albLL.txt | tr " " "\t" >"$folder"/processing/albLLdd
# converti in CSV e aggiungi intestazione al file con le coordinate in decimal degrees
mlr -I --implicit-csv-header --t2c label y,x,z then cut -x -f z "$folder"/processing/albLLdd
# aggiungi campo ID al file con le coordinate in decimal degrees
mlr -I --csv cat -N id "$folder"/processing/albLLdd
# aggiungi campo ID al file degli alberi
mlr -I --csv cat -N id "$folder"/processing/alberi.csv
# aggiungi al file degli alberi le coordinate in decimal degrees
mlr --csv join -j id -f "$folder"/processing/alberi.csv then cut -x -f "lat_dd","lon_dd" "$folder"/processing/albLLdd >"$folder"/alberi.csv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment