Last active
July 31, 2020 16:48
-
-
Save aborruso/45b61dd71b3621b7d5f43ef789733805 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
### requisiti ### | |
# miller https://github.com/johnkerl/miller | |
# scrape https://github.com/aborruso/scrape-cli | |
# pyexcel-cli https://github.com/pyexcel/pyexcel-cli | |
# pyexcel-xls https://github.com/pyexcel/pyexcel-xls | |
# yq https://github.com/kislyuk/yq | |
# parallel https://www.gnu.org/software/parallel/ | |
### requisiti ### | |
set -x | |
folder="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
mkdir -p ./rawdata | |
mkdir -p ./processing | |
# cancella file xls | |
rm -r "$folder"/rawdata/* | |
rm -r "$folder"/processing/* | |
# scrica i file xls | |
curl "https://www.politicheagricole.it/flex/cm/pages/ServeBLOB.php/L/IT/IDPagina/11260" | | |
# estrai dati sui file XLS degli alberi | |
scrape -be ".blob-element-download .viewLinkIMG" | | |
# estrai proprietà href e text (URL e nome file) | |
xq -r '.html.body.a[]|[."@href",."#text"]|@tsv' | | |
# fai partire il download | |
parallel --colsep '\t' wget {1} -O ./rawdata/{2}.xls | |
for i in "$folder"/rawdata/*.xls; do | |
#crea una variabile da usare per estrarre nome e estensione | |
filename=$(basename "$i") | |
#estrai estensione | |
extension="${filename##*.}" | |
#estrai nome file | |
filename="${filename%.*}" | |
# converti file XLS in CSV | |
pyexcel transcode --sheet-index 0 "$i" "$folder"/processing/"$filename".csv | |
done | |
# unisci i file, tenendo conto di eventuali campi in più e fai pulizia di eventuali spazi bianchi in più | |
mlr --csv unsparsify then clean-whitespace "$folder"/processing/*.csv >"$folder"/alberi.csv | |
# dalle coordinate rimuovi spazi e sostituisci "," con "." | |
mlr -I --csv put '${LATITUDINE SU GIS}=gsub(${LATITUDINE SU GIS}," ","");${LONGITUDINE SU GIS}=gsub(${LONGITUDINE SU GIS}," ","");${LATITUDINE SU GIS}=gsub(${LATITUDINE SU GIS},",",".");${LONGITUDINE SU GIS}=gsub(${LONGITUDINE SU GIS},",",".")' "$folder"/alberi.csv | |
# scarica limit comunali | |
curl -Lk "http://www.istat.it/storage/cartografia/confini_amministrativi/non_generalizzati/Limiti01012020.zip" >"$folder"/rawdata/Limiti01012020.zip | |
# unzippa i limiti comunali | |
cd "$folder"/rawdata | |
unzip "$folder"/Limiti01012020.zip | |
# riproietta limiti da EPSG 32632 a EPSG 4326 | |
mapshaper "$folder"/rawdata/Limiti01012020/Com01012020/Com01012020_WGS84.shp -proj wgs84 -o "$folder"/processing/comuni.shp | |
# crea colonne coordinate in formato 13d25'25 | |
mlr --csv put '$lat_dd=sub(${LATITUDINE SU GIS},"^([0-9]+)[^0-9]+([0-9]+)[^0-9]+([0-9]+\.?[0-9]*)[^0-9]*","\1d\2'\''\3");$lon_dd=sub(${LONGITUDINE SU GIS},"^([0-9]+)[^0-9]+([0-9]+)[^0-9]+([0-9]+\.?[0-9]*)[^0-9]*","\1d\2'\''\3")' "$folder"/alberi.csv >"$folder"/processing/alberi.csv | |
# estrai un file con le sole due colonne create sopra | |
mlr --csv cut -f "lat_dd","lon_dd" "$folder"/processing/alberi.csv >"$folder"/processing/albLL.csv | |
# converti il file in un CSV separato da spazi senza intestazione | |
mlr --c2n cat "$folder"/processing/albLL.csv >"$folder"/processing/albLL.txt | |
# converti le coordinate del file albLL.txt da DDMMSS a decimal degrees | |
cs2cs -f '%.6f' +proj=latlong +datum=WGS84 +to +proj=latlong +datum=WGS84 "$folder"/processing/albLL.txt | tr " " "\t" >"$folder"/processing/albLLdd | |
# converti in CSV e aggiungi intestazione al file con le coordinate in decimal degrees | |
mlr -I --implicit-csv-header --t2c label y,x,z then cut -x -f z "$folder"/processing/albLLdd | |
# aggiungi campo ID al file con le coordinate in decimal degrees | |
mlr -I --csv cat -N id "$folder"/processing/albLLdd | |
# aggiungi campo ID al file degli alberi | |
mlr -I --csv cat -N id "$folder"/processing/alberi.csv | |
# aggiungi al file degli alberi le coordinate in decimal degrees | |
mlr --csv join -j id -f "$folder"/processing/alberi.csv then cut -x -f "lat_dd","lon_dd" "$folder"/processing/albLLdd >"$folder"/alberi.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment