Skip to content

Instantly share code, notes, and snippets.

@ThomasG77
Last active March 2, 2022 16:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ThomasG77/e6f4c2723682e0180ff0bbf59f417beb to your computer and use it in GitHub Desktop.
Save ThomasG77/e6f4c2723682e0180ff0bbf59f417beb to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
curl -s "https://www.barrages-cfbr.eu/spip.php?page=gis_json&id_rubrique=114&objets=articles_branche&limit=1000" >| /tmp/barrages.geojson
cat /tmp/barrages.geojson | jq -r .features[].properties.title | cut -d "'" -f2
cat /tmp/barrages.geojson | iconv -c -f utf-8 -t ascii | sed 's#([a-zA-Z])*##g' | sed "s#<a href='\|'>[)<\/A-Za-z0-9> -]*##g" >| /tmp/barrages_cleaned.geojson
jq -r .features[].properties.title /tmp/barrages_cleaned.geojson | sort | uniq >| /tmp/files.txt
# curl -s "https://www.barrages-cfbr.eu/spip.php?page=gis_json&id_rubrique=114&objets=articles_branche&limit=1000" | jq -r .features[].properties.title | cut -d "'" -f2 >| /tmp/files.txt
true >| /tmp/urls.txt;
for i in $(cat /tmp/files.txt);
do echo "https://www.barrages-cfbr.eu/"$i >> /tmp/urls.txt;
done;
true >| /tmp/content_invalidated.ndjson
for i in $(cat /tmp/urls.txt);
do echo "$i";
echo $(curl -s $i | sed 's#&nbsp;##g' | pup --charset utf8 '.chapo' | sed 's#<p>\|</p>\|<br class="autobr">\|</div>\|<div class="chapo surlignable">##g' | tr '\n' ' ' | sed 's#<strong>#","#g' | sed 's#</strong>##g' | sed 's#:#":"#g' | sed 's#^[ ]*","#{"#g')'", "html": "'$i'"}' | jq -c '.' >> /tmp/content_invalidated.ndjson;
done;
sed -i 's#https://www.barrages-cfbr.eu/##g' /tmp/content_invalidated.ndjson;
ndjson-join 'd.properties.title' 'd.html' --left <(jq -c '.features[]' /tmp/barrages_cleaned.geojson) <(cat /tmp/content_invalidated.ndjson) | ndjson-map 'd[0].properties=d[1],d[0]' >| /tmp/barrages_infos.ndgeojson
cat /tmp/barrages_infos.ndgeojson | jq '.properties'| jq -n '[inputs | keys[]] | unique | sort'
sed -i 's#" Altitude de la crête"#"alt_crete_m"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Année de mise en service"#"annee_mes"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Commune "#"commune"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Département "#"dept"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Hauteur"#"haut_m"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Latitude"#"lat"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Latitude "#"lat"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Longitude"#"lon"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Longitude "#"lon"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Longueur"#"longueur_m"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Propriétaire / Exploitant"#"prop_expl"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Propriétaire / Exploitant "#"prop_expl"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Propriétaire/Exploitant"#"prop_expl"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Rivière"#"riviere"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Surface de la retenue"#"surf_ret_ha"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Surface du bassin versant"#"surf_bv_km2"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Type Barrage"#"type_b"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Usage(s)"#"usages"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" Volume de la retenue"#"vol_ret_m3"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" latitude"#"lat1"#g' /tmp/barrages_infos.ndgeojson
sed -i 's#" longitude"#"lon1"#g' /tmp/barrages_infos.ndgeojson
jq --slurp . /tmp/barrages_infos.ndgeojson | jq '{"type": "FeatureCollection","features": .}' >| /tmp/barrages_infos.geojson
ogrinfo -dialect SQLite -sql "SELECT id,trim(prop_expl) AS prop_expl,cast(replace(replace(haut_m, ' m', ''), ',', '.') AS real) AS haut_m,cast(replace(replace(vol_ret_m3, 'milliers de m³', ''), ' ', '') AS integer) AS vol_ret_m3,trim(riviere) AS riviere,trim(type_b) AS type_b,cast(replace(replace(replace(longueur_m, ' m3', ''), ' m', ''), ',', '.') AS integer) AS longueur_m,cast(trim(replace(annee_mes, 'm3', '')) AS integer) AS annee_mes,cast(trim(replace(replace(surf_ret_ha, 'ha', ''), ',', '.')) AS real) AS surf_ret_ha,cast(trim(replace(replace(surf_bv_km2, 'km²', ''), ',', '.')) AS real) AS surf_bv_km2,cast(trim(replace(replace(alt_crete_m, 'm', ''), ',', '.')) AS real) AS alt_crete_m,trim(usages) AS usages,trim(commune) AS commune,trim(dept) AS dept,cast(lat AS real) AS lat,cast(lon AS real) AS lon, 'https://www.barrages-cfbr.eu/' || html AS url,trim(lat1) AS lat1,trim(lon1) AS lon1,geometry FROM barrages_infos" /tmp/barrages_infos.geojson
ogr2ogr -f GeoJSON -dialect SQLite -sql "SELECT id,trim(prop_expl) AS prop_expl,cast(replace(replace(haut_m, ' m', ''), ',', '.') AS real) AS haut_m,cast(replace(replace(vol_ret_m3, 'milliers de m³', ''), ' ', '') AS integer) AS vol_ret_m3,trim(riviere) AS riviere,trim(type_b) AS type_b,cast(replace(replace(replace(longueur_m, ' m3', ''), ' m', ''), ',', '.') AS integer) AS longueur_m,cast(trim(replace(annee_mes, 'm3', '')) AS integer) AS annee_mes,cast(trim(replace(replace(surf_ret_ha, 'ha', ''), ',', '.')) AS real) AS surf_ret_ha,cast(trim(replace(replace(surf_bv_km2, 'km²', ''), ',', '.')) AS real) AS surf_bv_km2,cast(trim(replace(replace(alt_crete_m, 'm', ''), ',', '.')) AS real) AS alt_crete_m,trim(usages) AS usages,trim(commune) AS commune,trim(dept) AS dept,cast(lat AS real) AS lat,cast(lon AS real) AS lon, 'https://www.barrages-cfbr.eu/' || html AS url,trim(lat1) AS lat1,trim(lon1) AS lon1,geometry FROM barrages_infos" /tmp/barrages_infos_cleaned.geojson /tmp/barrages_infos.geojson -nln barrages_infos_cleaned -lco RFC7946=YES
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment