Skip to content

Instantly share code, notes, and snippets.

@do-me
Created June 19, 2024 10:29
Show Gist options
  • Save do-me/cd246d8fcdef9ed226e58513da1c1883 to your computer and use it in GitHub Desktop.
Save do-me/cd246d8fcdef9ed226e58513da1c1883 to your computer and use it in GitHub Desktop.
Saarland parcel fetching from geoportal.saarland.de, saving to geoparquet with geopandas
import requests
import geopandas as gpd
from shapely.geometry import shape
import json
from tqdm import tqdm
import time
# Define the base URL and parameters
base_url = "https://geoportal.saarland.de/spatial-objects/408/collections/cp:CadastralParcel/items"
limit = 500
num_pages = 2564
# Function to fetch data from a single page
def fetch_page(offset):
params = {
"limit": limit,
"offset": offset,
"f": "json"
}
url = f"{base_url}?limit={limit}&offset={offset}&f=json"
try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching {url}: {e}")
time.sleep(5)
try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Retry failed for {url}: {e}")
fail_list.append(url)
return None
# Collect all features
all_features = []
fail_list = []
for page in tqdm(range(num_pages), desc="Fetching pages", unit="page"):
offset = page * limit
data = fetch_page(offset)
if data:
features = data.get("features", [])
all_features.extend(features)
# Create a GeoDataFrame
if all_features:
geometries = [shape(feature["geometry"]) for feature in all_features]
properties = [feature["properties"] for feature in all_features]
gdf = gpd.GeoDataFrame(properties, geometry=geometries)
# Save to a single GeoJSON file
output_file = "saarland_parcels.parquet"
gdf.to_parquet(output_file)
print(f"Parquet file saved as {output_file}")
# Save failed URLs to a file for manual inspection
if fail_list:
fail_file = "failed_urls.txt"
with open(fail_file, 'w') as f:
for url in fail_list:
f.write(url + "\n")
print(f"Failed URLs saved to {fail_file}")
print("Data fetching completed.")
# Fetching pages: 100%|██████████| 2564/2564 [1:35:46<00:00, 2.24s/page]
# Parquet file saved as saarland_parcels.parquet
# Data fetching completed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment