Created
June 19, 2024 10:29
-
-
Save do-me/cd246d8fcdef9ed226e58513da1c1883 to your computer and use it in GitHub Desktop.
Saarland parcel fetching from geoportal.saarland.de, saving to geoparquet with geopandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import geopandas as gpd | |
from shapely.geometry import shape | |
import json | |
from tqdm import tqdm | |
import time | |
# Define the base URL and parameters | |
base_url = "https://geoportal.saarland.de/spatial-objects/408/collections/cp:CadastralParcel/items" | |
limit = 500 | |
num_pages = 2564 | |
# Function to fetch data from a single page | |
def fetch_page(offset): | |
params = { | |
"limit": limit, | |
"offset": offset, | |
"f": "json" | |
} | |
url = f"{base_url}?limit={limit}&offset={offset}&f=json" | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching {url}: {e}") | |
time.sleep(5) | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
print(f"Retry failed for {url}: {e}") | |
fail_list.append(url) | |
return None | |
# Collect all features | |
all_features = [] | |
fail_list = [] | |
for page in tqdm(range(num_pages), desc="Fetching pages", unit="page"): | |
offset = page * limit | |
data = fetch_page(offset) | |
if data: | |
features = data.get("features", []) | |
all_features.extend(features) | |
# Create a GeoDataFrame | |
if all_features: | |
geometries = [shape(feature["geometry"]) for feature in all_features] | |
properties = [feature["properties"] for feature in all_features] | |
gdf = gpd.GeoDataFrame(properties, geometry=geometries) | |
# Save to a single GeoJSON file | |
output_file = "saarland_parcels.parquet" | |
gdf.to_parquet(output_file) | |
print(f"Parquet file saved as {output_file}") | |
# Save failed URLs to a file for manual inspection | |
if fail_list: | |
fail_file = "failed_urls.txt" | |
with open(fail_file, 'w') as f: | |
for url in fail_list: | |
f.write(url + "\n") | |
print(f"Failed URLs saved to {fail_file}") | |
print("Data fetching completed.") | |
# Fetching pages: 100%|██████████| 2564/2564 [1:35:46<00:00, 2.24s/page] | |
# Parquet file saved as saarland_parcels.parquet | |
# Data fetching completed. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment