Skip to content

Instantly share code, notes, and snippets.

@danabauer
Last active February 29, 2024 17:14
Show Gist options
  • Save danabauer/13c3ef917f302026c1b0e8884e82353c to your computer and use it in GitHub Desktop.
Save danabauer/13c3ef917f302026c1b0e8884e82353c to your computer and use it in GitHub Desktop.
duckdb queries to ask questions of the Overture places and buildings datasets
INSTALL spatial;
INSTALL httpfs;
LOAD spatial;
LOAD httpfs;
SET s3_region='us-west-2';
COPY(
SELECT
id,
names.primary as primary_name,
bbox.minx as x,
bbox.miny as y,
ST_GeomFromWKB(geometry) as geometry,
categories.main as main_category,
sources[1].dataset AS primary_source,
confidence
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-02-15-alpha.0/theme=places/type=*/*', filename=true, hive_partitioning=1)
WHERE main_category = 'mountain' AND confidence > .90
ORDER BY confidence DESC
) TO 'overture_places_mountains_gt90.geojson'
WITH (FORMAT GDAL, DRIVER 'GeoJSON');
/* Buildings and their attributes in the Detroit area, with primary names and the primary data source made explicit. */
INSTALL spatial;
INSTALL azure;
LOAD spatial;
LOAD azure;
SET azure_storage_connection_string = 'DefaultEndpointsProtocol=https;AccountName=overturemapswestus2;AccountKey=;EndpointSuffix=core.windows.net';
SELECT
id,
names.primary as primary_name,
height,
level,
sources[1].dataset AS primary_source,
ST_GeomFromWKB(geometry) as geometry
FROM read_parquet('azure://release/2024-02-15-alpha.0/theme=buildings/type=*/*', filename=true, hive_partitioning=1)
WHERE primary_name IS NOT NULL
AND bbox.minX > -84.363175999999953
AND bbox.maxX < -82.418395999999973
AND bbox.minY > 41.706621000000041
AND bbox.maxY < 43.327039000000070
LIMIT 25;
/* This query gives you a CSV file with all the categories of places (~2090) and the count for each category. */
COPY (
SELECT category, count(1) as count
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-02-15-alpha.0/theme=places/type=*/*', filename=true, hive_partitioning=1)
CROSS JOIN UNNEST(ARRAY_CONCAT(ARRAY[categories.main], categories.alternate)) AS t(category)
GROUP BY category ORDER BY count DESC
) TO 'overture_places_categories.csv' (HEADER, DELIMITER ',');
SELECT DISTINCT
t.source.dataset
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-02-15-alpha.0/theme=places/type=*/*', filename=true, hive_partitioning=1)
CROSS JOIN UNNEST(sources) AS t(source);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment