Sample OSM Athena queries
-- | |
-- This will register the "planet" table within your AWS account | |
-- | |
CREATE EXTERNAL TABLE planet ( | |
id BIGINT, | |
type STRING, | |
tags MAP<STRING,STRING>, | |
lat DECIMAL(9,7), | |
lon DECIMAL(10,7), | |
nds ARRAY<STRUCT<ref: BIGINT>>, | |
members ARRAY<STRUCT<type: STRING, ref: BIGINT, role: STRING>>, | |
changeset BIGINT, | |
timestamp TIMESTAMP, | |
uid BIGINT, | |
user STRING, | |
version BIGINT | |
) | |
STORED AS ORCFILE | |
LOCATION 's3://osm-pds/planet/'; |
-- | |
-- This will register the "planet_history" table within your AWS account | |
-- | |
CREATE EXTERNAL TABLE planet_history ( | |
id BIGINT, | |
type STRING, | |
tags MAP<STRING,STRING>, | |
lat DECIMAL(9,7), | |
lon DECIMAL(10,7), | |
nds ARRAY<STRUCT<ref: BIGINT>>, | |
members ARRAY<STRUCT<type: STRING, ref: BIGINT, role: STRING>>, | |
changeset BIGINT, | |
timestamp TIMESTAMP, | |
uid BIGINT, | |
user STRING, | |
version BIGINT, | |
visible BOOLEAN | |
) | |
STORED AS ORCFILE | |
LOCATION 's3://osm-pds/planet-history/' |
-- | |
-- This will register the "changesets" table within your AWS account | |
-- | |
CREATE EXTERNAL TABLE changesets ( | |
id BIGINT, | |
tags MAP<STRING,STRING>, | |
created_at TIMESTAMP, | |
open BOOLEAN, | |
closed_at TIMESTAMP, | |
comments_count BIGINT, | |
min_lat DECIMAL(9,7), | |
max_lat DECIMAL(9,7), | |
min_lon DECIMAL(10,7), | |
max_lon DECIMAL(10,7), | |
num_changes BIGINT, | |
uid BIGINT, | |
user STRING | |
) | |
STORED AS ORCFILE | |
LOCATION 's3://osm-pds/changesets/'; |
-- | |
-- Find health center POIs in West Africa | |
-- | |
SELECT * from planet | |
WHERE type = 'node' | |
AND tags['amenity'] IN ('hospital', 'clinic', 'doctors') | |
AND lon BETWEEN -15.0863 AND -7.3651 | |
AND lat BETWEEN 4.3531 AND 12.6762; |
-- | |
-- Find health center POIs and buildings in West Africa | |
-- | |
-- select out nodes and relevant columns | |
WITH nodes AS ( | |
SELECT | |
type, | |
id, | |
tags, | |
lat, | |
lon | |
FROM planet | |
WHERE type = 'node' | |
), | |
-- select out ways and relevant columns | |
ways AS ( | |
SELECT | |
type, | |
id, | |
tags, | |
nds | |
FROM planet | |
WHERE type = 'way' | |
AND tags['amenity'] IN ('hospital', 'clinic', 'doctors') | |
), | |
-- filter nodes to only contain those present within a bounding box | |
nodes_in_bbox AS ( | |
SELECT * | |
FROM nodes | |
WHERE lon BETWEEN -15.0863 AND -7.3651 | |
AND lat BETWEEN 4.3531 AND 12.6762 | |
) | |
-- find ways intersecting the bounding box | |
SELECT | |
ways.type, | |
ways.id, | |
ways.tags, | |
AVG(nodes.lat) lat, | |
AVG(nodes.lon) lon | |
FROM ways | |
CROSS JOIN UNNEST(nds) AS t (nd) | |
JOIN nodes_in_bbox nodes ON nodes.id = nd.ref | |
GROUP BY (ways.type, ways.id, ways.tags) | |
UNION ALL | |
SELECT | |
type, | |
id, | |
tags, | |
lat, | |
lon | |
FROM nodes_in_bbox | |
WHERE tags['amenity'] IN ('hospital', 'clinic', 'doctors'); |
-- | |
-- Find changesets made during GMU mapathons | |
-- | |
SELECT * | |
FROM changesets | |
WHERE regexp_like(tags['comment'], '(?i)#gmu'); |
-- | |
-- Count unique users who have used the #missingmaps tag in edits | |
-- | |
SELECT COUNT(DISTINCT uid) | |
FROM changesets | |
WHERE regexp_like(tags['comment'], '(?i)#missingmaps'); |
-- | |
-- Find prolific GMU mappers | |
-- | |
SELECT user, count(*) AS edits | |
FROM changesets | |
WHERE regexp_like(tags['comment'], '(?i)#gmu') | |
GROUP BY user | |
ORDER BY count(*) DESC; |
-- | |
-- Find features edited during GMU mapathons | |
-- | |
SELECT planet.*, changesets.tags | |
FROM planet | |
JOIN changesets ON planet.changeset = changesets.id | |
WHERE regexp_like(changesets.tags['comment'], '(?i)#gmu'); |
-- | |
-- Group Dhaka buildings by condition | |
-- | |
-- select out nodes and relevant columns | |
WITH nodes AS ( | |
SELECT | |
id, | |
tags, | |
lat, | |
lon | |
FROM planet | |
WHERE type = 'node' | |
), | |
-- select out ways and relevant columns | |
ways AS ( | |
SELECT | |
id, | |
tags, | |
nds | |
FROM planet | |
WHERE type = 'way' | |
), | |
-- filter nodes to only contain those present within a bounding box | |
nodes_in_bbox AS ( | |
SELECT * | |
FROM nodes | |
WHERE lon BETWEEN 90.3907 AND 90.4235 | |
AND lat BETWEEN 23.6948 AND 23.7248 | |
), | |
-- fetch and expand referenced ways | |
referenced_ways AS ( | |
SELECT | |
ways.*, | |
t.* | |
FROM ways | |
CROSS JOIN UNNEST(nds) WITH ORDINALITY AS t (nd, idx) | |
JOIN nodes_in_bbox nodes ON nodes.id = nd.ref | |
), | |
-- fetch *all* referenced nodes (even those outside the queried bounding box) | |
exploded_ways AS ( | |
SELECT | |
ways.id, | |
ways.tags, | |
idx, | |
nd.ref, | |
nodes.id node_id, | |
ARRAY[nodes.lat, nodes.lon] coordinates | |
FROM referenced_ways ways | |
JOIN nodes ON nodes.id = nd.ref | |
ORDER BY ways.id, idx | |
) | |
-- query ways matching the bounding box | |
SELECT | |
count(*), | |
tags['building:condition'] | |
FROM exploded_ways | |
GROUP BY tags['building:condition'] | |
ORDER BY count(*) DESC; |
This comment has been minimized.
This comment has been minimized.
Sample sets/data: |
This comment has been minimized.
This comment has been minimized.
Hi thank you for the examples. They are very helpful. I am wondering if there is any way to query for buildings by geo-area like city names or county names or state names |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Hi, Can get sample dataset to run above queries