Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sample OSM Athena queries
--
-- This will register the "planet" table within your AWS account
--
CREATE EXTERNAL TABLE planet (
id BIGINT,
type STRING,
tags MAP<STRING,STRING>,
lat DECIMAL(9,7),
lon DECIMAL(10,7),
nds ARRAY<STRUCT<ref: BIGINT>>,
members ARRAY<STRUCT<type: STRING, ref: BIGINT, role: STRING>>,
changeset BIGINT,
timestamp TIMESTAMP,
uid BIGINT,
user STRING,
version BIGINT
)
STORED AS ORCFILE
LOCATION 's3://osm-pds/planet/';
--
-- This will register the "planet_history" table within your AWS account
--
CREATE EXTERNAL TABLE planet_history (
id BIGINT,
type STRING,
tags MAP<STRING,STRING>,
lat DECIMAL(9,7),
lon DECIMAL(10,7),
nds ARRAY<STRUCT<ref: BIGINT>>,
members ARRAY<STRUCT<type: STRING, ref: BIGINT, role: STRING>>,
changeset BIGINT,
timestamp TIMESTAMP,
uid BIGINT,
user STRING,
version BIGINT,
visible BOOLEAN
)
STORED AS ORCFILE
LOCATION 's3://osm-pds/planet-history/'
--
-- This will register the "changesets" table within your AWS account
--
CREATE EXTERNAL TABLE changesets (
id BIGINT,
tags MAP<STRING,STRING>,
created_at TIMESTAMP,
open BOOLEAN,
closed_at TIMESTAMP,
comments_count BIGINT,
min_lat DECIMAL(9,7),
max_lat DECIMAL(9,7),
min_lon DECIMAL(10,7),
max_lon DECIMAL(10,7),
num_changes BIGINT,
uid BIGINT,
user STRING
)
STORED AS ORCFILE
LOCATION 's3://osm-pds/changesets/';
--
-- Find health center POIs in West Africa
--
SELECT * from planet
WHERE type = 'node'
AND tags['amenity'] IN ('hospital', 'clinic', 'doctors')
AND lon BETWEEN -15.0863 AND -7.3651
AND lat BETWEEN 4.3531 AND 12.6762;
--
-- Find health center POIs and buildings in West Africa
--
-- select out nodes and relevant columns
WITH nodes AS (
SELECT
type,
id,
tags,
lat,
lon
FROM planet
WHERE type = 'node'
),
-- select out ways and relevant columns
ways AS (
SELECT
type,
id,
tags,
nds
FROM planet
WHERE type = 'way'
AND tags['amenity'] IN ('hospital', 'clinic', 'doctors')
),
-- filter nodes to only contain those present within a bounding box
nodes_in_bbox AS (
SELECT *
FROM nodes
WHERE lon BETWEEN -15.0863 AND -7.3651
AND lat BETWEEN 4.3531 AND 12.6762
)
-- find ways intersecting the bounding box
SELECT
ways.type,
ways.id,
ways.tags,
AVG(nodes.lat) lat,
AVG(nodes.lon) lon
FROM ways
CROSS JOIN UNNEST(nds) AS t (nd)
JOIN nodes_in_bbox nodes ON nodes.id = nd.ref
GROUP BY (ways.type, ways.id, ways.tags)
UNION ALL
SELECT
type,
id,
tags,
lat,
lon
FROM nodes_in_bbox
WHERE tags['amenity'] IN ('hospital', 'clinic', 'doctors');
--
-- Find changesets made during GMU mapathons
--
SELECT *
FROM changesets
WHERE regexp_like(tags['comment'], '(?i)#gmu');
--
-- Count unique users who have used the #missingmaps tag in edits
--
SELECT COUNT(DISTINCT uid)
FROM changesets
WHERE regexp_like(tags['comment'], '(?i)#missingmaps');
--
-- Find prolific GMU mappers
--
SELECT user, count(*) AS edits
FROM changesets
WHERE regexp_like(tags['comment'], '(?i)#gmu')
GROUP BY user
ORDER BY count(*) DESC;
--
-- Find features edited during GMU mapathons
--
SELECT planet.*, changesets.tags
FROM planet
JOIN changesets ON planet.changeset = changesets.id
WHERE regexp_like(changesets.tags['comment'], '(?i)#gmu');
--
-- Group Dhaka buildings by condition
--
-- select out nodes and relevant columns
WITH nodes AS (
SELECT
id,
tags,
lat,
lon
FROM planet
WHERE type = 'node'
),
-- select out ways and relevant columns
ways AS (
SELECT
id,
tags,
nds
FROM planet
WHERE type = 'way'
),
-- filter nodes to only contain those present within a bounding box
nodes_in_bbox AS (
SELECT *
FROM nodes
WHERE lon BETWEEN 90.3907 AND 90.4235
AND lat BETWEEN 23.6948 AND 23.7248
),
-- fetch and expand referenced ways
referenced_ways AS (
SELECT
ways.*,
t.*
FROM ways
CROSS JOIN UNNEST(nds) WITH ORDINALITY AS t (nd, idx)
JOIN nodes_in_bbox nodes ON nodes.id = nd.ref
),
-- fetch *all* referenced nodes (even those outside the queried bounding box)
exploded_ways AS (
SELECT
ways.id,
ways.tags,
idx,
nd.ref,
nodes.id node_id,
ARRAY[nodes.lat, nodes.lon] coordinates
FROM referenced_ways ways
JOIN nodes ON nodes.id = nd.ref
ORDER BY ways.id, idx
)
-- query ways matching the bounding box
SELECT
count(*),
tags['building:condition']
FROM exploded_ways
GROUP BY tags['building:condition']
ORDER BY count(*) DESC;
@Mactores-Eng

This comment has been minimized.

Copy link

commented Nov 13, 2017

Hi, Can get sample dataset to run above queries

@ron7

This comment has been minimized.

Copy link

commented Mar 13, 2019

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.