Skip to content

Instantly share code, notes, and snippets.

@halfb00t
Forked from mojodna/0_register_planet.sql
Created June 25, 2021 11:49
Show Gist options
  • Save halfb00t/43ec960fd817dfd66a31f2015b39300a to your computer and use it in GitHub Desktop.
Save halfb00t/43ec960fd817dfd66a31f2015b39300a to your computer and use it in GitHub Desktop.
Sample OSM Athena queries
--
-- This will register the "planet" table within your AWS account
--
CREATE EXTERNAL TABLE planet (
id BIGINT,
type STRING,
tags MAP<STRING,STRING>,
lat DECIMAL(9,7),
lon DECIMAL(10,7),
nds ARRAY<STRUCT<ref: BIGINT>>,
members ARRAY<STRUCT<type: STRING, ref: BIGINT, role: STRING>>,
changeset BIGINT,
timestamp TIMESTAMP,
uid BIGINT,
user STRING,
version BIGINT
)
STORED AS ORCFILE
LOCATION 's3://osm-pds/planet/';
--
-- This will register the "planet_history" table within your AWS account
--
CREATE EXTERNAL TABLE planet_history (
id BIGINT,
type STRING,
tags MAP<STRING,STRING>,
lat DECIMAL(9,7),
lon DECIMAL(10,7),
nds ARRAY<STRUCT<ref: BIGINT>>,
members ARRAY<STRUCT<type: STRING, ref: BIGINT, role: STRING>>,
changeset BIGINT,
timestamp TIMESTAMP,
uid BIGINT,
user STRING,
version BIGINT,
visible BOOLEAN
)
STORED AS ORCFILE
LOCATION 's3://osm-pds/planet-history/'
--
-- This will register the "changesets" table within your AWS account
--
CREATE EXTERNAL TABLE changesets (
id BIGINT,
tags MAP<STRING,STRING>,
created_at TIMESTAMP,
open BOOLEAN,
closed_at TIMESTAMP,
comments_count BIGINT,
min_lat DECIMAL(9,7),
max_lat DECIMAL(9,7),
min_lon DECIMAL(10,7),
max_lon DECIMAL(10,7),
num_changes BIGINT,
uid BIGINT,
user STRING
)
STORED AS ORCFILE
LOCATION 's3://osm-pds/changesets/';
--
-- Find health center POIs in West Africa
--
SELECT * from planet
WHERE type = 'node'
AND tags['amenity'] IN ('hospital', 'clinic', 'doctors')
AND lon BETWEEN -15.0863 AND -7.3651
AND lat BETWEEN 4.3531 AND 12.6762;
--
-- Find health center POIs and buildings in West Africa
--
-- select out nodes and relevant columns
WITH nodes AS (
SELECT
type,
id,
tags,
lat,
lon
FROM planet
WHERE type = 'node'
),
-- select out ways and relevant columns
ways AS (
SELECT
type,
id,
tags,
nds
FROM planet
WHERE type = 'way'
AND tags['amenity'] IN ('hospital', 'clinic', 'doctors')
),
-- filter nodes to only contain those present within a bounding box
nodes_in_bbox AS (
SELECT *
FROM nodes
WHERE lon BETWEEN -15.0863 AND -7.3651
AND lat BETWEEN 4.3531 AND 12.6762
)
-- find ways intersecting the bounding box
SELECT
ways.type,
ways.id,
ways.tags,
AVG(nodes.lat) lat,
AVG(nodes.lon) lon
FROM ways
CROSS JOIN UNNEST(nds) AS t (nd)
JOIN nodes_in_bbox nodes ON nodes.id = nd.ref
GROUP BY (ways.type, ways.id, ways.tags)
UNION ALL
SELECT
type,
id,
tags,
lat,
lon
FROM nodes_in_bbox
WHERE tags['amenity'] IN ('hospital', 'clinic', 'doctors');
--
-- Find changesets made during GMU mapathons
--
SELECT *
FROM changesets
WHERE regexp_like(tags['comment'], '(?i)#gmu');
--
-- Count unique users who have used the #missingmaps tag in edits
--
SELECT COUNT(DISTINCT uid)
FROM changesets
WHERE regexp_like(tags['comment'], '(?i)#missingmaps');
--
-- Find prolific GMU mappers
--
SELECT user, count(*) AS edits
FROM changesets
WHERE regexp_like(tags['comment'], '(?i)#gmu')
GROUP BY user
ORDER BY count(*) DESC;
--
-- Find features edited during GMU mapathons
--
SELECT planet.*, changesets.tags
FROM planet
JOIN changesets ON planet.changeset = changesets.id
WHERE regexp_like(changesets.tags['comment'], '(?i)#gmu');
--
-- Group Dhaka buildings by condition
--
-- select out nodes and relevant columns
WITH nodes AS (
SELECT
id,
tags,
lat,
lon
FROM planet
WHERE type = 'node'
),
-- select out ways and relevant columns
ways AS (
SELECT
id,
tags,
nds
FROM planet
WHERE type = 'way'
),
-- filter nodes to only contain those present within a bounding box
nodes_in_bbox AS (
SELECT *
FROM nodes
WHERE lon BETWEEN 90.3907 AND 90.4235
AND lat BETWEEN 23.6948 AND 23.7248
),
-- fetch and expand referenced ways
referenced_ways AS (
SELECT
ways.*,
t.*
FROM ways
CROSS JOIN UNNEST(nds) WITH ORDINALITY AS t (nd, idx)
JOIN nodes_in_bbox nodes ON nodes.id = nd.ref
),
-- fetch *all* referenced nodes (even those outside the queried bounding box)
exploded_ways AS (
SELECT
ways.id,
ways.tags,
idx,
nd.ref,
nodes.id node_id,
ARRAY[nodes.lat, nodes.lon] coordinates
FROM referenced_ways ways
JOIN nodes ON nodes.id = nd.ref
ORDER BY ways.id, idx
)
-- query ways matching the bounding box
SELECT
count(*),
tags['building:condition']
FROM exploded_ways
GROUP BY tags['building:condition']
ORDER BY count(*) DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment