Skip to content

Instantly share code, notes, and snippets.

@bitner
Last active March 3, 2022 23:54
Show Gist options
  • Save bitner/7d3b69f88d197d81db6717101b229371 to your computer and use it in GitHub Desktop.
Save bitner/7d3b69f88d197d81db6717101b229371 to your computer and use it in GitHub Desktop.
pgstac_aggregate.sql
-- Function to take an x, y, z and turn it into a quadkey text string
CREATE OR REPLACE FUNCTION quadkey(zoom int, tx int, ty int) RETURNS text AS $$
DECLARE
i int;
digit int;
quadkey text := '';
mask int;
BEGIN
FOR i IN REVERSE zoom..1 LOOP
digit := 0;
mask := 1 << (i-1);
IF (tx & mask) != 0 THEN
digit := digit + 1;
END IF;
IF (ty & mask) != 0 THEN
digit := digit + 2;
END IF;
quadkey := concat(quadkey, digit::text);
END LOOP;
RETURN quadkey;
END;
$$ LANGUAGE PLPGSQL;
-- Function to take a quadkey text string and return the corresponding z, x, y
CREATE OR REPLACE FUNCTION quadkeytotile(IN quadkey text, OUT zoom int, OUT x int, OUT y int) RETURNS RECORD AS $$
DECLARE
mask int;
c text;
BEGIN
x := 0;
y:= 0;
zoom := length(quadkey);
FOR i IN REVERSE zoom..1 LOOP
mask := 1 << (i-1);
c := substr(quadkey, zoom - i + 1, 1);
IF c = '1' THEN
x := x | mask;
END IF;
IF c = '2' THEN
y := y | mask;
END IF;
IF c = '3' THEN
x := x | mask;
y := y | mask;
END IF;
END LOOP;
RETURN;
END;
$$ LANGUAGE PLPGSQL IMMUTABLE PARALLEL SAFE;
-- Function that makes a global grid using a web mercator grid at a particular zoom level
CREATE OR REPLACE FUNCTION make_quadkey_grid(
IN _zoom int,
OUT quadkey text,
OUT geom geometry
) RETURNS SETOF record AS $$
SELECT
quadkey(_zoom, x, y), ST_TileEnvelope(_zoom, x, y)
FROM
generate_series(0,(2^_zoom)::int - 1, 1) x,
generate_series(0, (2^_zoom)::int -1, 1) y
;
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
-- Function that makes a global grid using a web mercator grid at a particular zoom level
CREATE OR REPLACE FUNCTION make_quadkey_subgrid(
IN _quadkey text,
IN zoom_plus int DEFAULT 4,
OUT zoom int,
OUT x int,
OUT y int,
OUT quadkey text,
OUT geom geometry
) RETURNS SETOF record AS $$
WITH i AS (
SELECT
q.zoom::int as iz,
q.x::int as ix,
q.y::int as iy
FROM quadkeytotile(concat(_quadkey, repeat('0', zoom_plus))) q
),
cells AS (
SELECT
iz as cz,
cx,
cy
FROM
i,
generate_series(ix, ix + (2^zoom_plus)::int - 1, 1) as cx,
generate_series(iy, iy + (2^zoom_plus)::int - 1, 1) as cy
)
SELECT
*,
quadkey(cz, cx, cy) as quadkey,
ST_TileEnvelope(cz, cx, cy) as geom
FROM
cells
;
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
-- Table to hold our created grid
DROP TABLE IF EXISTS quadkey_grid;
CREATE TABLE quadkey_grid (
quadkey text PRIMARY KEY,
bbox box2d
);
INSERT INTO quadkey_grid SELECT quadkey, geom::box2d FROM make_quadkey_grid(9);
CREATE INDEX ON quadkey_grid USING GIST (bbox);
-- since the grid is in web mercator, we create an index transforming the grid cell to geographic so we can have indexed comparison to the items geometry
CREATE INDEX ON quadkey_grid USING GIST((st_transform(st_setsrid(bbox, 3857), 4326)));
CREATE TABLE items_aggregate (
quadkey text NOT NULL,
day timestamptz NOT NULL,
collection_id text NOT NULL REFERENCES pgstac.collections (id),
count bigint NOT NULL,
UNIQUE (collection_id, day, quadkey)
);
CREATE INDEX ON items_aggregate(quadkey text_pattern_ops);
-- Function that allows passing in a collection and day to update the aggregates table, this would need to be run after any ingests, but could be limited to only run for the days/collections that had data added
CREATE OR REPLACE FUNCTION update_items_aggregate(
_collection_id text,
_day timestamptz
) RETURNS VOID AS $$
WITH u AS (
SELECT
quadkey,
date_trunc('day', datetime) as day,
collection_id,
count(*) as count
FROM
pgstac.items
JOIN quadkey_grid ON (st_intersects(geometry, (st_transform(st_setsrid(bbox, 3857), 4326))))
WHERE
collection_id=_collection_id
AND
datetime >= date_trunc('day', _day)
AND
datetime < (date_trunc('day', _day) + '1 day'::interval)
GROUP BY 1,2,3
)
INSERT INTO items_aggregate (quadkey, day, collection_id, count)
SELECT quadkey, day, collection_id, count FROM u
ON CONFLICT (collection_id, day, quadkey)
DO UPDATE SET
count=EXCLUDED.count
;
$$ LANGUAGE SQL;
SELECT update_items_aggregate(id, '2020-01-01') FROM pgstac.collections;
-- Roll up to month
SELECT quadkey, date_trunc('month', day) as month, collection_id, sum(count)
FROM items_aggregate
GROUP BY 1,2,3
;
-- This rollup is WRONG since a larger quadkey cell could have multiple cells that each had a count from a single item
SELECT substring(quadkey, 0, 4) as quadkey, day, collection_id, sum(count)
FROM items_aggregate
GROUP BY 1,2,3
;
-- Get "4 levels up" cells from a request for a zoom level
CREATE OR REPLACE FUNCTION get_agg(
IN _collection text,
IN _day timestamptz,
IN z int,
IN x int,
IN y int,
OUT quadkey text,
OUT day timestamptz,
OUT collection_id text,
OUT count bigint,
OUT geom geometry
) RETURNS SETOF RECORD AS $$
DECLARE
vtilequad text := quadkey(z, x, y);
zoom_plus int := 4;
BEGIN
-- Check if this is in the aggregate cache
IF EXISTS (
SELECT 1 FROM items_aggregate
WHERE
items_aggregate.collection_id=_collection
AND items_aggregate.day=_day
AND items_aggregate.quadkey LIKE CONCAT(vtilequad, repeat('_',zoom_plus))
LIMIT 1
) THEN
RAISE NOTICE 'Getting pre-aggregated data';
RETURN QUERY
WITH vtgrid AS (
SELECT * FROM make_quadkey_subgrid(vtilequad, zoom_plus)
)
SELECT vtgrid.quadkey, items_aggregate.day, items_aggregate.collection_id, items_aggregate.count, vtgrid.geom
FROM items_aggregate
JOIN vtgrid USING (quadkey)
WHERE
items_aggregate.collection_id=_collection
AND items_aggregate.day=_day
AND items_aggregate.quadkey LIKE CONCAT(vtilequad, repeat('_',zoom_plus));
ELSE
RAISE NOTICE 'Getting data from scratch.';
RETURN QUERY
WITH vtgrid AS (
SELECT * FROM make_quadkey_subgrid(vtilequad, zoom_plus)
), i as (
INSERT INTO items_aggregate (quadkey, day, collection_id, count)
SELECT
vtgrid.quadkey,
date_trunc('day', datetime) as day,
items.collection_id,
count(*)
FROM pgstac.items
JOIN vtgrid ON (st_intersects(geometry, (st_transform(vtgrid.geom, 4326))))
WHERE
items.collection_id=_collection
AND
datetime >= date_trunc('day', _day)
AND
datetime < (date_trunc('day', _day) + '1 day'::interval)
GROUP BY 1,2,3
ON CONFLICT DO NOTHING
RETURNING *
)
SELECT vtgrid.quadkey, i.day, i.collection_id, i.count, vtgrid.geom
FROM i
JOIN vtgrid USING (quadkey)
;
END IF;
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION get_agg_asvt(
IN _collection text,
IN _day timestamptz,
IN z int,
IN x int,
IN y int
) RETURNS bytea AS $$
WITH mvtgeom AS (
SELECT
ST_AsMVTGeom(
geom,
ST_TileEnvelope(z, x, y)
),
collection_id,
day,
quadkey,
count
FROM get_agg(_collection, _day, z, x, y)
)
SELECT ST_AsMVT(mvtgeom.*)
FROM mvtgeom;
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
select * from get_agg('landsat-8-c2-l2','2020-01-01 00:00:00+00',5,6,30);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment