Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Minimum spanning tree in SQL... just because
CREATE TYPE minimum_spanning_tree_internal AS (
d NUMERIC[],
a INT[],
b INT[],
geoms GEOMETRY[],
ids TEXT[]
);
CREATE TYPE minimum_spanning_tree_unit AS (
d NUMERIC,
a INT,
b INT
);
CREATE TYPE minimum_spanning_tree_container AS (
t INT,
a INT,
b INT
);
CREATE TYPE minimum_spanning_tree_result AS (
source INT,
target INT,
the_geom GEOMETRY,
cost NUMERIC
);
CREATE OR REPLACE FUNCTION minimum_spanning_tree_pre(data minimum_spanning_tree_internal, in_geom GEOMETRY, in_class TEXT) RETURNS minimum_spanning_tree_internal as $$
DECLARE
d NUMERIC;
n NUMERIC;
BEGIN
n = array_length(data.ids, 1) + 1;
IF array_length(data.ids, 1) > 0 THEN
FOR i IN 1 .. array_upper(data.ids, 1) LOOP
data.d = array_append(data.d, ST_Distance(in_geom, data.geoms[i])::numeric);
data.a = array_append(data.a, n::int);
data.b = array_append(data.b, i::int);
END LOOP;
END IF;
data.ids = array_append(data.ids, in_class) ;
data.geoms = array_append(data.geoms, in_geom) ;
RETURN data;
END;
$$ language plpgsql IMMUTABLE;
CREATE AGGREGATE minimum_spanning_tree ( GEOMETRY, TEXT) (
SFUNC = minimum_spanning_tree_pre,
STYPE = minimum_spanning_tree_internal
-- , FINALFUNC = minimum_spanning_tree_calc
);
CREATE OR REPLACE FUNCTION minimum_spanning_tree_calc(data minimum_spanning_tree_internal) RETURNS SETOF minimum_spanning_tree_result as $$
DECLARE
result DECIMAL;
r minimum_spanning_tree_unit;
tmp_a minimum_spanning_tree_container;
tmp_b minimum_spanning_tree_container;
out minimum_spanning_tree_result;
a INT;
b INT;
-- our master tree index
tree_m INT[];
tree_a INT[];
tree_b INT[];
-- our loose trees index
tree_u INT[];
tree_ua INT[];
tree_ub INT[];
tree_ut INT[];
curr_t INT = 1; --current working tree
target_t INT;
a_in_m INT;
b_in_m INT;
move_tree INT;
move_tree_a INT[];
move_tree_b INT[];
BEGIN
result := 0;
FOR r IN SELECT ii.d, ii.a::int, ii.b::int FROM (SELECT unnest(data.d) d, unnest(data.a) a, unnest(data.b) b) ii ORDER BY ii.d ASC LOOP
-- result = result+1;
IF array_length(tree_m, 1) IS NULL THEN
-- our starting condition
tree_m = ARRAY[r.a, r.b];
tree_a = ARRAY[r.a];
tree_b = ARRAY[r.b];
ELSE
a_in_m = 0;
b_in_m = 0;
IF r.a = ANY(tree_m) THEN
a_in_m = 1;
END IF;
IF r.b = ANY(tree_m) THEN
b_in_m = 1;
END IF;
IF a_in_m+b_in_m != 2 THEN -- make sure we don't have a loop closer
IF array_length(tree_ua, 1) IS NULL THEN
tree_ua = ARRAY[r.a];
tree_ub = ARRAY[r.b];
tree_ut = ARRAY[curr_t];
target_t = curr_t;
ELSE
-- find any case of a in one of the exiting trees
SELECT ii.t, ii.a, ii.b INTO tmp_a FROM (SELECT unnest(tree_ut) t, unnest(tree_ua) a,unnest(tree_ub) b) ii WHERE (ii.a = r.a AND ii.b!= r.b) OR (ii.a != r.b AND ii.b = r.a);
SELECT ii.t, ii.a, ii.b INTO tmp_b FROM (SELECT unnest(tree_ut) t, unnest(tree_ua) a,unnest(tree_ub) b) ii WHERE (ii.a != r.a AND ii.b= r.b) OR (ii.a = r.b AND ii.b != r.a);
target_t = -1;
IF tmp_a.t IS NOT null THEN
IF tmp_b.t IS null THEN
-- Add edge to the tree in tmp_a
tree_ua = array_append( tree_ua, r.a);
tree_ub = array_append( tree_ub, r.b);
tree_ut = array_append( tree_ut, tmp_a.t);
-- Keep track of where we put it
target_t = tmp_a.t;
ELSIF tmp_a.t != tmp_b.t THEN
-- Add the edge to tree in tmp_a
tree_ua = array_append( tree_ua, r.a);
tree_ub = array_append( tree_ub, r.b);
tree_ut = array_append( tree_ut, tmp_b.t);
-- Change the tree in tmp_b to be the tree in tmp_a
tree_ut = array_replace(tree_ut, tmp_a.t, tmp_b.t);
-- make sure we capture a needed move of the tree to master
IF b_in_m = 1 THEN
a_in_m = 1;
b_in_m = 0;
END IF;
target_t = tmp_a.t;
END IF;
ELSIF tmp_b.t IS NOT null THEN
-- Add edge to tree in tmp_b
tree_ua = array_append( tree_ua, r.a);
tree_ub = array_append( tree_ub, r.b);
tree_ut = array_append( tree_ut, tmp_b.t);
target_t = tmp_b.t;
ELSE
-- the edge is from a totally new tree
curr_t = curr_t+1;
tree_ua = array_append( tree_ua, r.a);
tree_ub = array_append( tree_ub, r.b);
tree_ut = array_append( tree_ut, curr_t);
target_t = curr_t;
END IF;
END IF;
IF a_in_m + b_in_m = 1 THEN -- we need to move a tree to master
move_tree = target_t;
SELECT array_agg(ii.a::int), array_agg(ii.b::int) INTO move_tree_a, move_tree_b FROM (SELECT unnest(tree_ua) a, unnest(tree_ub) b, unnest(tree_ut) t) ii WHERE ii.t = move_tree;
tree_m = tree_m || move_tree_a;
tree_m = tree_m || move_tree_b;
tree_a = tree_a || move_tree_a;
tree_b = tree_b || move_tree_b;
SELECT array_agg(ii.a::int), array_agg(ii.b::int), array_agg(ii.t::int) INTO tree_ua, tree_ub, tree_ut FROM (SELECT unnest(tree_ua) a, unnest(tree_ub) b, unnest(tree_ut) t) ii WHERE ii.t != move_tree;
END IF;
END IF;
END IF;
END LOOP;
FOR i IN 1 .. array_upper(tree_a, 1) LOOP
a = tree_a[i];
b = tree_b[i];
out.the_geom = ST_MakeLine(data.geoms[a], data.geoms[b]);
out.source = data.ids[a];
out.target = data.ids[b];
out.cost = ST_Distance(data.geoms[a], data.geoms[b]);
RETURN NEXT out;
END LOOP;
RETURN;
END;
$$ language plpgsql IMMUTABLE;
-- WITH a AS (
-- SELECT (minimum_spanning_tree_calc( minimum_spanning_tree(the_geom , cartodb_id::text ORDER BY cartodb_id ASC) )).* from untitled_table_48 )
-- SELECT *, ST_Transform(the_geom, 3857) the_geom_webmercator FROM a
@apercovich

This comment has been minimized.

Copy link

@apercovich apercovich commented Sep 18, 2020

hi @andrewxhill! this code really helped me, thanks! I have a question, how would you optimize it for a large set of points? (for example 1500 points), is it possible?

@andrewxhill

This comment has been minimized.

Copy link
Owner Author

@andrewxhill andrewxhill commented Sep 19, 2020

I bet there are good optimizations just in the basic algorithm design. There has to be a redundant loop or two. After that... it's probably time to write it in C to make it really zip.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.