Created
January 3, 2014 18:18
-
-
Save andrewxhill/8243282 to your computer and use it in GitHub Desktop.
First pass at a simple point clustering algo for plpgsql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TYPE axh_cdb_row_clusters ( | |
cartodb_id int, | |
cluster_id int, | |
size int | |
); | |
CREATE OR REPLACE FUNCTION axh_cdb_row_cluster(_t text, zoom INT) | |
RETURNS SETOF axh_cdb_row_clusters AS $$ | |
DECLARE | |
sql TEXT; | |
cluster_n INT := 1; | |
clustered INT[]; | |
clusterA INT[]; | |
clusterB INT[]; | |
history INT[]; | |
seed INT; | |
rows INT; | |
alen INT; | |
marker INT; | |
resolution INT := 10; | |
resolution_v INT; | |
ret axh_cdb_row_clusters; | |
BEGIN | |
-- Get the total number of rows we are looking at | |
sql := 'SELECT count(*) FROM (' || _t || ') t'; | |
EXECUTE sql INTO rows; | |
-- Get the first value to start with | |
WHILE COALESCE(array_length(clustered, 1),0) < rows LOOP | |
EXECUTE 'SELECT min(cartodb_id) FROM (' || _t || ') t WHERE ($1 IS NULL OR NOT (cartodb_id = ANY ($1)))' INTO seed USING clustered; | |
-- Get the first cluster | |
sql := 'SELECT array_agg(cartodb_id) FROM (' || _t || ') t WHERE ST_Distance(the_geom::geography, (SELECT the_geom::geography FROM (' || _t || ') g WHERE cartodb_id = ' || seed || ')) < '|| resolution ||' * CDB_XYZ_Resolution( '|| zoom || ')'; | |
EXECUTE sql INTO clusterA; | |
-- Reset variable resolution | |
resolution_v := resolution; | |
alen := array_length(clusterA, 1); | |
-- Evaluate the cluster | |
WHILE alen > resolution_v LOOP | |
--Extend the resolution | |
resolution_v := LEAST(resolution_v + resolution, resolution_v + (alen / resolution_v)); | |
EXECUTE 'SELECT array_agg(cartodb_id) FROM (' || _t || ') t WHERE NOT (cartodb_id = ANY ($1)) AND ST_Distance(the_geom::geography, (SELECT the_geom::geography FROM (' || _t || ') g WHERE cartodb_id = ' || seed || ')) < '|| resolution_v ||' * CDB_XYZ_Resolution( '|| zoom || ')' INTO clusterB USING clusterA; | |
alen := array_length(clusterB, 1); | |
clusterA = array_cat(clusterA, clusterB); | |
END LOOP; | |
SELECT seed cartodb_id, cluster_n cluster_id, array_length(clusterA, 1) size INTO ret.cartodb_id, ret.cluster_id, ret.size; | |
RETURN NEXT ret; | |
cluster_n := cluster_n + 1; | |
clustered = array_cat(clustered, clusterA); | |
END LOOP; | |
RETURN; | |
END | |
$$ language plpgsql; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment