Created
October 11, 2012 19:55
-
-
Save andrewxhill/3875088 to your computer and use it in GitHub Desktop.
Straight SQL Kmean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Number of iterations is at 20, which is probably very high | |
--You can reduce it by replace 20 in both iterations < and iteration = | |
WITH RECURSIVE dims AS (SELECT 3 as d), | |
env AS (SELECT ST_Envelope(ST_Collect(the_geom)) as geom FROM kmean), | |
grid AS (SELECT ST_Centroid(CDB_RectangleGrid( | |
geom, | |
(ST_XMax(geom)-ST_XMin(geom))/d, | |
(ST_YMax(geom)-ST_YMin(geom))/d | |
)) as geom | |
--row_number() OVER (ORDER BY GENERATE_SERIES(1,d*d) ASC) | |
FROM env,dims | |
), | |
clusters AS ( | |
SELECT geom,row_number() OVER (ORDER BY geom ASC) as id FROM grid | |
), | |
means(centroid, id, iteration) AS ( | |
SELECT Array_agg(geom) as centroid, ARRAY_agg(id) as id, 1 FROM clusters | |
UNION ALL | |
(SELECT | |
ARRAY[ | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[1]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[2]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[3]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[4]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[5]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[6]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[7]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[8]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
), | |
(SELECT ST_Centroid(ST_Collect(the_geom)) FROM kmean k WHERE | |
ST_Distance(k.the_geom,m.centroid[9]) = (SELECT min(ST_Distance(p,k.the_geom)) FROM UNNEST(m.centroid) p) | |
) | |
], | |
m.id, | |
iteration+1 | |
FROM means m | |
WHERE | |
iteration < 20 | |
) | |
), | |
result AS (SELECT unnest(centroid) as centroid, unnest(id) as id FROM means WHERE iteration = 20) | |
--TO get the centroid of the clusters | |
--SELECT centroid as the_geom,id, iteration FROM result | |
--All points for each cluster | |
SELECT k.the_geom_webmercator, (SELECT m.id FROM result m ORDER BY m.centroid <-> k.the_geom LIMIT 1 ) as id FROM kmean k |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment