Skip to content

Instantly share code, notes, and snippets.

@clrnd
Created July 1, 2015 13:57
Show Gist options
  • Save clrnd/c0e2ed88ca165fc0f88c to your computer and use it in GitHub Desktop.
Save clrnd/c0e2ed88ca165fc0f88c to your computer and use it in GitHub Desktop.
Cluster/partition spatial data in equal sized convex spacially cohesive parts
# coding: utf-8
import csv
import json
import numpy as np
def r_tree(points, K):
""" STR method for building R-Tree like structure.
http://stats.stackexchange.com/a/40785
http://www.dtic.mil/dtic/tr/fulltext/u2/a324493.pdf (pag. 8)
"""
# part size
size = int(np.sqrt(K))
# initialize array full of zeroes
labels = np.zeros(points.shape[0], dtype=np.int)
# sort by x
xsort = points[points[:,0].argsort()]
xparts = np.array_split(xsort, size)
group = 0
for px in xparts:
# sort by y
ysort = px[px[:,1].argsort()]
yparts = np.array_split(ysort, size)
for py in yparts:
for l in py:
idx = np.where(points == l)[0][0]
labels[idx] = group
group += 1
return labels
with open('data.csv', 'rb') as f:
# id,lon,lat
# XD.1.1,-58.09203,-34.9823
# ...
data = list(csv.DictReader(f))
# points = [[x1, y1], [x2, y2], ...]
points = np.array(map(lambda e: (float(e['lon']), float(e['lat'])), data))
# labels = [n1, n1, n2, n1, ...] where nₙ is the cluster number for that index
labels = r_tree(points, 81)
print labels
# associate each id to it's cluster number
clusters = map(lambda x, l: (x['id'], l), data, labels.tolist())
with open('out.json', 'w') as f:
json.dump({'data': clusters}, f)
with open('clusters.csv', 'wb') as f:
w = csv.writer(f)
w.writerow(['id', 'cluster'])
w.writerows(clusters)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment