Skip to content

Instantly share code, notes, and snippets.

@zarch
Created July 9, 2014 15:43
Show Gist options
  • Save zarch/f849df450d622f9eadee to your computer and use it in GitHub Desktop.
Save zarch/f849df450d622f9eadee to your computer and use it in GitHub Desktop.
make cluster using vector position with GRASS GIS and sklearn
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 8 15:31:10 2014
@author: pietro
"""
import numpy as np
from sklearn.cluster import (MeanShift, MiniBatchKMeans, Ward, KMeans,
SpectralClustering, DBSCAN, AffinityPropagation)
from sklearn.cluster import estimate_bandwidth
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from grass.pygrass.vector import VectorTopo
from grass.pygrass.vector.geometry import Point
# define columns of the vector output map
COLS = [("cat", "INTEGER PRIMARY KEY"),
("label", "INTEGER")]
def get_data_clusters(X, n_clusters=3, quantile=0.3, n_neighnors=10,
with_mean=True, with_std=True,
meanshift=None, minibatchkmeans=None, ward=None,
spectral=None, dbscan=None, affinity=None, kmeans=None):
# set parameters if given
meanshift = meanshift if meanshift else dict()
minibatchkmeans = minibatchkmeans if minibatchkmeans else dict()
ward = ward if ward else dict()
spectral = spectral if spectral else dict()
dbscan = dbscan if dbscan else dict()
affinity = affinity if affinity else dict()
kmeans = kmeans if kmeans else dict()
# normalize dataset for easier parameter selection
scaler = StandardScaler(with_mean=with_mean, with_std=with_std)
X = scaler.fit_transform(X)
# estimate bandwidth for mean shift
bandwidth = estimate_bandwidth(X, quantile=quantile)
# connectivity matrix for structured Ward
connectivity = kneighbors_graph(X, n_neighbors=n_neighnors)
# make connectivity symmetric
connectivity = 0.5 * (connectivity + connectivity.T)
# create clustering estimators
clusters = {"kmean": KMeans(n_clusters=n_clusters, **kmeans),
"meanshift": MeanShift(bandwidth=bandwidth, **meanshift),
"minibatchkmeans": MiniBatchKMeans(n_clusters=n_clusters,
**minibatchkmeans),
"ward": Ward(n_clusters=n_clusters, connectivity=connectivity,
**ward),
"spectral": SpectralClustering(n_clusters=n_clusters,
**spectral),
"dbscan": DBSCAN(**dbscan),
"affinity": AffinityPropagation(**affinity)}
return X, clusters
def mkcluster(vname, mapset='', overwrite=False,
linestyle='', marker='o', markersize=5,
palette='husl', context='paper',
figname='%s.svg', save=None, **options):
# open the input vector map
print("Reading %s vector points" %
("%s@%s" % (vname, mapset) if mapset else vname))
with VectorTopo(vname, mapset, mode="r") as ivct:
coords = np.array([pnt.coords() for pnt in ivct])
# extract coordinates to write the output features
xcoords, ycoords = coords.T
scaled, clusters = get_data_clusters(coords, **options)
for clust in sorted(clusters.keys()):
# open the output vector map
with VectorTopo(clust, mode='w', tab_cols=COLS,
overwrite=overwrite) as ovct:
print("Computing the cluster using: %s" % clust)
labels = clusters[clust].fit(scaled).labels_.astype(np.int)
# write the points to the vector map
print("Writing vector map: %s" % clust)
for x, y, l in zip(xcoords, ycoords, labels):
ovct.write(Point(x, y), attrs=(l, ))
labs = sorted(set(labels))
fig, ax = plt.subplots()
ax.set_title(repr(clusters[clust]))
sns.despine()
sns.set_context(context)
if labs[0] == -1:
# points without label => in black
indx = labels == -1
ax.plot(xcoords[indx], ycoords[indx], linestyle='',
marker=marker, markersize=int(markersize*0.7),
color=(0, 0, 0), label=str(-1))
labs.pop(0)
colors = sns.color_palette(palette, len(labs))
for label, color in zip(labs, colors):
indx = labels == label
ax.plot(xcoords[indx], ycoords[indx], linestyle=linestyle,
marker=marker, markersize=markersize,
color=color, label=str(label))
ax.legend(*ax.get_legend_handles_labels(), loc='best')
if save:
fig.savefig(figname % clust, **save)
options = {"n_clusters": 5,
"quantile": 0.1,
"n_neighnors":30,
"meanshift": dict(bin_seeding=True),
"minibatchkmeans": dict(),
"ward": dict(),
"spectral": dict(eigen_solver='arpack',
affinity="nearest_neighbors"),
"dbscan": dict(eps=0.2, min_samples=5),
"affinity": dict(damping=.9, preference=-200),
"kmeans": dict(n_init=1)}
mkcluster("points_of_interest", "PERMANENT", overwrite=True,
save=dict(dpi=300, format='svg', bbox_inches='tight'), **options)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment