Created
July 9, 2014 15:43
-
-
Save zarch/f849df450d622f9eadee to your computer and use it in GitHub Desktop.
make cluster using vector position with GRASS GIS and sklearn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Jul 8 15:31:10 2014 | |
@author: pietro | |
""" | |
import numpy as np | |
from sklearn.cluster import (MeanShift, MiniBatchKMeans, Ward, KMeans, | |
SpectralClustering, DBSCAN, AffinityPropagation) | |
from sklearn.cluster import estimate_bandwidth | |
from sklearn.neighbors import kneighbors_graph | |
from sklearn.preprocessing import StandardScaler | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from grass.pygrass.vector import VectorTopo | |
from grass.pygrass.vector.geometry import Point | |
# define columns of the vector output map | |
COLS = [("cat", "INTEGER PRIMARY KEY"), | |
("label", "INTEGER")] | |
def get_data_clusters(X, n_clusters=3, quantile=0.3, n_neighnors=10, | |
with_mean=True, with_std=True, | |
meanshift=None, minibatchkmeans=None, ward=None, | |
spectral=None, dbscan=None, affinity=None, kmeans=None): | |
# set parameters if given | |
meanshift = meanshift if meanshift else dict() | |
minibatchkmeans = minibatchkmeans if minibatchkmeans else dict() | |
ward = ward if ward else dict() | |
spectral = spectral if spectral else dict() | |
dbscan = dbscan if dbscan else dict() | |
affinity = affinity if affinity else dict() | |
kmeans = kmeans if kmeans else dict() | |
# normalize dataset for easier parameter selection | |
scaler = StandardScaler(with_mean=with_mean, with_std=with_std) | |
X = scaler.fit_transform(X) | |
# estimate bandwidth for mean shift | |
bandwidth = estimate_bandwidth(X, quantile=quantile) | |
# connectivity matrix for structured Ward | |
connectivity = kneighbors_graph(X, n_neighbors=n_neighnors) | |
# make connectivity symmetric | |
connectivity = 0.5 * (connectivity + connectivity.T) | |
# create clustering estimators | |
clusters = {"kmean": KMeans(n_clusters=n_clusters, **kmeans), | |
"meanshift": MeanShift(bandwidth=bandwidth, **meanshift), | |
"minibatchkmeans": MiniBatchKMeans(n_clusters=n_clusters, | |
**minibatchkmeans), | |
"ward": Ward(n_clusters=n_clusters, connectivity=connectivity, | |
**ward), | |
"spectral": SpectralClustering(n_clusters=n_clusters, | |
**spectral), | |
"dbscan": DBSCAN(**dbscan), | |
"affinity": AffinityPropagation(**affinity)} | |
return X, clusters | |
def mkcluster(vname, mapset='', overwrite=False, | |
linestyle='', marker='o', markersize=5, | |
palette='husl', context='paper', | |
figname='%s.svg', save=None, **options): | |
# open the input vector map | |
print("Reading %s vector points" % | |
("%s@%s" % (vname, mapset) if mapset else vname)) | |
with VectorTopo(vname, mapset, mode="r") as ivct: | |
coords = np.array([pnt.coords() for pnt in ivct]) | |
# extract coordinates to write the output features | |
xcoords, ycoords = coords.T | |
scaled, clusters = get_data_clusters(coords, **options) | |
for clust in sorted(clusters.keys()): | |
# open the output vector map | |
with VectorTopo(clust, mode='w', tab_cols=COLS, | |
overwrite=overwrite) as ovct: | |
print("Computing the cluster using: %s" % clust) | |
labels = clusters[clust].fit(scaled).labels_.astype(np.int) | |
# write the points to the vector map | |
print("Writing vector map: %s" % clust) | |
for x, y, l in zip(xcoords, ycoords, labels): | |
ovct.write(Point(x, y), attrs=(l, )) | |
labs = sorted(set(labels)) | |
fig, ax = plt.subplots() | |
ax.set_title(repr(clusters[clust])) | |
sns.despine() | |
sns.set_context(context) | |
if labs[0] == -1: | |
# points without label => in black | |
indx = labels == -1 | |
ax.plot(xcoords[indx], ycoords[indx], linestyle='', | |
marker=marker, markersize=int(markersize*0.7), | |
color=(0, 0, 0), label=str(-1)) | |
labs.pop(0) | |
colors = sns.color_palette(palette, len(labs)) | |
for label, color in zip(labs, colors): | |
indx = labels == label | |
ax.plot(xcoords[indx], ycoords[indx], linestyle=linestyle, | |
marker=marker, markersize=markersize, | |
color=color, label=str(label)) | |
ax.legend(*ax.get_legend_handles_labels(), loc='best') | |
if save: | |
fig.savefig(figname % clust, **save) | |
options = {"n_clusters": 5, | |
"quantile": 0.1, | |
"n_neighnors":30, | |
"meanshift": dict(bin_seeding=True), | |
"minibatchkmeans": dict(), | |
"ward": dict(), | |
"spectral": dict(eigen_solver='arpack', | |
affinity="nearest_neighbors"), | |
"dbscan": dict(eps=0.2, min_samples=5), | |
"affinity": dict(damping=.9, preference=-200), | |
"kmeans": dict(n_init=1)} | |
mkcluster("points_of_interest", "PERMANENT", overwrite=True, | |
save=dict(dpi=300, format='svg', bbox_inches='tight'), **options) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment