Skip to content

Instantly share code, notes, and snippets.

@barthr
Created January 16, 2018 11:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save barthr/f919080f2c3f2c493bec43da0a1010c0 to your computer and use it in GitHub Desktop.
Save barthr/f919080f2c3f2c493bec43da0a1010c0 to your computer and use it in GitHub Desktop.
Dbscan algorithm in python
import string
import numpy as np
UNCLASSIFIED = 0
OUTLIER = -1
class DBSCAN(object):
def __init__(self, epsilon=1.0, min_samples=10, dist=np.linalg.norm):
self.epsilon = epsilon
self.min_samples = min_samples
self.dist = dist
def fit_predict(self, X):
# we instantiate all the elements as unclassified
if len(X) == 0:
raise Exception("X is empty")
labels = len(X) * [UNCLASSIFIED]
cluster = 0
for index in range(0, len(X)):
point = X[index]
if not labels[index] == UNCLASSIFIED:
continue
neighbours = self._region_query(point, X)
if len(neighbours) < self.min_samples:
labels[index] = OUTLIER
else:
cluster += 1
self._expand_cluster(index, cluster, X, neighbours, labels)
return labels
def _region_query(self, point, X):
result = []
for other in range(0, len(X)):
if self.dist(point - X[other]) < self.epsilon:
result.append(other)
return result
def _expand_cluster(self, index, cluster, dataset, neighbours, labels):
labels[index] = cluster
i = 0
while i < len(neighbours):
current = neighbours[i]
if labels[current] == OUTLIER:
labels[current] = cluster
elif labels[current] == UNCLASSIFIED:
labels[current] = cluster
neighbour_of_neighbours = self._region_query(current, dataset)
if len(neighbour_of_neighbours) >= self.min_samples:
neighbours = neighbours + neighbour_of_neighbours
i += 1
print(DBSCAN(epsilon=4.5, min_samples=1).fit_predict(
np.matrix('1 2;3 4;519 20; 47 50; 11 3;4 6;7 8;10.0 11;1.4 5.0')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment