Skip to content

Instantly share code, notes, and snippets.

@bisco
Created January 14, 2019 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bisco/4bbcb685019ec7d0caafd0730df03c12 to your computer and use it in GitHub Desktop.
Save bisco/4bbcb685019ec7d0caafd0730df03c12 to your computer and use it in GitHub Desktop.
simple outlier detection using kNN
#!/usr/bin/env python3
from sklearn.neighbors import NearestNeighbors
import numpy as np
import sys
th_pi = 20
th_ep = 5.0
def main():
dataset = []
with open(sys.argv[1]) as f:
for line in f:
dataset.append([float(line)])
num_entries = len(dataset)
X = np.array(dataset)
nbrs = NearestNeighbors(n_neighbors=num_entries-1, algorithm='ball_tree').fit(X)
for i in dataset:
distances, indices = nbrs.kneighbors(np.array([i]))
count = 0
for d in distances[0]:
if d <= th_ep:
count += 1
if 100 * count // num_entries <= th_pi:
print("outlier ->", i)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment