Skip to content

Instantly share code, notes, and snippets.

@bhaskara
Created July 15, 2020 04:55
Show Gist options
  • Save bhaskara/6a740ca19d69838fc310fffbf08b07f7 to your computer and use it in GitHub Desktop.
Save bhaskara/6a740ca19d69838fc310fffbf08b07f7 to your computer and use it in GitHub Desktop.
from sklearn.cluster import DBSCAN
class OutlierDetector(object):
"""Detect outlier users
Parameters
----------
users : [User]
outlier_detector : DBScan or IsolationForest or other outlier detector or None
Defaults to DBScan
"""
def __init__(self, users, outlier_detector=None):
self._users = users
self._policies = list({pol for u in users for pol in u.policies})
if outlier_detector is None:
outlier_detector = DBSCAN(
eps = 1,
metric="euclidean",
min_samples = 3,
n_jobs = -1)
self._outlier_detector = outlier_detector
self._user_policies = np.array([self._policy_vec(u) for u in self._users])
def _policy_vec(self, user):
return np.array([pol in user.policies for pol in self._policies], dtype=float)
def current_outliers(self):
"""Return outliers among input users
Returns
-------
cluster_vals : [int]
Negative values correspond to outlier indexes among users.
"""
return self._outlier_detector.fit_predict(self._user_policies)
det = OutlierDetector(users)
outliers = det.current_outliers()
print(outliers)
print([user for i, user in enumerate(users) if outliers[i] < 0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment