drmingle/Detecting Outliers.md

## Detecting Outliers.md

      
    Raw
  

              Detecting Outliers.md
            
          
  title
  author
  date
  
  
  Detecting Outliers
  Damian Mingle
  06/10/2018
  
  
Preliminaries

# Load libraries
import numpy as np
from sklearn.covariance import EllipticEnvelope
from sklearn.datasets import make_blobs
Create Data

# Simulate data
simulated_data, _ = make_blobs(n_samples = 255,
                  n_features = 3,
                  centers = 1,
                  random_state = 1)

# Make extreme values
simulated_data[0,0] = 99999
simulated_data[0,1] = 99999
Detect Outliers

Using EllipticEnvelope forces you to specify a contaimination parameter (the proportition of outliers you think are in the data) - a significant limitation to this approach.
# Build outlier detector
outlier_detector = EllipticEnvelope(contamination=.1)

# Fit outlier detector
outlier_detector.fit(simulated_data)

# Predict outliers
outlier_detector.predict(simulated_data)
array([-1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1,  1, -1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1, -1,  1,
        1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1,  1,  1,
        1, -1,  1,  1,  1,  1,  1,  1, -1,  1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1, -1,  1])