Skip to content

Instantly share code, notes, and snippets.

@robsannaa
Created March 17, 2019 10:21
Show Gist options
  • Save robsannaa/e33d3a83e929a79e44fc97f0aa58994e to your computer and use it in GitHub Desktop.
Save robsannaa/e33d3a83e929a79e44fc97f0aa58994e to your computer and use it in GitHub Desktop.
Two very simple functions to estimate the number of outliers according to the 3-sigma rule and quantile rule
import numpy as np
import pandas as pd
def get_quantile_outliers(series):
outliers_dic = {}
outliers_list = []
iqr = series.quantile(0.75) - series.quantile(0.25)
lower_bound = series.quantile(0.25) - (1.5 * iqr)
upper_bound = series.quantile(0.75) + (1.5 * iqr)
outliers_dic['lower_bound'] = lower_bound
outliers_dic['upper_bound'] = upper_bound
for i in series:
if i >= upper_bound or i <= lower_bound:
outliers_list.append(i)
outliers_dic['number_of_outliers'] = len(outliers_list)
outliers_dic['outliers_list'] = np.array(outliers_list)
return outliers_dic
def get_3sigma_outliers(series):
outliers_dic = {}
sigma = series[((series - series.mean()).abs() > 3 * series.std())]
outliers_list = sigma.values
outliers_dic['outliers_list'] = outliers_list
outliers_dic['number_of_outliers'] = len(outliers_list)
return outliers_dic
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment