Skip to content

Instantly share code, notes, and snippets.

@usmcamp0811
Created February 3, 2017 17:32
Show Gist options
  • Save usmcamp0811/868738a2eba9dbb38d43eb27607b2085 to your computer and use it in GitHub Desktop.
Save usmcamp0811/868738a2eba9dbb38d43eb27607b2085 to your computer and use it in GitHub Desktop.
A Function to check if a field has any outliers using IQR
import pandas as pd
import numpy as np
def is_outlier(value, p25, p75):
"""Check if value is an outlier
"""
lower = p25 - 1.5 * (p75 - p25)
upper = p75 + 1.5 * (p75 - p25)
return value <= lower or value >= upper
def get_indices_of_outliers(values):
"""Get outlier indices (if any)
"""
p25 = values.quantile(.25)
p75 = values.quantile(.75)
lower = p25 - 1.5 * (p75 - p25)
upper = p75 + 1.5 * (p75 - p25)
indices_of_outliers = []
for ind, value in enumerate(values):
if is_outlier(value, p25, p75):
indices_of_outliers.append(ind)
return indices_of_outliers
def IQR_outliers(dataframe, field):
indices_of_outliers = get_indices_of_outliers(df[field])
outlier = dataframe[field][indices_of_outliers]
dataframe.loc[dataframe[field].index.isin(outlier.index) == True,
field + '_outlier'] = 'Outlier'
dataframe.loc[dataframe[field].index.isin(outlier.index) == False,
field + '_outlier'] = 'Not Outlier'
return dataframe
if __name__ == "__main__":
df = pd.DataFrame(np.random.uniform(0, 100, size=(100, 3)))
df.columns = ['A', 'B', 'C']
print(IQR_outliers(df, 'C'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment