Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save alexrios22/c166720eabc6a655ab25ecbd108ffbf2 to your computer and use it in GitHub Desktop.
Save alexrios22/c166720eabc6a655ab25ecbd108ffbf2 to your computer and use it in GitHub Desktop.
Function to remove or cap outliers in columns of a `pandas.DataFrame`
def treatoutliers(self, df=None, columns=None, factor=1.5, method='IQR', treament='cap'):
"""
Removes the rows from self.df whose value does not lies in the specified standard deviation
:param columns:
:param in_stddev:
:return:
"""
# if not columns:
# columns = self.mandatory_cols_ + self.optional_cols_ + [self.target_col]
if not columns:
columns = self.mandatory_cols_ + self.optional_cols_ + [self.target_col]
if not df:
df = self.df
for column in columns:
if method == 'STD':
permissable_std = factor * df[column].std()
col_mean = df[column].mean()
floor, ceil = col_mean - permissable_std, col_mean + permissable_std
elif method == 'IQR':
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
floor, ceil = Q1 - factor * IQR, Q3 + factor * IQR
# print(floor, ceil)
if treament == 'remove':
df = df[(df[column] >= floor) & (df[column] <= ceil)]
elif treament == 'cap':
df[column] = df[column].clip(floor, ceil)
self.df = df
return df
def treatoutliers(df, columns=None, factor=1.5, method='IQR', treament='cap'):
"""
Removes the rows from self.df whose value does not lies in the specified standard deviation
:param columns:
:param in_stddev:
:return:
"""
# if not columns:
# columns = self.mandatory_cols_ + self.optional_cols_ + [self.target_col]
if not columns:
columns = df.columns
for column in columns:
if method == 'STD':
permissable_std = factor * df[column].std()
col_mean = df[column].mean()
floor, ceil = col_mean - permissable_std, col_mean + permissable_std
elif method == 'IQR':
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
floor, ceil = Q1 - factor * IQR, Q3 + factor * IQR
if treament == 'remove':
df = df[(df[column] >= floor) & (df[column] <= ceil)]
elif treament == 'cap':
df[column] = df[column].clip(floor, ceil)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment