Skip to content

Instantly share code, notes, and snippets.

@jponf
Last active May 3, 2019 14:18
Show Gist options
  • Save jponf/9866d6648535df9a0a3a671165788643 to your computer and use it in GitHub Desktop.
Save jponf/9866d6648535df9a0a3a671165788643 to your computer and use it in GitHub Desktop.
Remove outliers from a numpy array using IQR
import numpy as np
# `data` is expected to be a numpy array
def filter_outliers_iqr(data, iqr_factor=1.5):
quartile_1, quartile_3 = np.percentile(data, (25, 75))
iqr = quartile_3 - quartile_1
lw_bound = quartile_1 - (iqr * iqr_factor)
up_bound = quartile_3 + (iqr * iqr_factor)
return data[data >= lw_bound & data <= up_bound]
# same as above but adapted for datasets with features
# and target values (X, Y).
# `data_x` and `data_y` are expected to be a numpy array
def filter_outliers_iqr(data_x, data_y, iqr_factor=1.5):
quartile_1, quartile_3 = np.percentile(data_y, (25, 75))
iqr = quartile_3 - quartile_1
lw_bound = quartile_1 - (iqr * iqr_factor)
up_bound = quartile_3 + (iqr * iqr_factor)
mask = (data_y >= lw_bound) & (data_y <= up_bound)
return data_x[mask], data_y[mask]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment