Last active
May 3, 2019 14:18
-
-
Save jponf/9866d6648535df9a0a3a671165788643 to your computer and use it in GitHub Desktop.
Remove outliers from a numpy array using IQR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# `data` is expected to be a numpy array | |
def filter_outliers_iqr(data, iqr_factor=1.5): | |
quartile_1, quartile_3 = np.percentile(data, (25, 75)) | |
iqr = quartile_3 - quartile_1 | |
lw_bound = quartile_1 - (iqr * iqr_factor) | |
up_bound = quartile_3 + (iqr * iqr_factor) | |
return data[data >= lw_bound & data <= up_bound] | |
# same as above but adapted for datasets with features | |
# and target values (X, Y). | |
# `data_x` and `data_y` are expected to be a numpy array | |
def filter_outliers_iqr(data_x, data_y, iqr_factor=1.5): | |
quartile_1, quartile_3 = np.percentile(data_y, (25, 75)) | |
iqr = quartile_3 - quartile_1 | |
lw_bound = quartile_1 - (iqr * iqr_factor) | |
up_bound = quartile_3 + (iqr * iqr_factor) | |
mask = (data_y >= lw_bound) & (data_y <= up_bound) | |
return data_x[mask], data_y[mask] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment