338rajesh/remove_outliers_numpy.py

## remove_outliers_numpy.py
import numpy as np

def remove_outliers(arr, col_indices=None, k=1.5):
    """
        Removes outliers from the given 2D array using inter-quartile range.

    arr: np.ndarray
        a 2D numpy array

    col_indices=[0,]
        These columns are used to find the outliers and
        the corresponding rows are removed.
    """
    col_indices = [0, ] if col_indices is None else col_indices
    for i in col_indices:
        ith_col = arr[:, i]
        q25, q75 = np.percentile(ith_col, 25), np.percentile(ith_col, 75)
        inter_quartile_range = q75 - q25
        lower_bound, upper_bound = q25 - (k * inter_quartile_range), q75 + (k * inter_quartile_range)
        mask = np.logical_and(ith_col >= lower_bound, ith_col <= upper_bound)
        if np.sum(mask) > 0:
            arr = arr[mask]
        print(f"Based on column #{i}: Array shape changed to {arr.shape}")
    return arr


A_raw_data = (5 * np.random.randn(10000, 3)) + 50
A_woo_data = remove_outliers(A_raw_data, col_indices=[1, 2], k=1.5)
print(A_raw_data.shape)
print(A_woo_data.shape)
	import numpy as np

	def remove_outliers(arr, col_indices=None, k=1.5):
	"""
	Removes outliers from the given 2D array using inter-quartile range.

	arr: np.ndarray
	a 2D numpy array

	col_indices=[0,]
	These columns are used to find the outliers and
	the corresponding rows are removed.
	"""
	col_indices = [0, ] if col_indices is None else col_indices
	for i in col_indices:
	ith_col = arr[:, i]
	q25, q75 = np.percentile(ith_col, 25), np.percentile(ith_col, 75)
	inter_quartile_range = q75 - q25
	lower_bound, upper_bound = q25 - (k * inter_quartile_range), q75 + (k * inter_quartile_range)
	mask = np.logical_and(ith_col >= lower_bound, ith_col <= upper_bound)
	if np.sum(mask) > 0:
	arr = arr[mask]
	print(f"Based on column #{i}: Array shape changed to {arr.shape}")
	return arr


	A_raw_data = (5 * np.random.randn(10000, 3)) + 50
	A_woo_data = remove_outliers(A_raw_data, col_indices=[1, 2], k=1.5)
	print(A_raw_data.shape)
	print(A_woo_data.shape)