Skip to content

Instantly share code, notes, and snippets.

@jurand71
Created August 30, 2022 05:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jurand71/91715f1120351e0da0b3816d76449c37 to your computer and use it in GitHub Desktop.
Save jurand71/91715f1120351e0da0b3816d76449c37 to your computer and use it in GitHub Desktop.
def find_boundaries(df, variable):
IQR = df[variable].quantile(0.75) - df[variable].quantile(0.25)
lower_boundary = df[variable].quantile(0.25) - 3*IQR
upper_boundary = df[variable].quantile(0.75) + 3*IQR
return lower_boundary, upper_boundary
# find limits for PoolArea
WoodDeckSF_lower_limit, WoodDeckSF_upper_limit = find_boundaries(df, 'WoodDeckSF')
# find limits for TotalBsmtSF
TotalBsmtSF_lower_limit, TotalBsmtSF_upper_limit = find_boundaries(df, 'TotalBsmtSF')
# find limits for GarageArea
GarageArea_lower_limit, GarageArea_upper_limit = find_boundaries(df, 'GarageArea')
# find outliers in variables
outliers_WoodDeckSF = np.where(df['WoodDeckSF'] > WoodDeckSF_upper_limit, True,
np.where(df['WoodDeckSF'] < WoodDeckSF_lower_limit, True, False))
outliers_TotalBsmtSF = np.where(df['TotalBsmtSF'] > TotalBsmtSF_upper_limit, True,
np.where(df['TotalBsmtSF'] < TotalBsmtSF_lower_limit, True, False))
outliers_TotalBsmtSF = np.where(df['GarageArea'] > GarageArea_upper_limit, True,
np.where(df['GarageArea'] < GarageArea_lower_limit, True, False))
df_removed_outliers = df.loc[~(outliers_WoodDeckSF + outliers_TotalBsmtSF + outliers_TotalBsmtSF),]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment