Skip to content

Instantly share code, notes, and snippets.

@omairaasim
Created September 23, 2020 05:47
Show Gist options
  • Save omairaasim/1f333ae4e7a1c2607f8b2a993d7e4fd5 to your computer and use it in GitHub Desktop.
Save omairaasim/1f333ae4e7a1c2607f8b2a993d7e4fd5 to your computer and use it in GitHub Desktop.
How to deal with outliers in Python
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"**How we can deal with outliers in Python** \n",
"\n",
" Price Rooms Square_Feet\n",
"0 632541 2 1600\n",
"1 425618 5 2850\n",
"2 356471 3 1780\n",
"3 7412512 100 90000\n",
"\n",
" Price Rooms Square_Feet\n",
"0 632541 2 1600\n",
"1 425618 5 2850\n",
"2 356471 3 1780\n",
"\n",
" Price Rooms Square_Feet Outlier\n",
"0 632541 2 1600 0\n",
"1 425618 5 2850 0\n",
"2 356471 3 1780 0\n",
"3 7412512 100 90000 1\n",
"\n",
" Price Rooms Square_Feet Outlier Log_Of_Square_Feet\n",
"0 632541 2 1600 0 7.377759\n",
"1 425618 5 2850 0 7.955074\n",
"2 356471 3 1780 0 7.484369\n",
"3 7412512 100 90000 1 11.407565\n"
]
}
],
"source": [
"## How we can deal with outliers in Python \n",
"def ProjectPro_Ex_38():\n",
" \n",
" print('**How we can deal with outliers in Python** ')\n",
"\n",
" # Loading library\n",
" import numpy as np\n",
" import pandas as pd\n",
"\n",
" # Creating DataFrame\n",
" farm = pd.DataFrame()\n",
" farm['Price'] = [632541, 425618, 356471, 7412512]\n",
" farm['Rooms'] = [2, 5, 3, 100]\n",
" farm['Square_Feet'] = [1600, 2850, 1780, 90000]\n",
" print(); print(farm)\n",
"\n",
" # Outlier Handling Option 1: Drop\n",
" # Droping observations greater than some value\n",
" h = farm[farm['Rooms'] < 20]\n",
" print(); print(h)\n",
"\n",
" # Outlier Handling Option 2: Mark\n",
" # Creating feature based on boolean condition\n",
" farm['Outlier'] = np.where(farm['Rooms'] < 20, 0, 1)\n",
"\n",
" # Show data\n",
" print(); print(farm)\n",
"\n",
" # Outlier Handling Option 3: Rescale\n",
" # Log feature\n",
" farm['Log_Of_Square_Feet'] = [np.log(x) for x in farm['Square_Feet']]\n",
"\n",
" # Show data\n",
" print(); print(farm)\n",
"\n",
"ProjectPro_Ex_38()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment