Created
September 23, 2020 05:47
-
-
Save omairaasim/1f333ae4e7a1c2607f8b2a993d7e4fd5 to your computer and use it in GitHub Desktop.
How to deal with outliers in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"**How we can deal with outliers in Python** \n", | |
"\n", | |
" Price Rooms Square_Feet\n", | |
"0 632541 2 1600\n", | |
"1 425618 5 2850\n", | |
"2 356471 3 1780\n", | |
"3 7412512 100 90000\n", | |
"\n", | |
" Price Rooms Square_Feet\n", | |
"0 632541 2 1600\n", | |
"1 425618 5 2850\n", | |
"2 356471 3 1780\n", | |
"\n", | |
" Price Rooms Square_Feet Outlier\n", | |
"0 632541 2 1600 0\n", | |
"1 425618 5 2850 0\n", | |
"2 356471 3 1780 0\n", | |
"3 7412512 100 90000 1\n", | |
"\n", | |
" Price Rooms Square_Feet Outlier Log_Of_Square_Feet\n", | |
"0 632541 2 1600 0 7.377759\n", | |
"1 425618 5 2850 0 7.955074\n", | |
"2 356471 3 1780 0 7.484369\n", | |
"3 7412512 100 90000 1 11.407565\n" | |
] | |
} | |
], | |
"source": [ | |
"## How we can deal with outliers in Python \n", | |
"def ProjectPro_Ex_38():\n", | |
" \n", | |
" print('**How we can deal with outliers in Python** ')\n", | |
"\n", | |
" # Loading library\n", | |
" import numpy as np\n", | |
" import pandas as pd\n", | |
"\n", | |
" # Creating DataFrame\n", | |
" farm = pd.DataFrame()\n", | |
" farm['Price'] = [632541, 425618, 356471, 7412512]\n", | |
" farm['Rooms'] = [2, 5, 3, 100]\n", | |
" farm['Square_Feet'] = [1600, 2850, 1780, 90000]\n", | |
" print(); print(farm)\n", | |
"\n", | |
" # Outlier Handling Option 1: Drop\n", | |
" # Droping observations greater than some value\n", | |
" h = farm[farm['Rooms'] < 20]\n", | |
" print(); print(h)\n", | |
"\n", | |
" # Outlier Handling Option 2: Mark\n", | |
" # Creating feature based on boolean condition\n", | |
" farm['Outlier'] = np.where(farm['Rooms'] < 20, 0, 1)\n", | |
"\n", | |
" # Show data\n", | |
" print(); print(farm)\n", | |
"\n", | |
" # Outlier Handling Option 3: Rescale\n", | |
" # Log feature\n", | |
" farm['Log_Of_Square_Feet'] = [np.log(x) for x in farm['Square_Feet']]\n", | |
"\n", | |
" # Show data\n", | |
" print(); print(farm)\n", | |
"\n", | |
"ProjectPro_Ex_38()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment