Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save AllieUbisse/4676dd20f33196de61f7e0750ab80c44 to your computer and use it in GitHub Desktop.
Save AllieUbisse/4676dd20f33196de61f7e0750ab80c44 to your computer and use it in GitHub Desktop.
Outlier-Detection-3WAYS-Methods.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Outlier-Detection-3WAYS-Methods.ipynb",
"provenance": [],
"collapsed_sections": [
"Y41mCGlZAk-s",
"lqLrIC7RBVVc",
"NQmEUGBABmwb",
"jwZuNSz0VG09",
"VvY2jmV4ZM6r",
"QTCBRuExaiqM",
"X8C_qMutc1V5"
],
"authorship_tag": "ABX9TyMhPr928L/rLGxIl7e68RkB",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/AllieUbisse/4676dd20f33196de61f7e0750ab80c44/outlier-detection-3ways-methods.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Y41mCGlZAk-s",
"colab_type": "text"
},
"source": [
"# **1. Library Imports**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ODlzYMLK_6kL",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from scipy.stats import norm\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "lqLrIC7RBVVc",
"colab_type": "text"
},
"source": [
"# **2. Import Dataset**\n",
"---\n",
"\n",
"source: [New York City Airbnb Open Data](https://www.kaggle.com/dgomonov/new-york-city-airbnb-open-data)"
]
},
{
"cell_type": "code",
"metadata": {
"id": "npW-YRWCA9AW",
"colab_type": "code",
"colab": {}
},
"source": [
"bnb = pd.read_csv('/content/AB_NYC_2019.csv')\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "NQmEUGBABmwb",
"colab_type": "text"
},
"source": [
"# **3. Outlier Detection using percentile BnB**\n",
"---\n",
"Suppose price is defined as price per night"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ck-e-iC6BxsT",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 401
},
"outputId": "7be52dfb-d49f-40ba-9101-996d84a70834"
},
"source": [
"bnb.head()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>host_id</th>\n",
" <th>host_name</th>\n",
" <th>neighbourhood_group</th>\n",
" <th>neighbourhood</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>room_type</th>\n",
" <th>price</th>\n",
" <th>minimum_nights</th>\n",
" <th>number_of_reviews</th>\n",
" <th>last_review</th>\n",
" <th>reviews_per_month</th>\n",
" <th>calculated_host_listings_count</th>\n",
" <th>availability_365</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2539</td>\n",
" <td>Clean &amp; quiet apt home by the park</td>\n",
" <td>2787</td>\n",
" <td>John</td>\n",
" <td>Brooklyn</td>\n",
" <td>Kensington</td>\n",
" <td>40.64749</td>\n",
" <td>-73.97237</td>\n",
" <td>Private room</td>\n",
" <td>149</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>2018-10-19</td>\n",
" <td>0.21</td>\n",
" <td>6</td>\n",
" <td>365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2595</td>\n",
" <td>Skylit Midtown Castle</td>\n",
" <td>2845</td>\n",
" <td>Jennifer</td>\n",
" <td>Manhattan</td>\n",
" <td>Midtown</td>\n",
" <td>40.75362</td>\n",
" <td>-73.98377</td>\n",
" <td>Entire home/apt</td>\n",
" <td>225</td>\n",
" <td>1</td>\n",
" <td>45</td>\n",
" <td>2019-05-21</td>\n",
" <td>0.38</td>\n",
" <td>2</td>\n",
" <td>355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3647</td>\n",
" <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
" <td>4632</td>\n",
" <td>Elisabeth</td>\n",
" <td>Manhattan</td>\n",
" <td>Harlem</td>\n",
" <td>40.80902</td>\n",
" <td>-73.94190</td>\n",
" <td>Private room</td>\n",
" <td>150</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3831</td>\n",
" <td>Cozy Entire Floor of Brownstone</td>\n",
" <td>4869</td>\n",
" <td>LisaRoxanne</td>\n",
" <td>Brooklyn</td>\n",
" <td>Clinton Hill</td>\n",
" <td>40.68514</td>\n",
" <td>-73.95976</td>\n",
" <td>Entire home/apt</td>\n",
" <td>89</td>\n",
" <td>1</td>\n",
" <td>270</td>\n",
" <td>2019-07-05</td>\n",
" <td>4.64</td>\n",
" <td>1</td>\n",
" <td>194</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5022</td>\n",
" <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
" <td>7192</td>\n",
" <td>Laura</td>\n",
" <td>Manhattan</td>\n",
" <td>East Harlem</td>\n",
" <td>40.79851</td>\n",
" <td>-73.94399</td>\n",
" <td>Entire home/apt</td>\n",
" <td>80</td>\n",
" <td>10</td>\n",
" <td>9</td>\n",
" <td>2018-11-19</td>\n",
" <td>0.10</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id ... availability_365\n",
"0 2539 ... 365\n",
"1 2595 ... 355\n",
"2 3647 ... 365\n",
"3 3831 ... 194\n",
"4 5022 ... 0\n",
"\n",
"[5 rows x 16 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ShT93U79HgPK",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 287
},
"outputId": "d1402857-e666-4975-a9dd-d59817420d2b"
},
"source": [
"bnb[['price', 'minimum_nights','number_of_reviews', 'reviews_per_month']].describe()"
],
"execution_count": 56,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>price</th>\n",
" <th>minimum_nights</th>\n",
" <th>number_of_reviews</th>\n",
" <th>reviews_per_month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>48895.000000</td>\n",
" <td>48895.000000</td>\n",
" <td>48895.000000</td>\n",
" <td>38843.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>152.720687</td>\n",
" <td>7.029962</td>\n",
" <td>23.274466</td>\n",
" <td>1.373221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>240.154170</td>\n",
" <td>20.510550</td>\n",
" <td>44.550582</td>\n",
" <td>1.680442</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.010000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>69.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.190000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>106.000000</td>\n",
" <td>3.000000</td>\n",
" <td>5.000000</td>\n",
" <td>0.720000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>175.000000</td>\n",
" <td>5.000000</td>\n",
" <td>24.000000</td>\n",
" <td>2.020000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>10000.000000</td>\n",
" <td>1250.000000</td>\n",
" <td>629.000000</td>\n",
" <td>58.500000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" price minimum_nights number_of_reviews reviews_per_month\n",
"count 48895.000000 48895.000000 48895.000000 38843.000000\n",
"mean 152.720687 7.029962 23.274466 1.373221\n",
"std 240.154170 20.510550 44.550582 1.680442\n",
"min 0.000000 1.000000 0.000000 0.010000\n",
"25% 69.000000 1.000000 1.000000 0.190000\n",
"50% 106.000000 3.000000 5.000000 0.720000\n",
"75% 175.000000 5.000000 24.000000 2.020000\n",
"max 10000.000000 1250.000000 629.000000 58.500000"
]
},
"metadata": {
"tags": []
},
"execution_count": 56
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "9m2kReN0DNzB",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "232ed44d-f219-4703-dfbf-081fc9f6c2dd"
},
"source": [
"sns.boxplot(x='price', data=bnb)"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7ff28eb262b0>"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWEAAAEGCAYAAAC0DiQ1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAASKUlEQVR4nO3df4zUdX7H8deb3RX5cSqiMRyeXc2IgdS0J2sCtmk2FoTdbcQ/NOU07tpWKNAA1ZgGvU1cGv/w2qYpR9vj9KxCS09be6lGBANXNVqC5+4VVqsio8dVUOoyaSUeXV3WT/+Yz8x9Z5iBHXZm38Pu85Fs+M7n+5nv5/35foYX3/3O7mAhBAEAfEzyLgAAJjJCGAAcEcIA4IgQBgBHhDAAOGqspPNll10Wmpuba1QKAIxPfX19x0MIl5faV1EINzc3q7e3tzpVAcAEYWY/L7eP2xEA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABwRwgDgiBAGAEeEMAA4IoQBwBEhDACOCGEAcEQIA4AjQhgAHBHCAOCIEAYAR4QwADiq6P+YG63Nmzfr1VdflSTNnj1bqVRKa9euHcsSAKCujGkIp9NpDRzPSA2NGvifE2M5NADUpTENYUlSQ6OGp84c82EBoB5xTxgAHBHCAOCIEAYAR4QwADgihAHAESEMAI4IYQBwRAgDgCNCGAAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABwRwgDgiBAGAEeEMAA4IoQBwBEhDACOCGEAcEQIA4AjQhgAHBHCAOCIEAYAR4QwADgihAHAESEMAI4IYQBwRAgDgCNCGAAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABw1jsUgmzdvrqjf2rVra1kOANSNMQnhdDpd1X4AMF5wOwIAHBHCAOCIEAYAR4QwADgihAHAESEMAI4IYQBwRAgDgCNCGAAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABwRwgDgiBAGAEeEMAA4IoQBwBEhDACOCGEAcEQIA4AjQhgAHBHCAOCIEAYAR4QwADgihAHAESEMAI4IYQBwRAgDgCNCGAAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjuoqhA8cOKADBw6otbW1Zl+LFi3Kb3d0dBTsW7x4sZYsWaIlS5bojjvuKNj3+OOPq7W1VU888YQ6Ozvz7cuWLVNra6uef/55rVixQm1tberr68vvX7JkidLptDo6OpROp5XJZLRy5cp8v9WrV2vNmjVKp9Nat26dMpmMHnroIbW2turWW29Vb2+v2tratHLlynyfdDpd8Lw1a9Zo9erVymQyeuCBB9Ta2qoNGzYUnNtMJlPQr5xcrcnacv2T+3K1ZjKZ/HZvb69uvvlm9fX15dvT6bTuvfdetbe3K51On3XcM/UpZ/v27WptbdXTTz9dUE9y7sVt1VJp3eXO10j6V0u5mkcy1mjrqeVa1Ery9VULDT09PSPu/Nhjj/WsXLmy4kF27dqV3z726YBC01RJ0tdnTFdbW1t+31NPPVXxsSv11Vdf5beHhoZO2zc8PKzh4WGdPHmyYN9bb70lServ79dnn32Wb//iiy8kSfv27VMmk9GpU6e0d+9effnll5Kk4eFh9ff3a2BgQP39/Tp27Jj27t2b7/fxxx/n973//vsaHBzUnj178sfeu3evTp48qUwmk+/T39+vdDqdf96hQ4d0/PhxffHFF3r55ZclSUeOHNE999yTr3PLli16/fXX8/0WLlxY8vzcf//9GhgYKKgt1z+578iRIxocHNT+/fv12muvaXBwUE8++WS+5hMnTui1115Tf3+/PvzwQw0NDam/v1+33XbbGcc9U59y1q1bJ0nq7e3V4OBgvp7cHLds2XJaW7VUWneyluT5uvPOO8/av1q1l6t5JGONtp5arkWtJF9fyb9Tldi4ceMnPT09j5XaVzdXwq2trd4ljEoIIb/9+eefF+w7fPhw/s8dO3aU7Hf48GGFEPTcc88VPLdUn9zxkseWdNpzc1fDmUym4B/CnTt3lrwSSafT+eMlx925c6f6+voK9oUQtHPnTu3atUshBL344ov553z++ed68cUXS9Za6ooxOW65PuVs37694PGOHTsUQtCuXbvyV+q5GnNt1VJp3claduzYUXC+Sl0N16L2cjWPZKzR1lPLtaiV4tdXLa6GxySEjx49qnQ6nV3wr4azAw+eUDqd1vr167V+/fqxKKMuDA8Pj9lY+/btkyRt3bq14Kp/aGhI27ZtO63/I488UvI4Q0NDevjhh0u2545b/F3FqVOnSh6r1BjFbeXqKOXxxx8veJw7v8PDw9q2bZu2bt2a/+4n11YtldadrKX4/JQ6v7WovVzNIxlrtPXUci1qpfj1tWXLlqqPcdYQNrOVZtZrZr0DAwNVLwC1t2fPnoIr9RCCdu/efVq/5FVrUgjhtKv7XHvyuCNRaozitnJ1VOLUqVPavXu39uzZkw+8XFu1VFp3spZipc5vLWovV/NIxhptPbVci/PZWUM4hPBYCKElhNBy+eWXn9Mgs2fPViqVUiqVkiY1SJK+uvAipVIpbdq0SZs2bTqn42JkFi1aJDPLPzYzLV68+LR+zc3NJZ9vZpo+fXrJ9uRxR6LUGMVt5eqoRGNjoxYvXqxFixapsbGxoK1aKq07WUuxUue3FrWXq3kkY422nlquxfmsbu4JTxQNDQ1jNtaCBQskSV1dXWpqasq3NzU1qbOz87T+3d3dJY/T1NSkjRs3lmzPHTd5fEllw6bUGMVt5eooZcWKFQWPc+e3oaFBnZ2d6urq0qRJkwraqqXSupO1FJ+fUue3FrWXq3kkY422nlquRa0Uv75WrVpV9THqJoRfeeUV7xJGJXlFWHxVk7vaaG5uVkdHR8l+zc3NMjMtW7as4Lml+iSvZpLbxc999NFHJUkzZ87U0qVL8+1tbW2aOXPmaXNIpVL54yXHbWtr0/z58wv2mZna2tq0dOlSmZna29vzz5k+fbra29tL1ppKpc44brk+5dx1110Fjzs6OmRmWrp0qWbOnJmfe7KtWiqtO1lLR0dHwfmaP3/+GftXq/ZyNY9krNHWU8u1qJXi19fy5curPkbdhPBYSV6BTJs2rWBfU1OTJk+erMmTJ6v41ktuMe6++25dddVV+faLL75YknTffffp2muv1ZQpUwquaiZPnqzu7m5NmzZN3d3d6urq0pw5c/L95s6dq3nz5qm7u1vXX3+9Ojs7ddNNN0mSLrroIvX09GjKlCmaM2dOvk93d3fB8+bNm6e5c+eqs7NTLS0tkn55FZzT1dVV0K+cXK3J2nL9k/tytXZ1deW3e3p6NGnSJG3cuDHf3t3drVQqpalTp57xSjF5jiqVu1pZtWpVQT3JuRe3VUuldZc7XyPpXy3lah7JWKOtp5ZrUSvJ11ctWCVvrLS0tITe3t6KB0n+9MP+t9/R8NTsv4Dzr7mi4H5wrh/3iAGMJ2bWF0JoKbVvwl0JA0A9IYQBwBEhDACOCGEAcEQIA4AjQhgAHBHCAOCIEAYAR4QwADgihAHAESEMAI4IYQBwRAgDgCNCGAAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABwRwgDgiBAGAEeEMAA4IoQBwBEhDACOCGEAcEQIA4AjQhgAHBHCAOCIEAYAR4QwADgihAHAESEMAI4IYQBwRAgDgCNCGAAcNY7FIKlUSpKUTqdH1A8AJooxCeG1a9dKktavXz+ifgAwUXA7AgAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABwRwgDgiBAGAEeEMAA4IoQBwBEhDACOCGEAcEQIA4AjQhgAHBHCAOCIEAYAR4QwADgihAHAESEMAI4IYQBwRAgDgCNCGAAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABwRwgDgiBAGAEeEMAA4IoQBwBEhDACOCGEAcEQIA4CjxjEfcfiUGk5mJJmkK8Z8eACoJ2MawqlUSkePHpUkzZ49W6lUaiyHB4C6YyGEEXduaWkJvb29NSwHAMYfM+sLIbSU2sc9YQBwRAgDgCNCGAAcEcIA4IgQBgBHhDAAOCKEAcARIQwAjghhAHBECAOAI0IYABwRwgDgiBAGAEeEMAA4IoQBwBEhDACOCGEAcEQIA4AjQhgAHBHCAOCoov/o08wGJP38HMe6TNLxc3zu+Yo5TwwTbc4Tbb7S6Of8KyGEy0vtqCiER8PMesv9b6PjFXOeGCbanCfafKXazpnbEQDgiBAGAEdjGcKPjeFY9YI5TwwTbc4Tbb5SDec8ZveEAQCn43YEADgihAHAUc1D2MyWmtlBM0ub2YZaj1dLZvYNM3vZzN4xs/80s/Wx/VIz221mh+KfM2K7mdl349z7zeyGxLG6Yv9DZtblNaeRMrMGM/sPM3shPr7azN6Ic3vGzC6I7ZPj43Tc35w4xoOx/aCZLfGZyciY2SVm9qyZvWdm75rZwvG+zmZ2X3xdv21mPzSzC8fbOpvZ35nZp2b2dqKtautqZvPN7K34nO+amZ21qBBCzb4kNUj6QNI1ki6QdEDSvFqOWeP5zJJ0Q9z+mqT3Jc2T9GeSNsT2DZK+E7fbJe2UZJIWSHojtl8q6cP454y4PcN7fmeZ+/2S/lHSC/HxP0laHre3SFodt9dI2hK3l0t6Jm7Pi+s/WdLV8XXR4D2vM8x3q6R74/YFki4Zz+ssabakn0makljfe8bbOkv6LUk3SHo70Va1dZX0k9jX4nPbzlpTjSe8UNJLiccPSnrQeyGqOL/nJC2WdFDSrNg2S9LBuP19Sd9K9D8Y939L0vcT7QX96u1L0pWSfizpZkkvxBfYcUmNxess6SVJC+N2Y+xnxWuf7FdvX5IujoFkRe3jdp1jCH8Ug6UxrvOS8bjOkpqLQrgq6xr3vZdoL+hX7qvWtyNyC5tzJLad9+K3X9+U9IakK0IIn8RdxyRdEbfLzf98Oy9/JelPJH0VH8+U9L8hhFPxcbL+/Nzi/s9i//NpzldLGpD0ZLwF8wMzm6ZxvM4hhKOS/kLSf0n6RNl169P4Xuecaq3r7Lhd3H5GvDF3DsxsuqR/kfTHIYQTyX0h+0/guPm5PzP7HUmfhhD6vGsZQ43Kfsv6vRDCNyX9QtlvU/PG4TrPkLRM2X+Avi5pmqSlrkU58FjXWofwUUnfSDy+Mradt8ysSdkA3h5C+FFs/m8zmxX3z5L0aWwvN//z6bz8hqRbzeywpKeVvSWxSdIlZtYY+yTrz88t7r9YUkbn15yPSDoSQngjPn5W2VAez+u8SNLPQggDIYQhST9Sdu3H8zrnVGtdj8bt4vYzqnUIvynp2vgO6wXK3sB/vsZj1kx8p/MJSe+GEP4yset5Sbl3SLuUvVeca++M77IukPRZ/LbnJUm3mNmMeAVyS2yrOyGEB0MIV4YQmpVdv38LIdwl6WVJt8duxXPOnYvbY/8Q25fHd9WvlnStsm9i1J0QwjFJH5nZdbHptyW9o3G8zsrehlhgZlPj6zw353G7zglVWde474SZLYjnsDNxrPLG4CZ4u7I/RfCBpG9735Qf5Vx+U9lvVfol7Y9f7creC/uxpEOS9ki6NPY3SX8T5/6WpJbEsX5fUjp+/Z733EY4/1b98qcjrlH2L1da0j9LmhzbL4yP03H/NYnnfzuei4MawbvGznP9dUm9ca3/Vdl3wcf1OkvaKOk9SW9L+ntlf8JhXK2zpB8qe897SNnveP6gmusqqSWevw8k/bWK3twt9cWvLQOAI96YAwBHhDAAOCKEAcARIQwAjghhAHBECOO8Z2Z/amaLvOsAzgU/oobzmpk1hBCGvesAzhVXwqhbZtYcP893e/xM32fjb3QdNrPvmNlPJd1hZk+Z2e3xOTea2V4zO2BmPzGzr1n2s5D/3MzejJ8L+4fOUwPyCGHUu+sk/W0IYa6kE8p+jq0kZUIIN4QQns51jL8a/4yk9SGEX1P28xD+T9nfivoshHCjpBslrYi/Ugu4I4RR7z4KIfx73P4HZX91XMqGbbHrJH0SQnhTkkIIJ0L2YxZvUfYzAPYr+9GjM5X9TAPAXePZuwCuit+0yD3+RQXHMElrQwj1+uE5mMC4Eka9u8rMFsbtOyW9foa+ByXNMrMbJSneD25U9lOvVsePIZWZzYkf0g64I4RR7w5K+iMze1fZTzL7XrmOIYQvJf2upM1mdkDSbmU/7esHyn4s40/jf/D4ffFdIOoEP6KGuhX/C6kXQgi/6lwKUDNcCQOAI66EAcARV8IA4IgQBgBHhDAAOCKEAcARIQwAjv4f/hBNri7JG6gAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "74O8VjkXJf8T",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "871b0ff0-a8a3-451f-e045-e73d06db1eed"
},
"source": [
"sns.distplot(bnb.price, bins=10, )"
],
"execution_count": 97,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7ff28dc05588>"
]
},
"metadata": {
"tags": []
},
"execution_count": 97
},
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "6YWWbaXfDrYx",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "11bedaf4-d910-4e35-bf99-b7f993a897cb"
},
"source": [
"# set threshold values\n",
"min_price_threshold, max_price_treshold = bnb.price.quantile([0.01, 0.90])\n",
"min_price_threshold, max_price_treshold"
],
"execution_count": 90,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(30.0, 269.0)"
]
},
"metadata": {
"tags": []
},
"execution_count": 90
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Vsbg4MN9ERnS",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "84c31a19-936b-4ca0-b117-72684d42ef2d"
},
"source": [
"# count the price occurance\n",
"bnb[bnb.price<min_price_threshold].price.value_counts().sum()"
],
"execution_count": 91,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"404"
]
},
"metadata": {
"tags": []
},
"execution_count": 91
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "8uMQU4srFSHf",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "53647172-8f60-46f8-c1cb-0fb53a5b966f"
},
"source": [
"# count the price occurance\n",
"bnb[bnb.price>max_price_treshold].price.value_counts().sum()"
],
"execution_count": 92,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"4878"
]
},
"metadata": {
"tags": []
},
"execution_count": 92
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "-PLMXlcBGFgO",
"colab_type": "code",
"colab": {}
},
"source": [
"bnb_no_outliers = bnb[(bnb.price>min_price_threshold) & (bnb.price<max_price_treshold) ]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gMKUAoMYGrr-",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "1fd82e55-3b9a-4041-f2f4-2ce558818a5c"
},
"source": [
"bnb.shape, bnb_no_outliers.shape"
],
"execution_count": 94,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((48895, 16), (43325, 16))"
]
},
"metadata": {
"tags": []
},
"execution_count": 94
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nPgHbSDUG3kI",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "d51e32ba-ebeb-496d-d0e6-b3ab0a9c0039"
},
"source": [
"sns.boxplot(x='price', data=bnb_no_outliers)"
],
"execution_count": 95,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7ff28d8a1a90>"
]
},
"metadata": {
"tags": []
},
"execution_count": 95
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAEGCAYAAABbzE8LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAJ+klEQVR4nO3dX4jl91nH8c+TXW03GrHpliWM1W0drYqgLhupUHpVlOYmCoriRXshVvwzrBdeVHpTvKuiEAcVoxarFiv4B4tUahVBtNh2N2zStJvYo7bUIU3SBpLirq1Nv16c3+IwzGx2kjPnmTPn9YJhzv7m7JznO/vdN+f8zsyZGmMEgOW7o3sAgHUlwABNBBigiQADNBFggCanD3Pls2fPjvPnzx/RKAAn05UrVz4/xnjV3uOHCvD58+dz+fLlxU0FsAaq6jP7HXcKAqCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCaH+p1w62J7ezuz2ax7jEPZ2dlJkmxsbDRP0mdzczNbW1vdY8BtE+B9zGazXH30Wp6/8+7uUW7bqevPJkk+96X1/Cc9df2Z7hHg0Nbzf+tteP7Ou3PjO+7rHuO2nXnsA0myUjMv0s31wypxDhigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoMlSAry9vZ3t7e1l3BTAQh1lv04fyWfdYzabLeNmABbuKPvlFARAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQJPTy7iRnZ2d3LhxI5cuXVrGzb1ks9ksd3x5dI/BIdzxP89lNvviyuwxVsdsNsuZM2eO5HO/4D3gqnpbVV2uqstPP/30kQwBsI5e8B7wGOPBJA8mycWLF1/U3cKNjY0kyQMPPPBi/vrSXbp0KVf+48nuMTiEr778G7L52nMrs8dYHUf5qMo5YIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0OT0Mm5kc3NzGTcDsHBH2a+lBHhra2sZNwOwcEfZL6cgAJoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNDkdPcAx9Wp68/kzGMf6B7jtp26/oUkWamZF+nU9WeSnOseAw5FgPexubnZPcKh7ex8JUmysbGuETq3kv9urDcB3sfW1lb3CMAacA4YoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0KTGGLd/5aqnk3zm6MZ5Sc4m+Xz3EM3W/Wtg/dZ/XNf/LWOMV+09eKgAH2dVdXmMcbF7jk7r/jWwfutftfU7BQHQRIABmpykAD/YPcAxsO5fA+tfbyu3/hNzDhhg1Zyke8AAK0WAAZqsbICr6tNV9fGqulpVl6djd1fVh6rqU9P7V3TPuShV9e6qeqqqHt11bN/11txvVtWsqh6pqgt9ky/GAet/Z1XtTHvgalXdt+tjvzyt//Gq+qGeqRenql5dVf9YVZ+sqk9U1aXp+FrsgVusf7X3wBhjJd+SfDrJ2T3HfjXJ26fLb0/yru45F7jeNya5kOTRF1pvkvuS/G2SSvL6JB/pnv+I1v/OJL+0z3W/K8nDSV6W5DVJ/j3Jqe41vMT135PkwnT5riT/Nq1zLfbALda/0ntgZe8BH+D+JO+ZLr8nyQ83zrJQY4x/SvLMnsMHrff+JH805v41yTdW1T3LmfRoHLD+g9yf5H1jjC+NMf4zySzJ9x/ZcEswxnhijPHQdPmLSa4l2cia7IFbrP8gK7EHVjnAI8nfVdWVqnrbdOzcGOOJ6fLnkpzrGW1pDlrvRpLP7rref+XWm3WV/cL0EPvdu045nej1V9X5JN+X5CNZwz2wZ/3JCu+BVQ7wG8YYF5K8OcnPV9Ubd39wzB+HrM332K3beie/k+Rbk3xvkieS/HrvOEevqr4+yV8k+cUxxnO7P7YOe2Cf9a/0HljZAI8xdqb3TyX5q8wfXjx582HW9P6pvgmX4qD17iR59a7rfdN07EQZYzw5xnh+jPHVJL+X/3+IeSLXX1Vfk3l83jvG+Mvp8Nrsgf3Wv+p7YCUDXFVfV1V33byc5AeTPJrk/UneOl3trUn+umfCpTlove9P8pbpmfDXJ3l218PUE2PPOc0fyXwPJPP1/0RVvayqXpPk25J8dNnzLVJVVZI/SHJtjPEbuz60FnvgoPWv/B7ofhbwxbwleW3mz3A+nOQTSd4xHX9lkn9I8qkkf5/k7u5ZF7jmP838Idb/Zn4+66cOWm/mz3z/VubP/H48ycXu+Y9o/X88re+RzP/D3bPr+u+Y1v94kjd3z7+A9b8h89MLjyS5Or3dty574BbrX+k94EeRAZqs5CkIgJNAgAGaCDBAEwEGaCLAAE0EmJVXVb9SVW/qngMOy7ehsdKq6tQY4/nuOeDFcA+YY6uqzlfVY1X13qq6VlV/XlV3Tq8F/a6qeijJj1XVH1bVj05/596q+nBVPVxVH62qu6rqVFX9WlV9bHrRlp9pXhokEWCOv9cl+e0xxncmeS7Jz03HvzDGuDDGeN/NK1bV1yb5sySXxhjfk+RNSW5k/lNzz44x7k1yb5Kfnn48FVoJMMfdZ8cY/zJd/pPMfyQ1mYd2r9cleWKM8bEkGWM8N8b4SuavFfKWqrqa+UsYvjLz1waAVqe7B4AXsPdJipt//u9DfI5KsjXG+OBiRoLFcA+Y4+6bq+oHpss/meSfb3Hdx5PcU1X3Jsl0/vd0kg8m+dnp5QxTVd8+vYoetBJgjrvHM3/B/WtJXpH5C3Dva4zx5SQ/nmS7qh5O8qEkL0/y+0k+meSh6Zd6/m48+uMY8G1oHFvTr575mzHGdzePAkfCPWCAJu4BAzRxDxigiQADNBFggCYCDNBEgAGa/B8quxjd1RvOEQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Km5wR5QtJrsj",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "39966afb-6ad3-45a0-9d6e-322acaf66197"
},
"source": [
"sns.distplot(bnb_no_outliers.price, bins=10)"
],
"execution_count": 96,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7ff28d7cedd8>"
]
},
"metadata": {
"tags": []
},
"execution_count": 96
},
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "YtNwvtBsTZSG",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "Nm-NTqEFTa9-",
"colab_type": "text"
},
"source": [
"# **Exercise 2**\n",
"---\n",
"source: [house prices](https://raw.githubusercontent.com/codebasics/py/master/ML/FeatureEngineering/2_outliers_z_score/Exercise/bhp.csv)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jwZuNSz0VG09",
"colab_type": "text"
},
"source": [
"### Import dataset"
]
},
{
"cell_type": "code",
"metadata": {
"id": "CXc1NgdXTzsW",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 197
},
"outputId": "a9b0f9f5-6532-4e86-9cf0-4efe1cb2d6a3"
},
"source": [
"house = pd.read_csv('https://raw.githubusercontent.com/codebasics/py/master/ML/FeatureEngineering/2_outliers_z_score/Exercise/bhp.csv')\n",
"house.head()"
],
"execution_count": 99,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>location</th>\n",
" <th>size</th>\n",
" <th>total_sqft</th>\n",
" <th>bath</th>\n",
" <th>price</th>\n",
" <th>bhk</th>\n",
" <th>price_per_sqft</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Electronic City Phase II</td>\n",
" <td>2 BHK</td>\n",
" <td>1056.0</td>\n",
" <td>2.0</td>\n",
" <td>39.07</td>\n",
" <td>2</td>\n",
" <td>3699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Chikka Tirupathi</td>\n",
" <td>4 Bedroom</td>\n",
" <td>2600.0</td>\n",
" <td>5.0</td>\n",
" <td>120.00</td>\n",
" <td>4</td>\n",
" <td>4615</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Uttarahalli</td>\n",
" <td>3 BHK</td>\n",
" <td>1440.0</td>\n",
" <td>2.0</td>\n",
" <td>62.00</td>\n",
" <td>3</td>\n",
" <td>4305</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Lingadheeranahalli</td>\n",
" <td>3 BHK</td>\n",
" <td>1521.0</td>\n",
" <td>3.0</td>\n",
" <td>95.00</td>\n",
" <td>3</td>\n",
" <td>6245</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Kothanur</td>\n",
" <td>2 BHK</td>\n",
" <td>1200.0</td>\n",
" <td>2.0</td>\n",
" <td>51.00</td>\n",
" <td>2</td>\n",
" <td>4250</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" location size total_sqft ... price bhk price_per_sqft\n",
"0 Electronic City Phase II 2 BHK 1056.0 ... 39.07 2 3699\n",
"1 Chikka Tirupathi 4 Bedroom 2600.0 ... 120.00 4 4615\n",
"2 Uttarahalli 3 BHK 1440.0 ... 62.00 3 4305\n",
"3 Lingadheeranahalli 3 BHK 1521.0 ... 95.00 3 6245\n",
"4 Kothanur 2 BHK 1200.0 ... 51.00 2 4250\n",
"\n",
"[5 rows x 7 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 99
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wkFbg0N7VXUV",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 264
},
"outputId": "67e45da5-7a8d-416c-a80b-a83a37648b3c"
},
"source": [
"plt.hist(house.price, bins=20, rwidth=0.8, density=True)\n",
"\n",
"rng = np.arange(house.price.min(), house.price.max(), 0.1)\n",
"plt.plot(rng, norm.pdf(rng,house.price.mean(), house.price.std()))\n",
"plt.show()"
],
"execution_count": 126,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VvY2jmV4ZM6r",
"colab_type": "text"
},
"source": [
"## **Percentile**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "3vGoC_a1ZKGk",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "ae5b9621-7977-469a-d519-fb7ee5c73977"
},
"source": [
"min_house_price_threshold, max_house_price_threshold = house.price.quantile([0.001, 0.999])\n",
"min_house_price_threshold, max_house_price_threshold"
],
"execution_count": 128,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(11.5, 2000.0)"
]
},
"metadata": {
"tags": []
},
"execution_count": 128
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Plo0n0XcZ0hN",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "f310fb5b-df0b-4acf-e180-feaeced8901b"
},
"source": [
"house_no_outliers = house[(house.price>min_house_price_threshold) & (house.price<max_house_price_threshold)]\n",
"house.shape, house_no_outliers.shape"
],
"execution_count": 129,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((13200, 7), (13169, 7))"
]
},
"metadata": {
"tags": []
},
"execution_count": 129
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "R-bgC2OhaTmV",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "8fb79e04-452a-4676-9c8e-d753fc19c92b"
},
"source": [
"# outliers removed\n",
"house.shape[0] - house_no_outliers.shape[0]"
],
"execution_count": 131,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"31"
]
},
"metadata": {
"tags": []
},
"execution_count": 131
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "LsY-wHmWbNEW",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 264
},
"outputId": "e14874e0-b38f-4ce7-8bab-21a54e3b4290"
},
"source": [
"plt.hist(house_no_outliers.price, bins=20, rwidth=0.8, density=True)\n",
"\n",
"rng = np.arange(house_no_outliers.price.min(), house_no_outliers.price.max(), 0.1)\n",
"plt.plot(rng, norm.pdf(rng,house_no_outliers.price.mean(), house_no_outliers.price.std()))\n",
"plt.show()"
],
"execution_count": 132,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QTCBRuExaiqM",
"colab_type": "text"
},
"source": [
"## **IQR 4 std**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "7eVhLOkSbHO1",
"colab_type": "code",
"colab": {}
},
"source": [
"# Set house price bounds\n",
"upper_bound = house.price.mean() + 4*house.price.std()\n",
"lower_bound = house.price.mean() - 4*house.price.std()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "FP7O-13aahRv",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "e6e534d1-1e0c-4a5a-88c5-9ec52bb421dd"
},
"source": [
"house_no_outliers_iqr = house[(house.price>lower_bound) & (house.price<upper_bound)]\n",
"house.shape, house_no_outliers_iqr.shape"
],
"execution_count": 137,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((13200, 7), (13093, 7))"
]
},
"metadata": {
"tags": []
},
"execution_count": 137
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "7eqGL0R9cVQk",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "b0e7cb60-2b28-4ec7-e99e-4c82e39769c8"
},
"source": [
"house.shape[0] - house_no_outliers_iqr.shape[0]"
],
"execution_count": 138,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"107"
]
},
"metadata": {
"tags": []
},
"execution_count": 138
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "aiUXHS4zclLt",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 267
},
"outputId": "ec5fe407-b3d1-476f-c5ee-05ebcfc90f4c"
},
"source": [
"plt.hist(house_no_outliers_iqr.price, bins=20, rwidth=0.8, density=True)\n",
"\n",
"rng = np.arange(house_no_outliers_iqr.price.min(), house_no_outliers_iqr.price.max(), 0.1)\n",
"plt.plot(rng, norm.pdf(rng,house_no_outliers_iqr.price.mean(), house_no_outliers_iqr.price.std()))\n",
"plt.show()"
],
"execution_count": 139,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD7CAYAAABjVUMJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAfEUlEQVR4nO3dfZwWdb3/8dd7b1FUQFjvgGRN1LDSdMPM7k1FTdHCI95XmGZyuj0V5snK8hzt0TlZaZo/UVFJMMzch2J470krZEVLAakVUECTFQGDBPbm8/tjBr3adtkL2N252Hk/H4997Mx3vjPXZ/Zxcb2Z+c7MpYjAzMzypyzrAszMLBsOADOznHIAmJnllAPAzCynHABmZjnlADAzy6miAkDSGEkLJTVKmtTB8mpJ09PlsyWNSNsHS3pY0lpJVxX031HSPZKekzRP0uXdtUNmZlacLgNAUjlwNXAsMAo4TdKodt0mAKsiYl/gx8AVaft64NvAf3Sw6R9FxAHAe4AjJB27dbtgZmZbo6KIPqOBxohYBCBpGjAWmF/QZyzw3XR6BnCVJEXEOuAxSfsWbjAi/gE8nE5vlDQXGNZVIUOGDIkRI0YUUbKZmQEMGTKEWbNmzYqIMe2XFRMAQ4GlBfPLgMM66xMRLZLWAIOBV7vauKSBwAnAT7rqO2LECBoaGooo2czMNpE0pKP2TAeBJVUAtwE/3XSE0UGf8yQ1SGpoamrq3QLNzPqwYgJgOTC8YH5Y2tZhn/RDfQCwsohtXwf8NSKu7KxDRFwXEXURUVdTU1PEJs3MrBjFBMAcYKSkWklVwHigvl2feuCcdHoc8FB08ZQ5ST8gCYovb1nJZmbWHbocA0jP6U8EZgHlwA0RMU/SpUBDRNQDk4FbJDUCr5GEBACSlgC7AFWSTgKOBl4HLgaeA+ZKArgqIq7vzp0zM7POFTMITETMBGa2a7ukYHo9cEon647oZLMqrkQzM+sJvhPYzCynHABmZjnlADAzyykHgJlZThU1CGyJEZPu2aL+Sy4/vocqMTPbdj4CMDPLKQeAmVlOOQDMzHLKAWBmllMOADOznHIAmJnllAPAzCynHABmZjnlADAzyykHgJlZTjkAzMxyygFgZpZTDgAzs5xyAJiZ5ZQDwMwspxwAZmY55QAwM8spB4CZWU45AMzMcsoBYGaWUw4AM7OccgCYmeVUUQEgaYykhZIaJU3qYHm1pOnp8tmSRqTtgyU9LGmtpKvarXOopGfSdX4qSd2xQ2ZmVpwuA0BSOXA1cCwwCjhN0qh23SYAqyJiX+DHwBVp+3rg28B/dLDpa4DPASPTnzFbswNmZrZ1ijkCGA00RsSiiNgITAPGtuszFpiSTs8AjpSkiFgXEY+RBMGbJO0J7BIRf4yIAG4GTtqWHTEzsy1TTAAMBZYWzC9L2zrsExEtwBpgcBfbXNbFNs3MrAeV/CCwpPMkNUhqaGpqyrocM7M+o5gAWA4ML5gflrZ12EdSBTAAWNnFNod1sU0AIuK6iKiLiLqampoiyjUzs2IUEwBzgJGSaiVVAeOB+nZ96oFz0ulxwEPpuf0ORcTLwOuS3pde/XM2cNcWV29mZlutoqsOEdEiaSIwCygHboiIeZIuBRoioh6YDNwiqRF4jSQkAJC0BNgFqJJ0EnB0RMwHvgDcBOwA3Jv+mJlZL+kyAAAiYiYws13bJQXT64FTOll3RCftDcA7iy3UzMy6V8kPApuZWc9wAJiZ5ZQDwMwspxwAZmY55QAwM8spB4CZWU45AMzMcsoBYGaWUw4AM7OccgCYmeWUA8DMLKccAGZmOeUAMDPLKQeAmVlOOQDMzHLKAWBmllMOADOznHIAmJnllAPAzCynHABmZjnlADAzyykHgJlZTjkAzMxyygFgZpZTDgAzs5xyAJiZ5ZQDwMwsp4oKAEljJC2U1ChpUgfLqyVNT5fPljSiYNlFaftCSccUtH9F0jxJz0q6TVK/7tghMzMrTpcBIKkcuBo4FhgFnCZpVLtuE4BVEbEv8GPginTdUcB44EBgDPBzSeWShgJfBOoi4p1AedrPzMx6STFHAKOBxohYFBEbgWnA2HZ9xgJT0ukZwJGSlLZPi4gNEbEYaEy3B1AB7CCpAtgReGnbdsXMzLZEMQEwFFhaML8sbeuwT0S0AGuAwZ2tGxHLgR8BLwIvA2si4r6t2QEzM9s6mQwCSxpEcnRQC+wF9Jd0Zid9z5PUIKmhqampN8s0M+vTigmA5cDwgvlhaVuHfdJTOgOAlZtZ9+PA4ohoiohm4NfA+zt68Yi4LiLqIqKupqamiHLNzKwYxQTAHGCkpFpJVSSDtfXt+tQD56TT44CHIiLS9vHpVUK1wEjgCZJTP++TtGM6VnAksGDbd8fMzIpV0VWHiGiRNBGYRXK1zg0RMU/SpUBDRNQDk4FbJDUCr5Fe0ZP2ux2YD7QAF0ZEKzBb0gxgbtr+FHBd9++emZl1Rsl/1LcPdXV10dDQkNnrj5h0zxb1X3L58T1UiZlZ8SQ9GRF17dt9J7CZWU45AMzMcsoBYGaWUw4AM7OccgCYmeWUA8DMLKccAGZmOeUAMDPLKQeAmVlOOQDMzHLKAWBmllMOADOznHIAmJnllAPAzCynHABmZjnlADAzyykHgJlZTjkAzMxyygFgZpZTDgAzs5xyAJiZ5ZQDwMwspxwAZmY55QAwM8spB4CZWU45AMzMcsoBYGaWU0UFgKQxkhZKapQ0qYPl1ZKmp8tnSxpRsOyitH2hpGMK2gdKmiHpOUkLJB3eHTtkZmbF6TIAJJUDVwPHAqOA0ySNatdtArAqIvYFfgxcka47ChgPHAiMAX6ebg/gJ8BvI+IA4CBgwbbvjpmZFauYI4DRQGNELIqIjcA0YGy7PmOBKen0DOBISUrbp0XEhohYDDQCoyUNAD4ETAaIiI0RsXrbd8fMzIpVTAAMBZYWzC9L2zrsExEtwBpg8GbWrQWagBslPSXpekn9t2oPzMxsq2Q1CFwBHAJcExHvAdYB/zK2ACDpPEkNkhqampp6s0Yzsz6tmABYDgwvmB+WtnXYR1IFMABYuZl1lwHLImJ22j6DJBD+RURcFxF1EVFXU1NTRLlmZlaMYgJgDjBSUq2kKpJB3fp2feqBc9LpccBDERFp+/j0KqFaYCTwRET8DVgqaf90nSOB+du4L2ZmtgUquuoQES2SJgKzgHLghoiYJ+lSoCEi6kkGc2+R1Ai8RhISpP1uJ/lwbwEujIjWdNP/DkxNQ2UR8Jlu3jczM9uMLgMAICJmAjPbtV1SML0eOKWTdS8DLuug/WmgbkuKNTOz7uM7gc3McsoBYGaWUw4AM7OccgCYmeWUA8DMLKccAGZmOeUAMDPLKQeAmVlOOQDMzHKqqDuBbduNmHTPFvVfcvnxPVSJmVnCRwBmZjnlADAzyykHgJlZTjkAzMxyygFgZpZTDgAzs5xyAJiZ5ZQDwMwspxwAZmY55QAwM8spB4CZWU7l6llAW/o8HvAzecys7/IRgJlZTjkAzMxyygFgZpZTuRoDKCW7sYoDy5YwUssYqlcZoHX0ZwMbqOSNqIKHnoZda2GPd8Nu74Cy8qxLNrM+xgHQa4KD9Twnlv+eD5Y9w8iy5W8uWR39WR078Q/6UUUz/cvWw+8eg2hLOlQPgNoPwqiTYP8xUL1zRvtgZn1JUQEgaQzwE6AcuD4iLm+3vBq4GTgUWAmcGhFL0mUXAROAVuCLETGrYL1yoAFYHhGf2Oa9KUUb1sLcm3mw6qe8vexl1kclf2wbxfTmj/BU2778NYbyOjv9y2pLfnAUrH4Bls+FF38Pf7kPnrsbKvrBu0+F912QHBmYmW2lLgMg/ZC+GjgKWAbMkVQfEfMLuk0AVkXEvpLGA1cAp0oaBYwHDgT2Ah6QtF9EtKbrfQlYAOzSbXtUInZkPTxyOcy+Ft5YxWvsx7XNJ3Bv62jWsmPXG6iogiEjk5+DToW2Nlj2BDw9Ff48HeZOgf2OhSMvgd1H9fwOmVmfU8wg8GigMSIWRcRGYBowtl2fscCUdHoGcKQkpe3TImJDRCwGGtPtIWkYcDxw/bbvRukQbYwrf5RHqr8Kj/w3vO1wmHA/p2z8Lr9q/UhxH/4dKSuDt70PTvwZfGU+fPQ/4YXfwzXvhzsvgLUrundHzKzPKyYAhgJLC+aXpW0d9omIFmANMLiLda8EvgG0bXHVJWofvcSMqu/xo8pfsDyGwIQH4LTbYPjo7n2h/oPhw1+HLz0N758Iz86Aq94Lc2+GiO59LTPrszK5DFTSJ4AVEfFkEX3Pk9QgqaGpqakXqttyZbQxoXwmM6su4u16ia9t/Dyf3PhdGP7enn3hHXeFo38An38cdhsF9f8ON4+F11/u2dc1sz6hmABYDgwvmB+WtnXYR1IFMIBkMLizdY8ATpS0hOSU0sck3drRi0fEdRFRFxF1NTU1RZTbu3bldaZUXs63K2/ld23v4qgNP+SOtg8RvZmtNfvBp++BT/wYlj4B1x6RDBqbmW1GMZ9Sc4CRkmolVZEM6ta361MPnJNOjwMeiohI28dLqpZUC4wEnoiIiyJiWESMSLf3UESc2Q3706sO0V+4u/pbjC5byKTmc/lc89doYlA2xZSVQd1n4fxHYec94ZenwP3fgbbWrtc1s1zq8iqgiGiRNBGYRXIZ6A0RMU/SpUBDRNQDk4FbJDUCr5F8qJP2ux2YD7QAFxZcAbRdO738Qb5XcRMvxWA+2fw95sWIrEtK1OwP5z4Iv50Ej18JKxbAp66Hfn3uQisz20ZF3QcQETOBme3aLimYXg+c0sm6lwGXbWbbjwCPFFNHKRBtTKq4jfMr7uGh1oP5cvOFvE7/rMv6Z5X94IQrYY93wr3fhOs/ngxGD3571pWZWQnxs4C2QDUbuaryp5xfcQ83txzF55q/Vnof/oXeey6c9RtY1wSTj4aXnsq6IjMrIQ6AYr2xmqlV/8WxZXP4fvMZXNLyaVrZDp7PU/tBmHAfVO4AN30Cnn8464rMrEQ4AIqxbiVMOYF363kubP4ik1uPB5R1VcUbMjIJgYFvg6mnwLO/zroiMysBDoCu/P1vcNNx8OpfOK/5a9zbdljWFW2dXfaCz8yEYXVwxwT40/SsKzKzjDkANmMvXoUbj4XVS+GMGTzSdnDWJW2bHQbBmXfA3kfAnefD07dlXZGZZcgB0Ik9WMm0qu8np3/Ovis5l94XVPWH02+H2g/Bby6Ap6ZmXZGZZcQB0IEakgHfQVoLZ9/Z84906G1VO8Lp02Gfj8BdF8JTHd6EbWZ9nAOgnUG8zq1V/8Ueeo1Pb/wGDD0065J6RuUOyb0Bb/9o8gyheXdmXZGZ9TIHQIFdWMstVZezt17h3Ob/4MnYP+uSelblDnDqVBh+GNzxOfjr/VlXZGa9yAGQ6s8bTKn6ISO1jPObv8of2g7MuqTesel00O6jYPqZsOTxrCsys17iAAAqaeGayit5lxYxsfmLPNp2UNYl9a5+A+DMO2Hg3vDLU5OvoTSzPi/3ASDa+GHlL/hQ+TNMavkc97fVZV1SNvoPhrN/k3zHwK2fgqaFWVdkZj0s9wEwqeI2Ti5/nB82n8qM1g9nXU62dtkrueS1vDIJgddfyroiM+tBuQ6ACeX3cH7FPdzUcjQ/bz0x63JKw661cMav4I3VcOu45LeZ9Um5DYATyx7n25VTubv1MC5tOZvt6tk+PW3Pg2D8rfDqX2DaGdC8PuuKzKwH5DIAPlD2DD+qvJY/tI7ia80X0JbPP8Pm7fMROPlaeOExuPM8f7OYWR+Uu0++A7WYayt/zPMxlPOav8oGqrIuqXS9axwcfRnMvwt+exFEZF2RmXWjor4RrK94m17hpqorWM1OnLPxm/ydHbMuqfS9fyL8/WX4w1Wwy57wga9kXZGZdZP8BMDaJqZUXk45bZyz8ZusyOrL27dHR30/eSz2A9+FnXaHg0/PuiIz6wb5CIANa2HqOPbQKk7feDHPx9CsK9q+lJXBST9PvlryronQvwZGHpV1VWa2jfp+ALQ2w+1nwd+e4cLmr/BUjMy6oi02YtI9W9R/yeXHd38RFdVw6q3Jl+PcfjacczcM66MPyjPLib4/CFxWkVzWeMJPeKjtkKyr2b712wXOuCM5AvjlKbDy+awrMrNt0PcDQIKPfxcOOSvrSvqGnXeHM9PvFL7lZPj7K9nWY2Zbre8HgHW/IfvC6b9KxgSmjoP1r2ddkZltBQeAbZ1hh8K/3QyvzEseI92yMeuKzGwLOQBs6408CsZeBYsfTb5fuK0t64rMbAv0/auArGcdfHpyj8CD34Od94BjLsu6IjMrUlFHAJLGSFooqVHSpA6WV0uani6fLWlEwbKL0vaFko5J24ZLeljSfEnzJH2pu3bIMvCBr8Bhn0/uFv79z7KuxsyK1OURgKRy4GrgKGAZMEdSfUTML+g2AVgVEftKGg9cAZwqaRQwHjgQ2At4QNJ+QAvwtYiYK2ln4ElJ97fbpm0vJDjmv2HtK3DffyZ3C7/737Kuysy6UMwRwGigMSIWRcRGYBowtl2fscCUdHoGcKQkpe3TImJDRCwGGoHREfFyRMwFiIi/AwsA3567PSsrg5N/ASM+mIwHND6YdUVm1oViAmAosLRgfhn/+mH9Zp+IaAHWAIOLWTc9XfQeYHbxZVtJqqiG8VOh5gCYfhYsnZN1RWa2GZleBSRpJ+AO4MsR0eHF5JLOk9QgqaGpqal3C7Qt128AnHlHcsPYrZ+Cl/+UdUVm1oliAmA5MLxgflja1mEfSRXAAGDl5taVVEny4T81In7d2YtHxHURURcRdTU1NUWUa5nbeQ84uz55dMQtJ8OK57KuyMw6UEwAzAFGSqqVVEUyqFvfrk89cE46PQ54KCIibR+fXiVUC4wEnkjHByYDCyLif7tjR6zEDByefMF8WSXcPNbPDTIrQV0GQHpOfyIwi2Sw9vaImCfpUkmbvkl9MjBYUiPwVWBSuu484HZgPvBb4MKIaAWOAM4CPibp6fTnuG7eN8va4LcnIdDWnITA6hezrsjMChR1I1hEzARmtmu7pGB6PXBKJ+teBlzWru0x/C3s+bDbAXDWnXDTCTDlRPj0PTDAF3yZlQI/CsJ63p4HJQPD615Nvk/ARwJmJcEBYL1j+HuT00H/WAU3HgevLc66IrPc87OA+riS+DaxTYYdCufUwy0nJSHw6buTcQIzy4SPAKx37XVw8nWSrRvhxmN9iahZhhwA1vv2eGcyGAxw4xhY+kS29ZjllAPAsrHbAfDZWbDDoOTqoIW/zbois9xxAFh2dq2Fz96XhMG002HuLVlXZJYrDgDL1k41yZjAPh+B+onwyOUQkXVVZrngALDsVe8Ep0+Hg06HR/4bZnwGNv4j66rM+jwHgJWG8ko46edw1KUw7zfJ4PCaZVlXZdanOQCsdEhwxJeSo4GVi+C6j8KLf8y6KrM+ywFgpWe/Y+DcB6Cqf3LD2GNXQltb1lWZ9TkOACtNux0A5z8K7zgBHvgO/PLfYN3KrKsy61McAFa6+g2AU26C4/8HFj8K134AFj2SdVVmfYYDwEqbBO89961TQjePhbu/ChvWZl2Z2XbPAWDbhz0Pgs//Dg6fCA03wDXvhyWPZV2V2XbNAWDbj8od4JjL4DMzQWVw0/Fw5wWwdkXWlZltl/w4aNusbXmcdI89inrv98MFj8P//Qh+/zN47h742MVQNwHK/ZY2K5aPAGz7VNUfPv4d+MIfYOghcO834NojYMHdfpSEWZEcALZ9GzIy+c7hU2+FtlaYfgZc/3FY/H9ZV2ZW8hwAtv2TkvsFvvBHOPFn8PeXYcoJMPkYWHivbyIz64QDwPqO8go45Gz497lw7A/h9ZfgtvFwzeHw1K3Q/EbWFZqVFAeA9T2V/eCw8+GLc+GT10NZJdx1IfzP/jDzG/DK/KwrNCsJvmTC+q7ySnj3KfCucck9A3OnwJM3whO/gKGHwoGfhANPggHDsq7ULBMOACtJPXP56ckM4kg+Wf47Tl76OO9cfjHcdzEMPwxGjYV9j0oGlaVtqNxs++EAsFxZxS5Mbj2eya3HU6uXOa5sNl/fMA9mfQtmfYtlMYRHWw/i/9reRUPb/qxkQJfbLPr+BbMS4wCw3Foce3J160l8/Qv/D1a9AM8/yLy7pjK2/HHOqHgw6dO2O3NjPxra9uOZtlr+GsPYQFXGlZt1j6ICQNIY4CdAOXB9RFzebnk1cDNwKLASODUilqTLLgImAK3AFyNiVjHbNOtVg/aGus9y/ozdqaSFd2kRdWULqSv7Cx8u+xOfKv8dAK0hFsVePBfDWdD2NpbEHvDyMBhUC/122aYSeuzOabNOdBkAksqBq4GjgGXAHEn1EVF4KcUEYFVE7CtpPHAFcKqkUcB44EBgL+ABSful63S1TbNMNFPB3NiPua37cV0rQLC3XmGUXuCAshd5h17kYD3PCZXpt5X94qfJ7x0HM3ftIF6KwayIQayIgayIgbzCIFbEIFbFzqyhP80F/+y660Pc4WFbo5gjgNFAY0QsApA0DRgLFH5YjwW+m07PAK6SpLR9WkRsABZLaky3RxHbNCsR4oXYgxdiD+5tO+zN1v68wdu0gnvPGgqvLYZVi3njiTm8Qy/y4bI/s7M6vu9gXVSzhv6sif5w49XQbyBU78RlFa/yBlW8QTVvRBXrqeYNqvhHVLOeKpqpoIXy5CfKYWkNlFVAeSX7alm6rILmKKeVcgJoQ7RRRgCR/m6jDDauSx6opzJA6XTB7y1Qks+LsqIUEwBDgaUF88uAwzrrExEtktYAg9P2P7Zbd2g63dU2zUraOnZgQewNo976UDrj8bc+0HZkPbtpFbuxmt21ioFaywDWMUDr3vz9DgSrX4CNazm6fBX92MiOrKdcRTzPaPJbkw9Ub2Hx/7X5xW0h2hCB0vB4KxSqK/759qGF1ZvutC4yOH5Q/ubkc9WtHXaJzrZVsG7yksW85r/2WbexpfjXBHaqfuujcu2Gf123K9uy/pvrfv355B6XblTyg8CSzgPOS2fXSlpY5KpDgFe3+fWv6LV1/6neXnzdrV23w79vCdfd6fthW163q/UXbP1mu+X924tcb88awsU7bG29na5XTAAsB4YXzA9L2zrqs0xSBTCAZDB4c+t2tU0AIuI64Loi6vwnkhoiom5L18uK6+1Zrrdnud6e1VP1FvMoiDnASEm1kqpIBnXr2/WpB85Jp8cBD0VEpO3jJVVLqgVGAk8UuU0zM+tBXR4BpOf0JwKzSC7ZvCEi5km6FGiIiHqSs5G3pIO8r5F8oJP2u51kcLcFuDAiWgE62mb3756ZmXWmqDGAiJgJzGzXdknB9HrglE7WvQy4rJhtdrMtPm2UMdfbs1xvz3K9PatH6lX425PMzHLJj4M2M8upPhkAksZIWiipUdKkrOsBkHSDpBWSni1o21XS/ZL+mv4elLZL0k/T+v8s6ZAM6h0u6WFJ8yXNk/SlUq5ZUj9JT0j6U1rv99L2Wkmz07qmpxcdkF6YMD1tny1pRG/Wm9ZQLukpSXeXeq1pHUskPSPpaUkNaVtJvh/SGgZKmiHpOUkLJB1eqvVK2j/9u276eV3Sl3u83ojoUz8kg8rPA/sAVcCfgFElUNeHgEOAZwvafghMSqcnAVek08cB95LcwfI+YHYG9e4JHJJO7wz8BRhVqjWnr7tTOl0JzE7ruB0Yn7ZfC1yQTn8BuDadHg9Mz+Bv/FXgl8Dd6XzJ1pq+9hJgSLu2knw/pDVMAc5Np6uAgaVcb0Hd5cDfgL17ut5MdrCH/3iHA7MK5i8CLsq6rrSWEe0CYCGwZzq9J7Awnf4FcFpH/TKs/S6SZzeVfM3AjsBckrvLXwUq2r83SK5AOzydrkj7qRdrHAY8CHwMuDv9h1yStRbU3FEAlOT7geRepMXt/06lWm+7Go8GHu+NevviKaCOHl0xtJO+Wds9Il5Op/8G7J5Ol9Q+pKcc3kPyv+qSrTk9pfI0sAK4n+RIcHVEbLr3vrCmf3p8CbDp8SW95UrgG8Cm5ygMpnRr3SSA+yQ9qeQOfSjd90Mt0ATcmJ5mu15Sf0q33kLjgdvS6R6tty8GwHYpkhgvuUuyJO0E3AF8OSJeL1xWajVHRGtEHEzyv+vRwAEZl9QhSZ8AVkTEk1nXsoU+EBGHAMcCF0r6UOHCEns/VJCccr0mIt4DrCM5hfKmEqsXgHTc50TgV+2X9US9fTEAinl0Ral4RdKeAOnvFWl7SeyDpEqSD/+pEfHrtLmkawaIiNXAwySnUQYqeTxJ+5rerFf//PiS3nAEcKKkJcA0ktNAPynRWt8UEcvT3yuAO0lCtlTfD8uAZRExO52fQRIIpVrvJscCcyPilXS+R+vtiwGwPT1movARGueQnGff1H52OtL/PmBNwWFgr5Akkju8F0TE/xYsKsmaJdVIGphO70AyXrGAJAjGdVJvR48v6XERcVFEDIuIESTvz4ci4oxSrHUTSf0l7bxpmuQ89bOU6PshIv4GLJW0f9p0JMkTCUqy3gKn8dbpn0119Vy9WQxy9MIgynEkV608D1ycdT1pTbcBLwPNJP87mUByHvdB4K/AA8CuaV+RfGHO88AzQF0G9X6A5HDzz8DT6c9xpVoz8G7gqbTeZ4FL0vZ9SJ4/1UhyWF2dtvdL5xvT5ftk9L74CG9dBVSytaa1/Sn9mbfp31Wpvh/SGg4GGtL3xG+AQSVeb3+SI7sBBW09Wq/vBDYzy6m+eArIzMyK4AAwM8spB4CZWU45AMzMcsoBYGaWUw4AM7OccgCYmeWUA8DMLKf+P7jwNDJwWbT9AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "X8C_qMutc1V5",
"colab_type": "text"
},
"source": [
"## **Z-Score**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "MDl9sRuuc6t3",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 197
},
"outputId": "776cb13f-5e1c-48e7-e435-6010288c7c50"
},
"source": [
"# calculate the z-score for each house price\n",
"house['price_Zscore'] = (house.price - house.price.mean()) / house.price.std()\n",
"house[['price','price_Zscore']].head()"
],
"execution_count": 143,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>price</th>\n",
" <th>price_Zscore</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>39.07</td>\n",
" <td>-0.490737</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>120.00</td>\n",
" <td>0.051777</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>62.00</td>\n",
" <td>-0.337026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>95.00</td>\n",
" <td>-0.115811</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>51.00</td>\n",
" <td>-0.410764</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" price price_Zscore\n",
"0 39.07 -0.490737\n",
"1 120.00 0.051777\n",
"2 62.00 -0.337026\n",
"3 95.00 -0.115811\n",
"4 51.00 -0.410764"
]
},
"metadata": {
"tags": []
},
"execution_count": 143
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "G6sNLgW6eBc1",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "0834b913-5e29-4c92-f014-ae57783a81a8"
},
"source": [
"house_no_outliers_zscore = house[(house.price_Zscore>-4) & (house.price_Zscore<4)]\n",
"house.shape, house_no_outliers_zscore.shape"
],
"execution_count": 144,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((13200, 8), (13093, 8))"
]
},
"metadata": {
"tags": []
},
"execution_count": 144
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "FV0D65e6eqFl",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "51744695-d42b-4303-85a5-963bcb3297da"
},
"source": [
"house.shape[0] - house_no_outliers_zscore.shape[0]"
],
"execution_count": 145,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"107"
]
},
"metadata": {
"tags": []
},
"execution_count": 145
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Gr7ituEye3Z-",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 267
},
"outputId": "86109545-edec-4f59-82b0-63ee001858ec"
},
"source": [
"plt.hist(house_no_outliers_zscore.price, bins=20, rwidth=0.8, density=True)\n",
"\n",
"rng = np.arange(house_no_outliers_zscore.price.min(), house_no_outliers_zscore.price.max(), 0.1)\n",
"plt.plot(rng, norm.pdf(rng,house_no_outliers_zscore.price.mean(), house_no_outliers_zscore.price.std()))\n",
"plt.show()"
],
"execution_count": 146,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
}
]
}
@codebasics
Copy link

Good job.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment