codebrain001/story_of_Descriptive_Statistics_with_numbers.ipynb

## story_of_Descriptive_Statistics_with_numbers.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy import stats \n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = pd.read_csv('housing.csv') #reading data set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>housing_median_age</th>\n",
       "      <th>total_rooms</th>\n",
       "      <th>total_bedrooms</th>\n",
       "      <th>population</th>\n",
       "      <th>households</th>\n",
       "      <th>median_income</th>\n",
       "      <th>median_house_value</th>\n",
       "      <th>ocean_proximity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-122.23</td>\n",
       "      <td>37.88</td>\n",
       "      <td>41.0</td>\n",
       "      <td>880.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>322.0</td>\n",
       "      <td>126.0</td>\n",
       "      <td>8.3252</td>\n",
       "      <td>452600.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-122.22</td>\n",
       "      <td>37.86</td>\n",
       "      <td>21.0</td>\n",
       "      <td>7099.0</td>\n",
       "      <td>1106.0</td>\n",
       "      <td>2401.0</td>\n",
       "      <td>1138.0</td>\n",
       "      <td>8.3014</td>\n",
       "      <td>358500.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-122.24</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1467.0</td>\n",
       "      <td>190.0</td>\n",
       "      <td>496.0</td>\n",
       "      <td>177.0</td>\n",
       "      <td>7.2574</td>\n",
       "      <td>352100.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-122.25</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1274.0</td>\n",
       "      <td>235.0</td>\n",
       "      <td>558.0</td>\n",
       "      <td>219.0</td>\n",
       "      <td>5.6431</td>\n",
       "      <td>341300.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-122.25</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1627.0</td>\n",
       "      <td>280.0</td>\n",
       "      <td>565.0</td>\n",
       "      <td>259.0</td>\n",
       "      <td>3.8462</td>\n",
       "      <td>342200.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
       "0    -122.23     37.88                41.0        880.0           129.0   \n",
       "1    -122.22     37.86                21.0       7099.0          1106.0   \n",
       "2    -122.24     37.85                52.0       1467.0           190.0   \n",
       "3    -122.25     37.85                52.0       1274.0           235.0   \n",
       "4    -122.25     37.85                52.0       1627.0           280.0   \n",
       "\n",
       "   population  households  median_income  median_house_value ocean_proximity  \n",
       "0       322.0       126.0         8.3252            452600.0        NEAR BAY  \n",
       "1      2401.0      1138.0         8.3014            358500.0        NEAR BAY  \n",
       "2       496.0       177.0         7.2574            352100.0        NEAR BAY  \n",
       "3       558.0       219.0         5.6431            341300.0        NEAR BAY  \n",
       "4       565.0       259.0         3.8462            342200.0        NEAR BAY  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.head() #peeking to see what the data set is like"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_array_df(measure, col):\n",
    "    '''\n",
    "    This function converts the inputed the array to a rectangular data structure(dataframe), so we can appreciate \n",
    "    the results better. This function accepts two parameters\n",
    "    1. Array of the measure\n",
    "    2. Columns\n",
    "    '''\n",
    "    results = {\n",
    "        'Features': col,\n",
    "        'Measure' : measure\n",
    "    }\n",
    "    results = pd.DataFrame(results)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def only_num(data):\n",
    "    '''\n",
    "    This function aids the extraction of the numeric values and columns\n",
    "    '''\n",
    "    numerical_values = data.select_dtypes(exclude=['object'])\n",
    "    numerical_columns = data.select_dtypes(exclude=['object']).columns\n",
    "    return numerical_values, numerical_columns\n",
    "    \n",
    "    \n",
    "def only_cat(data):\n",
    "    '''\n",
    "    This function aids the extraction of the categorical values and columns\n",
    "    '''\n",
    "    categorical_values = data.select_dtypes(include=['object'])\n",
    "    categorical_columns = data.select_dtypes(include=['object']).columns\n",
    "    return categorical_values, categorical_columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Measures of central tendency to be considered\n",
    "- Mean\n",
    "- Trimmed Mean\n",
    "- Weighted Mean\n",
    "- Median\n",
    "- Weighted Median \n",
    "- Mode"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_mean(data):\n",
    "    '''\n",
    "    This function calculates the mean of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    array = np.array(num)\n",
    "    n_mean = np.nanmean(array, axis=0) \n",
    "    return convert_array_df(n_mean, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>-119.569704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>35.631861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>28.639486</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>2635.763081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>537.870553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>1425.476744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>499.539680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>3.870671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>206855.816909</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features        Measure\n",
       "0           longitude    -119.569704\n",
       "1            latitude      35.631861\n",
       "2  housing_median_age      28.639486\n",
       "3         total_rooms    2635.763081\n",
       "4      total_bedrooms     537.870553\n",
       "5          population    1425.476744\n",
       "6          households     499.539680\n",
       "7       median_income       3.870671\n",
       "8  median_house_value  206855.816909"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the mean of the dataset\n",
    "get_mean(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Trimmed Mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_trimmed_mean(data, truncation):\n",
    "    '''\n",
    "    This function calculates the trimmed mean and makes use of the scipy library, the truncation represents the \n",
    "    portion (in percentage) of the  extreme values that is desired to be removed. It accepts two parameters\n",
    "    1. Dataset\n",
    "    2. Truncation value\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    array = np.array(num)\n",
    "    tmean = stats.trim_mean(array, truncation)\n",
    "    return convert_array_df(tmean, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>-119.518129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>35.508249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>28.494549</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>2294.557837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>477.576248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>1256.512900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>441.201793</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>3.654012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>192772.995397</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features        Measure\n",
       "0           longitude    -119.518129\n",
       "1            latitude      35.508249\n",
       "2  housing_median_age      28.494549\n",
       "3         total_rooms    2294.557837\n",
       "4      total_bedrooms     477.576248\n",
       "5          population    1256.512900\n",
       "6          households     441.201793\n",
       "7       median_income       3.654012\n",
       "8  median_house_value  192772.995397"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the trimmed mean of the dataset\n",
    "get_trimmed_mean(dataset,0.1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Weighted Mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_weighted_mean(data, weight_name):\n",
    "    '''\n",
    "    This function calculates the weighted mean and makes use on masked arrays. it accepts two parameters\n",
    "    1. Dataset\n",
    "    2. Name of feature desired to be used as weight as a string\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    col = data.select_dtypes(exclude=['object']).columns \n",
    "    weight = data[weight_name]\n",
    "    wt_avg = []\n",
    "    for item in col:\n",
    "        if item == weight_name:\n",
    "            ma = np.ma.MaskedArray(data[item], mask=np.isnan((data[item])))\n",
    "            w_avg = np.ma.average(ma, axis=0, weights=weight)\n",
    "            ### This tends to calculate the weighted mean on the weights, so to correct this we divide by 2\n",
    "            w_avg = w_avg/2\n",
    "            wt_avg.append(w_avg)\n",
    "        else:\n",
    "            ma = np.ma.MaskedArray(data[item], mask=np.isnan((data[item])))\n",
    "            w_avg = np.ma.average(ma, axis =0, weights=weight)\n",
    "            wt_avg.append(w_avg)\n",
    "            \n",
    "    return convert_array_df(wt_avg,col)    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>-119.410904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>35.447274</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>25.677624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>4121.237998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>831.999819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>1162.555715</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>775.085802</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>3.877967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>204596.156992</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features        Measure\n",
       "0           longitude    -119.410904\n",
       "1            latitude      35.447274\n",
       "2  housing_median_age      25.677624\n",
       "3         total_rooms    4121.237998\n",
       "4      total_bedrooms     831.999819\n",
       "5          population    1162.555715\n",
       "6          households     775.085802\n",
       "7       median_income       3.877967\n",
       "8  median_house_value  204596.156992"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the weighted mean of the dataset and using the feature 'population' as the weight\n",
    "get_weighted_mean(dataset, 'population')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Median"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_median(data):\n",
    "    '''\n",
    "    This function calculates the median of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    array = np.array(num)\n",
    "    n_median = np.nanmedian(array, axis=0)\n",
    "    return convert_array_df(n_median, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>-118.4900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>34.2600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>29.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>2127.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>435.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>1166.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>409.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>3.5348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>179700.0000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features      Measure\n",
       "0           longitude    -118.4900\n",
       "1            latitude      34.2600\n",
       "2  housing_median_age      29.0000\n",
       "3         total_rooms    2127.0000\n",
       "4      total_bedrooms     435.0000\n",
       "5          population    1166.0000\n",
       "6          households     409.0000\n",
       "7       median_income       3.5348\n",
       "8  median_house_value  179700.0000"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the median of the dataset\n",
    "get_median(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Weighted Median"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_weighted_median(data, weight_name):\n",
    "    '''\n",
    "    This function calculates weighted median, it makes use of wquantile library. it accepts only two parameter\n",
    "    1. Dataset\n",
    "    2. Name of feature desired to be used as weight as a string\n",
    "    '''\n",
    "    # ! pip install wquantiles\n",
    "    import weighted\n",
    "    \n",
    "    num,col =  only_num(data)\n",
    "    weight_name = weight_name.lower()\n",
    "    weight = data[weight_name]\n",
    "    wt_median = []\n",
    "    \n",
    "    for item in col:\n",
    "        if item == weight_name:\n",
    "            w_median = weighted.median(data[item], weights=weight)\n",
    "            w_median = w_median/2\n",
    "            wt_median.append(w_median)\n",
    "        else:\n",
    "            w_median = weighted.median(data[item], weights=weight)\n",
    "            wt_median.append(w_median)\n",
    "            \n",
    "    return convert_array_df(wt_median, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>-119.410904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>35.447274</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>25.677624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>4121.237998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>831.999819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>1162.555715</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>775.085802</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>3.877967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>204596.156992</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features        Measure\n",
       "0           longitude    -119.410904\n",
       "1            latitude      35.447274\n",
       "2  housing_median_age      25.677624\n",
       "3         total_rooms    4121.237998\n",
       "4      total_bedrooms     831.999819\n",
       "5          population    1162.555715\n",
       "6          households     775.085802\n",
       "7       median_income       3.877967\n",
       "8  median_house_value  204596.156992"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the weighted_median of the dataset\n",
    "get_weighted_mean(dataset, 'population')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_mode(data):\n",
    "    '''\n",
    "    This function calculates the mode of the categorical features, it makes use of wquantile library. it accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    cat,col = only_cat(data)\n",
    "    n_mode = stats.mode(cat)\n",
    "    return n_mode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ModeResult(mode=array([['<1H OCEAN']], dtype=object), count=array([[9136]]))"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the mode of the dataset\n",
    "get_mode(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Measure of Dispersion\n",
    "- Range\n",
    "- Interquartile range\n",
    "- Mean absolute deviation\n",
    "- Variance\n",
    "- Standard deviation \n",
    "- Median absolute deviation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Range"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_range(data):\n",
    "    '''\n",
    "    This function calculates the range of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    \n",
    "    num, col = only_num(data)\n",
    "    \n",
    "    results = []\n",
    "    for item in col:\n",
    "        item_range = []\n",
    "        for value in data[item]:\n",
    "            item_range.append(value)\n",
    "            limit1 = min(item_range)\n",
    "            limit2 = max(item_range)\n",
    "            range_ = limit2- limit1\n",
    "        results.append(range_)\n",
    "    return convert_array_df(results, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>10.0400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>9.4100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>51.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>39318.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>6444.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>35679.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>6081.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>14.5002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>485002.0000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features      Measure\n",
       "0           longitude      10.0400\n",
       "1            latitude       9.4100\n",
       "2  housing_median_age      51.0000\n",
       "3         total_rooms   39318.0000\n",
       "4      total_bedrooms    6444.0000\n",
       "5          population   35679.0000\n",
       "6          households    6081.0000\n",
       "7       median_income      14.5002\n",
       "8  median_house_value  485002.0000"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the range of the dataset\n",
    "get_range(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Interquartile range"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_IQR(data):\n",
    "    '''\n",
    "    This function calculates the Interquartile range of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    results = []\n",
    "    for item in col:\n",
    "        result = stats.iqr(data[item],nan_policy='omit') # The NaN defines how to handle when input contains nan.\n",
    "        results.append(result)\n",
    "    return convert_array_df(results, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>3.79000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>3.78000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>19.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>1700.25000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>351.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>938.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>325.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>2.17985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>145125.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features       Measure\n",
       "0           longitude       3.79000\n",
       "1            latitude       3.78000\n",
       "2  housing_median_age      19.00000\n",
       "3         total_rooms    1700.25000\n",
       "4      total_bedrooms     351.00000\n",
       "5          population     938.00000\n",
       "6          households     325.00000\n",
       "7       median_income       2.17985\n",
       "8  median_house_value  145125.00000"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the IQR of the dataset\n",
    "get_IQR(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mean absolute deviation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mean_absolute_dev(data):\n",
    "    '''\n",
    "    This function calculates the Mean absolute deviation of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    results = []\n",
    "    for item in col:\n",
    "        ### computing the formula 'mean(abs(data-mean(data)))'\n",
    "        a = np.nanmean(np.array(data[item]))\n",
    "        b = np.array(data[item])\n",
    "        result = np.nanmean(np.absolute(b-a))\n",
    "        results.append(result)\n",
    "    return convert_array_df(results, col)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>1.830206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>1.975024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>10.551539</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>1344.462236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>270.923606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>714.237277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>247.195367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>1.401614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>91170.439944</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features       Measure\n",
       "0           longitude      1.830206\n",
       "1            latitude      1.975024\n",
       "2  housing_median_age     10.551539\n",
       "3         total_rooms   1344.462236\n",
       "4      total_bedrooms    270.923606\n",
       "5          population    714.237277\n",
       "6          households    247.195367\n",
       "7       median_income      1.401614\n",
       "8  median_house_value  91170.439944"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the Mean absolute deviation of the dataset\n",
    "mean_absolute_dev(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Variance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_var(data):\n",
    "    '''\n",
    "    This function calculates the Variance of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    array = np.array(num)\n",
    "    var = np.nanvar(array, axis=0) \n",
    "    return convert_array_df(var, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>4.013945e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>4.562072e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>1.583886e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>4.759215e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>1.775567e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>1.282408e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>1.461690e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>3.609148e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>1.331550e+10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features       Measure\n",
       "0           longitude  4.013945e+00\n",
       "1            latitude  4.562072e+00\n",
       "2  housing_median_age  1.583886e+02\n",
       "3         total_rooms  4.759215e+06\n",
       "4      total_bedrooms  1.775567e+05\n",
       "5          population  1.282408e+06\n",
       "6          households  1.461690e+05\n",
       "7       median_income  3.609148e+00\n",
       "8  median_house_value  1.331550e+10"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the Variance of the dataset\n",
    "get_var(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Standard deviation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_std(data):\n",
    "    '''\n",
    "    This function calculates the Standard deviation of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    array = np.array(num)\n",
    "    std = np.nanstd(array, axis=0) \n",
    "    return convert_array_df(std, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>2.003483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>2.135901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>12.585253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>2181.562402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>421.374759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>1132.434688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>382.320491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>1.899776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>115392.820404</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features        Measure\n",
       "0           longitude       2.003483\n",
       "1            latitude       2.135901\n",
       "2  housing_median_age      12.585253\n",
       "3         total_rooms    2181.562402\n",
       "4      total_bedrooms     421.374759\n",
       "5          population    1132.434688\n",
       "6          households     382.320491\n",
       "7       median_income       1.899776\n",
       "8  median_house_value  115392.820404"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the Standard deviation of the dataset\n",
    "get_std(dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Median absolute deviation (MAD)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_MAD(data):\n",
    "    '''\n",
    "    This function calculates the Median absolute deviation of the numeric variables in the data set. It accepts only one parameter\n",
    "    1. Dataset\n",
    "    '''\n",
    "    num,col =  only_num(data)\n",
    "    results = []\n",
    "    for item in col:\n",
    "        result = stats.median_absolute_deviation(data[item],nan_policy='omit') # The NaN defines how to handle when input contains nan.\n",
    "        results.append(result)\n",
    "    return convert_array_df(results, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Features</th>\n",
       "      <th>Measure</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>longitude</td>\n",
       "      <td>1.897728</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>latitude</td>\n",
       "      <td>1.823598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>housing_median_age</td>\n",
       "      <td>14.826000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_rooms</td>\n",
       "      <td>1181.632200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>total_bedrooms</td>\n",
       "      <td>240.181200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>population</td>\n",
       "      <td>652.344000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>households</td>\n",
       "      <td>223.872600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>median_income</td>\n",
       "      <td>1.577783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>median_house_value</td>\n",
       "      <td>101409.840000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Features        Measure\n",
       "0           longitude       1.897728\n",
       "1            latitude       1.823598\n",
       "2  housing_median_age      14.826000\n",
       "3         total_rooms    1181.632200\n",
       "4      total_bedrooms     240.181200\n",
       "5          population     652.344000\n",
       "6          households     223.872600\n",
       "7       median_income       1.577783\n",
       "8  median_house_value  101409.840000"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#calulcating the Median absolute deviation of the dataset\n",
    "get_MAD(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}