Last active
April 4, 2020 15:01
-
-
Save Createdd/c86103e45422cab83c808cdb0a885365 to your computer and use it in GitHub Desktop.
Exercisesheet Statistics on Variability and Boxplots
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Measures of Variability and Boxplots" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.554474Z", | |
"start_time": "2020-04-04T14:34:00.430557Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd \n", | |
"import numpy as np \n", | |
"import matplotlib.pyplot as plt \n", | |
"import seaborn as sns \n", | |
"\n", | |
"import statsmodels\n", | |
"from scipy import stats" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.561297Z", | |
"start_time": "2020-04-04T14:34:01.556468Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"%precision 4\n", | |
"np.set_printoptions(precision=4)\n", | |
"\n", | |
"# 4.123457\n", | |
"# np.array([1.123456789])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Travel distances over 11 days" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.568171Z", | |
"start_time": "2020-04-04T14:34:01.564806Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"data = np.array([39, 35, 25, 37, 15, 36, 50, 52, 37, 51, 39])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## a) variation for given data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.578324Z", | |
"start_time": "2020-04-04T14:34:01.572201Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The first measure can be the variance, which is 111.2397. The second measure can be the standard deviation, which can be 10.547\n" | |
] | |
} | |
], | |
"source": [ | |
"variance = np.var(data).round(4)\n", | |
"std = np.std(data).round(4)\n", | |
"\n", | |
"print(f'The first measure can be the variance, which is {variance}. The second measure can be the standard deviation, which can be {std}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## b) boxplot of given data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.752446Z", | |
"start_time": "2020-04-04T14:34:01.580479Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAKQklEQVR4nO3dX4id+V3H8c93M9Wm/qHupIQlK0QdoRTBVVapVKSWXRmr+AdEFOvmolAFzcZFtNYbFRT0QutuLoT1XxOs1qIWpYSs2Xahd9WsjWbtLjhqhA7b3W3W2squK5P8vJhnZMzfScyZ75mZ1wuGmXPOk3m+8+M873nmyZmkxhgBYPvd1T0AwF4lwABNBBigiQADNBFggCYLt7LxgQMHxuHDh2c0CsDuc+DAgTzxxBNPjDGWr3zslgJ8+PDhnD179s5NBrAHVNWBa93vEgRAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNbun/hIPd6vjx41lZWdny9qurq0mSQ4cOzWqkubW0tJSjR492j7ErCDAkWVlZyblnns2lN9y9pe33vfIfSZLPvra3DqF9r7zcPcKusreePXADl95wd1598zu3tO3+504lyZa33y02vm7uDNeAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCPAOcvz48Rw/frx7DNhTZnncLczkszITKysr3SPAnjPL484ZMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCbbEuCLFy/m4YcfzsWLF7djdwA7wrYE+MSJEzl//nxOnjy5HbsD2BFmHuCLFy/m9OnTGWPk9OnTzoIBJguz3sGJEydy+fLlJMmlS5dy8uTJPPLII7Pe7a60urqaV199NceOHeseZddZWVnJXf89useYe3f91xeysvLFPfUcXFlZyf79+2fyuW96BlxV76mqs1V19qWXXrrlHTz55JNZW1tLkqytreXMmTO3PiXALnTTM+AxxuNJHk+S+++//5ZPER544IGcOnUqa2trWVhYyIMPPngbY5Ikhw4dSpI8+uijzZPsPseOHcvT//JC9xhz7/LrvzJLX3twTz0HZ3m2P/NrwEeOHMldd63vZt++fXnooYdmvUuAHWHmAV5cXMzy8nKqKsvLy1lcXJz1LgF2hJn/JVyyfhZ84cIFZ78Am2xLgBcXF/PYY49tx64Adgy/igzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKDJQvcAbN3S0lL3CLDnzPK4E+Ad5OjRo90jwJ4zy+POJQiAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNFnoHgDmxb5XXs7+505tcduLSbLl7XeLfa+8nORg9xi7hgBDkqWlpVvafnV1LUly6NBei9HBW14rrk+AIcnRo0e7R2APcg0YoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0KTGGFvfuOqlJP92m/s6kORzt/lnt4sZ7wwz3jk7YU4z3tjnkmSMsXzlA7cU4P+Pqjo7xrh/W3Z2m8x4Z5jxztkJc5rx9rkEAdBEgAGabGeAH9/Gfd0uM94ZZrxzdsKcZrxN23YNGID/yyUIgCYCDNBkJgGuqj+oqher6plN9/1yVa1W1bnp7Z2z2PcW5/vqqnqqqj5dVf9YVcem+++uqjNV9U/T+6/qmvEmc87TWr6+qv6mqv5+mvFXpvu/pqo+WVUrVfWnVfUlczjjB6rqXzet431dM26adV9VfaqqPjrdnpt1vMGMc7WOVXWhqs5Ps5yd7purY3vDrM6AP5DkqhcdJ3n/GOO+6e3UjPa9FWtJfnaM8ZYkb03yU1X1liS/kORjY4yvT/Kx6Xan682ZzM9avpbkHWOMb0xyX5Llqnprkt+YZlxK8u9J3j2HMybJz21ax3N9I/6vY0me3XR7ntZxw5UzJvO3jt85zbLx2t95O7aTzCjAY4xPJHl5Fp/7ThhjPD/G+Lvp4y9m/cl0KMn3JzkxbXYiyQ/0TLjuBnPOjbHuP6ebr5veRpJ3JPmz6f7WtbzBjHOlqu5N8j1Jfm+6XZmjdUyunnEHmatje8N2XwP+6ar6h+kSxVz8CFBVh5N8U5JPJjk4xnh+euizSQ42jXWVK+ZM5mgtpx9JzyV5McmZJP+c5PNjjLVpk8+k+RvHlTOOMTbW8demdXx/VX1p44hJ8ttJfj7J5en2YuZsHXP1jBvmaR1Hkr+uqqer6j3TfXN5bG9ngH8nyddl/UfA55P85jbu+5qq6suT/HmSnxljfGHzY2P99XlzcZZ0jTnnai3HGJfGGPcluTfJtyZ5c+c813LljFX1DUnel/VZvyXJ3Une2zVfVX1vkhfHGE93zXAzN5hxbtZx8u1jjG9O8t1Zv2z3HZsfnKdje9sCPMZ4YToILif53awfqG2q6nVZj9oHxxh/Md39QlXdMz1+T9bPllpda855W8sNY4zPJ3kqybcleWNVLUwP3ZtktW2wTTbNuDxd4hljjNeS/GF61/FtSb6vqi4k+VDWLz08mvlax6tmrKo/mrN1zBhjdXr/YpKPTPPM3bGdbGOAN774yQ8meeZ6227DLJXk95M8O8b4rU0P/VWSI9PHR5L85XbPttn15pyztXxTVb1x+nh/kgezfq36qSQ/NG3WupbXmfG5TQdkZf2aYNs6jjHeN8a4d4xxOMmPJPn4GOPHMkfreJ0Z3zVP61hVX1ZVX7HxcZLvmuaZq2N7w8LNN7l1VfUnSd6e5EBVfSbJLyV5+/TylJHkQpKfmMW+t+htSX48yfnpumCS/GKSX0/y4ap6d9b/2c0fbppvw/Xm/NE5Wst7kpyoqn1Z/4b+4THGR6vq00k+VFW/muRTWf9GMm8zfryq3pSkkpxL8pONM17PezM/63g9H5yjdTyY5CPr3wuykOSPxxinq+pvM1/HdhK/igzQxm/CATQRYIAmAgzQRIABmggwQBMBBmgiwABN/gdFveJCrlKx4gAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sns.boxplot(data);" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Boxplot of frequency table" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.780578Z", | |
"start_time": "2020-04-04T14:34:01.755433Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>range</th>\n", | |
" <th>value</th>\n", | |
" <th>freq_A</th>\n", | |
" <th>freq_B</th>\n", | |
" <th>freq_C</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>[1, 6]</td>\n", | |
" <td>3.5</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>[6, 11]</td>\n", | |
" <td>8.5</td>\n", | |
" <td>10</td>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>[11, 16]</td>\n", | |
" <td>13.5</td>\n", | |
" <td>26</td>\n", | |
" <td>24</td>\n", | |
" <td>24</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>[16, 21]</td>\n", | |
" <td>18.5</td>\n", | |
" <td>28</td>\n", | |
" <td>40</td>\n", | |
" <td>40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>[21, 26]</td>\n", | |
" <td>23.5</td>\n", | |
" <td>25</td>\n", | |
" <td>20</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>[26, 31]</td>\n", | |
" <td>28.5</td>\n", | |
" <td>11</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>[31, 36]</td>\n", | |
" <td>33.5</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" range value freq_A freq_B freq_C\n", | |
"0 [1, 6] 3.5 0 3 0\n", | |
"1 [6, 11] 8.5 10 5 4\n", | |
"2 [11, 16] 13.5 26 24 24\n", | |
"3 [16, 21] 18.5 28 40 40\n", | |
"4 [21, 26] 23.5 25 20 20\n", | |
"5 [26, 31] 28.5 11 4 4\n", | |
"6 [31, 36] 33.5 0 4 8" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_arr = np.array([\n", | |
"[np.array([1, 6]), 0, 3, 0],\n", | |
"[np.array([6, 11]), 10, 5, 4],\n", | |
"[np.array([11, 16]), 26, 24, 24],\n", | |
"[np.array([16, 21]), 28, 40, 40],\n", | |
"[np.array([21, 26]), 25, 20, 20],\n", | |
"[np.array([26, 31]), 11, 4, 4],\n", | |
"[np.array([31, 36]), 0, 4, 8]])\n", | |
"data = pd.DataFrame(data_arr, columns=['range', 'freq_A', 'freq_B', 'freq_C'])\n", | |
"x = np.arange(3.5, 38.5, 5)\n", | |
"data.insert(1,'value', x)\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.787975Z", | |
"start_time": "2020-04-04T14:34:01.783497Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"array_a = data.freq_A.to_list()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.961771Z", | |
"start_time": "2020-04-04T14:34:01.793444Z" | |
}, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAIT0lEQVR4nO3dT4jndR3H8dfb3aLVCpKVRSZrsgk6dKhYOkl4KcqLdZE82ckONUy3oktegoiKZA6BkWDQH4L+efBQh6BO4a5Imlr9ECUHXdcWUtktST8d5rcwLrtrOzO/39vfbx6Py/zmO38+n8985vfc33zmz9YYIwDM31XdEwA4qAQYoIkAAzQRYIAmAgzQ5PCVvPLRo0fH6urqjKYCsJxOnjz5whjjuguvX1GAV1dXc+LEif2bFcABUFVPX+y6IwiAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaXNH/CcfrbW5uZjKZdE+j1dbWVpJkZWWleSYcNGtra1lfX++exp4I8B5MJpM8/OjjefXqa7un0ubQ2X8lSZ77j08l5ufQ2TPdU9gX7jV79OrV1+bcB2/pnkabI088kCQH+mPA/J3/vFt0zoABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZrMJcCbm5vZ3Nycx1AA+2qW/To8k/d6gclkMo9hAPbdLPvlCAKgiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoMnheQyytbWVc+fOZWNjYx7Dzc1kMslVr4zuacCBc9W/X8xk8tJcmjKZTHLkyJGZvO83fARcVXdW1YmqOnH69OmZTALgIHrDR8BjjHuS3JMkx48f39XDvZWVlSTJ3XffvZs3f9Pa2NjIySdPdU8DDpzX3vbOrN14bC5NmeWjbGfAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigyeF5DLK2tjaPYQD23Sz7NZcAr6+vz2MYgH03y345ggBoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAk8PdE1h0h86eyZEnHuieRptDZ/+ZJAf6Y8D8HTp7Jsmx7mnsmQDvwdraWvcU2m1t/TdJsrKy+HcGFsmxpbj/CfAerK+vd08BWGDOgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNaozx/79y1ekkT+9yrKNJXtjl276ZLeu6kuVdm3UtnkVf23vHGNddePGKArwXVXVijHF8LoPN0bKuK1netVnX4lnWtTmCAGgiwABN5hnge+Y41jwt67qS5V2bdS2epVzb3M6AAXg9RxAATQQYoMnMA1xVn6qqv1bVpKq+Ouvx5qmqnqqqR6rq4ao60T2f3aqqe6vq+ap6dMe1a6vqd1X19+nTd3XOcbcusba7qmprum8PV9UtnXPcjaq6oap+X1WPVdVfqmpjen2h9+0y61r4PbuYmZ4BV9WhJH9L8okkzyR5MMntY4zHZjboHFXVU0mOjzEW+QfEU1UfT/Jykh+NMT40vfatJGfGGN+c/sP5rjHGVzrnuRuXWNtdSV4eY3y7c257UVXXJ7l+jPFQVb0jyckkn0ny+Szwvl1mXbdlwffsYmb9CPhjSSZjjCfHGK8k+VmSW2c8JldojPGHJGcuuHxrkvumt+/L9p1g4VxibQtvjPHsGOOh6e2XkjyeZCULvm+XWddSmnWAV5L8Y8fzz2S5PpgjyW+r6mRV3dk9mX12bIzx7PT2c0mOdU5mBr5UVX+eHlEs1JfpF6qq1SQfSfKnLNG+XbCuZIn27DzfhNubm8YYH03y6SRfnH65u3TG9jnVMv284veTvD/Jh5M8m+Q7vdPZvap6e5JfJPnyGOPFnS9b5H27yLqWZs92mnWAt5LcsOP5d0+vLYUxxtb06fNJfpXtI5dlcWp6Hnf+XO755vnsmzHGqTHGq2OM15L8IAu6b1X1lmxH6sdjjF9OLy/8vl1sXcuyZxeadYAfTPKBqnpfVb01yeeS3D/jMeeiqq6ZfpMgVXVNkk8mefTyb7VQ7k9yx/T2HUl+0ziXfXU+UFOfzQLuW1VVkh8meXyM8d0dL1rofbvUupZhzy5m5r8JN/1xke8lOZTk3jHGN2Y64JxU1Y3ZftSbJIeT/GRR11ZVP01yc7b/5N+pJF9P8uskP0/ynmz/CdLbxhgL982sS6zt5mx/KTuSPJXkCzvOTRdCVd2U5I9JHkny2vTy17J9Xrqw+3aZdd2eBd+zi/GryABNfBMOoIkAAzQRYIAmAgzQRIABmggwQBMBBmjyP5fi3RYCi2GrAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sns.boxplot(array_a);" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Comparing body sizes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:01.983329Z", | |
"start_time": "2020-04-04T14:34:01.967763Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"data = pd.DataFrame({'species A: body size [mm]': [99.0, 103.0, 106.0, 93.0, 98.0],\n", | |
" 'species B: body size [cm]': [9.6, 10.2, 10.1, 9.7, 11.6]})\n", | |
"data = data.T\n", | |
"data.iloc[0] = data.iloc[0] / 10\n", | |
"data = data.rename({'species A: body size [mm]': 'species A: body size [cm]'}, axis=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.015783Z", | |
"start_time": "2020-04-04T14:34:01.987230Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The variance of specias A is 0.1976; The variance of specias B is 0.5144\n" | |
] | |
} | |
], | |
"source": [ | |
"var_a = np.var(data.iloc[0]).round(4)\n", | |
"var_b = np.var(data.iloc[1]).round(4)\n", | |
"description = data.T.describe()\n", | |
"\n", | |
"# print(f'{description}')\n", | |
"print(f'The variance of specias A is {var_a}; The variance of specias B is {var_b}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Change of average heights" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.028845Z", | |
"start_time": "2020-04-04T14:34:02.019997Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The new average weight is 73.1429 and the change is 1.1583 %\n" | |
] | |
} | |
], | |
"source": [ | |
"new = (74*28 -24)/28 \n", | |
"change = 1 - new / 74 \n", | |
"print(f'The new average weight is {round(new,4)} and the change is {round(change * 100, 4)} %')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Categorical hours of listening music" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:35:19.812848Z", | |
"start_time": "2020-04-04T14:35:19.800637Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>2000</th>\n", | |
" <th>2019</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0 , 1</th>\n", | |
" <td>5</td>\n", | |
" <td>35</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1 , 2</th>\n", | |
" <td>3</td>\n", | |
" <td>24</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2 , 3</th>\n", | |
" <td>10</td>\n", | |
" <td>13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3 , 4</th>\n", | |
" <td>9</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4 , 5</th>\n", | |
" <td>13</td>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5 , 6</th>\n", | |
" <td>18</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6 , 7</th>\n", | |
" <td>21</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7 , 8</th>\n", | |
" <td>27</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8 , 9</th>\n", | |
" <td>10</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9 , 10</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 2000 2019\n", | |
"0 , 1 5 35\n", | |
"1 , 2 3 24\n", | |
"2 , 3 10 13\n", | |
"3 , 4 9 8\n", | |
"4 , 5 13 9\n", | |
"5 , 6 18 4\n", | |
"6 , 7 21 2\n", | |
"7 , 8 27 0\n", | |
"8 , 9 10 0\n", | |
"9 , 10 5 2" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame({\n", | |
"'0 , 1': [5 ,35],\n", | |
"'1 , 2': [3 ,24],\n", | |
"'2 , 3': [10 ,13],\n", | |
"'3 , 4': [9, 8],\n", | |
"'4 , 5': [13, 9],\n", | |
"'5 , 6': [18, 4],\n", | |
"'6 , 7': [21, 2],\n", | |
"'7 , 8': [27, 0],\n", | |
"'8 , 9': [10, 0],\n", | |
"'9 , 10': [5, 2]})\n", | |
"df = df.T\n", | |
"df.columns= ['2000', '2019']\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## a) mean and median for each year" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.063673Z", | |
"start_time": "2020-04-04T14:34:02.051695Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The mean for 2000 is 12.1, for 2019 is 9.7. The median for 2000 is 10.0, for 2019 is 6.0\n" | |
] | |
} | |
], | |
"source": [ | |
"mean00 = np.mean(df['2000'])\n", | |
"mean19 = np.mean(df['2019'])\n", | |
"\n", | |
"median00 = np.median(df['2000'])\n", | |
"median19 = np.median(df['2019'])\n", | |
"\n", | |
"print(f'The mean for 2000 is {mean00}, for 2019 is {mean19}. The median for 2000 is {median00}, for 2019 is {median19}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## b) modal class for each year" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.078828Z", | |
"start_time": "2020-04-04T14:34:02.069376Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The modal for 2000 is 5, and for 2019 is 0\n" | |
] | |
} | |
], | |
"source": [ | |
"mode00 = stats.mode(df['2000'])[0][0]\n", | |
"mode19 = stats.mode(df['2019'])[0][0]\n", | |
"\n", | |
"print(f'The modal for 2000 is {mode00}, and for 2019 is {mode19}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## c) standard deviation for each year" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.091141Z", | |
"start_time": "2020-04-04T14:34:02.082595Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The standard deviation for 2000 is 7.341, and for 2019 is 10.9458\n" | |
] | |
} | |
], | |
"source": [ | |
"std00 = np.std(df['2000']).round(4)\n", | |
"std19 = np.std(df['2019']).round(4)\n", | |
"\n", | |
"print(f'The standard deviation for 2000 is {std00}, and for 2019 is {std19}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## d) compare with boxplots" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.617822Z", | |
"start_time": "2020-04-04T14:34:02.095825Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAGpCAYAAACwHSBdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAARuUlEQVR4nO3df6zd933X8dc7drc63aomTjDVbTtT7qRSTaNjhg2pTBNqi4mQOhCqNmmqCxLdJLh1O5BWJiQGotJAMHAv0lBQN2xpo6tot1UoCku7aSsSlNolI2HJxqGkWq/SJLW7tsFZi50Pf5yvJdfyz/i+zw/fx0OKfP09557P55PvPefp8/0ef11jjADAbrtr2RMA4M4kMAC0EBgAWggMAC0EBoAW+2/lzvfdd984fPhw01QAWDdnzpz50hjj/qvddkuBOXz4cE6fPr07swJg7VXV5691m0NkALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANDilv49GG7d9vZ2ZrPZsqdxXTs7O0mSjY2NJc9kdWxubmZra2vZ04C1JjDNZrNZHn38iVy8+95lT+Wa9p3/SpLki1/345Ak+86fW/YU4I7gFWUBLt59b154wwPLnsY1HXjyoSRZ6Tku0qX/H8DtcQ4GgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWuyZwGxvb2d7e3vZ0wBuk+fy+ti/7Aksymw2W/YUgF3gubw+9sw7GAAWS2AAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0GL/siewKDs7O3nhhRdy/PjxhY47m81y1zfGQsfk9tz1R1/NbPa1hf+scHNms1kOHDiw7GlwE274Dqaq3l1Vp6vq9HPPPbeIOQFwB7jhO5gxxoNJHkySI0eOrO0fxTc2NpIkJ06cWOi4x48fz5nPPbPQMbk9L778ldl8/aGF/6xwc7yzXB/OwQDQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGgxf5lT2BRNjc3lz0FYBd4Lq+PPROYra2tZU8B2AWey+vDITIAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtNi/7AnsBfvOn8uBJx9a9jSuad/5s0my0nNcpH3nzyU5tOxpwNoTmGabm5vLnsIN7excSJJsbHhRnTu0FvsNVp3ANNva2lr2FACWwjkYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGhRY4ybv3PVc0k+f5tj3pfkS7f5GKvCWlbTnbKWO2UdibWsqt1Yy3eMMe6/2g23FJjdUFWnxxhHFjpoE2tZTXfKWu6UdSTWsqq61+IQGQAtBAaAFssIzINLGLOLtaymO2Utd8o6EmtZVa1rWfg5GFhFVfXaJKeSHEoykjw4xjhRVfcm+eUkh5M8leQdY4wvV1UlOZHkgSTnk7xrjPHZ6bGOJfkH00P/kzHGyUWuBVaFwECSqnp1klePMT5bVd+e5EySH0ryriTnxhg/U1XvT3LPGOMnq+qBJFuZB+b7kpwYY3zfFKTTSY5kHqozSb53jPHlxa8Klss5GEgyxnj60juQMcbXkjyRZCPJ25NcegdyMvPoZNp+asz91ySvmiL1l5I8MsY4N0XlkSRHF7gUWBkCA1eoqsNJvifJp5McGmM8Pd30xcwPoSXz+PzBZd/2hWnbtbbDniMwcJmq+rYkH03y3jHGVy+/bcyPJzumDDdJYGBSVS/LPC6/OMb42LT5menQ16XzNM9O23eSvPayb3/NtO1a22HPERhIMn0q7ENJnhhj/OxlN308ybHp62NJfu2y7e+sue9P8pXpUNp/SvK2qrqnqu5J8rZpG+w5PkUGSarqzUk+leSxJC9Om38q8/MwH0nyusyvw/eOMca5KUj/OvMT+OeT/I0xxunpsf7m9L1J8oExxi8sbCGwQgQGgBYOkQHQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAt9t/Kne+7775x+PDhpqkAsG7OnDnzpTHG/Ve77ZYCc/jw4Zw+fXp3ZgXA2quqz1/rNofIAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQItbulz/Ktre3s5sNlv2NK5pZ2cnSbKxsbHkmSzW5uZmtra2lj0NYInWPjCz2SyPPv5ELt5977KnclX7zn8lSfLFr6/9/+qbtu/8uWVPAVgBd8Sr3sW7780Lb3hg2dO4qgNPPpQkKzu/DpfWDOxtzsEA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoMVCA7O9vZ3t7e1FDgk08XzmRvYvcrDZbLbI4YBGns/ciENkALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQHYg86ePZv3vOc9OXv2bNsYAgOwB508eTKPPfZYTp061TaGwADsMWfPns3DDz+cMUYefvjhtncx+1se9Rp2dnbywgsv5Pjx47v2mLPZLHd9Y+za43H77vqjr2Y2+9qu7mdWz2w2y4EDB5Y9DV6CkydP5sUXX0ySXLx4MadOncr73ve+XR/nhu9gqurdVXW6qk4/99xzuz4BABbrE5/4RC5cuJAkuXDhQh555JGWcW74DmaM8WCSB5PkyJEjt/VWYWNjI0ly4sSJ23mYb3L8+PGc+dwzu/Z43L4XX/7KbL7+0K7uZ1aPd6jr6y1veUseeuihXLhwIfv3789b3/rWlnGcgwHYY44dO5a77pq//O/bty/vfOc7W8YRGIA95uDBgzl69GiqKkePHs3BgwdbxlnoSX4AVsOxY8fy1FNPtb17SQQGYE86ePBgPvjBD7aO4RAZAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBi/yIH29zcXORwQCPPZ25koYHZ2tpa5HBAI89nbsQhMgBaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC02L/sCeyGfefP5cCTDy17Gle17/zZJFnZ+XXYd/5ckkPLngawZGsfmM3NzWVP4bp2di4kSTY29tIL7qGV3y9Av7UPzNbW1rKnAMBVOAcDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC1qjHHzd656Lsnnb3PM+5J86TYfY1VYy2q6U9Zyp6wjsZZVtRtr+Y4xxv1Xu+GWArMbqur0GOPIQgdtYi2r6U5Zy52yjsRaVlX3WhwiA6CFwADQYhmBeXAJY3axltV0p6zlTllHYi2rqnUtCz8HA8De4BAZAC0EBoAWCw1MVR2tqt+rqllVvX+RY++2qnqqqh6rqker6vSy53Mrqurnq+rZqnr8sm33VtUjVfW/pl/vWeYcb8Y11vHTVbUz7ZdHq+qBZc7xZlXVa6vqN6vqd6vqf1bV8Wn7Wu2X66xj7fZLVb28qv5bVf3OtJZ/NG3/E1X16el17Jer6luWPdcbuc5a/l1V/Z/L9subdnXcRZ2Dqap9SX4/yVuTfCHJZ5L8yBjjdxcygV1WVU8lOTLGWLu/cFVVP5Dk+SSnxhjfNW37Z0nOjTF+Zor/PWOMn1zmPG/kGuv46STPjzH++TLndquq6tVJXj3G+GxVfXuSM0l+KMm7skb75TrreEfWbL9UVSV5xRjj+ap6WZL/nOR4kp9I8rExxoer6t8k+Z0xxs8tc643cp21/HiS/zjG+A8d4y7yHcyfSzIbY3xujPGNJB9O8vYFjs9kjPHbSc5dsfntSU5OX5/M/EVhpV1jHWtpjPH0GOOz09dfS/JEko2s2X65zjrWzph7fvrty6b/RpK/mOTSC/LK75PkumtptcjAbCT5g8t+/4Ws6Q/eZCT59ao6U1XvXvZkdsGhMcbT09dfTHJomZO5TX+nqv7HdAhtpQ8pXU1VHU7yPUk+nTXeL1esI1nD/VJV+6rq0STPJnkkyf9O8odjjAvTXdbmdezKtYwxLu2XD0z75V9W1bfu5phO8r90bx5j/JkkfznJ354O19wRxvy46bp+fv3nkvzJJG9K8nSSf7Hc6dyaqvq2JB9N8t4xxlcvv22d9stV1rGW+2WMcXGM8aYkr8n8KMwbljyll+zKtVTVdyX5+5mv6c8muTfJrh5+XWRgdpK89rLfv2batpbGGDvTr88m+ZXMf/jW2TPT8fNLx9GfXfJ8XpIxxjPTE+nFJP82a7RfpmPjH03yi2OMj02b126/XG0d67xfkmSM8YdJfjPJn0/yqqraP920dq9jl63l6HRIc4wxvp7kF7LL+2WRgflMku+cPoHxLUl+OMnHFzj+rqmqV0wnMFNVr0jytiSPX/+7Vt7Hkxybvj6W5NeWOJeX7NKL8eSvZk32y3QS9kNJnhhj/OxlN63VfrnWOtZxv1TV/VX1qunrA5l/QOmJzF+c//p0t5XfJ8k11/LkZX94qczPJe3qflno3+SfPpr4r5LsS/LzY4wPLGzwXVRVr8/8XUuS7E/yS+u0lqr690l+MPNLdT+T5B8m+dUkH0nyusz/SYZ3jDFW+gT6Ndbxg5kfhhlJnkryY5edw1hZVfXmJJ9K8liSF6fNP5X5+Yu12S/XWcePZM32S1V9d+Yn8fdl/ofxj4wx/vH0/P9w5oeU/nuSH53eAays66zlN5Lcn6SSPJrkxy/7MMDtj+tSMQB0cJIfgBYCA0ALgQGghcAA0EJgAGghMJBbv5pxVb2hqv5LVX29qv7eFY91vKoenx7nvctYD6wCgYG5C0n+7hjjjUm+P/PL/7wxyfuTfHKM8Z1JPjn9PplfZPM9Sb7p6sDT5Tf+VuZ/I/pPJ/krVbW5mCXAahEYyK1fzXiM8ewY4zNJ/t8VD/Wnknx6jHF+uiDibyX5awtYAqwcgYEr3ObVjB9P8heq6mBV3Z3kgXzzNfhgz9h/47vA3nHlVYDnl2iaG2OMqrrupS/GGE9U1T9N8utJ/m/ml9+42DhlWFnewcBkt65mPMb40Bjje8cYP5Dky5n/S66w5wgMZHevZlxVf2z69XWZn3/5pd2dLawHF7uE3PrVjKvqjyc5neSV0/2fT/LG6bDap5IczPwDAD8xxvjkQhcDK0JgAGjhEBkALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0+P8bQY0+qFOt1QAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<Figure size 504x504 with 2 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"f, ax = plt.subplots(2, 1, figsize=(7, 7), sharex=True)\n", | |
"\n", | |
"sns.boxplot(df['2000'], ax=ax[0])\n", | |
"sns.boxplot(df['2019'], ax=ax[1]);" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## e) values above 25% all values are" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.634406Z", | |
"start_time": "2020-04-04T14:34:02.622902Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The first qunatile (25%) for 2000 is 6.0, and for 2019 is 2.0\n" | |
] | |
} | |
], | |
"source": [ | |
"fquant00 = df['2000'].quantile(0.25)\n", | |
"fquant19 = df['2019'].quantile(0.25)\n", | |
"\n", | |
"print(f'The first qunatile (25%) for 2000 is {fquant00}, and for 2019 is {fquant19}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# BMI of 50 people" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.869654Z", | |
"start_time": "2020-04-04T14:34:02.853131Z" | |
}, | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame({'16;20': [18, 3],\n", | |
" '20;24': [22, 21],\n", | |
" '24;28': [26, 19],\n", | |
" '28;32': [30, 5],\n", | |
" '32;36': [34, 2], })\n", | |
"df = df.T\n", | |
"df.columns = ['midpoint', 'freq']\n", | |
"df['multiplied'] = df.midpoint * df.freq\n", | |
"# df.append(df.sum().rename('Total'))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## a)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.898010Z", | |
"start_time": "2020-04-04T14:34:02.879961Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The mean is 24.56\n" | |
] | |
} | |
], | |
"source": [ | |
"mean = sum(df.multiplied) / sum(df.freq)\n", | |
"print(f'The mean is {mean}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## b) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.913939Z", | |
"start_time": "2020-04-04T14:34:02.901070Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The standard deviation is 3.5562\n" | |
] | |
} | |
], | |
"source": [ | |
"freq_list = np.array([])\n", | |
"for i, num in enumerate(df.midpoint):\n", | |
" freq_list = np.append(freq_list, np.repeat(num, df.freq[i]))\n", | |
"\n", | |
"freq_list\n", | |
"new = pd.DataFrame(freq_list)\n", | |
"std = np.std(freq_list).round(4)\n", | |
"print(f'The standard deviation is {std}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## c) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.924903Z", | |
"start_time": "2020-04-04T14:34:02.918062Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Above value 22.0 lay 60% of the distirbution mass\n" | |
] | |
} | |
], | |
"source": [ | |
"np.std(freq_list)\n", | |
"quant40 = np.quantile(freq_list, [0.40])\n", | |
"print(f'Above value {quant40[0]} lay 60% of the distirbution mass')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Frequency table of life of bacteria" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-04-04T14:34:02.957383Z", | |
"start_time": "2020-04-04T14:34:02.942876Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Under value 75.0 are 10% of all cases of the distirbution\n" | |
] | |
} | |
], | |
"source": [ | |
"midpoints = np.array([100, 350/2, 450 /2, 550])\n", | |
"freqs = np.array([30, 40, 30, 50])\n", | |
"d = 100 \n", | |
"N = sum(freqs)\n", | |
"rel_freqs = freqs/N \n", | |
"cum_freqs = np.cumsum(rel_freqs)*N\n", | |
"\n", | |
"x10 = 50 + d/freqs[1] * (N* rel_freqs[1] - cum_freqs[0])\n", | |
"print(f'Under value {x10} are 10% of all cases of the distirbution')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"hide_input": false, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": true | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment