Skip to content

Instantly share code, notes, and snippets.

@Createdd
Last active April 4, 2020 15:01
Show Gist options
  • Save Createdd/c86103e45422cab83c808cdb0a885365 to your computer and use it in GitHub Desktop.
Save Createdd/c86103e45422cab83c808cdb0a885365 to your computer and use it in GitHub Desktop.
Exercisesheet Statistics on Variability and Boxplots
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Measures of Variability and Boxplots"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.554474Z",
"start_time": "2020-04-04T14:34:00.430557Z"
}
},
"outputs": [],
"source": [
"import pandas as pd \n",
"import numpy as np \n",
"import matplotlib.pyplot as plt \n",
"import seaborn as sns \n",
"\n",
"import statsmodels\n",
"from scipy import stats"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.561297Z",
"start_time": "2020-04-04T14:34:01.556468Z"
}
},
"outputs": [],
"source": [
"%precision 4\n",
"np.set_printoptions(precision=4)\n",
"\n",
"# 4.123457\n",
"# np.array([1.123456789])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Travel distances over 11 days"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.568171Z",
"start_time": "2020-04-04T14:34:01.564806Z"
}
},
"outputs": [],
"source": [
"data = np.array([39, 35, 25, 37, 15, 36, 50, 52, 37, 51, 39])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## a) variation for given data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.578324Z",
"start_time": "2020-04-04T14:34:01.572201Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The first measure can be the variance, which is 111.2397. The second measure can be the standard deviation, which can be 10.547\n"
]
}
],
"source": [
"variance = np.var(data).round(4)\n",
"std = np.std(data).round(4)\n",
"\n",
"print(f'The first measure can be the variance, which is {variance}. The second measure can be the standard deviation, which can be {std}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## b) boxplot of given data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.752446Z",
"start_time": "2020-04-04T14:34:01.580479Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAKQklEQVR4nO3dX4id+V3H8c93M9Wm/qHupIQlK0QdoRTBVVapVKSWXRmr+AdEFOvmolAFzcZFtNYbFRT0QutuLoT1XxOs1qIWpYSs2Xahd9WsjWbtLjhqhA7b3W3W2squK5P8vJhnZMzfScyZ75mZ1wuGmXPOk3m+8+M873nmyZmkxhgBYPvd1T0AwF4lwABNBBigiQADNBFggCYLt7LxgQMHxuHDh2c0CsDuc+DAgTzxxBNPjDGWr3zslgJ8+PDhnD179s5NBrAHVNWBa93vEgRAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNbun/hIPd6vjx41lZWdny9qurq0mSQ4cOzWqkubW0tJSjR492j7ErCDAkWVlZyblnns2lN9y9pe33vfIfSZLPvra3DqF9r7zcPcKusreePXADl95wd1598zu3tO3+504lyZa33y02vm7uDNeAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCPAOcvz48Rw/frx7DNhTZnncLczkszITKysr3SPAnjPL484ZMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCbbEuCLFy/m4YcfzsWLF7djdwA7wrYE+MSJEzl//nxOnjy5HbsD2BFmHuCLFy/m9OnTGWPk9OnTzoIBJguz3sGJEydy+fLlJMmlS5dy8uTJPPLII7Pe7a60urqaV199NceOHeseZddZWVnJXf89useYe3f91xeysvLFPfUcXFlZyf79+2fyuW96BlxV76mqs1V19qWXXrrlHTz55JNZW1tLkqytreXMmTO3PiXALnTTM+AxxuNJHk+S+++//5ZPER544IGcOnUqa2trWVhYyIMPPngbY5Ikhw4dSpI8+uijzZPsPseOHcvT//JC9xhz7/LrvzJLX3twTz0HZ3m2P/NrwEeOHMldd63vZt++fXnooYdmvUuAHWHmAV5cXMzy8nKqKsvLy1lcXJz1LgF2hJn/JVyyfhZ84cIFZ78Am2xLgBcXF/PYY49tx64Adgy/igzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKDJQvcAbN3S0lL3CLDnzPK4E+Ad5OjRo90jwJ4zy+POJQiAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNFnoHgDmxb5XXs7+505tcduLSbLl7XeLfa+8nORg9xi7hgBDkqWlpVvafnV1LUly6NBei9HBW14rrk+AIcnRo0e7R2APcg0YoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0KTGGFvfuOqlJP92m/s6kORzt/lnt4sZ7wwz3jk7YU4z3tjnkmSMsXzlA7cU4P+Pqjo7xrh/W3Z2m8x4Z5jxztkJc5rx9rkEAdBEgAGabGeAH9/Gfd0uM94ZZrxzdsKcZrxN23YNGID/yyUIgCYCDNBkJgGuqj+oqher6plN9/1yVa1W1bnp7Z2z2PcW5/vqqnqqqj5dVf9YVcem+++uqjNV9U/T+6/qmvEmc87TWr6+qv6mqv5+mvFXpvu/pqo+WVUrVfWnVfUlczjjB6rqXzet431dM26adV9VfaqqPjrdnpt1vMGMc7WOVXWhqs5Ps5yd7purY3vDrM6AP5DkqhcdJ3n/GOO+6e3UjPa9FWtJfnaM8ZYkb03yU1X1liS/kORjY4yvT/Kx6Xan682ZzM9avpbkHWOMb0xyX5Llqnprkt+YZlxK8u9J3j2HMybJz21ax3N9I/6vY0me3XR7ntZxw5UzJvO3jt85zbLx2t95O7aTzCjAY4xPJHl5Fp/7ThhjPD/G+Lvp4y9m/cl0KMn3JzkxbXYiyQ/0TLjuBnPOjbHuP6ebr5veRpJ3JPmz6f7WtbzBjHOlqu5N8j1Jfm+6XZmjdUyunnEHmatje8N2XwP+6ar6h+kSxVz8CFBVh5N8U5JPJjk4xnh+euizSQ42jXWVK+ZM5mgtpx9JzyV5McmZJP+c5PNjjLVpk8+k+RvHlTOOMTbW8demdXx/VX1p44hJ8ttJfj7J5en2YuZsHXP1jBvmaR1Hkr+uqqer6j3TfXN5bG9ngH8nyddl/UfA55P85jbu+5qq6suT/HmSnxljfGHzY2P99XlzcZZ0jTnnai3HGJfGGPcluTfJtyZ5c+c813LljFX1DUnel/VZvyXJ3Une2zVfVX1vkhfHGE93zXAzN5hxbtZx8u1jjG9O8t1Zv2z3HZsfnKdje9sCPMZ4YToILif53awfqG2q6nVZj9oHxxh/Md39QlXdMz1+T9bPllpda855W8sNY4zPJ3kqybcleWNVLUwP3ZtktW2wTTbNuDxd4hljjNeS/GF61/FtSb6vqi4k+VDWLz08mvlax6tmrKo/mrN1zBhjdXr/YpKPTPPM3bGdbGOAN774yQ8meeZ6227DLJXk95M8O8b4rU0P/VWSI9PHR5L85XbPttn15pyztXxTVb1x+nh/kgezfq36qSQ/NG3WupbXmfG5TQdkZf2aYNs6jjHeN8a4d4xxOMmPJPn4GOPHMkfreJ0Z3zVP61hVX1ZVX7HxcZLvmuaZq2N7w8LNN7l1VfUnSd6e5EBVfSbJLyV5+/TylJHkQpKfmMW+t+htSX48yfnpumCS/GKSX0/y4ap6d9b/2c0fbppvw/Xm/NE5Wst7kpyoqn1Z/4b+4THGR6vq00k+VFW/muRTWf9GMm8zfryq3pSkkpxL8pONM17PezM/63g9H5yjdTyY5CPr3wuykOSPxxinq+pvM1/HdhK/igzQxm/CATQRYIAmAgzQRIABmggwQBMBBmgiwABN/gdFveJCrlKx4gAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.boxplot(data);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Boxplot of frequency table"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.780578Z",
"start_time": "2020-04-04T14:34:01.755433Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>range</th>\n",
" <th>value</th>\n",
" <th>freq_A</th>\n",
" <th>freq_B</th>\n",
" <th>freq_C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[1, 6]</td>\n",
" <td>3.5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>[6, 11]</td>\n",
" <td>8.5</td>\n",
" <td>10</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>[11, 16]</td>\n",
" <td>13.5</td>\n",
" <td>26</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>[16, 21]</td>\n",
" <td>18.5</td>\n",
" <td>28</td>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[21, 26]</td>\n",
" <td>23.5</td>\n",
" <td>25</td>\n",
" <td>20</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>[26, 31]</td>\n",
" <td>28.5</td>\n",
" <td>11</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>[31, 36]</td>\n",
" <td>33.5</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" range value freq_A freq_B freq_C\n",
"0 [1, 6] 3.5 0 3 0\n",
"1 [6, 11] 8.5 10 5 4\n",
"2 [11, 16] 13.5 26 24 24\n",
"3 [16, 21] 18.5 28 40 40\n",
"4 [21, 26] 23.5 25 20 20\n",
"5 [26, 31] 28.5 11 4 4\n",
"6 [31, 36] 33.5 0 4 8"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_arr = np.array([\n",
"[np.array([1, 6]), 0, 3, 0],\n",
"[np.array([6, 11]), 10, 5, 4],\n",
"[np.array([11, 16]), 26, 24, 24],\n",
"[np.array([16, 21]), 28, 40, 40],\n",
"[np.array([21, 26]), 25, 20, 20],\n",
"[np.array([26, 31]), 11, 4, 4],\n",
"[np.array([31, 36]), 0, 4, 8]])\n",
"data = pd.DataFrame(data_arr, columns=['range', 'freq_A', 'freq_B', 'freq_C'])\n",
"x = np.arange(3.5, 38.5, 5)\n",
"data.insert(1,'value', x)\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.787975Z",
"start_time": "2020-04-04T14:34:01.783497Z"
}
},
"outputs": [],
"source": [
"array_a = data.freq_A.to_list()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.961771Z",
"start_time": "2020-04-04T14:34:01.793444Z"
},
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAIT0lEQVR4nO3dT4jndR3H8dfb3aLVCpKVRSZrsgk6dKhYOkl4KcqLdZE82ckONUy3oktegoiKZA6BkWDQH4L+efBQh6BO4a5Imlr9ECUHXdcWUtktST8d5rcwLrtrOzO/39vfbx6Py/zmO38+n8985vfc33zmz9YYIwDM31XdEwA4qAQYoIkAAzQRYIAmAgzQ5PCVvPLRo0fH6urqjKYCsJxOnjz5whjjuguvX1GAV1dXc+LEif2bFcABUFVPX+y6IwiAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaXNH/CcfrbW5uZjKZdE+j1dbWVpJkZWWleSYcNGtra1lfX++exp4I8B5MJpM8/OjjefXqa7un0ubQ2X8lSZ77j08l5ufQ2TPdU9gX7jV79OrV1+bcB2/pnkabI088kCQH+mPA/J3/vFt0zoABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZrMJcCbm5vZ3Nycx1AA+2qW/To8k/d6gclkMo9hAPbdLPvlCAKgiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoMnheQyytbWVc+fOZWNjYx7Dzc1kMslVr4zuacCBc9W/X8xk8tJcmjKZTHLkyJGZvO83fARcVXdW1YmqOnH69OmZTALgIHrDR8BjjHuS3JMkx48f39XDvZWVlSTJ3XffvZs3f9Pa2NjIySdPdU8DDpzX3vbOrN14bC5NmeWjbGfAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigyeF5DLK2tjaPYQD23Sz7NZcAr6+vz2MYgH03y345ggBoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNBBigiQADNBFggCYCDNBEgAGaCDBAk8PdE1h0h86eyZEnHuieRptDZ/+ZJAf6Y8D8HTp7Jsmx7mnsmQDvwdraWvcU2m1t/TdJsrKy+HcGFsmxpbj/CfAerK+vd08BWGDOgAGaCDBAEwEGaCLAAE0EGKCJAAM0EWCAJgIM0ESAAZoIMEATAQZoIsAATQQYoIkAAzQRYIAmAgzQRIABmggwQBMBBmgiwABNaozx/79y1ekkT+9yrKNJXtjl276ZLeu6kuVdm3UtnkVf23vHGNddePGKArwXVXVijHF8LoPN0bKuK1netVnX4lnWtTmCAGgiwABN5hnge+Y41jwt67qS5V2bdS2epVzb3M6AAXg9RxAATQQYoMnMA1xVn6qqv1bVpKq+Ouvx5qmqnqqqR6rq4ao60T2f3aqqe6vq+ap6dMe1a6vqd1X19+nTd3XOcbcusba7qmprum8PV9UtnXPcjaq6oap+X1WPVdVfqmpjen2h9+0y61r4PbuYmZ4BV9WhJH9L8okkzyR5MMntY4zHZjboHFXVU0mOjzEW+QfEU1UfT/Jykh+NMT40vfatJGfGGN+c/sP5rjHGVzrnuRuXWNtdSV4eY3y7c257UVXXJ7l+jPFQVb0jyckkn0ny+Szwvl1mXbdlwffsYmb9CPhjSSZjjCfHGK8k+VmSW2c8JldojPGHJGcuuHxrkvumt+/L9p1g4VxibQtvjPHsGOOh6e2XkjyeZCULvm+XWddSmnWAV5L8Y8fzz2S5PpgjyW+r6mRV3dk9mX12bIzx7PT2c0mOdU5mBr5UVX+eHlEs1JfpF6qq1SQfSfKnLNG+XbCuZIn27DzfhNubm8YYH03y6SRfnH65u3TG9jnVMv284veTvD/Jh5M8m+Q7vdPZvap6e5JfJPnyGOPFnS9b5H27yLqWZs92mnWAt5LcsOP5d0+vLYUxxtb06fNJfpXtI5dlcWp6Hnf+XO755vnsmzHGqTHGq2OM15L8IAu6b1X1lmxH6sdjjF9OLy/8vl1sXcuyZxeadYAfTPKBqnpfVb01yeeS3D/jMeeiqq6ZfpMgVXVNkk8mefTyb7VQ7k9yx/T2HUl+0ziXfXU+UFOfzQLuW1VVkh8meXyM8d0dL1rofbvUupZhzy5m5r8JN/1xke8lOZTk3jHGN2Y64JxU1Y3ZftSbJIeT/GRR11ZVP01yc7b/5N+pJF9P8uskP0/ynmz/CdLbxhgL982sS6zt5mx/KTuSPJXkCzvOTRdCVd2U5I9JHkny2vTy17J9Xrqw+3aZdd2eBd+zi/GryABNfBMOoIkAAzQRYIAmAgzQRIABmggwQBMBBmjyP5fi3RYCi2GrAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.boxplot(array_a);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparing body sizes"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:01.983329Z",
"start_time": "2020-04-04T14:34:01.967763Z"
}
},
"outputs": [],
"source": [
"data = pd.DataFrame({'species A: body size [mm]': [99.0, 103.0, 106.0, 93.0, 98.0],\n",
" 'species B: body size [cm]': [9.6, 10.2, 10.1, 9.7, 11.6]})\n",
"data = data.T\n",
"data.iloc[0] = data.iloc[0] / 10\n",
"data = data.rename({'species A: body size [mm]': 'species A: body size [cm]'}, axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.015783Z",
"start_time": "2020-04-04T14:34:01.987230Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The variance of specias A is 0.1976; The variance of specias B is 0.5144\n"
]
}
],
"source": [
"var_a = np.var(data.iloc[0]).round(4)\n",
"var_b = np.var(data.iloc[1]).round(4)\n",
"description = data.T.describe()\n",
"\n",
"# print(f'{description}')\n",
"print(f'The variance of specias A is {var_a}; The variance of specias B is {var_b}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Change of average heights"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.028845Z",
"start_time": "2020-04-04T14:34:02.019997Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The new average weight is 73.1429 and the change is 1.1583 %\n"
]
}
],
"source": [
"new = (74*28 -24)/28 \n",
"change = 1 - new / 74 \n",
"print(f'The new average weight is {round(new,4)} and the change is {round(change * 100, 4)} %')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Categorical hours of listening music"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:35:19.812848Z",
"start_time": "2020-04-04T14:35:19.800637Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>2000</th>\n",
" <th>2019</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0 , 1</th>\n",
" <td>5</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 , 2</th>\n",
" <td>3</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2 , 3</th>\n",
" <td>10</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3 , 4</th>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4 , 5</th>\n",
" <td>13</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5 , 6</th>\n",
" <td>18</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6 , 7</th>\n",
" <td>21</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7 , 8</th>\n",
" <td>27</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8 , 9</th>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9 , 10</th>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 2000 2019\n",
"0 , 1 5 35\n",
"1 , 2 3 24\n",
"2 , 3 10 13\n",
"3 , 4 9 8\n",
"4 , 5 13 9\n",
"5 , 6 18 4\n",
"6 , 7 21 2\n",
"7 , 8 27 0\n",
"8 , 9 10 0\n",
"9 , 10 5 2"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\n",
"'0 , 1': [5 ,35],\n",
"'1 , 2': [3 ,24],\n",
"'2 , 3': [10 ,13],\n",
"'3 , 4': [9, 8],\n",
"'4 , 5': [13, 9],\n",
"'5 , 6': [18, 4],\n",
"'6 , 7': [21, 2],\n",
"'7 , 8': [27, 0],\n",
"'8 , 9': [10, 0],\n",
"'9 , 10': [5, 2]})\n",
"df = df.T\n",
"df.columns= ['2000', '2019']\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## a) mean and median for each year"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.063673Z",
"start_time": "2020-04-04T14:34:02.051695Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The mean for 2000 is 12.1, for 2019 is 9.7. The median for 2000 is 10.0, for 2019 is 6.0\n"
]
}
],
"source": [
"mean00 = np.mean(df['2000'])\n",
"mean19 = np.mean(df['2019'])\n",
"\n",
"median00 = np.median(df['2000'])\n",
"median19 = np.median(df['2019'])\n",
"\n",
"print(f'The mean for 2000 is {mean00}, for 2019 is {mean19}. The median for 2000 is {median00}, for 2019 is {median19}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## b) modal class for each year"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.078828Z",
"start_time": "2020-04-04T14:34:02.069376Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The modal for 2000 is 5, and for 2019 is 0\n"
]
}
],
"source": [
"mode00 = stats.mode(df['2000'])[0][0]\n",
"mode19 = stats.mode(df['2019'])[0][0]\n",
"\n",
"print(f'The modal for 2000 is {mode00}, and for 2019 is {mode19}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## c) standard deviation for each year"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.091141Z",
"start_time": "2020-04-04T14:34:02.082595Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The standard deviation for 2000 is 7.341, and for 2019 is 10.9458\n"
]
}
],
"source": [
"std00 = np.std(df['2000']).round(4)\n",
"std19 = np.std(df['2019']).round(4)\n",
"\n",
"print(f'The standard deviation for 2000 is {std00}, and for 2019 is {std19}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## d) compare with boxplots"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.617822Z",
"start_time": "2020-04-04T14:34:02.095825Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAGpCAYAAACwHSBdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAARuUlEQVR4nO3df6zd933X8dc7drc63aomTjDVbTtT7qRSTaNjhg2pTBNqi4mQOhCqNmmqCxLdJLh1O5BWJiQGotJAMHAv0lBQN2xpo6tot1UoCku7aSsSlNolI2HJxqGkWq/SJLW7tsFZi50Pf5yvJdfyz/i+zw/fx0OKfP09557P55PvPefp8/0ef11jjADAbrtr2RMA4M4kMAC0EBgAWggMAC0EBoAW+2/lzvfdd984fPhw01QAWDdnzpz50hjj/qvddkuBOXz4cE6fPr07swJg7VXV5691m0NkALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANDilv49GG7d9vZ2ZrPZsqdxXTs7O0mSjY2NJc9kdWxubmZra2vZ04C1JjDNZrNZHn38iVy8+95lT+Wa9p3/SpLki1/345Ak+86fW/YU4I7gFWUBLt59b154wwPLnsY1HXjyoSRZ6Tku0qX/H8DtcQ4GgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWuyZwGxvb2d7e3vZ0wBuk+fy+ti/7Aksymw2W/YUgF3gubw+9sw7GAAWS2AAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0GL/siewKDs7O3nhhRdy/PjxhY47m81y1zfGQsfk9tz1R1/NbPa1hf+scHNms1kOHDiw7GlwE274Dqaq3l1Vp6vq9HPPPbeIOQFwB7jhO5gxxoNJHkySI0eOrO0fxTc2NpIkJ06cWOi4x48fz5nPPbPQMbk9L778ldl8/aGF/6xwc7yzXB/OwQDQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGgxf5lT2BRNjc3lz0FYBd4Lq+PPROYra2tZU8B2AWey+vDITIAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtNi/7AnsBfvOn8uBJx9a9jSuad/5s0my0nNcpH3nzyU5tOxpwNoTmGabm5vLnsIN7excSJJsbHhRnTu0FvsNVp3ANNva2lr2FACWwjkYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGhRY4ybv3PVc0k+f5tj3pfkS7f5GKvCWlbTnbKWO2UdibWsqt1Yy3eMMe6/2g23FJjdUFWnxxhHFjpoE2tZTXfKWu6UdSTWsqq61+IQGQAtBAaAFssIzINLGLOLtaymO2Utd8o6EmtZVa1rWfg5GFhFVfXaJKeSHEoykjw4xjhRVfcm+eUkh5M8leQdY4wvV1UlOZHkgSTnk7xrjPHZ6bGOJfkH00P/kzHGyUWuBVaFwECSqnp1klePMT5bVd+e5EySH0ryriTnxhg/U1XvT3LPGOMnq+qBJFuZB+b7kpwYY3zfFKTTSY5kHqozSb53jPHlxa8Klss5GEgyxnj60juQMcbXkjyRZCPJ25NcegdyMvPoZNp+asz91ySvmiL1l5I8MsY4N0XlkSRHF7gUWBkCA1eoqsNJvifJp5McGmM8Pd30xcwPoSXz+PzBZd/2hWnbtbbDniMwcJmq+rYkH03y3jHGVy+/bcyPJzumDDdJYGBSVS/LPC6/OMb42LT5menQ16XzNM9O23eSvPayb3/NtO1a22HPERhIMn0q7ENJnhhj/OxlN308ybHp62NJfu2y7e+sue9P8pXpUNp/SvK2qrqnqu5J8rZpG+w5PkUGSarqzUk+leSxJC9Om38q8/MwH0nyusyvw/eOMca5KUj/OvMT+OeT/I0xxunpsf7m9L1J8oExxi8sbCGwQgQGgBYOkQHQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAt9t/Kne+7775x+PDhpqkAsG7OnDnzpTHG/Ve77ZYCc/jw4Zw+fXp3ZgXA2quqz1/rNofIAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQItbulz/Ktre3s5sNlv2NK5pZ2cnSbKxsbHkmSzW5uZmtra2lj0NYInWPjCz2SyPPv5ELt5977KnclX7zn8lSfLFr6/9/+qbtu/8uWVPAVgBd8Sr3sW7780Lb3hg2dO4qgNPPpQkKzu/DpfWDOxtzsEA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoMVCA7O9vZ3t7e1FDgk08XzmRvYvcrDZbLbI4YBGns/ciENkALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQHYg86ePZv3vOc9OXv2bNsYAgOwB508eTKPPfZYTp061TaGwADsMWfPns3DDz+cMUYefvjhtncx+1se9Rp2dnbywgsv5Pjx47v2mLPZLHd9Y+za43H77vqjr2Y2+9qu7mdWz2w2y4EDB5Y9DV6CkydP5sUXX0ySXLx4MadOncr73ve+XR/nhu9gqurdVXW6qk4/99xzuz4BABbrE5/4RC5cuJAkuXDhQh555JGWcW74DmaM8WCSB5PkyJEjt/VWYWNjI0ly4sSJ23mYb3L8+PGc+dwzu/Z43L4XX/7KbL7+0K7uZ1aPd6jr6y1veUseeuihXLhwIfv3789b3/rWlnGcgwHYY44dO5a77pq//O/bty/vfOc7W8YRGIA95uDBgzl69GiqKkePHs3BgwdbxlnoSX4AVsOxY8fy1FNPtb17SQQGYE86ePBgPvjBD7aO4RAZAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBi/yIH29zcXORwQCPPZ25koYHZ2tpa5HBAI89nbsQhMgBaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC0EBoAWAgNAC4EBoIXAANBCYABoITAAtBAYAFoIDAAtBAaAFgIDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC02L/sCeyGfefP5cCTDy17Gle17/zZJFnZ+XXYd/5ckkPLngawZGsfmM3NzWVP4bp2di4kSTY29tIL7qGV3y9Av7UPzNbW1rKnAMBVOAcDQAuBAaCFwADQQmAAaCEwALQQGABaCAwALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0EBgAWggMAC1qjHHzd656Lsnnb3PM+5J86TYfY1VYy2q6U9Zyp6wjsZZVtRtr+Y4xxv1Xu+GWArMbqur0GOPIQgdtYi2r6U5Zy52yjsRaVlX3WhwiA6CFwADQYhmBeXAJY3axltV0p6zlTllHYi2rqnUtCz8HA8De4BAZAC0EBoAWCw1MVR2tqt+rqllVvX+RY++2qnqqqh6rqker6vSy53Mrqurnq+rZqnr8sm33VtUjVfW/pl/vWeYcb8Y11vHTVbUz7ZdHq+qBZc7xZlXVa6vqN6vqd6vqf1bV8Wn7Wu2X66xj7fZLVb28qv5bVf3OtJZ/NG3/E1X16el17Jer6luWPdcbuc5a/l1V/Z/L9subdnXcRZ2Dqap9SX4/yVuTfCHJZ5L8yBjjdxcygV1WVU8lOTLGWLu/cFVVP5Dk+SSnxhjfNW37Z0nOjTF+Zor/PWOMn1zmPG/kGuv46STPjzH++TLndquq6tVJXj3G+GxVfXuSM0l+KMm7skb75TrreEfWbL9UVSV5xRjj+ap6WZL/nOR4kp9I8rExxoer6t8k+Z0xxs8tc643cp21/HiS/zjG+A8d4y7yHcyfSzIbY3xujPGNJB9O8vYFjs9kjPHbSc5dsfntSU5OX5/M/EVhpV1jHWtpjPH0GOOz09dfS/JEko2s2X65zjrWzph7fvrty6b/RpK/mOTSC/LK75PkumtptcjAbCT5g8t+/4Ws6Q/eZCT59ao6U1XvXvZkdsGhMcbT09dfTHJomZO5TX+nqv7HdAhtpQ8pXU1VHU7yPUk+nTXeL1esI1nD/VJV+6rq0STPJnkkyf9O8odjjAvTXdbmdezKtYwxLu2XD0z75V9W1bfu5phO8r90bx5j/JkkfznJ354O19wRxvy46bp+fv3nkvzJJG9K8nSSf7Hc6dyaqvq2JB9N8t4xxlcvv22d9stV1rGW+2WMcXGM8aYkr8n8KMwbljyll+zKtVTVdyX5+5mv6c8muTfJrh5+XWRgdpK89rLfv2batpbGGDvTr88m+ZXMf/jW2TPT8fNLx9GfXfJ8XpIxxjPTE+nFJP82a7RfpmPjH03yi2OMj02b126/XG0d67xfkmSM8YdJfjPJn0/yqqraP920dq9jl63l6HRIc4wxvp7kF7LL+2WRgflMku+cPoHxLUl+OMnHFzj+rqmqV0wnMFNVr0jytiSPX/+7Vt7Hkxybvj6W5NeWOJeX7NKL8eSvZk32y3QS9kNJnhhj/OxlN63VfrnWOtZxv1TV/VX1qunrA5l/QOmJzF+c//p0t5XfJ8k11/LkZX94qczPJe3qflno3+SfPpr4r5LsS/LzY4wPLGzwXVRVr8/8XUuS7E/yS+u0lqr690l+MPNLdT+T5B8m+dUkH0nyusz/SYZ3jDFW+gT6Ndbxg5kfhhlJnkryY5edw1hZVfXmJJ9K8liSF6fNP5X5+Yu12S/XWcePZM32S1V9d+Yn8fdl/ofxj4wx/vH0/P9w5oeU/nuSH53eAays66zlN5Lcn6SSPJrkxy/7MMDtj+tSMQB0cJIfgBYCA0ALgQGghcAA0EJgAGghMJBbv5pxVb2hqv5LVX29qv7eFY91vKoenx7nvctYD6wCgYG5C0n+7hjjjUm+P/PL/7wxyfuTfHKM8Z1JPjn9PplfZPM9Sb7p6sDT5Tf+VuZ/I/pPJ/krVbW5mCXAahEYyK1fzXiM8ewY4zNJ/t8VD/Wnknx6jHF+uiDibyX5awtYAqwcgYEr3ObVjB9P8heq6mBV3Z3kgXzzNfhgz9h/47vA3nHlVYDnl2iaG2OMqrrupS/GGE9U1T9N8utJ/m/ml9+42DhlWFnewcBkt65mPMb40Bjje8cYP5Dky5n/S66w5wgMZHevZlxVf2z69XWZn3/5pd2dLawHF7uE3PrVjKvqjyc5neSV0/2fT/LG6bDap5IczPwDAD8xxvjkQhcDK0JgAGjhEBkALQQGgBYCA0ALgQGghcAA0EJgAGghMAC0+P8bQY0+qFOt1QAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 504x504 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"f, ax = plt.subplots(2, 1, figsize=(7, 7), sharex=True)\n",
"\n",
"sns.boxplot(df['2000'], ax=ax[0])\n",
"sns.boxplot(df['2019'], ax=ax[1]);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## e) values above 25% all values are"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.634406Z",
"start_time": "2020-04-04T14:34:02.622902Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The first qunatile (25%) for 2000 is 6.0, and for 2019 is 2.0\n"
]
}
],
"source": [
"fquant00 = df['2000'].quantile(0.25)\n",
"fquant19 = df['2019'].quantile(0.25)\n",
"\n",
"print(f'The first qunatile (25%) for 2000 is {fquant00}, and for 2019 is {fquant19}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# BMI of 50 people"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.869654Z",
"start_time": "2020-04-04T14:34:02.853131Z"
},
"scrolled": false
},
"outputs": [],
"source": [
"df = pd.DataFrame({'16;20': [18, 3],\n",
" '20;24': [22, 21],\n",
" '24;28': [26, 19],\n",
" '28;32': [30, 5],\n",
" '32;36': [34, 2], })\n",
"df = df.T\n",
"df.columns = ['midpoint', 'freq']\n",
"df['multiplied'] = df.midpoint * df.freq\n",
"# df.append(df.sum().rename('Total'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## a)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.898010Z",
"start_time": "2020-04-04T14:34:02.879961Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The mean is 24.56\n"
]
}
],
"source": [
"mean = sum(df.multiplied) / sum(df.freq)\n",
"print(f'The mean is {mean}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## b) "
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.913939Z",
"start_time": "2020-04-04T14:34:02.901070Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The standard deviation is 3.5562\n"
]
}
],
"source": [
"freq_list = np.array([])\n",
"for i, num in enumerate(df.midpoint):\n",
" freq_list = np.append(freq_list, np.repeat(num, df.freq[i]))\n",
"\n",
"freq_list\n",
"new = pd.DataFrame(freq_list)\n",
"std = np.std(freq_list).round(4)\n",
"print(f'The standard deviation is {std}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## c) "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.924903Z",
"start_time": "2020-04-04T14:34:02.918062Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Above value 22.0 lay 60% of the distirbution mass\n"
]
}
],
"source": [
"np.std(freq_list)\n",
"quant40 = np.quantile(freq_list, [0.40])\n",
"print(f'Above value {quant40[0]} lay 60% of the distirbution mass')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Frequency table of life of bacteria"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-04T14:34:02.957383Z",
"start_time": "2020-04-04T14:34:02.942876Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Under value 75.0 are 10% of all cases of the distirbution\n"
]
}
],
"source": [
"midpoints = np.array([100, 350/2, 450 /2, 550])\n",
"freqs = np.array([30, 40, 30, 50])\n",
"d = 100 \n",
"N = sum(freqs)\n",
"rel_freqs = freqs/N \n",
"cum_freqs = np.cumsum(rel_freqs)*N\n",
"\n",
"x10 = 50 + d/freqs[1] * (N* rel_freqs[1] - cum_freqs[0])\n",
"print(f'Under value {x10} are 10% of all cases of the distirbution')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment