Skip to content

Instantly share code, notes, and snippets.

@catslovedata
Created January 2, 2023 21:04
Show Gist options
  • Save catslovedata/f86a9783b6f6d08317e724e67303ce30 to your computer and use it in GitHub Desktop.
Save catslovedata/f86a9783b6f6d08317e724e67303ce30 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Standard import for numpy and set some constants"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from scipy.stats import norm, skewnorm\n",
"from matplotlib import pyplot as plt\n",
"\n",
"RANDOM_SEED = 128 # Set to None if it should differ each time. RANDOM_SEED = None\n",
"NUM_VALUES = 10000 # Number of values to generate\n",
"MEAN, ST_DEV = 12, 2 # Mean and std dev to use in the normal distribution\n",
"NUM_BINS = 20 # Number of bins to group the data into"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate and bin data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Normal distribution\n",
"data1 = norm.rvs(loc=MEAN, scale=ST_DEV, size=NUM_VALUES, random_state=RANDOM_SEED)\n",
"data_binned1 = np.histogram(data1, bins=NUM_BINS)\n",
"x_values1 = data_binned1[1][:-1] # Boundary values - omit the last so that we have just the left boundaries.\n",
"y_values1 = data_binned1[0] # Count of items in this bin.\n",
"\n",
"data1_min, data1_q1, data1_median, data1_q3, data1_max, data1_mean = np.min(data1), np.quantile(data1, 0.25), np.quantile(data1, 0.5), np.quantile(data1, 0.75), np.max(data1), np.mean(data1)\n",
"\n",
"# Positive skew (alpha = 5)\n",
"data2 = skewnorm.rvs(5, loc=MEAN, scale=ST_DEV, size=NUM_VALUES, random_state=RANDOM_SEED)\n",
"data_binned2 = np.histogram(data2, bins=NUM_BINS)\n",
"x_values2 = data_binned2[1][:-1]\n",
"y_values2 = data_binned2[0]\n",
"\n",
"data2_min, data2_q1, data2_median, data2_q3, data2_max, data2_mean = np.min(data2), np.quantile(data2, 0.25), np.quantile(data2, 0.5), np.quantile(data2, 0.75), np.max(data2), np.mean(data2)\n",
"\n",
"# Negative skew (alpha = -5)\n",
"data3 = skewnorm.rvs(-5, loc=MEAN, scale=ST_DEV, size=NUM_VALUES, random_state=RANDOM_SEED)\n",
"data_binned3 = np.histogram(data3, bins=NUM_BINS)\n",
"x_values3 = data_binned3[1][:-1]\n",
"y_values3 = data_binned3[0]\n",
"\n",
"data3_min, data3_q1, data3_median, data3_q3, data3_max, data3_mean = np.min(data3), np.quantile(data3, 0.25), np.quantile(data3, 0.5), np.quantile(data3, 0.75), np.max(data3), np.mean(data3)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example histogram for normally distributed data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAASsklEQVR4nO3dbYxc53ne8f9VspYtp6qlcCnxTSETME4kwfHLhlWSNnXMBCJaV9QXFTTqmkgIEBXUODHiOmIMJMgHAkIcJHWASgUhK6QTVyqruhVRVLYFAqlRwJKykq1IlKyICV1yTUrcxHXqJoBsMnc/zFE9Xs5yd2d2d5Z6/j9gMefc5zlz7iV3rzn7zMyZVBWSpDb8nXE3IElaOYa+JDXE0Jekhhj6ktQQQ1+SGrJ23A3MZ926dbV169ZxtyFJV5Snn376L6pqYnZ91Yf+1q1bmZqaGncbknRFSfK/BtWd3pGkhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIas+nfkSqvV2VfOD73vxhvWL2En0sLNe6af5MEk55M8P2DbR5NUknV9tQNJTiZ5KcltffX3JHmu2/Z7SbJ034YkaSEWMr1zGNg1u5hkC/BzwOm+2k3AHuDmbp/7kqzpNt8P7Ae2d1+X3KckaXnNO71TVV9MsnXApt8FPgY82lfbDTxcVa8Bp5KcBHYk+RpwTVV9CSDJp4E7gMdG6l4a0ShTNNKVaKgncpPcDny9qp6dtWkTcKZvfbqrbeqWZ9fnuv/9SaaSTM3MzAzToiRpgEWHfpKrgY8Dvz5o84BaXaY+UFUdqqrJqpqcmLjkctCSpCEN8+qdHwK2Ac92z8VuBp5JsoPeGfyWvrGbgbNdffOAuiRpBS36TL+qnquq9VW1taq20gv0d1fVK8AxYE+Sq5Jso/eE7VNVdQ74VpJbu1ftfIjvfS5AkrQCFvKSzYeALwFvTzKdZN9cY6vqBHAUeAH4HHB3VV3sNt8FPACcBP4Mn8SVpBW3kFfvfGCe7VtnrR8EDg4YNwXcssj+JElLyMswSFJDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGDPPJWdKq4oebSwvnmb4kNcTQl6SGGPqS1BBDX5IaYuhLUkPmffVOkgeB9wPnq+qWrvYJ4J8B3wb+DPj5qvpmt+0AsA+4CHy4qj7f1d8DHAbeAvx34Jeqqpb4+5GuCKO+4mjjDeuXqBO1ZiFn+oeBXbNqjwO3VNU7gD8FDgAkuQnYA9zc7XNfkjXdPvcD+4Ht3dfs+5QkLbN5Q7+qvgh8Y1btC1V1oVt9AtjcLe8GHq6q16rqFHAS2JFkA3BNVX2pO7v/NHDHEn0PkqQFWoo5/V8AHuuWNwFn+rZNd7VN3fLs+kBJ9ieZSjI1MzOzBC1KkmDE0E/yceAC8JnXSwOG1WXqA1XVoaqarKrJiYmJUVqUJPUZ+jIMSfbSe4J3Z98TstPAlr5hm4GzXX3zgLokaQUNdaafZBfwq8DtVfU3fZuOAXuSXJVkG70nbJ+qqnPAt5LcmiTAh4BHR+xdkrRIC3nJ5kPAe4F1SaaB36D3ap2rgMd7Gc4TVfWvqupEkqPAC/Smfe6uqovdXd3Fd1+y+RjffR5AkrRC5g39qvrAgPKnLjP+IHBwQH0KuGVR3UmSlpTvyJWkhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkPmDf0kDyY5n+T5vtp1SR5P8nJ3e23ftgNJTiZ5KcltffX3JHmu2/Z7SbL0344k6XIWcqZ/GNg1q3YPcLyqtgPHu3WS3ATsAW7u9rkvyZpun/uB/cD27mv2fUqSltm8oV9VXwS+Mau8GzjSLR8B7uirP1xVr1XVKeAksCPJBuCaqvpSVRXw6b59JEkrZNg5/eur6hxAd7u+q28CzvSNm+5qm7rl2fWBkuxPMpVkamZmZsgWJUmzLfUTuYPm6esy9YGq6lBVTVbV5MTExJI1J0mtGzb0X+2mbOhuz3f1aWBL37jNwNmuvnlAXZK0goYN/WPA3m55L/BoX31PkquSbKP3hO1T3RTQt5Lc2r1q50N9+0iSVsja+QYkeQh4L7AuyTTwG8C9wNEk+4DTwJ0AVXUiyVHgBeACcHdVXezu6i56rwR6C/BY9yVJWkHzhn5VfWCOTTvnGH8QODigPgXcsqjuJElLynfkSlJDDH1Jasi80zuSVp+zr5yff9AcNt6wfv5BesPyTF+SGuKZvsZulLNWSYvjmb4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNGSn0k3wkyYkkzyd5KMmbk1yX5PEkL3e31/aNP5DkZJKXktw2evuSpMUYOvSTbAI+DExW1S3AGmAPcA9wvKq2A8e7dZLc1G2/GdgF3JdkzWjtS5IWY9TpnbXAW5KsBa4GzgK7gSPd9iPAHd3ybuDhqnqtqk4BJ4EdIx5fkrQIQ4d+VX0d+G3gNHAO+Kuq+gJwfVWd68acA17/FOZNwJm+u5juapdIsj/JVJKpmZmZYVuUJM0yyvTOtfTO3rcBG4G3Jvng5XYZUKtBA6vqUFVNVtXkxMTEsC1KkmYZZXrnZ4FTVTVTVd8BPgv8JPBqkg0A3e3rn3o9DWzp238zvekgSdIKGSX0TwO3Jrk6SYCdwIvAMWBvN2Yv8Gi3fAzYk+SqJNuA7cBTIxxfkrRIa4fdsaqeTPII8AxwAfgycAj4PuBokn30Hhju7MafSHIUeKEbf3dVXRyxf0nSIqRq4LT6qjE5OVlTU1PjbkPL6Owr5+cfpCWz8Yb18w/SFS/J01U1ObvuO3IlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0Z+pOzJF2ZRvnQGj+A5crnmb4kNcQzfS0JP/JQujJ4pi9JDRkp9JO8LckjSb6a5MUkP5HkuiSPJ3m5u722b/yBJCeTvJTkttHblyQtxqhn+p8EPldVPwL8GPAicA9wvKq2A8e7dZLcBOwBbgZ2AfclWTPi8SVJizB06Ce5Bvhp4FMAVfXtqvomsBs40g07AtzRLe8GHq6q16rqFHAS2DHs8SVJizfKmf4PAjPA7yf5cpIHkrwVuL6qzgF0t6+/xmsTcKZv/+mudokk+5NMJZmamZkZoUVJUr9RQn8t8G7g/qp6F/DXdFM5c8iAWg0aWFWHqmqyqiYnJiZGaFGS1G+U0J8GpqvqyW79EXoPAq8m2QDQ3Z7vG7+lb//NwNkRji9JWqShQ7+qXgHOJHl7V9oJvAAcA/Z2tb3Ao93yMWBPkquSbAO2A08Ne3xJ0uKN+uasXwQ+k+RNwJ8DP0/vgeRokn3AaeBOgKo6keQovQeGC8DdVXVxxONLkhZhpNCvqq8AkwM27Zxj/EHg4CjHlCQNz3fkSlJDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpISOHfpI1Sb6c5L9169cleTzJy93ttX1jDyQ5meSlJLeNemxJ0uIsxZn+LwEv9q3fAxyvqu3A8W6dJDcBe4CbgV3AfUnWLMHxJUkLNFLoJ9kM/FPggb7ybuBIt3wEuKOv/nBVvVZVp4CTwI5Rji9JWpy1I+7/b4GPAX+vr3Z9VZ0DqKpzSdZ39U3AE33jprvaJZLsB/YD3HjjjSO2qIU6+8r5cbcgaZkNfaaf5P3A+ap6eqG7DKjVoIFVdaiqJqtqcmJiYtgWJUmzjHKm/1PA7Un+CfBm4Jokfwi8mmRDd5a/AXj99HEa2NK3/2bg7AjHlyQt0tChX1UHgAMASd4LfLSqPpjkE8Be4N7u9tFul2PAf0jyO8BGYDvw1NCdS1pxo0wBbrxh/fyDtOxGndMf5F7gaJJ9wGngToCqOpHkKPACcAG4u6ouLsPxJUlzWJLQr6o/Av6oW/5LYOcc4w4CB5fimJKkxfMduZLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JasjQH4yeZAvwaeAG4G+BQ1X1ySTXAf8R2Ap8DfjnVfW/u30OAPuAi8CHq+rzI3WvS5x95fy4W5C0io1ypn8B+JWq+lHgVuDuJDcB9wDHq2o7cLxbp9u2B7gZ2AXcl2TNKM1LkhZn6NCvqnNV9Uy3/C3gRWATsBs40g07AtzRLe8GHq6q16rqFHAS2DHs8SVJizf09E6/JFuBdwFPAtdX1TnoPTAkWd8N2wQ80bfbdFcbdH/7gf0AN95441K0KGnMRpl63HjD+vkHaUFGfiI3yfcB/xn45ar6P5cbOqBWgwZW1aGqmqyqyYmJiVFblCR1Rgr9JH+XXuB/pqo+25VfTbKh274BeP3hfRrY0rf7ZuDsKMeXJC3O0KGfJMCngBer6nf6Nh0D9nbLe4FH++p7klyVZBuwHXhq2ONLkhZvlDn9nwL+JfBckq90tV8D7gWOJtkHnAbuBKiqE0mOAi/Qe+XP3VV1cYTjS5IWaejQr6r/yeB5eoCdc+xzEDg47DElSaPxHbmS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDVkSS64JknLadTPifCCbd9l6K9CfhCKpOXi9I4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXEd+QuA99RK2m1MvQlveGNciL2Rrtuz4qHfpJdwCeBNcADVXXvSvcgSQv1RnvAWNHQT7IG+HfAzwHTwB8nOVZVLyzH8ZxmkaTvtdJn+juAk1X15wBJHgZ2A8sS+pI0Tqvxr4SVDv1NwJm+9WngH8welGQ/sL9b/b9JXlqB3hZrHfAX425igex1edjr8rDXpfEDg4orHfoZUKtLClWHgEPL387wkkxV1eS4+1gIe10e9ro87HV5rfTr9KeBLX3rm4GzK9yDJDVrpUP/j4HtSbYleROwBzi2wj1IUrNWdHqnqi4k+dfA5+m9ZPPBqjqxkj0soVU9/TSLvS4Pe10e9rqMUnXJlLok6Q3Ka+9IUkMMfUlqiKE/hCRvS/JIkq8meTHJT4y7p7kk+UiSE0meT/JQkjePu6fXJXkwyfkkz/fVrkvyeJKXu9trx9nj6+bo9RPdz8CfJPkvSd42xhb/v0G99m37aJJKsm4cvc02V69JfjHJS93P7m+Nq7++fgb9/78zyRNJvpJkKsmOcfa4UIb+cD4JfK6qfgT4MeDFMfczUJJNwIeByaq6hd6T53vG29X3OAzsmlW7BzheVduB4936anCYS3t9HLilqt4B/ClwYKWbmsNhLu2VJFvoXQLl9Eo3dBmHmdVrkp+h9079d1TVzcBvj6Gv2Q5z6b/pbwG/WVXvBH69W1/1DP1FSnIN8NPApwCq6ttV9c2xNnV5a4G3JFkLXM0qel9EVX0R+Mas8m7gSLd8BLhjJXuay6Beq+oLVXWhW32C3vtOxm6Of1eA3wU+xoA3RI7LHL3eBdxbVa91Y8Z+Ea05+izgmm7577OKfrcux9BfvB8EZoDfT/LlJA8keeu4mxqkqr5O7yzpNHAO+Kuq+sJ4u5rX9VV1DqC7XX2XKRzsF4DHxt3EXJLcDny9qp4ddy8L8MPAP0ryZJL/keTHx93QHH4Z+ESSM/R+z1bLX3qXZegv3lrg3cD9VfUu4K9ZPVMQ36ObD98NbAM2Am9N8sHxdvXGk+TjwAXgM+PuZZAkVwMfpzcFcSVYC1wL3Ar8G+BokkGXcBm3u4CPVNUW4CN0f/2vdob+4k0D01X1ZLf+CL0HgdXoZ4FTVTVTVd8BPgv85Jh7ms+rSTYAdLdj/9P+cpLsBd4P/ItavW96+SF6D/zPJvkavWmoZ5LcMNau5jYNfLZ6ngL+lt6FzVabvfR+pwD+E72rCK96hv4iVdUrwJkkb+9KO1m9l4Y+Ddya5OruTGknq/RJ5z7H6P0y0d0+OsZeLqv7QKBfBW6vqr8Zdz9zqarnqmp9VW2tqq30QvXd3c/yavRfgfcBJPlh4E2szitZngX+cbf8PuDlMfaycFXl1yK/gHcCU8Cf0PsBvXbcPV2m198Evgo8D/wBcNW4e+rr7SF6zzV8h14Q7QO+n96rdl7ubq8bd5+X6fUkvUuFf6X7+vfj7nOuXmdt/xqwbtx9Xubf9U3AH3Y/s88A71ulff5D4GngWeBJ4D3j7nMhX16GQZIa4vSOJDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kN+X9pi+awsI/hcgAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"%matplotlib inline\n",
"\n",
"plt.bar(x=x_values1, height=y_values1, align='edge', width=(x_values1[1]-x_values1[0]), facecolor='#E5E7E9')\n",
"plt.show()\n",
"\n",
"# This is equivalent to the hist() function of pyplot, e.g.:\n",
"# plt.hist(data1, bins=NUM_BINS, density=False)\n",
"# plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plot 3 histograms in a grid - one for each distribution"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 720x540 with 3 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, (ax1, ax2, ax3) = plt.subplots(3, sharex='col', sharey='col')\n",
"fig.set_size_inches(10,7.5)\n",
"\n",
"ax1.bar(x=x_values1, height=y_values1, align='edge', width=(x_values1[1]-x_values1[0]), facecolor='#E5E7E9')\n",
"ax2.bar(x=x_values2, height=y_values2, align='edge', width=(x_values2[1]-x_values2[0]), facecolor='#E5E7E9')\n",
"ax3.bar(x=x_values3, height=y_values3, align='edge', width=(x_values3[1]-x_values3[0]), facecolor='#E5E7E9')\n",
"\n",
"ax1.title.set_text('Histogram plot for normally distributed data')\n",
"ax2.title.set_text('Histogram plot for data with positive skew')\n",
"ax3.title.set_text('Histogram plot for data with negative skew')\n",
"\n",
"plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plot 3 histograms in a grid with markers for mean, median and quartiles"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 720x540 with 3 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, (ax1, ax2, ax3) = plt.subplots(3, sharex='col', sharey='col')\n",
"fig.set_size_inches(10,7.5)\n",
"\n",
"ax1.bar(x=x_values1, height=y_values1, align='edge', width=(x_values1[1]-x_values1[0]), facecolor='#E5E7E9')\n",
"ax2.bar(x=x_values2, height=y_values2, align='edge', width=(x_values2[1]-x_values2[0]), facecolor='#E5E7E9')\n",
"ax3.bar(x=x_values3, height=y_values3, align='edge', width=(x_values3[1]-x_values3[0]), facecolor='#E5E7E9')\n",
"\n",
"ax1.title.set_text('Histogram plot for normally distributed data')\n",
"ax2.title.set_text('Histogram plot for data with positive skew')\n",
"ax3.title.set_text('Histogram plot for data with negative skew')\n",
"\n",
"max_y_data = plt.gca().get_ylim()[1]\n",
"min_y = [max_y_data*0.35, max_y_data*0.35, max_y_data*0.35, 0]\n",
"max_y = [max_y_data, max_y_data, max_y_data, max_y_data*0.65]\n",
"colors =['#8E44AD', '#F39C12', '#2874A6', '#7F8C8D']\n",
"\n",
"ax1.vlines([data1_q1, data1_median, data1_q3, data1_mean], ymin=min_y, ymax=max_y, color=colors, alpha=0.75)\n",
"ax2.vlines([data2_q1, data2_median, data2_q3, data2_mean], ymin=min_y, ymax=max_y, color=colors, alpha=0.75)\n",
"ax3.vlines([data3_q1, data3_median, data3_q3, data3_mean], ymin=min_y, ymax=max_y, color=colors, alpha=0.75)\n",
"\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "newenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "65e74bcb0609f7f067e47051b00bfa25ce7004419ec87f30c427e7be065571b6"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment