{{ message }}

Instantly share code, notes, and snippets.

# bstancil/random_generated_payments.ipynb Secret

Last active Mar 12, 2018
Payments generated with probability distribution functions
 { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "## MODEL PARAMETERS\n", "SAMPLE_SIZE = 20000 # Number of samples\n", "EXPENSE_COUNT = 8 # Number of expenses you can sample from. Keep below 20.\n", "MAX_INCLUDED = 8 # Number of expenses allowed choose from the sample.\n", "DIST = 'gamma' # Distribution to use for generating random data." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "## Import various stuff\n", "import itertools\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generated 160000 random values...\n" ] } ], "source": [ "from scipy.stats import exponpow\n", "from scipy.stats import recipinvgauss\n", "from scipy.stats import gengamma\n", "from scipy.stats import exponweib\n", "from scipy.stats import beta\n", "from scipy.stats import gamma\n", "\n", "vars_to_generate = SAMPLE_SIZE * EXPENSE_COUNT\n", "\n", "if DIST == 'exponpow':\n", " b = 0.5\n", " loc = 1021\n", " scale = 14460\n", " r = exponpow.rvs(b, size=vars_to_generate)\n", "\n", "elif DIST == 'recipinvgauss':\n", " mu = 737550\n", " loc = 1021\n", " scale = 9320\n", " r = recipinvgauss.rvs(mu, size=vars_to_generate)\n", "\n", "elif DIST == 'gengamma':\n", " a = 1.16\n", " c = 0.58\n", " loc = 1021\n", " scale = 4991\n", " r = gengamma.rvs(a, c, size=vars_to_generate)\n", "\n", "elif DIST == 'exponweib':\n", " a = 1.76\n", " c = 0.47\n", " loc = 1021\n", " scale = 2782\n", " r = exponweib.rvs(a, c, size=vars_to_generate)\n", " \n", "elif DIST == 'beta':\n", " a = 0.59\n", " b = 1015\n", " loc = 1021\n", " scale = 11907999\n", " r = beta.rvs(a, b, size=vars_to_generate)\n", "\n", "elif DIST == 'gamma':\n", " a = 0.59\n", " loc = 1021\n", " scale = 15099\n", " r = gamma.rvs(a, size=vars_to_generate)\n", "\n", "r = (r * scale + loc)\n", "print 'Generated %i random values...' % len(r)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "
01234567
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
04053.3978842568.63774710040.9630825739.9767959293.1744841099.0428882499.95356510715.697627
14284.45908324184.62592010886.7591484094.23765819636.19526310684.03570811044.7473281680.279604
21082.2807782814.82283118863.12213737539.0522011937.3186871101.37779210387.26959617978.688672
39116.7313915967.7635368914.17798211857.71755611911.7520048480.7249365092.2287184644.603398
42099.9593744130.0319605175.9561471140.4013324851.86900927743.94319119664.5870001820.357459
\n", "\n", "\n", "
abs_deltaOne in
Percentile
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0.000010.254846100000.000000
0.000100.40570510000.000000
0.000200.7670795000.000000
0.001004.5985851000.000000
0.0050026.090394200.000000
0.0100050.719541100.000000
0.02000113.95050850.000000
0.03000195.19771833.333333
0.04000285.83240125.000000
0.05000409.84777620.000000
0.100007030.80925310.000000
0.2000025563.0173565.000000
0.3000037591.0040963.333333
0.4000047031.2208292.500000
0.5000055282.5991632.000000
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "" ], "text/plain": [ " abs_delta One in\n", "Percentile \n", "0.00001 0.254846 100000.000000\n", "0.00010 0.405705 10000.000000\n", "0.00020 0.767079 5000.000000\n", "0.00100 4.598585 1000.000000\n", "0.00500 26.090394 200.000000\n", "0.01000 50.719541 100.000000\n", "0.02000 113.950508 50.000000\n", "0.03000 195.197718 33.333333\n", "0.04000 285.832401 25.000000\n", "0.05000 409.847776 20.000000\n", "0.10000 7030.809253 10.000000\n", "0.20000 25563.017356 5.000000\n", "0.30000 37591.004096 3.333333\n", "0.40000 47031.220829 2.500000\n", "0.50000 55282.599163 2.000000" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Spend difference for best results difference by percentile\n", "\n", "percentiles = [0.00001,0.0001,0.0002,0.001,0.005,0.01,0.02,0.03,0.04,0.05,0.1,0.2,0.3,0.4,0.5]\n", "\n", "percent_df = pd.DataFrame(results['abs_delta'].quantile(percentiles))\n", "percent_df.index.name = 'Percentile'\n", "percent_df['One in'] = 1/percent_df.index\n", "percent_df" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "
abs_deltaOne in
Percentile
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0.0000010.9549231000000.000000
0.00001012.435548100000.000000
0.000100123.08354110000.000000
0.0010001255.6624891000.000000
0.0050006209.230351200.000000
0.01000012018.254107100.000000
0.02000021532.83317250.000000
0.03000028762.93328033.333333
0.04000034321.94636925.000000
0.05000038815.43897420.000000
0.10000053822.45958810.000000
0.20000070327.5988775.000000
0.30000080876.0683573.333333
0.40000088952.4136942.500000
0.50000095769.4722772.000000
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "" ], "text/plain": [ " abs_delta One in\n", "Percentile \n", "0.000001 0.954923 1000000.000000\n", "0.000010 12.435548 100000.000000\n", "0.000100 123.083541 10000.000000\n", "0.001000 1255.662489 1000.000000\n", "0.005000 6209.230351 200.000000\n", "0.010000 12018.254107 100.000000\n", "0.020000 21532.833172 50.000000\n", "0.030000 28762.933280 33.333333\n", "0.040000 34321.946369 25.000000\n", "0.050000 38815.438974 20.000000\n", "0.100000 53822.459588 10.000000\n", "0.200000 70327.598877 5.000000\n", "0.300000 80876.068357 3.333333\n", "0.400000 88952.413694 2.500000\n", "0.500000 95769.472277 2.000000" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Spend difference for all results difference by percentile\n", "\n", "percentiles = [0.000001,0.00001,0.0001,0.001,0.005,0.01,0.02,0.03,0.04,0.05,0.1,0.2,0.3,0.4,0.5]\n", "\n", "percent_df = pd.DataFrame(all_sums['abs_delta'].quantile(percentiles))\n", "percent_df['One in'] = 1/percent_df.index\n", "percent_df.index.name = 'Percentile'\n", "percent_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.14" } }, "nbformat": 4, "nbformat_minor": 2 }