Created
June 17, 2021 15:24
-
-
Save ricardoV94/70d0e0b4eac0a9aadc5210d0e6c37b87 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 96, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import time\n", | |
"\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import scipy.stats as st\n", | |
"from scipy.special import expit\n", | |
"\n", | |
"import theano.tensor as at\n", | |
"import pymc3 as pm" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def benchmark_model(rows):\n", | |
"\n", | |
" # Generate data\n", | |
" n_groups = 4\n", | |
" n_features_per_group = (50, 45, 55, 50)\n", | |
" n_features_total = sum(n_features_per_group)\n", | |
"\n", | |
" group_idxs = []\n", | |
" for n_index, n in enumerate(n_features_per_group):\n", | |
" group_idxs.extend([n_index]*n)\n", | |
" group_idxs = np.array(group_idxs)\n", | |
"\n", | |
" betas_spread_group_true = st.halfnorm().rvs(n_groups)\n", | |
" betas_features_true = [\n", | |
" st.norm(0, beta_spread_group).rvs(n_features_group)\n", | |
" for (beta_spread_group, n_features_group) in zip(\n", | |
" betas_spread_group_true, \n", | |
" n_features_per_group,\n", | |
" )\n", | |
" ]\n", | |
" betas_features_true_flat = np.array([b for group in betas_features_true for b in group])\n", | |
"\n", | |
" x = np.random.randn(rows, n_features_total)\n", | |
" prob_y = expit(betas_features_true_flat @ x.T)\n", | |
" y = st.bernoulli(prob_y).rvs()\n", | |
" \n", | |
" # Create model\n", | |
" with pm.Model(check_bounds=False) as m:\n", | |
" betas_spread_group = pm.HalfNormal('betas_spread_group', 1, shape=n_groups)\n", | |
" betas_features = pm.Normal('betas_features', 0, betas_spread_group[group_idxs], shape=n_features_total)\n", | |
" logit = betas_features @ x.T\n", | |
" like = pm.Bernoulli('like', logit_p=logit, observed=y)\n", | |
"\n", | |
" # Timeit\n", | |
" start = time.time()\n", | |
" print(f'{rows=}')\n", | |
" with m:\n", | |
" trace = pm.sample(cores=1, chains=1, compute_convergence_checks=False, return_inferencedata=False)\n", | |
" end = time.time()\n", | |
" \n", | |
" return end - start" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 90, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Auto-assigning NUTS sampler...\n", | |
"Initializing NUTS using jitter+adapt_diag...\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"rows=100\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Sequential sampling (1 chains in 1 job)\n", | |
"NUTS: [betas_features, betas_spread_group]\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" <style>\n", | |
" /* Turns off some styling */\n", | |
" progress {\n", | |
" /* gets rid of default border in Firefox and Opera. */\n", | |
" border: none;\n", | |
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n", | |
" background-size: auto;\n", | |
" }\n", | |
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n", | |
" background: #F44336;\n", | |
" }\n", | |
" </style>\n", | |
" <progress value='2000' class='' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" 100.00% [2000/2000 00:18<00:00 Sampling chain 0, 0 divergences]\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 18 seconds.\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"rows=50000\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Auto-assigning NUTS sampler...\n", | |
"Initializing NUTS using jitter+adapt_diag...\n", | |
"Sequential sampling (1 chains in 1 job)\n", | |
"NUTS: [betas_features, betas_spread_group]\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" <style>\n", | |
" /* Turns off some styling */\n", | |
" progress {\n", | |
" /* gets rid of default border in Firefox and Opera. */\n", | |
" border: none;\n", | |
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n", | |
" background-size: auto;\n", | |
" }\n", | |
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n", | |
" background: #F44336;\n", | |
" }\n", | |
" </style>\n", | |
" <progress value='2000' class='' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" 100.00% [2000/2000 09:28<00:00 Sampling chain 0, 0 divergences]\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 568 seconds.\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"rows=400000\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Auto-assigning NUTS sampler...\n", | |
"Initializing NUTS using jitter+adapt_diag...\n", | |
"Sequential sampling (1 chains in 1 job)\n", | |
"NUTS: [betas_features, betas_spread_group]\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" <style>\n", | |
" /* Turns off some styling */\n", | |
" progress {\n", | |
" /* gets rid of default border in Firefox and Opera. */\n", | |
" border: none;\n", | |
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n", | |
" background-size: auto;\n", | |
" }\n", | |
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n", | |
" background: #F44336;\n", | |
" }\n", | |
" </style>\n", | |
" <progress value='2000' class='' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" 100.00% [2000/2000 1:31:54<00:00 Sampling chain 0, 0 divergences]\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 5514 seconds.\n" | |
] | |
} | |
], | |
"source": [ | |
"duration = [benchmark_model(nrows) for nrows in (100, 50_000, 400_000)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[19.17148232460022, 571.013610124588, 5535.505532503128]" | |
] | |
}, | |
"execution_count": 91, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"duration" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 125, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>nrows</th>\n", | |
" <th>seconds</th>\n", | |
" <th>minutes</th>\n", | |
" <th>seconds per row</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>100</td>\n", | |
" <td>19.171482</td>\n", | |
" <td>0.319525</td>\n", | |
" <td>0.191715</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>50000</td>\n", | |
" <td>571.013610</td>\n", | |
" <td>9.516894</td>\n", | |
" <td>0.011420</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>400000</td>\n", | |
" <td>5535.505533</td>\n", | |
" <td>92.258426</td>\n", | |
" <td>0.013839</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" nrows seconds minutes seconds per row\n", | |
"0 100 19.171482 0.319525 0.191715\n", | |
"1 50000 571.013610 9.516894 0.011420\n", | |
"2 400000 5535.505533 92.258426 0.013839" | |
] | |
}, | |
"execution_count": 125, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data = [\n", | |
" dict(nrows=nrows, seconds=seconds) \n", | |
" for nrows, seconds in zip((100, 50_000, 400_000), duration)\n", | |
"]\n", | |
"df = pd.DataFrame(data)\n", | |
"df['minutes'] = df['seconds'] / 60\n", | |
"df['seconds per row'] = df['seconds'] / df['nrows']\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The watermark extension is already loaded. To reload it, use:\n", | |
" %reload_ext watermark\n", | |
"numpy : 1.20.3\n", | |
"scipy : 1.6.3\n", | |
"pymc3 : 3.11.2\n", | |
"matplotlib: 3.4.2\n", | |
"theano : 1.1.2\n", | |
"pandas : 1.2.4\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"%load_ext watermark\n", | |
"%watermark --iversions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 124, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Last updated: 2021-06-17T17:18:01.999297+02:00\n", | |
"\n", | |
"Python implementation: CPython\n", | |
"Python version : 3.8.5\n", | |
"IPython version : 7.24.1\n", | |
"\n", | |
"Compiler : GCC 9.3.0\n", | |
"OS : Linux\n", | |
"Release : 5.4.0-74-generic\n", | |
"Machine : x86_64\n", | |
"Processor : x86_64\n", | |
"CPU cores : 8\n", | |
"Architecture: 64bit\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"%watermark" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"hide_input": false, | |
"kernelspec": { | |
"display_name": "pymc-labs", | |
"language": "python", | |
"name": "pymc-labs" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.5" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment