Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ricardoV94/70d0e0b4eac0a9aadc5210d0e6c37b87 to your computer and use it in GitHub Desktop.
Save ricardoV94/70d0e0b4eac0a9aadc5210d0e6c37b87 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scipy.stats as st\n",
"from scipy.special import expit\n",
"\n",
"import theano.tensor as at\n",
"import pymc3 as pm"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"def benchmark_model(rows):\n",
"\n",
" # Generate data\n",
" n_groups = 4\n",
" n_features_per_group = (50, 45, 55, 50)\n",
" n_features_total = sum(n_features_per_group)\n",
"\n",
" group_idxs = []\n",
" for n_index, n in enumerate(n_features_per_group):\n",
" group_idxs.extend([n_index]*n)\n",
" group_idxs = np.array(group_idxs)\n",
"\n",
" betas_spread_group_true = st.halfnorm().rvs(n_groups)\n",
" betas_features_true = [\n",
" st.norm(0, beta_spread_group).rvs(n_features_group)\n",
" for (beta_spread_group, n_features_group) in zip(\n",
" betas_spread_group_true, \n",
" n_features_per_group,\n",
" )\n",
" ]\n",
" betas_features_true_flat = np.array([b for group in betas_features_true for b in group])\n",
"\n",
" x = np.random.randn(rows, n_features_total)\n",
" prob_y = expit(betas_features_true_flat @ x.T)\n",
" y = st.bernoulli(prob_y).rvs()\n",
" \n",
" # Create model\n",
" with pm.Model(check_bounds=False) as m:\n",
" betas_spread_group = pm.HalfNormal('betas_spread_group', 1, shape=n_groups)\n",
" betas_features = pm.Normal('betas_features', 0, betas_spread_group[group_idxs], shape=n_features_total)\n",
" logit = betas_features @ x.T\n",
" like = pm.Bernoulli('like', logit_p=logit, observed=y)\n",
"\n",
" # Timeit\n",
" start = time.time()\n",
" print(f'{rows=}')\n",
" with m:\n",
" trace = pm.sample(cores=1, chains=1, compute_convergence_checks=False, return_inferencedata=False)\n",
" end = time.time()\n",
" \n",
" return end - start"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Auto-assigning NUTS sampler...\n",
"Initializing NUTS using jitter+adapt_diag...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"rows=100\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sequential sampling (1 chains in 1 job)\n",
"NUTS: [betas_features, betas_spread_group]\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
" </style>\n",
" <progress value='2000' class='' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 100.00% [2000/2000 00:18<00:00 Sampling chain 0, 0 divergences]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 18 seconds.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"rows=50000\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Auto-assigning NUTS sampler...\n",
"Initializing NUTS using jitter+adapt_diag...\n",
"Sequential sampling (1 chains in 1 job)\n",
"NUTS: [betas_features, betas_spread_group]\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
" </style>\n",
" <progress value='2000' class='' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 100.00% [2000/2000 09:28<00:00 Sampling chain 0, 0 divergences]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 568 seconds.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"rows=400000\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Auto-assigning NUTS sampler...\n",
"Initializing NUTS using jitter+adapt_diag...\n",
"Sequential sampling (1 chains in 1 job)\n",
"NUTS: [betas_features, betas_spread_group]\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
" </style>\n",
" <progress value='2000' class='' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 100.00% [2000/2000 1:31:54<00:00 Sampling chain 0, 0 divergences]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 5514 seconds.\n"
]
}
],
"source": [
"duration = [benchmark_model(nrows) for nrows in (100, 50_000, 400_000)]"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[19.17148232460022, 571.013610124588, 5535.505532503128]"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duration"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nrows</th>\n",
" <th>seconds</th>\n",
" <th>minutes</th>\n",
" <th>seconds per row</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100</td>\n",
" <td>19.171482</td>\n",
" <td>0.319525</td>\n",
" <td>0.191715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>50000</td>\n",
" <td>571.013610</td>\n",
" <td>9.516894</td>\n",
" <td>0.011420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>400000</td>\n",
" <td>5535.505533</td>\n",
" <td>92.258426</td>\n",
" <td>0.013839</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nrows seconds minutes seconds per row\n",
"0 100 19.171482 0.319525 0.191715\n",
"1 50000 571.013610 9.516894 0.011420\n",
"2 400000 5535.505533 92.258426 0.013839"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = [\n",
" dict(nrows=nrows, seconds=seconds) \n",
" for nrows, seconds in zip((100, 50_000, 400_000), duration)\n",
"]\n",
"df = pd.DataFrame(data)\n",
"df['minutes'] = df['seconds'] / 60\n",
"df['seconds per row'] = df['seconds'] / df['nrows']\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The watermark extension is already loaded. To reload it, use:\n",
" %reload_ext watermark\n",
"numpy : 1.20.3\n",
"scipy : 1.6.3\n",
"pymc3 : 3.11.2\n",
"matplotlib: 3.4.2\n",
"theano : 1.1.2\n",
"pandas : 1.2.4\n",
"\n"
]
}
],
"source": [
"%load_ext watermark\n",
"%watermark --iversions"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Last updated: 2021-06-17T17:18:01.999297+02:00\n",
"\n",
"Python implementation: CPython\n",
"Python version : 3.8.5\n",
"IPython version : 7.24.1\n",
"\n",
"Compiler : GCC 9.3.0\n",
"OS : Linux\n",
"Release : 5.4.0-74-generic\n",
"Machine : x86_64\n",
"Processor : x86_64\n",
"CPU cores : 8\n",
"Architecture: 64bit\n",
"\n"
]
}
],
"source": [
"%watermark"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "pymc-labs",
"language": "python",
"name": "pymc-labs"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment