Skip to content

Instantly share code, notes, and snippets.

@willirath
Created October 12, 2018 17:03
Show Gist options
  • Save willirath/9d762394b4fa27372a3b654005b8ef15 to your computer and use it in GitHub Desktop.
Save willirath/9d762394b4fa27372a3b654005b8ef15 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Monte Carlo Estimate of $\\pi$\n",
"\n",
"<img src=\"http://dask.readthedocs.io/en/latest/_images/dask_horizontal.svg\" \n",
" width=\"50%\" \n",
" align=top\n",
" alt=\"Dask logo\">\n",
"<img src=\"https://upload.wikimedia.org/wikipedia/commons/b/ba/Monte-Carlo01.gif\" \n",
" width=\"30%\" \n",
" align=top\n",
" alt=\"PI monte-carlo estimate\">\n",
" \n",
"Using [Dask's adaptivity](http://docs.dask.org/en/latest/setup/adaptive.html), we'll show that it is possible to scale the available resources to meet almost identical wall times irrespective of the acutal work load:\n",
"\n",
"- Estimating $\\pi$ from 16 GB of random data is done in 17 seconds using 3 workers (with 2 cores each).\n",
"- Estimating $\\pi$ from 512 GB of random data is done in 19 seconds using 142 workers (with 2 cores each).\n",
"- Estimating $\\pi$ from 1024 GB of random data is done in 21 seconds using 273 workers (with 2 cores each)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from dask_kubernetes import KubeCluster\n",
"cluster = KubeCluster(n_workers=1)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# check Adaptive? for help on adapt's kwargs.\n",
"from dask.distributed import Adaptive"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"cluster.adapt(minimum=1, maximum=400,\n",
" target_duration=\"20s\", # more realistic than the default \"5s\"?\n",
" wait_count=10, # 10 seconds before killing an idle worker\n",
" scale_factor=1.2); # scale slower than doubling (default)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://10.23.27.5:37004\n",
" <li><b>Dashboard: </b><a href='/user/willirath/proxy/8787/status' target='_blank'>/user/willirath/proxy/8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>0</li>\n",
" <li><b>Cores: </b>0</li>\n",
" <li><b>Memory: </b>0 B</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://10.23.27.5:37004' processes=0 cores=0>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dask.distributed import Client\n",
"c = Client(cluster)\n",
"c"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(Check the dash board to see the cluster scale up and down!)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as da\n",
"import numpy as np\n",
"from time import time\n",
"\n",
"def calc_pi_mc(size):\n",
" xy = da.random.uniform(0, 1, size=(size / 8 / 2, 2), chunks=(0.25e9 / 8, 2))\n",
" \n",
" in_circle = ((xy ** 2).sum(axis=-1) < 1)\n",
" pi = 4 * in_circle.mean()\n",
"\n",
" start = time()\n",
" pi = pi.compute()\n",
" end = time()\n",
" \n",
" num_pods = len(cluster.pods())\n",
" \n",
" print(\"Size of data:\", xy.nbytes / 1e9, \"GB\")\n",
" print(\"Monte-Carlo pi:\", pi)\n",
" print(\"Numpys pi:\", np.pi)\n",
" print(\"Delta:\", abs(pi - np.pi))\n",
" print(\"Duration: {:.2f} seconds with {} pods\".format(end-start, num_pods))\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Size of data: 1.0 GB\n",
"Monte-Carlo pi: 3.141738048\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 0.0001453944102070004\n",
"Duration: 4.68 seconds with 1 pods\n",
"\n",
"Size of data: 2.0 GB\n",
"Monte-Carlo pi: 3.1416384\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 4.574641020704817e-05\n",
"Duration: 5.31 seconds with 1 pods\n",
"\n",
"Size of data: 4.0 GB\n",
"Monte-Carlo pi: 3.141615792\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 2.3138410206957616e-05\n",
"Duration: 7.91 seconds with 2 pods\n",
"\n",
"Size of data: 8.0 GB\n",
"Monte-Carlo pi: 3.141654136\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 6.148241020698109e-05\n",
"Duration: 10.73 seconds with 3 pods\n",
"\n",
"Size of data: 16.0 GB\n",
"Monte-Carlo pi: 3.141506724\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 8.592958979303233e-05\n",
"Duration: 17.35 seconds with 3 pods\n",
"\n",
"Size of data: 32.0 GB\n",
"Monte-Carlo pi: 3.141638062\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 4.5408410207059546e-05\n",
"Duration: 12.77 seconds with 12 pods\n",
"\n",
"Size of data: 64.0 GB\n",
"Monte-Carlo pi: 3.141572989\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 1.9664589792967035e-05\n",
"Duration: 19.20 seconds with 15 pods\n",
"\n",
"Size of data: 128.0 GB\n",
"Monte-Carlo pi: 3.141593464\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 8.104102069417252e-07\n",
"Duration: 17.55 seconds with 36 pods\n",
"\n",
"Size of data: 256.0 GB\n",
"Monte-Carlo pi: 3.14161230525\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 1.9651660206676524e-05\n",
"Duration: 18.69 seconds with 68 pods\n",
"\n",
"Size of data: 512.0 GB\n",
"Monte-Carlo pi: 3.14158963425\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 3.019339793297604e-06\n",
"Duration: 18.71 seconds with 142 pods\n",
"\n",
"Size of data: 1024.0 GB\n",
"Monte-Carlo pi: 3.1415884875\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 4.166089793145034e-06\n",
"Duration: 20.80 seconds with 273 pods\n",
"\n"
]
}
],
"source": [
"from time import sleep\n",
"\n",
"for size in [1e9 * 2 ** n for n in range(11)]:\n",
" \n",
" calc_pi_mc(size)\n",
" sleep(10) # allow for some scale-down time"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Monte Carlo Estimate of $\\pi$\n",
"\n",
"<img src=\"http://dask.readthedocs.io/en/latest/_images/dask_horizontal.svg\" \n",
" width=\"50%\" \n",
" align=top\n",
" alt=\"Dask logo\">\n",
"<img src=\"https://upload.wikimedia.org/wikipedia/commons/b/ba/Monte-Carlo01.gif\" \n",
" width=\"30%\" \n",
" align=top\n",
" alt=\"PI monte-carlo estimate\">\n",
" \n",
"Using [Dask's adaptivity](http://docs.dask.org/en/latest/setup/adaptive.html), we'll show that it is possible to scale the available resources to meet almost identical wall times irrespective of the acutal work load:\n",
"\n",
"- Estimating $\\pi$ from 16 GB of random data is done in 17 seconds using 3 workers (with 2 cores each).\n",
"- Estimating $\\pi$ from 512 GB of random data is done in 19 seconds using 142 workers (with 2 cores each).\n",
"- Estimating $\\pi$ from 1024 GB of random data is done in 21 seconds using 273 workers (with 2 cores each)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from dask_kubernetes import KubeCluster\n",
"cluster = KubeCluster(n_workers=1)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# check Adaptive? for help on adapt's kwargs.\n",
"from dask.distributed import Adaptive"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"cluster.adapt(minimum=1, maximum=400,\n",
" target_duration=\"20s\", # more realistic than the default \"5s\"?\n",
" wait_count=10, # 10 seconds before killing an idle worker\n",
" scale_factor=1.2); # scale slower than doubling (default)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://10.23.27.5:37004\n",
" <li><b>Dashboard: </b><a href='/user/willirath/proxy/8787/status' target='_blank'>/user/willirath/proxy/8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>0</li>\n",
" <li><b>Cores: </b>0</li>\n",
" <li><b>Memory: </b>0 B</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://10.23.27.5:37004' processes=0 cores=0>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dask.distributed import Client\n",
"c = Client(cluster)\n",
"c"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(Check the dash board to see the cluster scale up and down!)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as da\n",
"import numpy as np\n",
"from time import time\n",
"\n",
"def calc_pi_mc(size):\n",
" xy = da.random.uniform(0, 1, size=(size / 8 / 2, 2), chunks=(0.25e9 / 8, 2))\n",
" \n",
" in_circle = ((xy ** 2).sum(axis=-1) < 1)\n",
" pi = 4 * in_circle.mean()\n",
"\n",
" start = time()\n",
" pi = pi.compute()\n",
" end = time()\n",
" \n",
" num_pods = len(cluster.pods())\n",
" \n",
" print(\"Size of data:\", xy.nbytes / 1e9, \"GB\")\n",
" print(\"Monte-Carlo pi:\", pi)\n",
" print(\"Numpys pi:\", np.pi)\n",
" print(\"Delta:\", abs(pi - np.pi))\n",
" print(\"Duration: {:.2f} seconds with {} pods\".format(end-start, num_pods))\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Size of data: 1.0 GB\n",
"Monte-Carlo pi: 3.141738048\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 0.0001453944102070004\n",
"Duration: 4.68 seconds with 1 pods\n",
"\n",
"Size of data: 2.0 GB\n",
"Monte-Carlo pi: 3.1416384\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 4.574641020704817e-05\n",
"Duration: 5.31 seconds with 1 pods\n",
"\n",
"Size of data: 4.0 GB\n",
"Monte-Carlo pi: 3.141615792\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 2.3138410206957616e-05\n",
"Duration: 7.91 seconds with 2 pods\n",
"\n",
"Size of data: 8.0 GB\n",
"Monte-Carlo pi: 3.141654136\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 6.148241020698109e-05\n",
"Duration: 10.73 seconds with 3 pods\n",
"\n",
"Size of data: 16.0 GB\n",
"Monte-Carlo pi: 3.141506724\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 8.592958979303233e-05\n",
"Duration: 17.35 seconds with 3 pods\n",
"\n",
"Size of data: 32.0 GB\n",
"Monte-Carlo pi: 3.141638062\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 4.5408410207059546e-05\n",
"Duration: 12.77 seconds with 12 pods\n",
"\n",
"Size of data: 64.0 GB\n",
"Monte-Carlo pi: 3.141572989\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 1.9664589792967035e-05\n",
"Duration: 19.20 seconds with 15 pods\n",
"\n",
"Size of data: 128.0 GB\n",
"Monte-Carlo pi: 3.141593464\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 8.104102069417252e-07\n",
"Duration: 17.55 seconds with 36 pods\n",
"\n",
"Size of data: 256.0 GB\n",
"Monte-Carlo pi: 3.14161230525\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 1.9651660206676524e-05\n",
"Duration: 18.69 seconds with 68 pods\n",
"\n",
"Size of data: 512.0 GB\n",
"Monte-Carlo pi: 3.14158963425\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 3.019339793297604e-06\n",
"Duration: 18.71 seconds with 142 pods\n",
"\n",
"Size of data: 1024.0 GB\n",
"Monte-Carlo pi: 3.1415884875\n",
"Numpys pi: 3.141592653589793\n",
"Delta: 4.166089793145034e-06\n",
"Duration: 20.80 seconds with 273 pods\n",
"\n"
]
}
],
"source": [
"from time import sleep\n",
"\n",
"for size in [1e9 * 2 ** n for n in range(11)]:\n",
" \n",
" calc_pi_mc(size)\n",
" sleep(10) # allow for some scale-down time"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment