willirath/elastic_monte_carlo_estimate_of_pi.ipynb

## elastic_monte_carlo_estimate_of_pi.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              elastic_monte_carlo_estimate_of_pi.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gistfile1.txt
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Monte Carlo Estimate of $\\pi$\n",
    "\n",
    "<img src=\"http://dask.readthedocs.io/en/latest/_images/dask_horizontal.svg\" \n",
    "     width=\"50%\" \n",
    "     align=top\n",
    "     alt=\"Dask logo\">\n",
    "<img src=\"https://upload.wikimedia.org/wikipedia/commons/b/ba/Monte-Carlo01.gif\" \n",
    "     width=\"30%\" \n",
    "     align=top\n",
    "     alt=\"PI monte-carlo estimate\">\n",
    "     \n",
    "Using [Dask's adaptivity](http://docs.dask.org/en/latest/setup/adaptive.html), we'll show that it is possible to scale the available resources to meet almost identical wall times irrespective of the acutal work load:\n",
    "\n",
    "- Estimating $\\pi$ from 16 GB of random data is done in 17 seconds using 3 workers (with 2 cores each).\n",
    "- Estimating $\\pi$ from 512 GB of random data is done in 19 seconds using 142 workers (with 2 cores each).\n",
    "- Estimating $\\pi$ from 1024 GB of random data is done in 21 seconds using 273 workers (with 2 cores each)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask_kubernetes import KubeCluster\n",
    "cluster = KubeCluster(n_workers=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check Adaptive? for help on adapt's kwargs.\n",
    "from dask.distributed import Adaptive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "cluster.adapt(minimum=1, maximum=400,\n",
    "              target_duration=\"20s\",  # more realistic than the default \"5s\"?\n",
    "              wait_count=10,  # 10 seconds before killing an idle worker\n",
    "              scale_factor=1.2);  # scale slower than doubling (default)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table style=\"border: 2px solid white;\">\n",
       "<tr>\n",
       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
       "<h3>Client</h3>\n",
       "<ul>\n",
       "  <li><b>Scheduler: </b>tcp://10.23.27.5:37004\n",
       "  <li><b>Dashboard: </b><a href='/user/willirath/proxy/8787/status' target='_blank'>/user/willirath/proxy/8787/status</a>\n",
       "</ul>\n",
       "</td>\n",
       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
       "<h3>Cluster</h3>\n",
       "<ul>\n",
       "  <li><b>Workers: </b>0</li>\n",
       "  <li><b>Cores: </b>0</li>\n",
       "  <li><b>Memory: </b>0 B</li>\n",
       "</ul>\n",
       "</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<Client: scheduler='tcp://10.23.27.5:37004' processes=0 cores=0>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from dask.distributed import Client\n",
    "c = Client(cluster)\n",
    "c"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(Check the dash board to see the cluster scale up and down!)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import dask.array as da\n",
    "import numpy as np\n",
    "from time import time\n",
    "\n",
    "def calc_pi_mc(size):\n",
    "    xy = da.random.uniform(0, 1, size=(size / 8 / 2, 2), chunks=(0.25e9 / 8, 2))\n",
    "    \n",
    "    in_circle = ((xy ** 2).sum(axis=-1) < 1)\n",
    "    pi = 4 * in_circle.mean()\n",
    "\n",
    "    start = time()\n",
    "    pi = pi.compute()\n",
    "    end = time()\n",
    "    \n",
    "    num_pods = len(cluster.pods())\n",
    "    \n",
    "    print(\"Size of data:\", xy.nbytes / 1e9, \"GB\")\n",
    "    print(\"Monte-Carlo pi:\", pi)\n",
    "    print(\"Numpys pi:\", np.pi)\n",
    "    print(\"Delta:\", abs(pi - np.pi))\n",
    "    print(\"Duration: {:.2f} seconds with {} pods\".format(end-start, num_pods))\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of data: 1.0 GB\n",
      "Monte-Carlo pi: 3.141738048\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 0.0001453944102070004\n",
      "Duration: 4.68 seconds with 1 pods\n",
      "\n",
      "Size of data: 2.0 GB\n",
      "Monte-Carlo pi: 3.1416384\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 4.574641020704817e-05\n",
      "Duration: 5.31 seconds with 1 pods\n",
      "\n",
      "Size of data: 4.0 GB\n",
      "Monte-Carlo pi: 3.141615792\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 2.3138410206957616e-05\n",
      "Duration: 7.91 seconds with 2 pods\n",
      "\n",
      "Size of data: 8.0 GB\n",
      "Monte-Carlo pi: 3.141654136\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 6.148241020698109e-05\n",
      "Duration: 10.73 seconds with 3 pods\n",
      "\n",
      "Size of data: 16.0 GB\n",
      "Monte-Carlo pi: 3.141506724\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 8.592958979303233e-05\n",
      "Duration: 17.35 seconds with 3 pods\n",
      "\n",
      "Size of data: 32.0 GB\n",
      "Monte-Carlo pi: 3.141638062\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 4.5408410207059546e-05\n",
      "Duration: 12.77 seconds with 12 pods\n",
      "\n",
      "Size of data: 64.0 GB\n",
      "Monte-Carlo pi: 3.141572989\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 1.9664589792967035e-05\n",
      "Duration: 19.20 seconds with 15 pods\n",
      "\n",
      "Size of data: 128.0 GB\n",
      "Monte-Carlo pi: 3.141593464\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 8.104102069417252e-07\n",
      "Duration: 17.55 seconds with 36 pods\n",
      "\n",
      "Size of data: 256.0 GB\n",
      "Monte-Carlo pi: 3.14161230525\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 1.9651660206676524e-05\n",
      "Duration: 18.69 seconds with 68 pods\n",
      "\n",
      "Size of data: 512.0 GB\n",
      "Monte-Carlo pi: 3.14158963425\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 3.019339793297604e-06\n",
      "Duration: 18.71 seconds with 142 pods\n",
      "\n",
      "Size of data: 1024.0 GB\n",
      "Monte-Carlo pi: 3.1415884875\n",
      "Numpys pi: 3.141592653589793\n",
      "Delta: 4.166089793145034e-06\n",
      "Duration: 20.80 seconds with 273 pods\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from time import sleep\n",
    "\n",
    "for size in [1e9 * 2 ** n for n in range(11)]:\n",
    "    \n",
    "    calc_pi_mc(size)\n",
    "    sleep(10)  # allow for some scale-down time"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Monte Carlo Estimate of $\\pi$\n",
	"\n",
	"<img src=\"http://dask.readthedocs.io/en/latest/_images/dask_horizontal.svg\" \n",
	" width=\"50%\" \n",
	" align=top\n",
	" alt=\"Dask logo\">\n",
	"<img src=\"https://upload.wikimedia.org/wikipedia/commons/b/ba/Monte-Carlo01.gif\" \n",
	" width=\"30%\" \n",
	" align=top\n",
	" alt=\"PI monte-carlo estimate\">\n",
	" \n",
	"Using [Dask's adaptivity](http://docs.dask.org/en/latest/setup/adaptive.html), we'll show that it is possible to scale the available resources to meet almost identical wall times irrespective of the acutal work load:\n",
	"\n",
	"- Estimating $\\pi$ from 16 GB of random data is done in 17 seconds using 3 workers (with 2 cores each).\n",
	"- Estimating $\\pi$ from 512 GB of random data is done in 19 seconds using 142 workers (with 2 cores each).\n",
	"- Estimating $\\pi$ from 1024 GB of random data is done in 21 seconds using 273 workers (with 2 cores each)."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"from dask_kubernetes import KubeCluster\n",
	"cluster = KubeCluster(n_workers=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"# check Adaptive? for help on adapt's kwargs.\n",
	"from dask.distributed import Adaptive"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"cluster.adapt(minimum=1, maximum=400,\n",
	" target_duration=\"20s\", # more realistic than the default \"5s\"?\n",
	" wait_count=10, # 10 seconds before killing an idle worker\n",
	" scale_factor=1.2); # scale slower than doubling (default)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<table style=\"border: 2px solid white;\">\n",
	"<tr>\n",
	"<td style=\"vertical-align: top; border: 0px solid white\">\n",
	"<h3>Client</h3>\n",
	"<ul>\n",
	" <li><b>Scheduler: </b>tcp://10.23.27.5:37004\n",
	" <li><b>Dashboard: </b><a href='/user/willirath/proxy/8787/status' target='_blank'>/user/willirath/proxy/8787/status</a>\n",
	"</ul>\n",
	"</td>\n",
	"<td style=\"vertical-align: top; border: 0px solid white\">\n",
	"<h3>Cluster</h3>\n",
	"<ul>\n",
	" <li><b>Workers: </b>0</li>\n",
	" <li><b>Cores: </b>0</li>\n",
	" <li><b>Memory: </b>0 B</li>\n",
	"</ul>\n",
	"</td>\n",
	"</tr>\n",
	"</table>"
	],
	"text/plain": [
	"<Client: scheduler='tcp://10.23.27.5:37004' processes=0 cores=0>"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from dask.distributed import Client\n",
	"c = Client(cluster)\n",
	"c"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"(Check the dash board to see the cluster scale up and down!)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"import dask.array as da\n",
	"import numpy as np\n",
	"from time import time\n",
	"\n",
	"def calc_pi_mc(size):\n",
	" xy = da.random.uniform(0, 1, size=(size / 8 / 2, 2), chunks=(0.25e9 / 8, 2))\n",
	" \n",
	" in_circle = ((xy ** 2).sum(axis=-1) < 1)\n",
	" pi = 4 * in_circle.mean()\n",
	"\n",
	" start = time()\n",
	" pi = pi.compute()\n",
	" end = time()\n",
	" \n",
	" num_pods = len(cluster.pods())\n",
	" \n",
	" print(\"Size of data:\", xy.nbytes / 1e9, \"GB\")\n",
	" print(\"Monte-Carlo pi:\", pi)\n",
	" print(\"Numpys pi:\", np.pi)\n",
	" print(\"Delta:\", abs(pi - np.pi))\n",
	" print(\"Duration: {:.2f} seconds with {} pods\".format(end-start, num_pods))\n",
	" print()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Size of data: 1.0 GB\n",
	"Monte-Carlo pi: 3.141738048\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 0.0001453944102070004\n",
	"Duration: 4.68 seconds with 1 pods\n",
	"\n",
	"Size of data: 2.0 GB\n",
	"Monte-Carlo pi: 3.1416384\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 4.574641020704817e-05\n",
	"Duration: 5.31 seconds with 1 pods\n",
	"\n",
	"Size of data: 4.0 GB\n",
	"Monte-Carlo pi: 3.141615792\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 2.3138410206957616e-05\n",
	"Duration: 7.91 seconds with 2 pods\n",
	"\n",
	"Size of data: 8.0 GB\n",
	"Monte-Carlo pi: 3.141654136\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 6.148241020698109e-05\n",
	"Duration: 10.73 seconds with 3 pods\n",
	"\n",
	"Size of data: 16.0 GB\n",
	"Monte-Carlo pi: 3.141506724\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 8.592958979303233e-05\n",
	"Duration: 17.35 seconds with 3 pods\n",
	"\n",
	"Size of data: 32.0 GB\n",
	"Monte-Carlo pi: 3.141638062\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 4.5408410207059546e-05\n",
	"Duration: 12.77 seconds with 12 pods\n",
	"\n",
	"Size of data: 64.0 GB\n",
	"Monte-Carlo pi: 3.141572989\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 1.9664589792967035e-05\n",
	"Duration: 19.20 seconds with 15 pods\n",
	"\n",
	"Size of data: 128.0 GB\n",
	"Monte-Carlo pi: 3.141593464\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 8.104102069417252e-07\n",
	"Duration: 17.55 seconds with 36 pods\n",
	"\n",
	"Size of data: 256.0 GB\n",
	"Monte-Carlo pi: 3.14161230525\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 1.9651660206676524e-05\n",
	"Duration: 18.69 seconds with 68 pods\n",
	"\n",
	"Size of data: 512.0 GB\n",
	"Monte-Carlo pi: 3.14158963425\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 3.019339793297604e-06\n",
	"Duration: 18.71 seconds with 142 pods\n",
	"\n",
	"Size of data: 1024.0 GB\n",
	"Monte-Carlo pi: 3.1415884875\n",
	"Numpys pi: 3.141592653589793\n",
	"Delta: 4.166089793145034e-06\n",
	"Duration: 20.80 seconds with 273 pods\n",
	"\n"
	]
	}
	],
	"source": [
	"from time import sleep\n",
	"\n",
	"for size in [1e9 * 2 ** n for n in range(11)]:\n",
	" \n",
	" calc_pi_mc(size)\n",
	" sleep(10) # allow for some scale-down time"
	]
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python [default]",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}