AparaV/SIRSimulationsTestSuite.ipynb

## SIRSimulationsTestSuite.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created `%t` as an alias for `%timeit`.\n",
      "Created `%%t` as an alias for `%%timeit`.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import timeit\n",
    "\n",
    "%alias_magic t timeit\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Travel and infect kernel\n",
    "Speeding up the travel and infection kernel. One of the slowest parts is the binomial simulation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def version1(n, p, m):\n",
    "    infected_possibility = np.random.binomial(n, p, m)\n",
    "    infected_possibility[infected_possibility > 0] = 1\n",
    "    return np.sum(infected_possibility)\n",
    "\n",
    "def version2(n, p, m):\n",
    "    infected_possibility = np.random.geometric(p, m)\n",
    "    infected_possibility[infected_possibility <= n] = 1\n",
    "    infected_possibility[infected_possibility > n] = 0\n",
    "    return np.sum(infected_possibility)\n",
    "\n",
    "def version3(n, p, m):\n",
    "    prob = 1 - (1 - p)**n\n",
    "    return np.random.binomial(m, prob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Version 1 3258.600\n",
      "Version 2 3255.250\n",
      "Version 3 3254.650\n"
     ]
    }
   ],
   "source": [
    "n = 10\n",
    "p = 0.1\n",
    "m = 5000\n",
    "num_sim = 100\n",
    "x1 = []\n",
    "x2 = []\n",
    "x3 = []\n",
    "\n",
    "for i in range(num_sim):\n",
    "    x1.append(version1(n, p, m))\n",
    "    x2.append(version2(n, p, m))\n",
    "    x3.append(version3(n, p, m))\n",
    "\n",
    "print(\"Version 1 {:.3f}\".format(np.mean(x1)))\n",
    "print(\"Version 2 {:.3f}\".format(np.mean(x2)))\n",
    "print(\"Version 3 {:.3f}\".format(np.mean(x3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "806 µs ± 42.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t version1(n, p, m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.12 ms ± 47.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t version2(n, p, m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9.45 µs ± 318 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t version3(n, p, m)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Recover kernel\n",
    "The other bottleneck is the recover kernel. Especially np.random.choice and counting it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def recover1(m, SIR):\n",
    "    x = np.random.choice([0, 1, 2], m, p=SIR)\n",
    "    recovered = len(x[x == 0])\n",
    "    dead = len(x[x == 2])\n",
    "    return recovered, dead\n",
    "\n",
    "def recover2(m, SIR):\n",
    "    x = np.random.choice([0, 1, 2], m, p=SIR)\n",
    "    unique, counts = np.unique(x, return_counts=True)\n",
    "    if 0 in unique:\n",
    "        recovered = counts[0]\n",
    "    else:\n",
    "        recovered = 0\n",
    "    if 2 not in unique:\n",
    "        dead = 0\n",
    "    elif 0 in unique and 1 in unique:\n",
    "        dead = counts[2]\n",
    "    elif (0 not in unique and 1 in unique) or (0 in unique and 1 not in unique):\n",
    "        dead = counts[1]\n",
    "    else:\n",
    "        dead = counts[0]\n",
    "    return recovered, dead\n",
    "\n",
    "def recover3(m, SIR):\n",
    "    x = np.random.choice([0, 1, 2], m, p=SIR)\n",
    "    freq = Counter(x)\n",
    "    recovered = freq[0]\n",
    "    dead =freq[2]\n",
    "    return recovered, dead\n",
    "\n",
    "def recover4(m, SIR):\n",
    "    x = np.random.uniform(0, 1, m)\n",
    "    recovered = len(np.asarray(x < SIR[0]).nonzero()[0])\n",
    "    dead = len(np.asarray(x <= SIR[2]).nonzero()[0])\n",
    "    return recovered, max(0, dead)\n",
    "\n",
    "def recover5(m, SIR):\n",
    "    x = np.random.multinomial(m, SIR)\n",
    "    recovered = x[0]\n",
    "    dead = x[2]\n",
    "    return recovered, dead"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "m = 5000\n",
    "p_rec = 0.99\n",
    "p_die = 7.540044190323758e-05\n",
    "p_stay = 1 - p_rec - p_die\n",
    "SIR = [p_rec, p_stay, p_die]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Version 1 4950.080, 0.360\n",
      "Version 4 4950.880, 0.430\n",
      "Version 5 4949.280, 0.280\n"
     ]
    }
   ],
   "source": [
    "num_sim = 100\n",
    "\n",
    "r1 = []\n",
    "d1 = []\n",
    "\n",
    "r4 = []\n",
    "d4 = []\n",
    "\n",
    "r5 = []\n",
    "d5 = []\n",
    "\n",
    "for i in range(num_sim):\n",
    "    r, d = recover1(m, SIR)\n",
    "    r1.append(r)\n",
    "    d1.append(d)\n",
    "    \n",
    "    r, d = recover4(m, SIR)\n",
    "    r4.append(r)\n",
    "    d4.append(d)\n",
    "    \n",
    "    r, d = recover5(m, SIR)\n",
    "    r5.append(r)\n",
    "    d5.append(d)\n",
    "\n",
    "print(\"Version 1 {:.3f}, {:.3f}\".format(np.mean(r1), np.mean(d1)))\n",
    "print(\"Version 4 {:.3f}, {:.3f}\".format(np.mean(r4), np.mean(d4)))\n",
    "print(\"Version 5 {:.3f}, {:.3f}\".format(np.mean(r5), np.mean(d5)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "545 µs ± 14.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t recover1(m, SIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "812 µs ± 16.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t recover2(m, SIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.66 ms ± 145 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t recover3(m, SIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "275 µs ± 10.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t recover4(m, SIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9.6 µs ± 762 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%t recover5(m, SIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Created `%t` as an alias for `%timeit`.\n",
	"Created `%%t` as an alias for `%%timeit`.\n"
	]
	}
	],
	"source": [
	"import numpy as np\n",
	"import timeit\n",
	"\n",
	"%alias_magic t timeit\n",
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Travel and infect kernel\n",
	"Speeding up the travel and infection kernel. One of the slowest parts is the binomial simulation"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"def version1(n, p, m):\n",
	" infected_possibility = np.random.binomial(n, p, m)\n",
	" infected_possibility[infected_possibility > 0] = 1\n",
	" return np.sum(infected_possibility)\n",
	"\n",
	"def version2(n, p, m):\n",
	" infected_possibility = np.random.geometric(p, m)\n",
	" infected_possibility[infected_possibility <= n] = 1\n",
	" infected_possibility[infected_possibility > n] = 0\n",
	" return np.sum(infected_possibility)\n",
	"\n",
	"def version3(n, p, m):\n",
	" prob = 1 - (1 - p)**n\n",
	" return np.random.binomial(m, prob)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Version 1 3258.600\n",
	"Version 2 3255.250\n",
	"Version 3 3254.650\n"
	]
	}
	],
	"source": [
	"n = 10\n",
	"p = 0.1\n",
	"m = 5000\n",
	"num_sim = 100\n",
	"x1 = []\n",
	"x2 = []\n",
	"x3 = []\n",
	"\n",
	"for i in range(num_sim):\n",
	" x1.append(version1(n, p, m))\n",
	" x2.append(version2(n, p, m))\n",
	" x3.append(version3(n, p, m))\n",
	"\n",
	"print(\"Version 1 {:.3f}\".format(np.mean(x1)))\n",
	"print(\"Version 2 {:.3f}\".format(np.mean(x2)))\n",
	"print(\"Version 3 {:.3f}\".format(np.mean(x3)))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"806 µs ± 42.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
	]
	}
	],
	"source": [
	"%t version1(n, p, m)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1.12 ms ± 47.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
	]
	}
	],
	"source": [
	"%t version2(n, p, m)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"9.45 µs ± 318 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
	]
	}
	],
	"source": [
	"%t version3(n, p, m)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Recover kernel\n",
	"The other bottleneck is the recover kernel. Especially np.random.choice and counting it"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"from collections import Counter\n",
	"import random"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"def recover1(m, SIR):\n",
	" x = np.random.choice([0, 1, 2], m, p=SIR)\n",
	" recovered = len(x[x == 0])\n",
	" dead = len(x[x == 2])\n",
	" return recovered, dead\n",
	"\n",
	"def recover2(m, SIR):\n",
	" x = np.random.choice([0, 1, 2], m, p=SIR)\n",
	" unique, counts = np.unique(x, return_counts=True)\n",
	" if 0 in unique:\n",
	" recovered = counts[0]\n",
	" else:\n",
	" recovered = 0\n",
	" if 2 not in unique:\n",
	" dead = 0\n",
	" elif 0 in unique and 1 in unique:\n",
	" dead = counts[2]\n",
	" elif (0 not in unique and 1 in unique) or (0 in unique and 1 not in unique):\n",
	" dead = counts[1]\n",
	" else:\n",
	" dead = counts[0]\n",
	" return recovered, dead\n",
	"\n",
	"def recover3(m, SIR):\n",
	" x = np.random.choice([0, 1, 2], m, p=SIR)\n",
	" freq = Counter(x)\n",
	" recovered = freq[0]\n",
	" dead =freq[2]\n",
	" return recovered, dead\n",
	"\n",
	"def recover4(m, SIR):\n",
	" x = np.random.uniform(0, 1, m)\n",
	" recovered = len(np.asarray(x < SIR[0]).nonzero()[0])\n",
	" dead = len(np.asarray(x <= SIR[2]).nonzero()[0])\n",
	" return recovered, max(0, dead)\n",
	"\n",
	"def recover5(m, SIR):\n",
	" x = np.random.multinomial(m, SIR)\n",
	" recovered = x[0]\n",
	" dead = x[2]\n",
	" return recovered, dead"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"m = 5000\n",
	"p_rec = 0.99\n",
	"p_die = 7.540044190323758e-05\n",
	"p_stay = 1 - p_rec - p_die\n",
	"SIR = [p_rec, p_stay, p_die]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Version 1 4950.080, 0.360\n",
	"Version 4 4950.880, 0.430\n",
	"Version 5 4949.280, 0.280\n"
	]
	}
	],
	"source": [
	"num_sim = 100\n",
	"\n",
	"r1 = []\n",
	"d1 = []\n",
	"\n",
	"r4 = []\n",
	"d4 = []\n",
	"\n",
	"r5 = []\n",
	"d5 = []\n",
	"\n",
	"for i in range(num_sim):\n",
	" r, d = recover1(m, SIR)\n",
	" r1.append(r)\n",
	" d1.append(d)\n",
	" \n",
	" r, d = recover4(m, SIR)\n",
	" r4.append(r)\n",
	" d4.append(d)\n",
	" \n",
	" r, d = recover5(m, SIR)\n",
	" r5.append(r)\n",
	" d5.append(d)\n",
	"\n",
	"print(\"Version 1 {:.3f}, {:.3f}\".format(np.mean(r1), np.mean(d1)))\n",
	"print(\"Version 4 {:.3f}, {:.3f}\".format(np.mean(r4), np.mean(d4)))\n",
	"print(\"Version 5 {:.3f}, {:.3f}\".format(np.mean(r5), np.mean(d5)))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"545 µs ± 14.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
	]
	}
	],
	"source": [
	"%t recover1(m, SIR)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"812 µs ± 16.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
	]
	}
	],
	"source": [
	"%t recover2(m, SIR)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"3.66 ms ± 145 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%t recover3(m, SIR)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"275 µs ± 10.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
	]
	}
	],
	"source": [
	"%t recover4(m, SIR)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"9.6 µs ± 762 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
	]
	}
	],
	"source": [
	"%t recover5(m, SIR)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}