ayushdg/Numba_cuda.ipynb

## Numba_cuda.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numba import cuda\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.ones(100) + 1\n",
    "b = np.zeros(100)\n",
    "\n",
    "ca = cuda.to_device(a)\n",
    "cb = cuda.to_device(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "@cuda.jit\n",
    "def exp(ca,cb):\n",
    "    ee = cuda.grid(1)\n",
    "    stride = cuda.gridsize(1)\n",
    "    for i in range(ee,ca.size,stride):\n",
    "        ca[i] = ee\n",
    "        cb[i] = stride"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "tpb = 32\n",
    "bpg = 3\n",
    "exp[tpb,bpg](ca,cb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.\n",
      " 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.\n",
      " 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53.\n",
      " 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71.\n",
      " 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89.\n",
      " 90. 91. 92. 93. 94. 95.  0.  1.  2.  3.]\n"
     ]
    }
   ],
   "source": [
    "print(ca.copy_to_host())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
      " 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
      " 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
      " 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
      " 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
      " 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.]\n"
     ]
    }
   ],
   "source": [
    "print(cb.copy_to_host())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "@cuda.reduce\n",
    "def sumreduce(a, b):\n",
    "    return a + b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.ones(100) + 1\n",
    "b = np.zeros(100)\n",
    "\n",
    "ca = cuda.to_device(a)\n",
    "cb = cuda.to_device(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = np.zeros(1)\n",
    "res2 = cuda.to_device(res)\n",
    "@cuda.jit\n",
    "def sumsqr(a):\n",
    "    pos = cuda.grid(1)\n",
    "    size = cuda.gridsize(1)\n",
    "    for i in range(pos,a.size,size):\n",
    "        a[i] = a[i] * a[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "sumsqr[32,3](ca)\n",
    "#sumreduce(ca)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
       "       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
       "       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
       "       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
       "       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
       "       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ca.copy_to_host()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "400.0"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sumreduce(ca)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "@cuda.jit(device=True)\n",
    "def sigdiffsq(a_slice,b_slice):\n",
    "    sig = 0\n",
    "    for i in range(a_slice.size):\n",
    "        sig = sig + (a_slice[i] - b_slice[i])**2\n",
    "    return sig\n",
    "\n",
    "@cuda.jit\n",
    "def window_sigdiffsqr(a,b,result,window_size):\n",
    "    pos = cuda.grid(1)\n",
    "    stride = cuda.gridsize(1)\n",
    "    for i in range(pos,result.size,stride):\n",
    "        result[i] = sigdiffsq(a[i:i+window_size],b[i:i+window_size])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.ones(100) + 1\n",
    "b = np.zeros(100)\n",
    "window_size = 5\n",
    "result = np.zeros(a.size-window_size+1)\n",
    "\n",
    "ca = cuda.to_device(a)\n",
    "cb = cuda.to_device(b)\n",
    "cres = cuda.to_device(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "window_sigdiffsqr[32,32](ca,cb,cres,window_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
       "       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
       "       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
       "       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
       "       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
       "       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
       "       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
       "       20., 20., 20., 20., 20.])"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cres.copy_to_host()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cudf\n",
    "df = cudf.DataFrame()\n",
    "n_elem = 100_000_000\n",
    "\n",
    "df['a'] = [1,2,3,4,5,4,3,2,1,0]*(n_elem//10)\n",
    "df['b'] = np.zeros(n_elem)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "window_size = 100\n",
    "result = cudf.Series(np.zeros(len(df)-window_size+1))\n",
    "\n",
    "ca = df['a'].to_gpu_array()\n",
    "cb = df['b'].to_gpu_array()\n",
    "cres = result.to_gpu_array()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 236 ms, sys: 8 ms, total: 244 ms\n",
      "Wall time: 244 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "window_sigdiffsqr[32,128](ca,cb,cres,window_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([850., 850., 850., ..., 850., 850., 850.])"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cres.copy_to_host()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For more examples look into: \n",
    "- http://numba.pydata.org/numba-doc/latest/cuda/kernels.html\n",
    "- https://numba.pydata.org/numba-doc/dev/cuda/examples.html\n",
    "- https://github.com/numba/numba-examples/tree/master/examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numba"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"from numba import cuda\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"a = np.ones(100) + 1\n",
	"b = np.zeros(100)\n",
	"\n",
	"ca = cuda.to_device(a)\n",
	"cb = cuda.to_device(b)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [],
	"source": [
	"@cuda.jit\n",
	"def exp(ca,cb):\n",
	" ee = cuda.grid(1)\n",
	" stride = cuda.gridsize(1)\n",
	" for i in range(ee,ca.size,stride):\n",
	" ca[i] = ee\n",
	" cb[i] = stride"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [],
	"source": [
	"tpb = 32\n",
	"bpg = 3\n",
	"exp[tpb,bpg](ca,cb)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17.\n",
	" 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.\n",
	" 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53.\n",
	" 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71.\n",
	" 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89.\n",
	" 90. 91. 92. 93. 94. 95. 0. 1. 2. 3.]\n"
	]
	}
	],
	"source": [
	"print(ca.copy_to_host())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
	" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
	" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
	" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
	" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
	" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.]\n"
	]
	}
	],
	"source": [
	"print(cb.copy_to_host())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [],
	"source": [
	"@cuda.reduce\n",
	"def sumreduce(a, b):\n",
	" return a + b"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [],
	"source": [
	"a = np.ones(100) + 1\n",
	"b = np.zeros(100)\n",
	"\n",
	"ca = cuda.to_device(a)\n",
	"cb = cuda.to_device(b)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 32,
	"metadata": {},
	"outputs": [],
	"source": [
	"res = np.zeros(1)\n",
	"res2 = cuda.to_device(res)\n",
	"@cuda.jit\n",
	"def sumsqr(a):\n",
	" pos = cuda.grid(1)\n",
	" size = cuda.gridsize(1)\n",
	" for i in range(pos,a.size,size):\n",
	" a[i] = a[i] * a[i]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 33,
	"metadata": {},
	"outputs": [],
	"source": [
	"sumsqr[32,3](ca)\n",
	"#sumreduce(ca)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 35,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
	" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
	" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
	" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
	" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
	" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.])"
	]
	},
	"execution_count": 35,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ca.copy_to_host()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 34,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"400.0"
	]
	},
	"execution_count": 34,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sumreduce(ca)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 36,
	"metadata": {},
	"outputs": [],
	"source": [
	"@cuda.jit(device=True)\n",
	"def sigdiffsq(a_slice,b_slice):\n",
	" sig = 0\n",
	" for i in range(a_slice.size):\n",
	" sig = sig + (a_slice[i] - b_slice[i])**2\n",
	" return sig\n",
	"\n",
	"@cuda.jit\n",
	"def window_sigdiffsqr(a,b,result,window_size):\n",
	" pos = cuda.grid(1)\n",
	" stride = cuda.gridsize(1)\n",
	" for i in range(pos,result.size,stride):\n",
	" result[i] = sigdiffsq(a[i:i+window_size],b[i:i+window_size])\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 37,
	"metadata": {},
	"outputs": [],
	"source": [
	"a = np.ones(100) + 1\n",
	"b = np.zeros(100)\n",
	"window_size = 5\n",
	"result = np.zeros(a.size-window_size+1)\n",
	"\n",
	"ca = cuda.to_device(a)\n",
	"cb = cuda.to_device(b)\n",
	"cres = cuda.to_device(result)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 38,
	"metadata": {},
	"outputs": [],
	"source": [
	"window_sigdiffsqr[32,32](ca,cb,cres,window_size)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 39,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
	" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
	" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
	" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
	" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
	" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
	" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
	" 20., 20., 20., 20., 20.])"
	]
	},
	"execution_count": 39,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"cres.copy_to_host()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 40,
	"metadata": {},
	"outputs": [],
	"source": [
	"import cudf\n",
	"df = cudf.DataFrame()\n",
	"n_elem = 100_000_000\n",
	"\n",
	"df['a'] = [1,2,3,4,5,4,3,2,1,0]*(n_elem//10)\n",
	"df['b'] = np.zeros(n_elem)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 41,
	"metadata": {},
	"outputs": [],
	"source": [
	"window_size = 100\n",
	"result = cudf.Series(np.zeros(len(df)-window_size+1))\n",
	"\n",
	"ca = df['a'].to_gpu_array()\n",
	"cb = df['b'].to_gpu_array()\n",
	"cres = result.to_gpu_array()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 42,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 236 ms, sys: 8 ms, total: 244 ms\n",
	"Wall time: 244 ms\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"window_sigdiffsqr[32,128](ca,cb,cres,window_size)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 43,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([850., 850., 850., ..., 850., 850., 850.])"
	]
	},
	"execution_count": 43,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"cres.copy_to_host()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"For more examples look into: \n",
	"- http://numba.pydata.org/numba-doc/latest/cuda/kernels.html\n",
	"- https://numba.pydata.org/numba-doc/dev/cuda/examples.html\n",
	"- https://github.com/numba/numba-examples/tree/master/examples"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}