Skip to content

Instantly share code, notes, and snippets.

@ayushdg
Created July 29, 2019 04:42
Show Gist options
  • Save ayushdg/f8ce4e8e68a23a10aa42072f4e891b5d to your computer and use it in GitHub Desktop.
Save ayushdg/f8ce4e8e68a23a10aa42072f4e891b5d to your computer and use it in GitHub Desktop.
Small Numba examples
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import numba"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from numba import cuda\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"a = np.ones(100) + 1\n",
"b = np.zeros(100)\n",
"\n",
"ca = cuda.to_device(a)\n",
"cb = cuda.to_device(b)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"@cuda.jit\n",
"def exp(ca,cb):\n",
" ee = cuda.grid(1)\n",
" stride = cuda.gridsize(1)\n",
" for i in range(ee,ca.size,stride):\n",
" ca[i] = ee\n",
" cb[i] = stride"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"tpb = 32\n",
"bpg = 3\n",
"exp[tpb,bpg](ca,cb)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17.\n",
" 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.\n",
" 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53.\n",
" 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71.\n",
" 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89.\n",
" 90. 91. 92. 93. 94. 95. 0. 1. 2. 3.]\n"
]
}
],
"source": [
"print(ca.copy_to_host())"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n",
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.]\n"
]
}
],
"source": [
"print(cb.copy_to_host())"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"@cuda.reduce\n",
"def sumreduce(a, b):\n",
" return a + b"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"a = np.ones(100) + 1\n",
"b = np.zeros(100)\n",
"\n",
"ca = cuda.to_device(a)\n",
"cb = cuda.to_device(b)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"res = np.zeros(1)\n",
"res2 = cuda.to_device(res)\n",
"@cuda.jit\n",
"def sumsqr(a):\n",
" pos = cuda.grid(1)\n",
" size = cuda.gridsize(1)\n",
" for i in range(pos,a.size,size):\n",
" a[i] = a[i] * a[i]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"sumsqr[32,3](ca)\n",
"#sumreduce(ca)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n",
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ca.copy_to_host()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"400.0"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sumreduce(ca)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"@cuda.jit(device=True)\n",
"def sigdiffsq(a_slice,b_slice):\n",
" sig = 0\n",
" for i in range(a_slice.size):\n",
" sig = sig + (a_slice[i] - b_slice[i])**2\n",
" return sig\n",
"\n",
"@cuda.jit\n",
"def window_sigdiffsqr(a,b,result,window_size):\n",
" pos = cuda.grid(1)\n",
" stride = cuda.gridsize(1)\n",
" for i in range(pos,result.size,stride):\n",
" result[i] = sigdiffsq(a[i:i+window_size],b[i:i+window_size])\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"a = np.ones(100) + 1\n",
"b = np.zeros(100)\n",
"window_size = 5\n",
"result = np.zeros(a.size-window_size+1)\n",
"\n",
"ca = cuda.to_device(a)\n",
"cb = cuda.to_device(b)\n",
"cres = cuda.to_device(result)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"window_sigdiffsqr[32,32](ca,cb,cres,window_size)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n",
" 20., 20., 20., 20., 20.])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cres.copy_to_host()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"import cudf\n",
"df = cudf.DataFrame()\n",
"n_elem = 100_000_000\n",
"\n",
"df['a'] = [1,2,3,4,5,4,3,2,1,0]*(n_elem//10)\n",
"df['b'] = np.zeros(n_elem)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"window_size = 100\n",
"result = cudf.Series(np.zeros(len(df)-window_size+1))\n",
"\n",
"ca = df['a'].to_gpu_array()\n",
"cb = df['b'].to_gpu_array()\n",
"cres = result.to_gpu_array()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 236 ms, sys: 8 ms, total: 244 ms\n",
"Wall time: 244 ms\n"
]
}
],
"source": [
"%%time\n",
"window_sigdiffsqr[32,128](ca,cb,cres,window_size)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([850., 850., 850., ..., 850., 850., 850.])"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cres.copy_to_host()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For more examples look into: \n",
"- http://numba.pydata.org/numba-doc/latest/cuda/kernels.html\n",
"- https://numba.pydata.org/numba-doc/dev/cuda/examples.html\n",
"- https://github.com/numba/numba-examples/tree/master/examples"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment