Created
July 29, 2019 04:42
-
-
Save ayushdg/f8ce4e8e68a23a10aa42072f4e891b5d to your computer and use it in GitHub Desktop.
Small Numba examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numba" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from numba import cuda\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"a = np.ones(100) + 1\n", | |
"b = np.zeros(100)\n", | |
"\n", | |
"ca = cuda.to_device(a)\n", | |
"cb = cuda.to_device(b)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@cuda.jit\n", | |
"def exp(ca,cb):\n", | |
" ee = cuda.grid(1)\n", | |
" stride = cuda.gridsize(1)\n", | |
" for i in range(ee,ca.size,stride):\n", | |
" ca[i] = ee\n", | |
" cb[i] = stride" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tpb = 32\n", | |
"bpg = 3\n", | |
"exp[tpb,bpg](ca,cb)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17.\n", | |
" 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.\n", | |
" 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53.\n", | |
" 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71.\n", | |
" 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89.\n", | |
" 90. 91. 92. 93. 94. 95. 0. 1. 2. 3.]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(ca.copy_to_host())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n", | |
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n", | |
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n", | |
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n", | |
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.\n", | |
" 96. 96. 96. 96. 96. 96. 96. 96. 96. 96.]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(cb.copy_to_host())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@cuda.reduce\n", | |
"def sumreduce(a, b):\n", | |
" return a + b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"a = np.ones(100) + 1\n", | |
"b = np.zeros(100)\n", | |
"\n", | |
"ca = cuda.to_device(a)\n", | |
"cb = cuda.to_device(b)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"res = np.zeros(1)\n", | |
"res2 = cuda.to_device(res)\n", | |
"@cuda.jit\n", | |
"def sumsqr(a):\n", | |
" pos = cuda.grid(1)\n", | |
" size = cuda.gridsize(1)\n", | |
" for i in range(pos,a.size,size):\n", | |
" a[i] = a[i] * a[i]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sumsqr[32,3](ca)\n", | |
"#sumreduce(ca)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n", | |
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n", | |
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n", | |
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n", | |
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,\n", | |
" 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.])" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ca.copy_to_host()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"400.0" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sumreduce(ca)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@cuda.jit(device=True)\n", | |
"def sigdiffsq(a_slice,b_slice):\n", | |
" sig = 0\n", | |
" for i in range(a_slice.size):\n", | |
" sig = sig + (a_slice[i] - b_slice[i])**2\n", | |
" return sig\n", | |
"\n", | |
"@cuda.jit\n", | |
"def window_sigdiffsqr(a,b,result,window_size):\n", | |
" pos = cuda.grid(1)\n", | |
" stride = cuda.gridsize(1)\n", | |
" for i in range(pos,result.size,stride):\n", | |
" result[i] = sigdiffsq(a[i:i+window_size],b[i:i+window_size])\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"a = np.ones(100) + 1\n", | |
"b = np.zeros(100)\n", | |
"window_size = 5\n", | |
"result = np.zeros(a.size-window_size+1)\n", | |
"\n", | |
"ca = cuda.to_device(a)\n", | |
"cb = cuda.to_device(b)\n", | |
"cres = cuda.to_device(result)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"window_sigdiffsqr[32,32](ca,cb,cres,window_size)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n", | |
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n", | |
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n", | |
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n", | |
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n", | |
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n", | |
" 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,\n", | |
" 20., 20., 20., 20., 20.])" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cres.copy_to_host()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import cudf\n", | |
"df = cudf.DataFrame()\n", | |
"n_elem = 100_000_000\n", | |
"\n", | |
"df['a'] = [1,2,3,4,5,4,3,2,1,0]*(n_elem//10)\n", | |
"df['b'] = np.zeros(n_elem)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"window_size = 100\n", | |
"result = cudf.Series(np.zeros(len(df)-window_size+1))\n", | |
"\n", | |
"ca = df['a'].to_gpu_array()\n", | |
"cb = df['b'].to_gpu_array()\n", | |
"cres = result.to_gpu_array()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 236 ms, sys: 8 ms, total: 244 ms\n", | |
"Wall time: 244 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"window_sigdiffsqr[32,128](ca,cb,cres,window_size)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([850., 850., 850., ..., 850., 850., 850.])" | |
] | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cres.copy_to_host()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"For more examples look into: \n", | |
"- http://numba.pydata.org/numba-doc/latest/cuda/kernels.html\n", | |
"- https://numba.pydata.org/numba-doc/dev/cuda/examples.html\n", | |
"- https://github.com/numba/numba-examples/tree/master/examples" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment