Last active
October 16, 2019 10:31
-
-
Save brunodantas/f5e6d0d1e8ba84766da707a3f00f9b74 to your computer and use it in GitHub Desktop.
matrix multiplication: numba cuda vs numpy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Matrix multiplication" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Numba GPU" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"from numba import cuda\n", | |
"@cuda.jit('void( float64 [ : , : ] , float64 [ : , : ] , float64 [ : , : ] , int32 )')\n", | |
"def cu_matmul(a , b, c , n) :\n", | |
" x, y = cuda.grid (2)\n", | |
" if (x >= n) or (y >= n) :\n", | |
" return\n", | |
" c[x, y] = 0\n", | |
" for i in range(n) :\n", | |
" c[x, y] += a[x, i ] * b[ i , y]\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"14.7 ms ± 163 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"device = cuda.get_current_device()\n", | |
"tpb = device.WARP_SIZE\n", | |
"n = 320\n", | |
"bpg = (n+tpb-1)//tpb\n", | |
"grid_dim = (bpg, bpg)\n", | |
"block_dim = (tpb , tpb)\n", | |
"A = np.random.random((n, n ) ).astype (np. float64 )\n", | |
"B = np.random.random((n, n ) ).astype (np. float64 )\n", | |
"C = np.empty((n, n) , dtype=np.float64 )\n", | |
"dev_A = cuda.to_device(A)\n", | |
"dev_B = cuda.to_device(B)\n", | |
"dev_C = cuda.to_device(C, copy=False )\n", | |
"result_cuda = %timeit -o cu_matmul[grid_dim , block_dim](dev_A, dev_B, dev_C, n)\n", | |
"dev_C. copy_to_host(C)\n", | |
"assert (np. allclose (np. dot(A, B) , C))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Numpy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"525 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit C = np.dot(A, B)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment