Skip to content

Instantly share code, notes, and snippets.

@brunodantas
Last active October 16, 2019 10:31
Show Gist options
  • Save brunodantas/f5e6d0d1e8ba84766da707a3f00f9b74 to your computer and use it in GitHub Desktop.
Save brunodantas/f5e6d0d1e8ba84766da707a3f00f9b74 to your computer and use it in GitHub Desktop.
matrix multiplication: numba cuda vs numpy
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Matrix multiplication"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Numba GPU"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from numba import cuda\n",
"@cuda.jit('void( float64 [ : , : ] , float64 [ : , : ] , float64 [ : , : ] , int32 )')\n",
"def cu_matmul(a , b, c , n) :\n",
" x, y = cuda.grid (2)\n",
" if (x >= n) or (y >= n) :\n",
" return\n",
" c[x, y] = 0\n",
" for i in range(n) :\n",
" c[x, y] += a[x, i ] * b[ i , y]\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"14.7 ms ± 163 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"device = cuda.get_current_device()\n",
"tpb = device.WARP_SIZE\n",
"n = 320\n",
"bpg = (n+tpb-1)//tpb\n",
"grid_dim = (bpg, bpg)\n",
"block_dim = (tpb , tpb)\n",
"A = np.random.random((n, n ) ).astype (np. float64 )\n",
"B = np.random.random((n, n ) ).astype (np. float64 )\n",
"C = np.empty((n, n) , dtype=np.float64 )\n",
"dev_A = cuda.to_device(A)\n",
"dev_B = cuda.to_device(B)\n",
"dev_C = cuda.to_device(C, copy=False )\n",
"result_cuda = %timeit -o cu_matmul[grid_dim , block_dim](dev_A, dev_B, dev_C, n)\n",
"dev_C. copy_to_host(C)\n",
"assert (np. allclose (np. dot(A, B) , C))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Numpy"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"525 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit C = np.dot(A, B)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment