Created
November 12, 2018 15:05
-
-
Save sklam/0e750e0dea7571c68e94d99006ae8533 to your computer and use it in GitHub Desktop.
Getting PTXAS info from a numba cuda kernel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from numba import cuda, intp\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@cuda.jit\n", | |
"def foo(x):\n", | |
" tid =cuda.grid(1)\n", | |
" a = cuda.shared.array(1000, dtype=intp)\n", | |
" a[tid] = x[x.size - tid - 1]\n", | |
" x[x.size - tid - 1] = a[tid]\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"a = np.empty(10000)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"foo(a)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"ptxas info : 28 bytes gmem\n", | |
"ptxas info : Compiling entry function '_ZN6cudapy8__main__7foo$241E5ArrayIdLi1E1C7mutable7alignedE' for 'sm_61'\n", | |
"ptxas info : Function properties for _ZN6cudapy8__main__7foo$241E5ArrayIdLi1E1C7mutable7alignedE\n", | |
"ptxas . 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads\n", | |
"ptxas info : Used 10 registers, 376 bytes cmem[0]\n", | |
"info : 40 bytes gmem\n", | |
"info : Function properties for '_ZN6cudapy8__main__7foo$241E5ArrayIdLi1E1C7mutable7alignedE':\n", | |
"info : used 10 registers, 0 stack, 8000 bytes smem, 376 bytes cmem[0], 0 bytes lmem\n" | |
] | |
} | |
], | |
"source": [ | |
"kern = foo.specialize(a) # get a CUDAKernel object specialized to the given arguments\n", | |
"print(kern._func.get_info()) # get ptxas info" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In my opinion, 'tid' needs 4 registers if it is 'int32' type. So which variables need the rest 6 registers?