Last active
April 3, 2019 21:32
-
-
Save Lokno/60a0eefbe1c641966710fbb839d5661c to your computer and use it in GitHub Desktop.
Script that compiles a CUDA application to report the attributes of a kernel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compiles a CUDA application to report the attributes of a kernel | |
# | |
# usage: cuda_kernel_query.py <file> <entry name> [<define list>] | |
# | |
# file - name of the CUDA source file contain the kernel of interest | |
# entry name - name of the kernel to examine | |
# define list - comma delimited list of proprocessor defines for compilation | |
# | |
# If BLOCK_SIZE is defined in the file, this value is used in the occupancy | |
# calculation, otherwise the blockSize returned by | |
# cudaOccupancyMaxPotentialBlockSize() is instead. | |
import sys | |
import os | |
from pathlib import Path | |
cuda_template = """ | |
#include <stdio.h> | |
#include <cuda_runtime.h> | |
#define CUDA_CHECK(err) if( (err) != cudaSuccess ) { fprintf(stderr, "ERROR: %s in %s at line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); exit(EXIT_FAILURE); } | |
#include "FILE_NAME" | |
int main() | |
{ | |
int minGridSize; | |
int maxPotentialBlockSize; | |
int blockSize; | |
int numBlocks; | |
cudaDeviceProp deviceProp; | |
cudaFuncAttributes attrib; | |
CUDA_CHECK(cudaFuncGetAttributes(&attrib, KERNEL_NAME)) | |
CUDA_CHECK(cudaGetDeviceProperties(&deviceProp, 0)) | |
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&minGridSize,&maxPotentialBlockSize,KERNEL_NAME,0,deviceProp.maxThreadsPerBlock)) | |
#ifdef BLOCK_SIZE | |
blockSize = BLOCK_SIZE; | |
#else | |
blockSize = maxPotentialBlockSize; | |
#endif | |
CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,KERNEL_NAME,blockSize,0)) | |
int maxWarps = deviceProp.maxThreadsPerMultiProcessor / deviceProp.warpSize; | |
printf("Device Info\n"); | |
printf("-----------\n"); | |
printf("deviceProp.maxThreadsPerBlock: %d\n", deviceProp.maxThreadsPerBlock); | |
printf("deviceProp.maxThreadsPerMultiProcessor: %d\n", deviceProp.maxThreadsPerMultiProcessor ); | |
printf("deviceProp.warpSize: %d\n", deviceProp.warpSize); | |
printf("maxWarps: %d\n\n", maxWarps); | |
printf("Kernal KERNEL_NAME in file FILE_NAME\n\n"); | |
printf("minGridSize: %d\n", minGridSize); | |
printf("MaxPotentialBlockSize: %d\n", maxPotentialBlockSize); | |
printf("MaxActiveBlocksPerMultiprocessor: %d\n", numBlocks); | |
#ifdef BLOCK_SIZE | |
printf("BLOCK_SIZE: %d\n", blockSize); | |
#endif | |
printf("Occupancy: %f%%\n\n", (double)(numBlocks * blockSize / deviceProp.warpSize) / maxWarps * 100.0); | |
printf("cudaFuncAttributes\n"); | |
printf("------------------\n"); | |
printf(" binaryVersion: %.1f\n", attrib.binaryVersion / 10.0f); | |
printf(" cacheModeCA: %d\n", attrib.cacheModeCA); | |
printf(" constSizeBytes: %zu\n", attrib.constSizeBytes); | |
printf(" localSizeBytes: %zu\n", attrib.localSizeBytes); | |
printf(" maxDynamicSharedSizeBytes: %d\n", attrib.maxDynamicSharedSizeBytes); | |
printf(" maxThreadsPerBlock: %d\n", attrib.maxThreadsPerBlock); | |
printf(" numRegs: %d\n", attrib.numRegs); | |
printf(" preferredShmemCarveout: %d\n", attrib.preferredShmemCarveout); | |
printf(" ptxVersion: %d\n", attrib.ptxVersion); | |
printf(" sharedSizeBytes: %zu\n", attrib.sharedSizeBytes); | |
return 0; | |
} | |
""" | |
if len(sys.argv) != 3 and len(sys.argv) != 4: | |
print(" usage: %s <file> <entry name> [<define list>]" % sys.argv[0]) | |
sys.exit(0) | |
p = Path(sys.argv[1]) | |
define_list = '' | |
if not p.exists(): | |
print("Error: %s does not exist" % sys.argv[1]) | |
sys.exit(-1) | |
if not p.is_file(): | |
print("Error: %s is not a file" % sys.argv[1]) | |
sys.exit(-1) | |
if len(sys.argv) == 4: | |
define_list = ' '.join(['-D%s' % d for d in sys.argv[3].split(',')]) | |
p = Path("./kquery") | |
# remove old executable if it exists | |
if p.exists(): | |
p.unlink() | |
cuda_template = cuda_template.replace('FILE_NAME', sys.argv[1]) | |
cuda_template = cuda_template.replace('KERNEL_NAME', sys.argv[2]) | |
with open('kernel_query.cu','w') as f: | |
f.write(cuda_template) | |
cmd_str = "nvcc -ccbin g++ %s -m64 -gencode arch=compute_35,code=sm_35 -o kquery kernel_query.cu" % define_list | |
print(cmd_str) | |
os.system(cmd_str) | |
p = Path("./kquery") | |
if p.exists(): | |
os.system("./kquery") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment