Skip to content

Instantly share code, notes, and snippets.

@Lokno
Last active April 3, 2019 21:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Lokno/60a0eefbe1c641966710fbb839d5661c to your computer and use it in GitHub Desktop.
Save Lokno/60a0eefbe1c641966710fbb839d5661c to your computer and use it in GitHub Desktop.
Script that compiles a CUDA application to report the attributes of a kernel
# Compiles a CUDA application to report the attributes of a kernel
#
# usage: cuda_kernel_query.py <file> <entry name> [<define list>]
#
# file - name of the CUDA source file contain the kernel of interest
# entry name - name of the kernel to examine
# define list - comma delimited list of proprocessor defines for compilation
#
# If BLOCK_SIZE is defined in the file, this value is used in the occupancy
# calculation, otherwise the blockSize returned by
# cudaOccupancyMaxPotentialBlockSize() is instead.
import sys
import os
from pathlib import Path
cuda_template = """
#include <stdio.h>
#include <cuda_runtime.h>
#define CUDA_CHECK(err) if( (err) != cudaSuccess ) { fprintf(stderr, "ERROR: %s in %s at line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); exit(EXIT_FAILURE); }
#include "FILE_NAME"
int main()
{
int minGridSize;
int maxPotentialBlockSize;
int blockSize;
int numBlocks;
cudaDeviceProp deviceProp;
cudaFuncAttributes attrib;
CUDA_CHECK(cudaFuncGetAttributes(&attrib, KERNEL_NAME))
CUDA_CHECK(cudaGetDeviceProperties(&deviceProp, 0))
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&minGridSize,&maxPotentialBlockSize,KERNEL_NAME,0,deviceProp.maxThreadsPerBlock))
#ifdef BLOCK_SIZE
blockSize = BLOCK_SIZE;
#else
blockSize = maxPotentialBlockSize;
#endif
CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,KERNEL_NAME,blockSize,0))
int maxWarps = deviceProp.maxThreadsPerMultiProcessor / deviceProp.warpSize;
printf("Device Info\n");
printf("-----------\n");
printf("deviceProp.maxThreadsPerBlock: %d\n", deviceProp.maxThreadsPerBlock);
printf("deviceProp.maxThreadsPerMultiProcessor: %d\n", deviceProp.maxThreadsPerMultiProcessor );
printf("deviceProp.warpSize: %d\n", deviceProp.warpSize);
printf("maxWarps: %d\n\n", maxWarps);
printf("Kernal KERNEL_NAME in file FILE_NAME\n\n");
printf("minGridSize: %d\n", minGridSize);
printf("MaxPotentialBlockSize: %d\n", maxPotentialBlockSize);
printf("MaxActiveBlocksPerMultiprocessor: %d\n", numBlocks);
#ifdef BLOCK_SIZE
printf("BLOCK_SIZE: %d\n", blockSize);
#endif
printf("Occupancy: %f%%\n\n", (double)(numBlocks * blockSize / deviceProp.warpSize) / maxWarps * 100.0);
printf("cudaFuncAttributes\n");
printf("------------------\n");
printf(" binaryVersion: %.1f\n", attrib.binaryVersion / 10.0f);
printf(" cacheModeCA: %d\n", attrib.cacheModeCA);
printf(" constSizeBytes: %zu\n", attrib.constSizeBytes);
printf(" localSizeBytes: %zu\n", attrib.localSizeBytes);
printf(" maxDynamicSharedSizeBytes: %d\n", attrib.maxDynamicSharedSizeBytes);
printf(" maxThreadsPerBlock: %d\n", attrib.maxThreadsPerBlock);
printf(" numRegs: %d\n", attrib.numRegs);
printf(" preferredShmemCarveout: %d\n", attrib.preferredShmemCarveout);
printf(" ptxVersion: %d\n", attrib.ptxVersion);
printf(" sharedSizeBytes: %zu\n", attrib.sharedSizeBytes);
return 0;
}
"""
if len(sys.argv) != 3 and len(sys.argv) != 4:
print(" usage: %s <file> <entry name> [<define list>]" % sys.argv[0])
sys.exit(0)
p = Path(sys.argv[1])
define_list = ''
if not p.exists():
print("Error: %s does not exist" % sys.argv[1])
sys.exit(-1)
if not p.is_file():
print("Error: %s is not a file" % sys.argv[1])
sys.exit(-1)
if len(sys.argv) == 4:
define_list = ' '.join(['-D%s' % d for d in sys.argv[3].split(',')])
p = Path("./kquery")
# remove old executable if it exists
if p.exists():
p.unlink()
cuda_template = cuda_template.replace('FILE_NAME', sys.argv[1])
cuda_template = cuda_template.replace('KERNEL_NAME', sys.argv[2])
with open('kernel_query.cu','w') as f:
f.write(cuda_template)
cmd_str = "nvcc -ccbin g++ %s -m64 -gencode arch=compute_35,code=sm_35 -o kquery kernel_query.cu" % define_list
print(cmd_str)
os.system(cmd_str)
p = Path("./kquery")
if p.exists():
os.system("./kquery")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment