Skip to content

Instantly share code, notes, and snippets.

@Robadob

Robadob/Makefile Secret

Last active January 5, 2022 13:49
Show Gist options
  • Save Robadob/77ef11e6e8fdd72d6431218da4ff7da4 to your computer and use it in GitHub Desktop.
Save Robadob/77ef11e6e8fdd72d6431218da4ff7da4 to your computer and use it in GitHub Desktop.
NVRTC Simple MemoryLeak
/**
* This file demonstrates a memory leak present in nvrtcCompileProgram()
* With line 96 commented out, the memory footprint is static
* With line 96 uncommented, the memory footprint grows as the source is compiled multiple times
* This was originally developed using VisualStudio 2019 (need to additionally link against nvrtc.lib)
* But has since been tested under GCC on linux too (makefile used included)
*/
#include <array>
#include <cassert>
#include <vector>
#include <cstdlib>
#include "nvrtc.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
// If MSVC earlier than VS 2019
#if defined(_MSC_VER) && _MSC_VER < 1920
#include <filesystem>
using std::tr2::sys::exists;
using std::tr2::sys::path;
#else
// VS2019 requires this macro, as building pre c++17 cant use std::filesystem
#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING
#include <experimental/filesystem>
using std::experimental::filesystem::v1::exists;
using std::experimental::filesystem::v1::path;
#endif
const char* kernel_src_name =
"my_program1";
const char* kernel_src =
"template<typename T>\n"
"T pointless_func(T x) {\n"
" return x;\n"
"}\n"
"\n"
"__global__\n"
"void my_kernel1(float const* indata, float* outdata) {\n"
" outdata[0] = indata[0] + 1;\n"
" outdata[0] -= 1;\n"
"}\n"
"\n"
"__global__\n"
"void my_kernel2(float const* indata, float* outdata) {\n"
" for( int i=0; i<100; ++i ) {\n"
" outdata[0] = "
"pointless_func(indata[0]);\n"
" }\n"
"}\n";
/**
* Find the cuda include directory.
* Throws exceptions if it can not be found.
* @return the path to the CUDA include directory.
*/
std::string getCUDAIncludeDir() {
// Define an array of environment variables to check in order
std::array<const std::string, 2> ENV_VARS{ "CUDA_PATH", "CUDA_HOME" };
std::string cuda_include_dir_str = "";
for (const auto& env_var : ENV_VARS) {
std::string env_value = std::getenv(env_var.c_str()) ? std::getenv(env_var.c_str()) : "";
if (!env_value.empty()) {
path check_path = path(env_value) / "include/";
// Use try catch to suppress file permission exceptions etc
try {
if (exists(check_path)) {
cuda_include_dir_str = check_path.string();
break;
}
}
catch (...) {}
// Throw if the value is not empty, but it does not exist. Outside the try catch excplicityly.
fprintf(stderr, "Error environment variable %s (%s) does not contain a valid CUDA include directory", env_var.c_str(), env_value.c_str());
throw std::exception();
}
}
// If none of the search enviornmental variables were useful, throw an exception.
if (cuda_include_dir_str.empty()) {
fprintf(stderr, "Error could not find CUDA include directory. Please specify using the CUDA_PATH environment variable");
throw std::exception();
}
return cuda_include_dir_str;
}
void compile_kernel2() {
nvrtcProgram nvrtc_program;
// Create
nvrtcResult create_result = nvrtcCreateProgram(
&nvrtc_program, kernel_src, kernel_src_name, 0, nullptr, nullptr);
if (create_result !=NVRTC_SUCCESS)
fprintf(stderr, "nvrtcCreateProgram() failed\n");
// Compile
std::vector<const char*> options_c(2);
options_c[0] = "--device-as-default-execution-space";
static const std::string cuda_include_dir = std::string("-I") + getCUDAIncludeDir();
options_c[1] = cuda_include_dir.c_str();
nvrtcResult compile_result = NVRTC_SUCCESS;
compile_result = nvrtcCompileProgram(nvrtc_program, (int)options_c.size(), options_c.data()); // Commenting out this line leads to static memory footprint
if (compile_result != NVRTC_SUCCESS) {
fprintf(stderr, "nvrtcCompileProgram() failed\n");
}
// Destroy
nvrtcResult destroy_result = nvrtcDestroyProgram(&nvrtc_program);
if (destroy_result != NVRTC_SUCCESS)
fprintf(stderr, "nvrtcDestroyProgram() failed\n");
}
int main()
{
cudaFree(nullptr); // Init context
printf("Compiling 10:\n");
for (int i = 0; i < 10; ++i) {
compile_kernel2();
}
printf("10 Complete! (check memory usage, hit enter)\n");
getchar();
printf("Compiling 100:\n");
for (int i = 0; i < 100; ++i) {
compile_kernel2();
}
printf("100 Complete! (check memory usage, hit enter)\n");
getchar();
printf("Compiling 1000:\n");
for (int i = 0; i < 1000; ++i) {
compile_kernel2();
}
printf("1000 Complete! (check memory usage, hit enter)\n");
getchar();
// printf("Compiling 10000:\n");
// for (int i = 0; i < 10000; ++i) {
// compile_kernel2();
// }
// printf("10000 Complete! (check memory usage, hit enter)\n");
// getchar();
return 0;
}
# Change the example variable to build a different source module (e.g. hello/example1/example4)
EXAMPLE=kernel
# Makefile variables
# Add extra targets to OBJ with space separator e.g. If there is as source file random.c then add random.o to OBJ)
# Add any additional dependencies (header files) to DEPS. e.g. if there is a header file random.h required by your source modules then add this to DEPS.
CC=gcc
CFLAGS= -g -O3 --std=c++14
NVCC=nvcc
NVCC_FLAGS= -gencode arch=compute_70,code=compute_70 --std=c++14 -lstdc++fs -lnvrtc
OBJ=$(EXAMPLE).o
DEPS=kernel.cu
# Build rule for object files ($@ is left hand side of rule, $< is first item from the right hand side of rule)
%.o : %.cu $(DEPS)
$(NVCC) -c -o $@ $< $(NVCC_FLAGS) $(addprefix -Xcompiler ,$(CCFLAGS))
# Make example ($^ is all items from right hand side of the rule)
$(EXAMPLE) : $(OBJ)
$(NVCC) -o $@ $^ $(NVCC_FLAGS) $(addprefix -Xcompiler ,$(CCFLAGS))
# PHONY prevents make from doing something with a filename called clean
.PHONY : clean
clean:
rm -rf $(EXAMPLE) $(OBJ)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment