Robadob/Makefile Secret

## kernel.cu
/**
 * This file demonstrates a memory leak present in nvrtcCompileProgram()
 * With line 96 commented out, the memory footprint is static
 * With line 96 uncommented, the memory footprint grows as the source is compiled multiple times
 * This was originally developed using VisualStudio 2019 (need to additionally link against nvrtc.lib)
 * But has since been tested under GCC on linux too (makefile used included)
 */
#include <array>
#include <cassert>
#include <vector>
#include <cstdlib>

#include "nvrtc.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
// If MSVC earlier than VS 2019
#if defined(_MSC_VER) && _MSC_VER < 1920
#include <filesystem>
using std::tr2::sys::exists;
using std::tr2::sys::path;
#else
// VS2019 requires this macro, as building pre c++17 cant use std::filesystem
#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING
#include <experimental/filesystem>
using std::experimental::filesystem::v1::exists;
using std::experimental::filesystem::v1::path;
#endif

const char* kernel_src_name =
"my_program1";
const char* kernel_src =
"template<typename T>\n"
"T pointless_func(T x) {\n"
"	return x;\n"
"}\n"
"\n"
"__global__\n"
"void my_kernel1(float const* indata, float* outdata) {\n"
"    outdata[0] = indata[0] + 1;\n"
"    outdata[0] -= 1;\n"
"}\n"
"\n"
"__global__\n"
"void my_kernel2(float const* indata, float* outdata) {\n"
"    for( int i=0; i<100; ++i ) {\n"
"        outdata[0] = "
"pointless_func(indata[0]);\n"
"    }\n"
"}\n";
/**
 * Find the cuda include directory.
 * Throws exceptions if it can not be found.
 * @return the path to the CUDA include directory.
 */
std::string getCUDAIncludeDir() {
    // Define an array of environment variables to check in order
    std::array<const std::string, 2> ENV_VARS{ "CUDA_PATH", "CUDA_HOME" };
    std::string cuda_include_dir_str = "";
    for (const auto& env_var : ENV_VARS) {
        std::string env_value = std::getenv(env_var.c_str()) ? std::getenv(env_var.c_str()) : "";
        if (!env_value.empty()) {
            path check_path = path(env_value) / "include/";
            // Use try catch to suppress file permission exceptions etc
            try {
                if (exists(check_path)) {
                    cuda_include_dir_str = check_path.string();
                    break;
                }
            }
            catch (...) {}
            // Throw if the value is not empty, but it does not exist. Outside the try catch excplicityly.
            fprintf(stderr, "Error environment variable %s (%s) does not contain a valid CUDA include directory", env_var.c_str(), env_value.c_str());
            throw std::exception();
        }
    }
    // If none of the search enviornmental variables were useful, throw an exception.
    if (cuda_include_dir_str.empty()) {
        fprintf(stderr, "Error could not find CUDA include directory. Please specify using the CUDA_PATH environment variable");
        throw std::exception();
    }
    return cuda_include_dir_str;
}
void compile_kernel2() {
    nvrtcProgram nvrtc_program;
    // Create
    nvrtcResult create_result = nvrtcCreateProgram(
        &nvrtc_program, kernel_src, kernel_src_name, 0, nullptr, nullptr);
    if (create_result !=NVRTC_SUCCESS)
        fprintf(stderr, "nvrtcCreateProgram() failed\n");
    // Compile
    std::vector<const char*> options_c(2);
    options_c[0] = "--device-as-default-execution-space";
    static const std::string cuda_include_dir = std::string("-I") + getCUDAIncludeDir();
    options_c[1] = cuda_include_dir.c_str();
    nvrtcResult compile_result = NVRTC_SUCCESS;
    compile_result = nvrtcCompileProgram(nvrtc_program, (int)options_c.size(), options_c.data());  // Commenting out this line leads to static memory footprint
    if (compile_result != NVRTC_SUCCESS) {
        fprintf(stderr, "nvrtcCompileProgram() failed\n");
    }
    // Destroy
    nvrtcResult destroy_result = nvrtcDestroyProgram(&nvrtc_program);
    if (destroy_result != NVRTC_SUCCESS)
        fprintf(stderr, "nvrtcDestroyProgram() failed\n");
}

int main()
{
    cudaFree(nullptr); // Init context
    printf("Compiling 10:\n");
    for (int i = 0; i < 10; ++i) {
        compile_kernel2();
    }
    printf("10 Complete! (check memory usage, hit enter)\n");
    getchar();
    printf("Compiling 100:\n");
    for (int i = 0; i < 100; ++i) {
        compile_kernel2();
    }
    printf("100 Complete! (check memory usage, hit enter)\n");
    getchar();
    printf("Compiling 1000:\n");
    for (int i = 0; i < 1000; ++i) {
        compile_kernel2();
    }
    printf("1000 Complete! (check memory usage, hit enter)\n");
    getchar();
    // printf("Compiling 10000:\n");
    // for (int i = 0; i < 10000; ++i) {
    //     compile_kernel2();
    // }
    // printf("10000 Complete! (check memory usage, hit enter)\n");
    // getchar();

    return 0;
}

## Makefile
# Change the example variable to build a different source module (e.g. hello/example1/example4)
EXAMPLE=kernel

# Makefile variables
# Add extra targets to OBJ with space separator e.g. If there is as source file random.c then add random.o to OBJ)
# Add any additional dependencies (header files) to DEPS. e.g. if there is a header file random.h required by your source modules then add this to DEPS.
CC=gcc
CFLAGS= -g -O3 --std=c++14
NVCC=nvcc
NVCC_FLAGS= -gencode arch=compute_70,code=compute_70 --std=c++14 -lstdc++fs -lnvrtc
OBJ=$(EXAMPLE).o
DEPS=kernel.cu

# Build rule for object files ($@ is left hand side of rule, $< is first item from the right hand side of rule)
%.o : %.cu $(DEPS)
	$(NVCC) -c -o $@ $< $(NVCC_FLAGS) $(addprefix -Xcompiler ,$(CCFLAGS))

# Make example ($^ is all items from right hand side of the rule)
$(EXAMPLE) : $(OBJ)
	$(NVCC) -o $@ $^ $(NVCC_FLAGS) $(addprefix -Xcompiler ,$(CCFLAGS))

# PHONY prevents make from doing something with a filename called clean
.PHONY : clean
clean:
	rm -rf $(EXAMPLE) $(OBJ)
	/**
	* This file demonstrates a memory leak present in nvrtcCompileProgram()
	* With line 96 commented out, the memory footprint is static
	* With line 96 uncommented, the memory footprint grows as the source is compiled multiple times
	* This was originally developed using VisualStudio 2019 (need to additionally link against nvrtc.lib)
	* But has since been tested under GCC on linux too (makefile used included)
	*/
	#include <array>
	#include <cassert>
	#include <vector>
	#include <cstdlib>

	#include "nvrtc.h"
	#include "cuda_runtime.h"
	#include "device_launch_parameters.h"
	// If MSVC earlier than VS 2019
	#if defined(_MSC_VER) && _MSC_VER < 1920
	#include <filesystem>
	using std::tr2::sys::exists;
	using std::tr2::sys::path;
	#else
	// VS2019 requires this macro, as building pre c++17 cant use std::filesystem
	#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING
	#include <experimental/filesystem>
	using std::experimental::filesystem::v1::exists;
	using std::experimental::filesystem::v1::path;
	#endif

	const char* kernel_src_name =
	"my_program1";
	const char* kernel_src =
	"template<typename T>\n"
	"T pointless_func(T x) {\n"
	" return x;\n"
	"}\n"
	"\n"
	"__global__\n"
	"void my_kernel1(float const* indata, float* outdata) {\n"
	" outdata[0] = indata[0] + 1;\n"
	" outdata[0] -= 1;\n"
	"}\n"
	"\n"
	"__global__\n"
	"void my_kernel2(float const* indata, float* outdata) {\n"
	" for( int i=0; i<100; ++i ) {\n"
	" outdata[0] = "
	"pointless_func(indata[0]);\n"
	" }\n"
	"}\n";
	/**
	* Find the cuda include directory.
	* Throws exceptions if it can not be found.
	* @return the path to the CUDA include directory.
	*/
	std::string getCUDAIncludeDir() {
	// Define an array of environment variables to check in order
	std::array<const std::string, 2> ENV_VARS{ "CUDA_PATH", "CUDA_HOME" };
	std::string cuda_include_dir_str = "";
	for (const auto& env_var : ENV_VARS) {
	std::string env_value = std::getenv(env_var.c_str()) ? std::getenv(env_var.c_str()) : "";
	if (!env_value.empty()) {
	path check_path = path(env_value) / "include/";
	// Use try catch to suppress file permission exceptions etc
	try {
	if (exists(check_path)) {
	cuda_include_dir_str = check_path.string();
	break;
	}
	}
	catch (...) {}
	// Throw if the value is not empty, but it does not exist. Outside the try catch excplicityly.
	fprintf(stderr, "Error environment variable %s (%s) does not contain a valid CUDA include directory", env_var.c_str(), env_value.c_str());
	throw std::exception();
	}
	}
	// If none of the search enviornmental variables were useful, throw an exception.
	if (cuda_include_dir_str.empty()) {
	fprintf(stderr, "Error could not find CUDA include directory. Please specify using the CUDA_PATH environment variable");
	throw std::exception();
	}
	return cuda_include_dir_str;
	}
	void compile_kernel2() {
	nvrtcProgram nvrtc_program;
	// Create
	nvrtcResult create_result = nvrtcCreateProgram(
	&nvrtc_program, kernel_src, kernel_src_name, 0, nullptr, nullptr);
	if (create_result !=NVRTC_SUCCESS)
	fprintf(stderr, "nvrtcCreateProgram() failed\n");
	// Compile
	std::vector<const char*> options_c(2);
	options_c[0] = "--device-as-default-execution-space";
	static const std::string cuda_include_dir = std::string("-I") + getCUDAIncludeDir();
	options_c[1] = cuda_include_dir.c_str();
	nvrtcResult compile_result = NVRTC_SUCCESS;
	compile_result = nvrtcCompileProgram(nvrtc_program, (int)options_c.size(), options_c.data()); // Commenting out this line leads to static memory footprint
	if (compile_result != NVRTC_SUCCESS) {
	fprintf(stderr, "nvrtcCompileProgram() failed\n");
	}
	// Destroy
	nvrtcResult destroy_result = nvrtcDestroyProgram(&nvrtc_program);
	if (destroy_result != NVRTC_SUCCESS)
	fprintf(stderr, "nvrtcDestroyProgram() failed\n");
	}

	int main()
	{
	cudaFree(nullptr); // Init context
	printf("Compiling 10:\n");
	for (int i = 0; i < 10; ++i) {
	compile_kernel2();
	}
	printf("10 Complete! (check memory usage, hit enter)\n");
	getchar();
	printf("Compiling 100:\n");
	for (int i = 0; i < 100; ++i) {
	compile_kernel2();
	}
	printf("100 Complete! (check memory usage, hit enter)\n");
	getchar();
	printf("Compiling 1000:\n");
	for (int i = 0; i < 1000; ++i) {
	compile_kernel2();
	}
	printf("1000 Complete! (check memory usage, hit enter)\n");
	getchar();
	// printf("Compiling 10000:\n");
	// for (int i = 0; i < 10000; ++i) {
	// compile_kernel2();
	// }
	// printf("10000 Complete! (check memory usage, hit enter)\n");
	// getchar();

	return 0;
	}
	# Change the example variable to build a different source module (e.g. hello/example1/example4)
	EXAMPLE=kernel

	# Makefile variables
	# Add extra targets to OBJ with space separator e.g. If there is as source file random.c then add random.o to OBJ)
	# Add any additional dependencies (header files) to DEPS. e.g. if there is a header file random.h required by your source modules then add this to DEPS.
	CC=gcc
	CFLAGS= -g -O3 --std=c++14
	NVCC=nvcc
	NVCC_FLAGS= -gencode arch=compute_70,code=compute_70 --std=c++14 -lstdc++fs -lnvrtc
	OBJ=$(EXAMPLE).o
	DEPS=kernel.cu

	# Build rule for object files ($@ is left hand side of rule, $< is first item from the right hand side of rule)
	%.o : %.cu $(DEPS)
	$(NVCC) -c -o $@ $< $(NVCC_FLAGS) $(addprefix -Xcompiler ,$(CCFLAGS))

	# Make example ($^ is all items from right hand side of the rule)
	$(EXAMPLE) : $(OBJ)
	$(NVCC) -o $@ $^ $(NVCC_FLAGS) $(addprefix -Xcompiler ,$(CCFLAGS))

	# PHONY prevents make from doing something with a filename called clean
	.PHONY : clean
	clean:
	rm -rf $(EXAMPLE) $(OBJ)