Ext3h/leaktest.cpp

## leaktest.cpp
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>

#include <memory>
#include <vector>
#include <list>
#include <future>
#include <atomic>

#include <cassert>

#include <cuda.h>
#include <builtin_types.h>

void CUcheck(CUresult result)
{
	if (result != CUDA_SUCCESS)
	{
		__debugbreak();
	}
}

class CudaDeviceContext {
public:
	CudaDeviceContext(CUdevice device) : device_(device)
	{
		CUcheck(cuDevicePrimaryCtxSetFlags(device_, CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_LMEM_RESIZE_TO_MAX));
		CUcheck(cuDevicePrimaryCtxRetain(&ctx_, device_));
	}
	~CudaDeviceContext()
	{
		CUcheck(cuDevicePrimaryCtxRelease(device_));
	}
	CudaDeviceContext() = delete;
	CudaDeviceContext(CudaDeviceContext&&) = delete;
	operator CUcontext() const {
		return ctx_;
	}
private:
	CUdevice device_;
	CUcontext ctx_;
};

class CudaContextLock {
public:
	CudaContextLock(CUcontext context) : ctx_(context)
	{
		CUcheck(cuCtxPushCurrent(ctx_));
	}
	~CudaContextLock()
	{
		CUcontext context;
		CUcheck(cuCtxPopCurrent(&context));
		assert(context == ctx_);
	}
	CudaContextLock() = delete;
	CudaContextLock(CudaContextLock&&) = delete;
private:
	CUcontext ctx_;
};

class CudaSharedAllocation
{
public:
	CudaSharedAllocation(std::size_t size) : data_(new uint8_t[size])
	{
		CUcheck(cuMemHostRegister(data_, size, CU_MEMHOSTREGISTER_DEVICEMAP));
	}
	~CudaSharedAllocation()
	{
		CUcheck(cuMemHostUnregister(data_));
		delete[] data_;
	}
	operator uint8_t* () {
		return data_;
	}
	CudaSharedAllocation() = delete;
	CudaSharedAllocation(CudaSharedAllocation&&) = delete;
private:
	uint8_t* data_ = nullptr;
};

class LeakTest
{
public:
	void run()
	{
		const size_t numWorkers = 4;
		running = true;
		CUcheck(cuInit(0));
		std::list<CudaDeviceContext> allDevices;
		{
			int numDevices = -1;
			CUcheck(cuDeviceGetCount(&numDevices));
			for (size_t i = 0; i < numDevices; i++)
			{
				CUdevice device = 0;
				CUcheck(cuDeviceGet(&device, i));
				allDevices.emplace_back(device);
			}
		}
		std::vector<std::future<void>> workers;

		// Allocator test
		for (const auto& device : allDevices)
		{
			for (size_t i = 0; i < numWorkers; i++)
			{
				workers.emplace_back(std::async(std::launch::async, [this, &device]() -> void {
					CudaContextLock lock(device);
					while (running)
					{
						std::list<CudaSharedAllocation> allocations;
						for (size_t j = 0; j < 8; j++)
						{
							allocations.emplace_back(16 * 1024 * 1024);
						}
					}
				}));
			}
		}

		for (auto& task : workers)
		{
			task.get();
		}
	}
	void stop()
	{
		running = false;
	}
private:
	std::atomic_bool running = { false };
};


static LeakTest test;

BOOL WINAPI HandlerRoutine(
	_In_ DWORD dwCtrlType
)
{
	test.stop();
	return TRUE;
}

int main()
{
	SetConsoleCtrlHandler(HandlerRoutine, true);
	test.run();
	return 0;
}
	#define WIN32_LEAN_AND_MEAN
	#include <Windows.h>

	#include <memory>
	#include <vector>
	#include <list>
	#include <future>
	#include <atomic>

	#include <cassert>

	#include <cuda.h>
	#include <builtin_types.h>

	void CUcheck(CUresult result)
	{
	if (result != CUDA_SUCCESS)
	{
	__debugbreak();
	}
	}

	class CudaDeviceContext {
	public:
	CudaDeviceContext(CUdevice device) : device_(device)
	{
	CUcheck(cuDevicePrimaryCtxSetFlags(device_, CU_CTX_SCHED_BLOCKING_SYNC \| CU_CTX_LMEM_RESIZE_TO_MAX));
	CUcheck(cuDevicePrimaryCtxRetain(&ctx_, device_));
	}
	~CudaDeviceContext()
	{
	CUcheck(cuDevicePrimaryCtxRelease(device_));
	}
	CudaDeviceContext() = delete;
	CudaDeviceContext(CudaDeviceContext&&) = delete;
	operator CUcontext() const {
	return ctx_;
	}
	private:
	CUdevice device_;
	CUcontext ctx_;
	};

	class CudaContextLock {
	public:
	CudaContextLock(CUcontext context) : ctx_(context)
	{
	CUcheck(cuCtxPushCurrent(ctx_));
	}
	~CudaContextLock()
	{
	CUcontext context;
	CUcheck(cuCtxPopCurrent(&context));
	assert(context == ctx_);
	}
	CudaContextLock() = delete;
	CudaContextLock(CudaContextLock&&) = delete;
	private:
	CUcontext ctx_;
	};

	class CudaSharedAllocation
	{
	public:
	CudaSharedAllocation(std::size_t size) : data_(new uint8_t[size])
	{
	CUcheck(cuMemHostRegister(data_, size, CU_MEMHOSTREGISTER_DEVICEMAP));
	}
	~CudaSharedAllocation()
	{
	CUcheck(cuMemHostUnregister(data_));
	delete[] data_;
	}
	operator uint8_t* () {
	return data_;
	}
	CudaSharedAllocation() = delete;
	CudaSharedAllocation(CudaSharedAllocation&&) = delete;
	private:
	uint8_t* data_ = nullptr;
	};

	class LeakTest
	{
	public:
	void run()
	{
	const size_t numWorkers = 4;
	running = true;
	CUcheck(cuInit(0));
	std::list<CudaDeviceContext> allDevices;
	{
	int numDevices = -1;
	CUcheck(cuDeviceGetCount(&numDevices));
	for (size_t i = 0; i < numDevices; i++)
	{
	CUdevice device = 0;
	CUcheck(cuDeviceGet(&device, i));
	allDevices.emplace_back(device);
	}
	}
	std::vector<std::future<void>> workers;

	// Allocator test
	for (const auto& device : allDevices)
	{
	for (size_t i = 0; i < numWorkers; i++)
	{
	workers.emplace_back(std::async(std::launch::async, [this, &device]() -> void {
	CudaContextLock lock(device);
	while (running)
	{
	std::list<CudaSharedAllocation> allocations;
	for (size_t j = 0; j < 8; j++)
	{
	allocations.emplace_back(16 * 1024 * 1024);
	}
	}
	}));
	}
	}

	for (auto& task : workers)
	{
	task.get();
	}
	}
	void stop()
	{
	running = false;
	}
	private:
	std::atomic_bool running = { false };
	};



	static LeakTest test;

	BOOL WINAPI HandlerRoutine(
	_In_ DWORD dwCtrlType
	)
	{
	test.stop();
	return TRUE;
	}

	int main()
	{
	SetConsoleCtrlHandler(HandlerRoutine, true);
	test.run();
	return 0;
	}