Skip to content

Instantly share code, notes, and snippets.

@Ext3h
Last active October 29, 2019 10:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ext3h/b037506884826f5a50e96e6f82647576 to your computer and use it in GitHub Desktop.
Save Ext3h/b037506884826f5a50e96e6f82647576 to your computer and use it in GitHub Desktop.
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <memory>
#include <vector>
#include <list>
#include <future>
#include <atomic>
#include <cassert>
#include <cuda.h>
#include <builtin_types.h>
void CUcheck(CUresult result)
{
if (result != CUDA_SUCCESS)
{
__debugbreak();
}
}
class CudaDeviceContext {
public:
CudaDeviceContext(CUdevice device) : device_(device)
{
CUcheck(cuDevicePrimaryCtxSetFlags(device_, CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_LMEM_RESIZE_TO_MAX));
CUcheck(cuDevicePrimaryCtxRetain(&ctx_, device_));
}
~CudaDeviceContext()
{
CUcheck(cuDevicePrimaryCtxRelease(device_));
}
CudaDeviceContext() = delete;
CudaDeviceContext(CudaDeviceContext&&) = delete;
operator CUcontext() const {
return ctx_;
}
private:
CUdevice device_;
CUcontext ctx_;
};
class CudaContextLock {
public:
CudaContextLock(CUcontext context) : ctx_(context)
{
CUcheck(cuCtxPushCurrent(ctx_));
}
~CudaContextLock()
{
CUcontext context;
CUcheck(cuCtxPopCurrent(&context));
assert(context == ctx_);
}
CudaContextLock() = delete;
CudaContextLock(CudaContextLock&&) = delete;
private:
CUcontext ctx_;
};
class CudaSharedAllocation
{
public:
CudaSharedAllocation(std::size_t size) : data_(new uint8_t[size])
{
CUcheck(cuMemHostRegister(data_, size, CU_MEMHOSTREGISTER_DEVICEMAP));
}
~CudaSharedAllocation()
{
CUcheck(cuMemHostUnregister(data_));
delete[] data_;
}
operator uint8_t* () {
return data_;
}
CudaSharedAllocation() = delete;
CudaSharedAllocation(CudaSharedAllocation&&) = delete;
private:
uint8_t* data_ = nullptr;
};
class LeakTest
{
public:
void run()
{
const size_t numWorkers = 4;
running = true;
CUcheck(cuInit(0));
std::list<CudaDeviceContext> allDevices;
{
int numDevices = -1;
CUcheck(cuDeviceGetCount(&numDevices));
for (size_t i = 0; i < numDevices; i++)
{
CUdevice device = 0;
CUcheck(cuDeviceGet(&device, i));
allDevices.emplace_back(device);
}
}
std::vector<std::future<void>> workers;
// Allocator test
for (const auto& device : allDevices)
{
for (size_t i = 0; i < numWorkers; i++)
{
workers.emplace_back(std::async(std::launch::async, [this, &device]() -> void {
CudaContextLock lock(device);
while (running)
{
std::list<CudaSharedAllocation> allocations;
for (size_t j = 0; j < 8; j++)
{
allocations.emplace_back(16 * 1024 * 1024);
}
}
}));
}
}
for (auto& task : workers)
{
task.get();
}
}
void stop()
{
running = false;
}
private:
std::atomic_bool running = { false };
};
static LeakTest test;
BOOL WINAPI HandlerRoutine(
_In_ DWORD dwCtrlType
)
{
test.stop();
return TRUE;
}
int main()
{
SetConsoleCtrlHandler(HandlerRoutine, true);
test.run();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment