Skip to content

Instantly share code, notes, and snippets.

@native-m
Created July 9, 2019 19:24
Show Gist options
  • Save native-m/facbe0f797f16f7c55cb46b512610a80 to your computer and use it in GitHub Desktop.
Save native-m/facbe0f797f16f7c55cb46b512610a80 to your computer and use it in GitHub Desktop.
Performing async task in different GPU using DirectX 11
#include <Windows.h>
#include <d3d11.h>
#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
#include <d3dcompiler.h>
#pragma comment(lib, "dxgi.lib")
#pragma comment(lib, "d3d11.lib")
#pragma comment(lib, "d3dcompiler.lib")
#define MAKE_SHADER(x) #x
// inputs
float dataA[10] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f };
float dataB[10] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f };
// Compute shader
const char computeShader[] = MAKE_SHADER(
Buffer<float> bufA : register(t0);
Buffer<float> bufB : register(t1);
RWBuffer<float> bufC : register(u0);
[numthreads(1,1,1)]
void CSMain(uint3 i : SV_DispatchThreadID)
{
bufC[i.x] = bufA[i.x] + bufB[i.x];
}
);
// a mutex for synchronizing stream out
std::mutex mut;
// definition
HRESULT CreateBuffer(ID3D11Device* device, UINT sz, void* pInitData, ID3D11Buffer** bufOut);
HRESULT CreateBufferSrv(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11ShaderResourceView ** srvOut);
HRESULT CreateBufferUav(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11UnorderedAccessView** uavOut);
ID3D11Buffer* CopyBuffer(ID3D11Device* device, ID3D11DeviceContext* ctx, ID3D11Buffer* buf);
HRESULT CompileShader(const char* shader, SIZE_T sz, ID3D10Blob** bytecode, ID3D10Blob** errMsg);
int main()
{
HRESULT hr;
IDXGIFactory* factory;
IDXGIAdapter* adapter;
ID3D11Device* dev0 = nullptr;
ID3D11DeviceContext* ctx0 = nullptr;
ID3D11Device* dev1 = nullptr;
ID3D11DeviceContext* ctx1 = nullptr;
std::vector<IDXGIAdapter*> adapters;
static const D3D_FEATURE_LEVEL lvl[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_3, D3D_FEATURE_LEVEL_9_2, D3D_FEATURE_LEVEL_9_1 };
D3D_FEATURE_LEVEL supported;
float result0[10] = { 0.0f };
float result1[10] = { 0.0f };
if (FAILED(hr = CreateDXGIFactory(__uuidof(factory), (void**)&factory)))
{
std::cout << "Failed to create DXGI factory" << std::endl;
return hr;
}
// Enumerate all adapters and store it into std::vector
for (int i = 0; factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND; i++)
adapters.push_back(adapter);
// Create d3d11 on each device
if (FAILED(hr = D3D11CreateDevice(
adapters[0],
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
0,
lvl,
7,
D3D11_SDK_VERSION,
&dev0,
&supported,
&ctx0)))
{
std::cout << "Failed to create d3d device0" << std::endl;
return hr;
}
if (FAILED(hr = D3D11CreateDevice(
adapters[1],
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
0,
lvl,
7,
D3D11_SDK_VERSION,
&dev1,
&supported,
&ctx1)))
{
std::cout << "Failed to create d3d device1" << std::endl;
return hr;
}
// perform a simple GPGPU program that add ten numbers
auto task = [&](ID3D11Device* dev, ID3D11DeviceContext* ctx, float* result, size_t sz, HRESULT* hr) {
ID3D11Buffer* bufA;
ID3D11Buffer* bufB;
ID3D11Buffer* bufC;
ID3D11ShaderResourceView* bufASrv;
ID3D11ShaderResourceView* bufBSrv;
ID3D11UnorderedAccessView* bufCUav;
ID3D11ComputeShader* cs = nullptr;
ID3D10Blob* bytecode;
ID3D10Blob* errMsg;
IDXGIDevice* dxgiDev;
IDXGIAdapter* adapter;
DXGI_ADAPTER_DESC adesc;
// Get device information
dev->QueryInterface(__uuidof(dxgiDev), (void**)&dxgiDev);
dxgiDev->GetAdapter(&adapter);
adapter->GetDesc(&adesc);
mut.lock();
std::wcout << "Performing async task on device: " << adesc.Description << std::endl;
mut.unlock();
// Create buffer and its view
CreateBuffer(dev, sizeof(dataA), dataA, &bufA);
CreateBuffer(dev, sizeof(dataB), dataB, &bufB);
CreateBuffer(dev, sz, nullptr, &bufC);
CreateBufferSrv(dev, DXGI_FORMAT_R32_FLOAT, 4, bufA, &bufASrv);
CreateBufferSrv(dev, DXGI_FORMAT_R32_FLOAT, 4, bufB, &bufBSrv);
CreateBufferUav(dev, DXGI_FORMAT_R32_FLOAT, 4, bufC, &bufCUav);
// Compile & create shader
if (FAILED(CompileShader(computeShader, sizeof(computeShader), &bytecode, &errMsg)))
{
std::cout << "Failed to compile compute shader" << std::endl;
if (errMsg)
{
std::cout << (char*)errMsg->GetBufferPointer() << std::endl;
errMsg->Release();
*hr = -1;
return;
}
}
dev->CreateComputeShader(
bytecode->GetBufferPointer(),
bytecode->GetBufferSize(),
nullptr,
&cs);
bytecode->Release();
ID3D11ShaderResourceView* srvs[2] = { bufASrv, bufBSrv };
// Perform action on GPU
ctx->CSSetShader(cs, nullptr, 0);
ctx->CSSetShaderResources(0, 2, srvs);
ctx->CSSetUnorderedAccessViews(0, 1, &bufCUav, nullptr);
ctx->Dispatch(10, 1, 1);
ctx->CSSetShader(nullptr, nullptr, 0);
ctx->CSSetShaderResources(0, 0, nullptr);
ctx->CSSetUnorderedAccessViews(0, 0, nullptr, nullptr);
// Copy result from GPU
ID3D11Buffer* cpy = CopyBuffer(dev, ctx, bufC);
D3D11_MAPPED_SUBRESOURCE copyback;
ctx->Map(cpy, 0, D3D11_MAP_READ, 0, &copyback);
memcpy_s(result, sz, copyback.pData, sz);
ctx->Unmap(cpy, 0);
// we dont need these again
cpy->Release();
if (bufA)
{
bufA->Release();
if (bufASrv)
bufASrv->Release();
}
if (bufB)
{
bufB->Release();
if (bufBSrv)
bufBSrv->Release();
}
if (bufC)
{
bufC->Release();
if (bufCUav)
bufCUav->Release();
}
if (cs)
cs->Release();
return;
};
// perform async task
std::thread gpu0(task, dev0, ctx0, (float*)result0, (size_t)sizeof(result0), &hr);
std::thread gpu1(task, dev1, ctx1, (float*)result1, (size_t)sizeof(result1), &hr);
// wait until the job done
gpu0.join();
gpu1.join();
std::cout << "Done performing tasks" << std::endl;
// print outputs
std::cout << "Output from GPU0: { ";
for (int i = 0; i < 10; i++)
std::cout << result0[i] << ((i < 9) ? ", " : " }");
std::cout << std::endl;
std::cout << "Output from GPU1: { ";
for (int i = 0; i < 10; i++)
std::cout << result1[i] << ((i < 9) ? ", " : " }");
std::cout << std::endl;
std::cout << "Releasing shits..." << std::endl;
// release shits
if (dev0)
dev0->Release();
if (ctx0)
ctx0->Release();
if(dev1)
dev1->Release();
if(ctx1)
ctx1->Release();
return 0;
}
HRESULT CreateBuffer(ID3D11Device * device, UINT sz, void * pInitData, ID3D11Buffer ** bufOut)
{
D3D11_BUFFER_DESC bufDesc = { 0 };
bufDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER;
bufDesc.ByteWidth = sz;
bufDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
if (pInitData)
{
D3D11_SUBRESOURCE_DATA init = { 0 };
init.pSysMem = pInitData;
return device->CreateBuffer(&bufDesc, &init, bufOut);
}
else
return device->CreateBuffer(&bufDesc, nullptr, bufOut);
}
HRESULT CreateBufferSrv(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11ShaderResourceView ** srvOut)
{
D3D11_BUFFER_DESC bufDesc;
buf->GetDesc(&bufDesc);
D3D11_SHADER_RESOURCE_VIEW_DESC desc;
ZeroMemory(&desc, sizeof(desc));
desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
desc.Buffer.FirstElement = 0;
desc.Buffer.NumElements = bufDesc.ByteWidth / szStride;
desc.Format = bufFormat;
return device->CreateShaderResourceView(buf, &desc, srvOut);
}
HRESULT CreateBufferUav(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11UnorderedAccessView ** uavOut)
{
D3D11_BUFFER_DESC bufDesc;
buf->GetDesc(&bufDesc);
D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
ZeroMemory(&desc, sizeof(desc));
desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
desc.Buffer.FirstElement = 0;
desc.Buffer.NumElements = bufDesc.ByteWidth / szStride;
desc.Format = bufFormat;
return device->CreateUnorderedAccessView(buf, &desc, uavOut);
}
ID3D11Buffer* CopyBuffer(ID3D11Device * device, ID3D11DeviceContext * ctx, ID3D11Buffer * buf)
{
D3D11_BUFFER_DESC desc;
ID3D11Buffer* ret = nullptr;
buf->GetDesc(&desc);
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
desc.Usage = D3D11_USAGE_STAGING;
desc.BindFlags = 0;
desc.MiscFlags = 0;
if (SUCCEEDED(device->CreateBuffer(&desc, nullptr, &ret)))
ctx->CopyResource(ret, buf);
return ret;
}
HRESULT CompileShader(const char * shader, SIZE_T sz, ID3D10Blob** bytecode, ID3D10Blob** errMsg)
{
HRESULT hr;
if (FAILED(hr = D3DCompile(shader, sz, "ComputeShader", nullptr, nullptr, "CSMain", "cs_5_0", 0, 0, bytecode, errMsg)))
return hr;
return hr;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment