Skip to content

Instantly share code, notes, and snippets.

@cshenton
Last active March 25, 2024 01:04
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cshenton/3ba217ee409648927889b56562a534e8 to your computer and use it in GitHub Desktop.
Save cshenton/3ba217ee409648927889b56562a534e8 to your computer and use it in GitHub Desktop.
Seeing how fast a d3d11 swapchain can go.
#include <assert.h>
#include <stdio.h>
#include <time.h>
#define COBJMACROS
#include <windows.h>
#include <d3d11_1.h>
#include <d3dcompiler.h>
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define TITLE "D3D11 Speedtest"
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef struct Constants {
int frame;
} Constants;
int main()
{
HRESULT hr;
WNDCLASSA wnd_class = { 0, DefWindowProcA, 0, 0, 0, 0, 0, 0, 0, TITLE };
RegisterClassA(&wnd_class);
DWORD style = WS_POPUP | WS_MAXIMIZE | WS_VISIBLE;
HWND window = CreateWindowExA(0, TITLE, TITLE, style, 0, 0, 0, 0, NULL, NULL, NULL, NULL);
assert(window);
//////////////////////////////////////////////////////////////////////////////////////////////////////
D3D_FEATURE_LEVEL feature_levels[] = { D3D_FEATURE_LEVEL_11_1 };
ID3D11Device* base_device;
ID3D11DeviceContext* base_ctx;
hr = D3D11CreateDevice(
NULL, D3D_DRIVER_TYPE_HARDWARE,
NULL, D3D11_CREATE_DEVICE_BGRA_SUPPORT,
feature_levels, ARRAYSIZE(feature_levels),
D3D11_SDK_VERSION, &base_device,
NULL, &base_ctx);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
ID3D11Device1* device;
hr = ID3D11Device_QueryInterface(base_device, &IID_ID3D11Device1, (void**)(&device));
assert(SUCCEEDED(hr));
ID3D11DeviceContext1* ctx;
hr = ID3D11DeviceContext_QueryInterface(base_ctx, &IID_ID3D11DeviceContext1, (void**)&ctx);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
IDXGIDevice1* dxgi_dev;
hr = ID3D11Device_QueryInterface(device, &IID_IDXGIDevice1, (void**)&dxgi_dev);
assert(SUCCEEDED(hr));
IDXGIAdapter1* adapter;
hr = IDXGIDevice_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter);
assert(SUCCEEDED(hr));
IDXGIFactory2* factory;
hr = IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory2, (void**)&factory);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
DXGI_SWAP_CHAIN_DESC1 swapchain_desc = {
.Width = 100,
.Height = 100,
.Format = DXGI_FORMAT_B8G8R8A8_UNORM,
.SampleDesc = {.Count = 1},
.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT,
.BufferCount = 2,
.Scaling = DXGI_SCALING_STRETCH,
.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING,
.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD,
.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED,
};
IDXGISwapChain1* swapchain;
hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown*)device, window,
&swapchain_desc, NULL, NULL, &swapchain);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
ID3D11Texture2D* framebuffer;
hr = IDXGISwapChain1_GetBuffer(swapchain, 0, &IID_ID3D11Texture2D, (void**)&framebuffer);
assert(SUCCEEDED(hr));
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {
.Format = DXGI_FORMAT_B8G8R8A8_UNORM_SRGB,
.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D,
};
ID3D11RenderTargetView* rtv;
hr = ID3D11Device_CreateRenderTargetView(device, (ID3D11Resource*)framebuffer, &rtv_desc, &rtv);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
D3D11_TEXTURE2D_DESC depth_tex_desc;
ID3D11Texture2D_GetDesc(framebuffer, &depth_tex_desc);
depth_tex_desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
depth_tex_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL;
ID3D11Texture2D* depth_tex;
hr = ID3D11Device_CreateTexture2D(device, &depth_tex_desc, NULL, &depth_tex);
assert(SUCCEEDED(hr));
ID3D11DepthStencilView* dsv;
ID3D11Device_CreateDepthStencilView(device, (ID3D11Resource*)depth_tex, NULL, &dsv);
//////////////////////////////////////////////////////////////////////////////////////////////////////
ID3DBlob* vert_src;
hr = D3DCompileFromFile(L"tri.hlsl", NULL, NULL, "vert", "vs_5_0", 0, 0, &vert_src, NULL);
assert(SUCCEEDED(hr));
ID3D11VertexShader* vert_shd;
hr = ID3D11Device_CreateVertexShader(
device,
ID3D10Blob_GetBufferPointer(vert_src),
ID3D10Blob_GetBufferSize(vert_src),
NULL,
&vert_shd);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
ID3DBlob* pix_src;
hr = D3DCompileFromFile(L"tri.hlsl", NULL, NULL, "pix", "ps_5_0", 0, 0, &pix_src, NULL);
assert(SUCCEEDED(hr));
ID3D11PixelShader* pix_shd;
hr = ID3D11Device_CreatePixelShader(device,
ID3D10Blob_GetBufferPointer(pix_src),
ID3D10Blob_GetBufferSize(pix_src),
NULL,
&pix_shd);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
D3D11_BUFFER_DESC cbuffer_desc = {
.ByteWidth = sizeof(Constants) + 0xf & 0xfffffff0,
.Usage = D3D11_USAGE_DYNAMIC,
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
};
ID3D11Buffer* cbuffer;
hr = ID3D11Device_CreateBuffer(device, &cbuffer_desc, NULL, &cbuffer);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
D3D11_RASTERIZER_DESC rasterizer_desc = {
.FillMode = D3D11_FILL_SOLID,
.CullMode = D3D11_CULL_BACK,
};
ID3D11RasterizerState* raster_state;
hr = ID3D11Device_CreateRasterizerState(device, &rasterizer_desc, &raster_state);
assert(SUCCEEDED(hr));
//////////////////////////////////////////////////////////////////////////////////////////////////////
D3D11_DEPTH_STENCIL_DESC depth_stencil_desc = {
.DepthEnable = TRUE,
.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL,
.DepthFunc = D3D11_COMPARISON_LESS,
};
ID3D11DepthStencilState* depth_stencil_state;
ID3D11Device_CreateDepthStencilState(device, &depth_stencil_desc, &depth_stencil_state);
//////////////////////////////////////////////////////////////////////////////////////////////////////
float clear_color[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
float width = (float)depth_tex_desc.Width;
float height = (float)depth_tex_desc.Height;
D3D11_VIEWPORT viewport = { 0.0f, 0.0f, width, height, 0.0f, 1.0f };
printf("%f, %f\n", width, height);
//////////////////////////////////////////////////////////////////////////////////////////////////////
int frame = 0;
LARGE_INTEGER start_time, end_time, ElapsedMicroseconds;
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
QueryPerformanceCounter(&start_time);
clock_t frame_time = clock();
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
for (;;) {
MSG msg;
while (PeekMessageA(&msg, NULL, 0, 0, PM_REMOVE)) {
if (msg.message == WM_KEYDOWN && msg.wParam == VK_ESCAPE) {
return 0;
}
DispatchMessageA(&msg);
}
D3D11_MAPPED_SUBRESOURCE mapped;
Constants c = { .frame = frame++ };
ID3D11DeviceContext_Map(ctx, (ID3D11Resource*)cbuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
memcpy(mapped.pData, &c, sizeof(c));
ID3D11DeviceContext_Unmap(ctx, (ID3D11Resource*)cbuffer, 0);
ID3D11DeviceContext_ClearRenderTargetView(ctx, rtv, clear_color);
ID3D11DeviceContext_ClearDepthStencilView(ctx, dsv, D3D11_CLEAR_DEPTH, 1.0f, 0);
ID3D11DeviceContext_IASetPrimitiveTopology(ctx, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D11DeviceContext_VSSetShader(ctx, vert_shd, NULL, 0);
ID3D11DeviceContext_VSSetConstantBuffers(ctx, 0, 1, &cbuffer);
ID3D11DeviceContext_RSSetViewports(ctx, 1, &viewport);
ID3D11DeviceContext_RSSetState(ctx, raster_state);
ID3D11DeviceContext_PSSetShader(ctx, pix_shd, NULL, 0);
ID3D11DeviceContext_OMSetRenderTargets(ctx, 1, &rtv, dsv);
ID3D11DeviceContext_OMSetDepthStencilState(ctx, depth_stencil_state, 0);
ID3D11DeviceContext_OMSetBlendState(ctx, NULL, NULL, 0xffffffff);
ID3D11DeviceContext_Draw(ctx, 3, 0);
IDXGISwapChain1_Present(swapchain, 0, DXGI_PRESENT_ALLOW_TEARING);
QueryPerformanceCounter(&end_time);
long long elapsed_us = 1000000 * (end_time.QuadPart - start_time.QuadPart) / frequency.QuadPart;
if (elapsed_us > 250000) {
printf("%f FPS\n", (double)frame / ((double)elapsed_us) * 1000000.0 );
frame = 0;
start_time = end_time;
}
}
}
cbuffer Constants : register(b0)
{
int frame;
};
struct PixelData {
float4 position : SV_POSITION;
float4 color : COL;
};
uint hash(uint a)
{
a = (a+0x7ed55d16) + (a<<12);
a = (a^0xc761c23c) ^ (a>>19);
a = (a+0x165667b1) + (a<<5);
a = (a+0xd3a2646c) ^ (a<<9);
a = (a+0xfd7046c5) + (a<<3);
a = (a^0xb55a4f09) ^ (a>>16);
return a;
}
PixelData vert(uint v: SV_VertexID)
{
float2 positions[3] = { float2(0.5, -0.5), float2(-0.5, -0.5), float2(0.0, 0.5) };
PixelData pd;
uint ch = hash(~uint(frame));
pd.color = float4(((ch >> 16) & 0xff) / 255.0, ((ch >> 8) & 0xff) / 255.0, ((ch) & 0xff) / 255.0, 1.0);
pd.position = float4(positions[v], 0.0, 1.0);
return pd;
}
float4 pix(PixelData pd) : SV_TARGET
{
return pd.color;
}
@cshenton
Copy link
Author

cshenton commented Mar 24, 2024

I am compiling with zig cc -o tri.exe tri.c -ld3d11 -ld3dcompiler_47 -Werror. Hits 19402 FPS (well that's a random printout obviously it varies in practice). Worth noting that if we leave the swapchain res the default (which is 1440p on my monitor), the perf is more like 9k.

There's some swapover point where the tearing is no longer visible and the framerate passes the ~9.5k mark.

@krayfaus
Copy link

Compiled with cl.exe .\d3d11_speedtest.c /link user32.lib d3dcompiler.lib d3d11.lib dxguid.lib. Couldn't get past 8000 FPS here, potato PC.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment