Skip to content

Instantly share code, notes, and snippets.

@mmozeiko
Last active January 3, 2021 01:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmozeiko/074158229d3101824a33120e2ca00481 to your computer and use it in GitHub Desktop.
Save mmozeiko/074158229d3101824a33120e2ca00481 to your computer and use it in GitHub Desktop.
d3d11 hook + cofm shader replacement
  1. compile dxgi.c & d3d11.c files - these create dxgi.dll and d3d11.dll that allows to dump & replace shaders.
cl.exe /nologo /O2 /W3 /MT dxgi.c /link /DLL /OUT:dxgi.dll /INCREMENTAL:NO kernel32.lib user32.lib
cl.exe /nologo /O2 /W3 /MT d3d11.c /link /DLL /OUT:d3d11.dll /INCREMENTAL:NO kernel32.lib user32.lib d3dcompiler.lib dxguid.lib shlwapi.lib
  1. put dxgi.dll and d3d11.dll files next to ChildrenOfMorta.exe

  2. create d3d11_shaders folder next to ChildrenOfMorta.exe and put ps_b1c05ceb9ca8a14c.hlsl into this folder. This assumes exe file version 2019.2.15.46892, product version 2019.2.15.14464812, in game it shows version 1.1.70.2 (768117).

Folder structure should look like this:

ChildrenOfMorta/
├── ChildrenOfMorta_Data/
│   └── ...
├── d3d11_shaders/
│   └── ps_b1c05ceb9ca8a14c.hlsl
├── ChildrenOfMorta.exe
├── d3d11.dll
├── dxgi.dll
└── ...

This particular shader replaces rendering mainly in intro cutscene when new game starts. Pixels will not be doing so much jittering when camera pans around.

  1. now run the exe
#define COBJMACROS
#define WIN32_LEAN_AND_MEAN
#define D3D11CreateDevice D3D11CreateDeviceImport
#define D3D11CreateDeviceAndSwapChain D3D11CreateDeviceAndSwapChainImport
#include <windows.h>
#include <shlwapi.h>
#include <d3d11_4.h>
#include <d3dcommon.h>
#include <d3dcompiler.h>
#undef D3D11CreateDevice
#undef D3D11CreateDeviceAndSwapChain
#if _WIN64
#pragma comment(linker, "/export:D3D11CreateDevice")
#pragma comment(linker, "/export:D3D11CreateDeviceAndSwapChain")
#else
#pragma comment(linker, "/export:D3D11CreateDevice=_D3D11CreateDevice@40")
#pragma comment(linker, "/export:D3D11CreateDeviceAndSwapChain=_D3D11CreateDeviceAndSwapChain@48")
#endif
#include <stdint.h>
static BOOL dump;
static WCHAR d3d11_shaders[MAX_PATH];
static HMODULE d3d11;
static PFN_D3D11_CREATE_DEVICE d3d11_CreateDevice;
static PFN_D3D11_CREATE_DEVICE_AND_SWAP_CHAIN d3d11_CreateDeviceAndSwapChain;
static HRESULT (WINAPI *d3d11_CreateComputeShader)(ID3D11Device*, const void*, SIZE_T, ID3D11ClassLinkage*, ID3D11ComputeShader**);
static HRESULT (WINAPI* d3d11_CreateVertexShader)(ID3D11Device*, const void*, SIZE_T, ID3D11ClassLinkage*, ID3D11VertexShader**);
static HRESULT (WINAPI* d3d11_CreateHullShader)(ID3D11Device*, const void*, SIZE_T, ID3D11ClassLinkage*, ID3D11HullShader**);
static HRESULT (WINAPI* d3d11_CreateDomainShader)(ID3D11Device*, const void*, SIZE_T, ID3D11ClassLinkage*, ID3D11DomainShader**);
static HRESULT (WINAPI* d3d11_CreateGeometryShader)(ID3D11Device*, const void*, SIZE_T, ID3D11ClassLinkage*, ID3D11GeometryShader**);
static HRESULT (WINAPI* d3d11_CreateGeometryShaderWithStreamOutput)(ID3D11Device*, const void*, SIZE_T, const D3D11_SO_DECLARATION_ENTRY*, UINT, const UINT*, UINT, UINT, ID3D11ClassLinkage*, ID3D11GeometryShader**);
static HRESULT (WINAPI *d3d11_CreatePixelShader)(ID3D11Device*, const void*, SIZE_T, ID3D11ClassLinkage*, ID3D11PixelShader**);
#define CHECK_FATAL(cond, msg) do { if (!(cond)) { MessageBoxA(NULL, msg, "FATAL ERROR", 0); ExitProcess(1); } } while (0)
#define COUNTOF(arr) (sizeof(arr)/sizeof(0[arr]))
static uint64_t FNV1a(const uint8_t* data, size_t size)
{
uint64_t hash = 14695981039346656037ULL;
for (size_t i=0; i<size; i++)
{
hash ^= data[i];
hash *= 1099511628211ULL;
}
return hash;
}
static void ShaderDump(const char* type, const char* ext, uint64_t id, const void* data, DWORD size)
{
CreateDirectoryW(d3d11_shaders, NULL);
WCHAR path[1024];
wsprintfW(path, L"%s\\%S_%016I64x.%S", d3d11_shaders, type, id, ext);
HANDLE f = CreateFileW(path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (f != INVALID_HANDLE_VALUE)
{
DWORD written;
BOOL ok = WriteFile(f, data, size, &written, NULL);
if (!ok || written != size)
{
// TODO: report error
CHECK_FATAL(0, "Error writing to file!");
}
CloseHandle(f);
}
else
{
// TODO: report error
CHECK_FATAL(0, "Error creating output file!");
}
}
static void ShaderDisassemble(const char* type, uint64_t id, const void* bytecode, DWORD bytecode_size)
{
UINT flags = D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_OFFSET;
ID3DBlob* blob;
HRESULT hr = D3DDisassemble(bytecode, bytecode_size, flags, "", &blob);
if (SUCCEEDED(hr))
{
// yes, ID3D10Blob is same as ID3DBlob
LPVOID buffer = ID3D10Blob_GetBufferPointer(blob);
SIZE_T buffer_size = ID3D10Blob_GetBufferSize(blob);
ShaderDump(type, "asm", id, buffer, (DWORD)buffer_size);
ID3D10Blob_Release(blob);
}
}
static void ShaderHook(ID3D11Device* device, const char* type, const void** bytecode, SIZE_T* bytecode_size)
{
uint64_t id = FNV1a(*bytecode, *bytecode_size);
if (dump)
{
ShaderDump(type, "bin", id, *bytecode, (DWORD)*bytecode_size);
ShaderDisassemble(type, id, *bytecode, (DWORD)*bytecode_size);
}
WCHAR hlsl[1024];
wsprintfW(hlsl, L"%s\\%S_%016I64x.hlsl", d3d11_shaders, type, id);
if (GetFileAttributesW(hlsl) != INVALID_FILE_ATTRIBUTES)
{
D3D_FEATURE_LEVEL level = ID3D11Device_GetFeatureLevel(device);
int version;
if (level == D3D_FEATURE_LEVEL_11_1 || level == D3D_FEATURE_LEVEL_11_0)
{
version = 50;
}
else if (level == D3D_FEATURE_LEVEL_10_1)
{
version = 41;
}
else if (level == D3D_FEATURE_LEVEL_10_0)
{
version = 40;
}
else
{
// TODO: report error
return;
}
char target[16];
wsprintfA(target, "%s_%u_%u", type, version / 10, version % 10);
ID3DBlob* code = NULL;
ID3DBlob* error = NULL;
HRESULT hr = D3DCompileFromFile(hlsl, NULL, NULL, "main", target, D3DCOMPILE_OPTIMIZATION_LEVEL2 | D3DCOMPILE_WARNINGS_ARE_ERRORS, 0, &code, &error);
if (error != NULL)
{
WCHAR txt[1024];
wsprintfW(txt, L"%s\\%S_%016I64x.hlsl.txt", d3d11_shaders, type, id);
const void* error_data = ID3D10Blob_GetBufferPointer(error);
DWORD error_size = (DWORD)ID3D10Blob_GetBufferSize(error);
HANDLE f = CreateFileW(txt, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (f != INVALID_HANDLE_VALUE)
{
DWORD written;
BOOL ok = WriteFile(f, error_data, error_size, &written, NULL);
if (!ok || written != error_size)
{
// TODO: report error
}
CloseHandle(f);
}
ID3D10Blob_Release(error);
}
if (SUCCEEDED(hr) && code != NULL)
{
// do not release code blob, small memory "leak", but whatever
*bytecode = ID3D10Blob_GetBufferPointer(code);
*bytecode_size = ID3D10Blob_GetBufferSize(code);
}
}
}
static HRESULT WINAPI ComputeShaderHook(ID3D11Device* device, const void* bytecode, SIZE_T length, ID3D11ClassLinkage* linkage, ID3D11ComputeShader** shader)
{
ShaderHook(device, "cs", &bytecode, &length);
return d3d11_CreateComputeShader(device, bytecode, length, linkage, shader);
}
static HRESULT WINAPI VertexShaderHook(ID3D11Device* device, const void* bytecode, SIZE_T length, ID3D11ClassLinkage* linkage, ID3D11VertexShader** shader)
{
ShaderHook(device, "vs", &bytecode, &length);
return d3d11_CreateVertexShader(device, bytecode, length, linkage, shader);
}
static HRESULT WINAPI HullShaderHook(ID3D11Device* device, const void* bytecode, SIZE_T length, ID3D11ClassLinkage* linkage, ID3D11HullShader** shader)
{
ShaderHook(device, "hs", &bytecode, &length);
return d3d11_CreateHullShader(device, bytecode, length, linkage, shader);
}
static HRESULT WINAPI DomainShaderHook(ID3D11Device* device, const void* bytecode, SIZE_T length, ID3D11ClassLinkage* linkage, ID3D11DomainShader** shader)
{
ShaderHook(device, "ds", &bytecode, &length);
return d3d11_CreateDomainShader(device, bytecode, length, linkage, shader);
}
static HRESULT WINAPI GeometryShaderHook(ID3D11Device* device, const void* bytecode, SIZE_T length, ID3D11ClassLinkage* linkage, ID3D11GeometryShader** shader)
{
ShaderHook(device, "gs", &bytecode, &length);
return d3d11_CreateGeometryShader(device, bytecode, length, linkage, shader);
}
static HRESULT WINAPI GeometryShaderWithStreamOutputHook(ID3D11Device* device, const void* bytecode, SIZE_T length, const D3D11_SO_DECLARATION_ENTRY* entries, UINT entry_count, const UINT* strides, UINT stride_count, UINT stream, ID3D11ClassLinkage* linkage, ID3D11GeometryShader** shader)
{
ShaderHook(device, "gs", &bytecode, &length);
return d3d11_CreateGeometryShaderWithStreamOutput(device, bytecode, length, entries, entry_count, strides, stride_count, stream, linkage, shader);
}
static HRESULT WINAPI PixelShaderHook(ID3D11Device* device, const void* bytecode, SIZE_T length, ID3D11ClassLinkage* linkage, ID3D11PixelShader** shader)
{
ShaderHook(device, "ps", &bytecode, &length);
return d3d11_CreatePixelShader(device, bytecode, length, linkage, shader);
}
static void SetupHooks(ID3D11Device* device)
{
if (d3d11_CreateComputeShader) return;
void** vt = (void**)device->lpVtbl;
DWORD old;
if (VirtualProtect(vt, 4096, PAGE_READWRITE, &old))
{
#define HOOK(index, original, hook) \
*((void**)&original) = vt[index]; \
vt[index] = &hook
HOOK(18, d3d11_CreateComputeShader, ComputeShaderHook);
HOOK(12, d3d11_CreateVertexShader, VertexShaderHook);
HOOK(16, d3d11_CreateHullShader, HullShaderHook);
HOOK(17, d3d11_CreateDomainShader, DomainShaderHook);
HOOK(13, d3d11_CreateGeometryShader, GeometryShaderHook);
HOOK(14, d3d11_CreateGeometryShaderWithStreamOutput, GeometryShaderWithStreamOutputHook);
HOOK(15, d3d11_CreatePixelShader, PixelShaderHook);
#undef HOOK
VirtualProtect(vt, 4096, old, NULL);
}
else
{
// TODO: report error
}
}
HRESULT WINAPI D3D11CreateDevice(
IDXGIAdapter* adapter,
D3D_DRIVER_TYPE driver,
HMODULE software,
UINT flags,
const D3D_FEATURE_LEVEL* level,
UINT levels,
UINT version,
ID3D11Device** device,
D3D_FEATURE_LEVEL* created_level,
ID3D11DeviceContext** context)
{
HRESULT hr = d3d11_CreateDevice(adapter, driver, software, flags, level, levels, version, device, created_level, context);
SetupHooks(*device);
return hr;
}
HRESULT WINAPI D3D11CreateDeviceAndSwapChain(
IDXGIAdapter* adapter,
D3D_DRIVER_TYPE driver,
HMODULE software,
UINT flags,
const D3D_FEATURE_LEVEL* level,
UINT levels,
UINT version,
const DXGI_SWAP_CHAIN_DESC* swap_chain_desc,
IDXGISwapChain** swap_chain,
ID3D11Device** device,
D3D_FEATURE_LEVEL* created_level,
ID3D11DeviceContext** context)
{
HRESULT hr = d3d11_CreateDeviceAndSwapChain(adapter, driver, software, flags, level, levels, version, swap_chain_desc, swap_chain, device, created_level, context);
SetupHooks(*device);
return hr;
}
BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
{
if (reason == DLL_PROCESS_ATTACH)
{
DWORD ok = GetModuleFileNameW(instance, d3d11_shaders, COUNTOF(d3d11_shaders));
CHECK_FATAL(ok != 0, "Failed to get path to my dll with GetModuleFileNameW");
PathRemoveFileSpecW(d3d11_shaders);
PathAppendW(d3d11_shaders, L"d3d11_shaders");
d3d11 = LoadLibraryA("C:\\Windows\\System32\\d3d11.dll");
CHECK_FATAL(d3d11, "Failed to load original d3d11.dll");
*((FARPROC*)&d3d11_CreateDevice) = GetProcAddress(d3d11, "D3D11CreateDevice");
CHECK_FATAL(d3d11_CreateDevice, "Failed to get address of D3D11CreateDevice");
*((FARPROC*)&d3d11_CreateDeviceAndSwapChain) = GetProcAddress(d3d11, "D3D11CreateDeviceAndSwapChain");
CHECK_FATAL(d3d11_CreateDeviceAndSwapChain, "Failed to get address of D3D11CreateDeviceAndSwapChain");
GetEnvironmentVariableA("D3D11_DUMP_SHADERS", NULL, 0);
dump = GetLastError() != ERROR_ENVVAR_NOT_FOUND;
DisableThreadLibraryCalls(instance);
}
return TRUE;
}
#define COBJMACROS
#define WIN32_LEAN_AND_MEAN
#define CreateDXGIFactory CreateDXGIFactoryImport
#define CreateDXGIFactory1 CreateDXGIFactory1Import
#define CreateDXGIFactory2 CreateDXGIFactory2Import
#include <windows.h>
#include <dxgi.h>
#undef CreateDXGIFactory
#undef CreateDXGIFactory1
#undef CreateDXGIFactory2
#if _WIN64
#pragma comment(linker, "/export:CreateDXGIFactory")
#pragma comment(linker, "/export:CreateDXGIFactory1")
#pragma comment(linker, "/export:CreateDXGIFactory2")
#else
#pragma comment(linker, "/export:CreateDXGIFactory=_CreateDXGIFactory@8")
#pragma comment(linker, "/export:CreateDXGIFactory1=_CreateDXGIFactory1@8")
#pragma comment(linker, "/export:CreateDXGIFactory2=_CreateDXGIFactory2@12")
#endif
static HMODULE dxgi;
typedef HRESULT WINAPI fn_CreateDXGIFactory(REFIID riid, void** factory);
typedef HRESULT WINAPI fn_CreateDXGIFactory1(REFIID riid, void** factory);
typedef HRESULT WINAPI fn_CreateDXGIFactory2(UINT flags, REFIID riid, void** factory);
static fn_CreateDXGIFactory* dxgi_CreateDXGIFactory;
static fn_CreateDXGIFactory1* dxgi_CreateDXGIFactory1;
static fn_CreateDXGIFactory2* dxgi_CreateDXGIFactory2;
#define CHECK_FATAL(cond, msg) do { if (!(cond)) { MessageBoxA(NULL, msg, "FATAL ERROR", 0); ExitProcess(1); } } while (0)
HRESULT WINAPI CreateDXGIFactory(REFIID riid, void** factory)
{
return dxgi_CreateDXGIFactory(riid, factory);
}
HRESULT WINAPI CreateDXGIFactory1(REFIID riid, void** factory)
{
return dxgi_CreateDXGIFactory1(riid, factory);
}
HRESULT WINAPI CreateDXGIFactory2(UINT flags, REFIID riid, void** factory)
{
return dxgi_CreateDXGIFactory2(flags, riid, factory);
}
BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
{
if (reason == DLL_PROCESS_ATTACH)
{
// force loading d3d11 from current folder first
HMODULE d3d11 = LoadLibraryA("d3d11.dll");
CHECK_FATAL(d3d11, "Failed to load d3d11");
dxgi = LoadLibraryA("C:\\Windows\\System32\\dxgi.dll");
CHECK_FATAL(dxgi, "Failed to load original dxgi.dll");
dxgi_CreateDXGIFactory = (fn_CreateDXGIFactory*)GetProcAddress(dxgi, "CreateDXGIFactory");
CHECK_FATAL(dxgi_CreateDXGIFactory, "Failed to get address of CreateDXGIFactory");
dxgi_CreateDXGIFactory1 = (fn_CreateDXGIFactory1*)GetProcAddress(dxgi, "CreateDXGIFactory1");
CHECK_FATAL(dxgi_CreateDXGIFactory1, "Failed to get address of CreateDXGIFactory1");
dxgi_CreateDXGIFactory2 = (fn_CreateDXGIFactory2*)GetProcAddress(dxgi, "CreateDXGIFactory2");
CHECK_FATAL(dxgi_CreateDXGIFactory2, "Failed to get address of CreateDXGIFactory2");
DisableThreadLibraryCalls(instance);
}
return TRUE;
}
Texture2D<float4> t0 : register(t0);
SamplerState s0_s : register(s0);
cbuffer cb0 : register(b0)
{
float4 cb0[7];
}
// replace this function with t.Sample(s, uv) if you have sampler with "linear" filtering mode
float4 BilinearSample(float2 uv, float2 size)
{
float2 pos = uv * size - 0.5;
float2 fpos = frac(pos);
int2 ipos = floor(pos);
int2 off = int2(0, 1);
float4 c0 = t0.Load(int3(ipos + off.xx, 0));
float4 c1 = t0.Load(int3(ipos + off.yx, 0));
float4 c2 = t0.Load(int3(ipos + off.xy, 0));
float4 c3 = t0.Load(int3(ipos + off.yy, 0));
float4 c = lerp(lerp(c0, c1, fpos.x), lerp(c2, c3, fpos.x), fpos.y);
return c;
}
void main(
float4 v0 : TEXCOORD0,
float4 v1 : SV_POSITION0,
float4 v2 : TEXCOORD1,
float4 v3 : TEXCOORD2,
float4 v4 : COLOR0,
out float4 o0 : SV_Target0)
{
float4 r0,r1;
uint4 bitmask, uiDest;
float4 fDest;
#if 1
float2 size;
t0.GetDimensions(size.x, size.y);
float2 uv = v0.xy * size;
float2 duv = fwidth(uv);
uv = floor(uv) + 0.5 + saturate((frac(uv) - 0.5 + duv)/duv);
uv /= size;
r0 = BilinearSample(uv, size);
#else
r0.xyzw = t0.Sample(s0_s, v0.xy).xyzw;
#endif
r1.xyz = -r0.xyz * v4.xyz + cb0[5].xyz;
r0.xyzw = v4.xyzw * r0.xyzw;
r0.xyz = cb0[6].xxx * r1.xyz + r0.xyz;
o0.w = r0.w;
r0.w = dot(r0.xyz, float3(0.300000012,0.589999974,0.109999999));
r1.xyz = r0.www + -r0.xyz;
o0.xyz = cb0[6].yyy * r1.xyz + r0.xyz;
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment