Skip to content

Instantly share code, notes, and snippets.

@Joshua-Ashton
Created January 23, 2019 00:33
Show Gist options
  • Save Joshua-Ashton/f9db53d72873858ed304574c4c453b24 to your computer and use it in GitHub Desktop.
Save Joshua-Ashton/f9db53d72873858ed304574c4c453b24 to your computer and use it in GitHub Desktop.
From c66337200467d7df4e2499f78b07ab389b17fe0d Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@nonagon.games>
Date: Wed, 23 Jan 2019 00:24:02 +0000
Subject: [PATCH] [d3d9, dx9asm] SHORT2 and UBYTE4 fixup.
---
src/d3d9/d3d9_constant_buffer.h | 16 ++-
src/d3d9/d3d9_renderer.cpp | 27 ++++-
src/d3d9/d3d9_shaders.h | 4 +-
src/d3d9/d3d9_vertexdeclaration.h | 4 +
src/dx9asm/dx9asm_register_map.cpp | 11 ++-
src/dx9asm/dx9asm_register_mapping.h | 1 +
src/dx9asm/dx9asm_unique_operations.cpp | 7 ++
src/dx9asm/dxbc_bytecode.h | 11 +++
src/dx9asm/dxbc_chunks.cpp | 126 ++++++++++++++++++++++--
9 files changed, 187 insertions(+), 20 deletions(-)
diff --git a/src/d3d9/d3d9_constant_buffer.h b/src/d3d9/d3d9_constant_buffer.h
index 53575de..5b6fe51 100644
--- a/src/d3d9/d3d9_constant_buffer.h
+++ b/src/d3d9/d3d9_constant_buffer.h
@@ -19,6 +19,7 @@ namespace dxup {
std::array<Vector<float, 4>, 256> floatConstants;
std::array<Vector<int, 4>, 16> intConstants;
std::array<int, 16> boolConstants;
+ uint32_t mask;
};
template <bool Pixel>
@@ -30,7 +31,7 @@ namespace dxup {
: m_device{ device }
, m_context{ context } {
D3D11_BUFFER_DESC cbDesc;
- cbDesc.ByteWidth = sizeof(D3D9ShaderConstants::floatConstants) + sizeof(D3D9ShaderConstants::intConstants) + (4 * sizeof(D3D9ShaderConstants::boolConstants)); // TODO make bool constants a bitfield.
+ cbDesc.ByteWidth = 4 * sizeof(uint32_t) + sizeof(D3D9ShaderConstants::floatConstants) + sizeof(D3D9ShaderConstants::intConstants) + (4 * sizeof(D3D9ShaderConstants::boolConstants)); // TODO make bool constants a bitfield.
cbDesc.Usage = D3D11_USAGE_DYNAMIC;
cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
@@ -50,10 +51,17 @@ namespace dxup {
// This can probably be consolidated into a single one.
uint8_t* data = (uint8_t*)res.pData;
- std::memcpy(data, constants.floatConstants.data(), sizeof(constants.floatConstants));
- std::memcpy(data + sizeof(constants.floatConstants), constants.intConstants.data(), sizeof(constants.intConstants));
- int* boolData = (int*)(data + sizeof(constants.floatConstants) + sizeof(constants.intConstants));
+ uint32_t* bitfield = (uint32_t*)data;
+ bitfield[0] = constants.mask;
+ bitfield[1] = constants.mask;
+ bitfield[2] = constants.mask;
+ bitfield[3] = constants.mask;
+
+ std::memcpy(data + 4 * sizeof(uint32_t), constants.floatConstants.data(), sizeof(constants.floatConstants));
+ std::memcpy(data + 4 * sizeof(uint32_t) + sizeof(constants.floatConstants), constants.intConstants.data(), sizeof(constants.intConstants));
+
+ int* boolData = (int*)(data + 4 * sizeof(uint32_t) + sizeof(constants.floatConstants) + sizeof(constants.intConstants));
for (uint32_t i = 0; i < constants.boolConstants.size(); i++) {
for (uint32_t j = 0; j < 4; j++)
boolData[i * 4 + j] = constants.boolConstants[i];
diff --git a/src/d3d9/d3d9_renderer.cpp b/src/d3d9/d3d9_renderer.cpp
index bd1d984..0c73d2c 100644
--- a/src/d3d9/d3d9_renderer.cpp
+++ b/src/d3d9/d3d9_renderer.cpp
@@ -321,6 +321,27 @@ namespace dxup {
auto& elements = m_state->vertexDecl->GetD3D11Descs();
auto* vertexShdrBytecode = m_state->vertexShader->GetTranslation();
+ auto& vertexInputs = m_state->vertexShader->GetTranslation()->getVertexInputs();
+
+ uint32_t mask = 0;
+ auto& d3d9Descs = m_state->vertexDecl->GetD3D9Descs();
+ for (auto& vertexInput : vertexInputs) {
+ for (auto& d3d9Desc : d3d9Descs) {
+ if (vertexInput.dclInfo.usage != d3d9Desc.Usage || vertexInput.dclInfo.usageIndex != d3d9Desc.UsageIndex)
+ continue;
+
+ if (d3d9Desc.Type == D3DDECLTYPE_SHORT2 || d3d9Desc.Type == D3DDECLTYPE_SHORT4)
+ mask |= 1 << (vertexInput.id * 2);
+ else if (d3d9Desc.Type == D3DDECLTYPE_UBYTE4)
+ mask |= 1 << (vertexInput.id * 2 + 1);
+ }
+ }
+
+ if (m_state->vsConstants.mask != mask)
+ m_state->dirtyFlags |= dirtyFlags::vsConstants;
+
+ m_state->vsConstants.mask = mask;
+
ID3D11InputLayout* layout = m_state->vertexShader->GetLinkedInput(m_state->vertexDecl.ptr());
if (layout == nullptr) {
@@ -628,15 +649,15 @@ namespace dxup {
if (m_state->dirtyFlags & dirtyFlags::indexBuffer)
updateIndexBuffer();
+ if (m_state->dirtyFlags & dirtyFlags::vertexDecl || m_state->dirtyFlags & dirtyFlags::vertexShader)
+ updateVertexShaderAndInputLayout();
+
if (m_state->dirtyFlags & dirtyFlags::vsConstants)
updateVertexConstants();
if (m_state->dirtyFlags & dirtyFlags::psConstants)
updatePixelConstants();
- if (m_state->dirtyFlags & dirtyFlags::vertexDecl || m_state->dirtyFlags & dirtyFlags::vertexShader)
- updateVertexShaderAndInputLayout();
-
if (m_state->dirtySamplers != 0)
updateSamplers();
diff --git a/src/d3d9/d3d9_shaders.h b/src/d3d9/d3d9_shaders.h
index 39777e7..b64fe43 100644
--- a/src/d3d9/d3d9_shaders.h
+++ b/src/d3d9/d3d9_shaders.h
@@ -63,7 +63,7 @@ namespace dxup {
return E_NOINTERFACE;
}
- const dx9asm::ShaderBytecode* GetTranslation() const {
+ dx9asm::ShaderBytecode* GetTranslation() {
return m_translation;
}
@@ -93,7 +93,7 @@ namespace dxup {
std::vector<InputLink> m_inputLinks;
std::vector<uint32_t> m_dx9asm;
Com<D3D11Shader> m_shader;
- const dx9asm::ShaderBytecode* m_translation;
+ dx9asm::ShaderBytecode* m_translation;
};
using Direct3DVertexShader9 = Direct3DShader9<ID3D11VertexShader, IDirect3DVertexShader9>;
diff --git a/src/d3d9/d3d9_vertexdeclaration.h b/src/d3d9/d3d9_vertexdeclaration.h
index b07e302..5be0273 100644
--- a/src/d3d9/d3d9_vertexdeclaration.h
+++ b/src/d3d9/d3d9_vertexdeclaration.h
@@ -48,6 +48,10 @@ namespace dxup {
return m_d3d11Descs;
}
+ const std::vector<D3DVERTEXELEMENT9>& GetD3D9Descs() const {
+ return m_d3d9Descs;
+ }
+
private:
std::vector<D3D11_INPUT_ELEMENT_DESC> m_d3d11Descs;
std::vector<D3DVERTEXELEMENT9> m_d3d9Descs;
diff --git a/src/dx9asm/dx9asm_register_map.cpp b/src/dx9asm/dx9asm_register_map.cpp
index 199e763..a008f8b 100644
--- a/src/dx9asm/dx9asm_register_map.cpp
+++ b/src/dx9asm/dx9asm_register_map.cpp
@@ -98,7 +98,12 @@ namespace dxup {
dxbcType = D3D10_SB_OPERAND_TYPE_TEMP; break;
case D3DSPR_INPUT: {
- dxbcType = D3D10_SB_OPERAND_TYPE_INPUT;
+ if (translator.getShaderType() == ShaderType::Vertex) {
+ dxbcType = D3D10_SB_OPERAND_TYPE_TEMP;
+ newMapping.inputTemp = true;
+ }
+ else
+ dxbcType = D3D10_SB_OPERAND_TYPE_INPUT;
if (translator.getMajorVersion() != 3 && translator.getShaderType() == ShaderType::Pixel) {
newMapping.dclInfo.type = UsageType::Input;
@@ -113,6 +118,7 @@ namespace dxup {
const uint32_t constantBufferIndex = 0;
uint32_t constId = newMapping.dx9Id;
+ constId += 1; // Our first constant is reserved for UBYTE/UINT integral float masks.
if (regType == D3DSPR_CONSTINT)
constId += 256;
else if (regType == D3DSPR_CONSTBOOL)
@@ -209,6 +215,9 @@ namespace dxup {
bool transient = io && translator.isTransient(newMapping.dclInfo.type == UsageType::Input);
bool generateId = translator.shouldGenerateId(transient);
+ if (newMapping.inputTemp) // We deal with this later in dcl territory.
+ transient = false;
+
if (dxbcType == D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER)
generateId = false;
diff --git a/src/dx9asm/dx9asm_register_mapping.h b/src/dx9asm/dx9asm_register_mapping.h
index 7b4ee99..29ae107 100644
--- a/src/dx9asm/dx9asm_register_mapping.h
+++ b/src/dx9asm/dx9asm_register_mapping.h
@@ -28,6 +28,7 @@ namespace dxup {
DXBCOperand relativeOperand;
bool hasRelativeOperand = false;
+ bool inputTemp = false;
uint32_t readMask = 0;
uint32_t writeMask = 0;
diff --git a/src/dx9asm/dx9asm_unique_operations.cpp b/src/dx9asm/dx9asm_unique_operations.cpp
index a181a1c..63a80fa 100644
--- a/src/dx9asm/dx9asm_unique_operations.cpp
+++ b/src/dx9asm/dx9asm_unique_operations.cpp
@@ -47,6 +47,10 @@ namespace dxup {
mapping.dclInfo.usageIndex = usageToken->getUsageIndex();
if (dst->getRegType() == D3DSPR_INPUT) {
+ if (getShaderType() == ShaderType::Vertex) {
+ mapping.dxbcOperand.setRegisterType(D3D10_SB_OPERAND_TYPE_TEMP);
+ mapping.inputTemp = true;
+ }
mapping.dclInfo.type = UsageType::Input;
if (getMajorVersion() != 3 && getShaderType() == ShaderType::Pixel)
@@ -102,6 +106,9 @@ namespace dxup {
bool transient = io && isTransient(mapping.dclInfo.type == UsageType::Input);
bool generateId = shouldGenerateId(transient);
+ if (mapping.inputTemp) // We deal with this later in dcl territory.
+ transient = false;
+
getRegisterMap().addRegisterMapping(transient, generateId, mapping);
return true;
diff --git a/src/dx9asm/dxbc_bytecode.h b/src/dx9asm/dxbc_bytecode.h
index e59885e..74f97c5 100644
--- a/src/dx9asm/dxbc_bytecode.h
+++ b/src/dx9asm/dxbc_bytecode.h
@@ -1,6 +1,7 @@
#pragma once
#include "dxbc_header.h"
+#include "dx9asm_register_mapping.h"
#include <vector>
namespace dxup {
@@ -9,6 +10,11 @@ namespace dxup {
class ShaderCodeTranslator;
+ struct VertexInput {
+ uint32_t id;
+ DclInfo dclInfo;
+ };
+
class ShaderBytecode {
public:
ShaderBytecode(ShaderCodeTranslator& shdrCode);
@@ -28,8 +34,13 @@ namespace dxup {
inline std::vector<uint32_t>& getBytecodeVector() {
return m_bytecode;
}
+
+ inline std::vector<VertexInput>& getVertexInputs() {
+ return m_vertexInputs;
+ }
private:
std::vector<uint32_t> m_bytecode;
+ std::vector<VertexInput> m_vertexInputs;
};
}
diff --git a/src/dx9asm/dxbc_chunks.cpp b/src/dx9asm/dxbc_chunks.cpp
index fd633ba..05df997 100644
--- a/src/dx9asm/dxbc_chunks.cpp
+++ b/src/dx9asm/dxbc_chunks.cpp
@@ -200,7 +200,7 @@ namespace dxup {
resourceBindingDescOffset = 0;
resourceBindingCount = bindingCount;
-
+
// Just one for now...
constantBufferDescOffset = this->getChunkSize(bytecode);
if (constantBufferCount != 0) {
@@ -337,7 +337,7 @@ namespace dxup {
uint32_t opcode = 0;
uint32_t interpMode = UINT32_MAX;
-
+
if (shdrCode.getShaderType() == ShaderType::Vertex) {
if (Input)
opcode = hasSiv ? D3D10_SB_OPCODE_DCL_INPUT_SIV : D3D10_SB_OPCODE_DCL_INPUT;
@@ -356,11 +356,15 @@ namespace dxup {
uint32_t lengthOffset = hasSiv ? 1 : 0;
DXBCOperand operand = mapping.dxbcOperand;
-
- if (!Input)
+
+ if (!Input) {
operand.setSwizzleOrWritemask(mapping.writeMask);
- else
+ operand.setRegisterType(D3D10_SB_OPERAND_TYPE_OUTPUT);
+ }
+ else {
operand.setSwizzleOrWritemask(mapping.readMask);
+ operand.setRegisterType(D3D10_SB_OPERAND_TYPE_INPUT);
+ }
DXBCOperation{ opcode, false, UINT32_MAX, lengthOffset, interpMode }
@@ -383,12 +387,16 @@ namespace dxup {
// Temps
uint32_t tempCount = shdrCode.getRegisterMap().getTotalTempCount();
+
+ if (shdrCode.getShaderType() == ShaderType::Vertex)
+ tempCount += 2; // +2 for UINT/UBYTE conversion to integral float internal registers.
+
if (tempCount > 0)
{
DXBCOperation{ D3D10_SB_OPCODE_DCL_TEMPS, false, 2 }.push(obj);
obj.push_back(tempCount); // Followed by DWORD count of temps. Not an operand!
}
-
+
// Samplers
{
for (uint32_t i = 0; i < 16; i++) {
@@ -439,7 +447,7 @@ namespace dxup {
uint32_t cbufferCount = 0;
if (shdrCode.isIndirectMarked())
- cbufferCount = 256 + 16;
+ cbufferCount = 1 + 256 + 16 + 16;
else
cbufferCount = shdrCode.getRegisterMap().getDXBCTypeCount(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER);
@@ -452,6 +460,104 @@ namespace dxup {
}
}
+ if (shdrCode.getShaderType() == ShaderType::Vertex) {
+ // Do input -> reg for UINT/UBYTE types to integral float.
+ uint32_t mappingCount = 0;
+ for (auto& mapping : shdrCode.getRegisterMap().getRegisterMappings()) {
+ if (mapping.inputTemp) {
+ DXBCOperand rVxSrc = mapping.dxbcOperand;
+ rVxSrc.setSwizzleOrWritemask(noSwizzle);
+ DXBCOperand rVxDst = mapping.dxbcOperand;
+ rVxDst.setSwizzleOrWritemask(writeAll);
+
+ DXBCOperand vXSrc = rVxSrc;
+ vXSrc.setRegisterType(D3D10_SB_OPERAND_TYPE_INPUT);
+ DXBCOperand vXDst = rVxSrc;
+ vXDst.setRegisterType(D3D10_SB_OPERAND_TYPE_INPUT);
+
+ VertexInput input;
+ input.id = mappingCount;
+ input.dclInfo = mapping.dclInfo;
+ bytecode.getVertexInputs().push_back(input);
+
+ uint32_t tempCount = shdrCode.getRegisterMap().getTotalTempCount();
+
+ DXBCOperand rFormatSrc = DXBCOperand{D3D10_SB_OPERAND_TYPE_TEMP, 1}
+ .setSwizzleOrWritemask(noSwizzle)
+ .setData(&tempCount, 1);
+ DXBCOperand rFormatDst = rFormatSrc;
+ rFormatDst.setSwizzleOrWritemask(writeAll);
+
+ tempCount++;
+
+ DXBCOperand rConversionTempSrc = DXBCOperand{D3D10_SB_OPERAND_TYPE_TEMP, 1}
+ .setSwizzleOrWritemask(noSwizzle)
+ .setData(&tempCount, 1);
+
+ DXBCOperand rConversionTempDst = rConversionTempSrc;
+ rConversionTempDst.setSwizzleOrWritemask(writeAll);
+
+ if (mappingCount % 2 == 0) {
+ uint32_t doubleCount = mappingCount * 2;
+ uint32_t data[2] = { 0, 0 };
+
+ DXBCOperation{ D3D11_SB_OPCODE_UBFE, false }
+ .appendOperand(rFormatDst)
+ .appendOperand(DXBCOperand{1, 1, 1, 1})
+ .appendOperand(DXBCOperand{doubleCount, doubleCount + 1, doubleCount + 2, doubleCount + 3})
+ .appendOperand(DXBCOperand{ D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 2 }.setSwizzleOrWritemask(ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) |
+ ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_X, D3D10_SB_4_COMPONENT_X, D3D10_SB_4_COMPONENT_X, D3D10_SB_4_COMPONENT_X)).setData(data, 2))
+ .push(obj);
+ }
+
+ // TODO! Consolidate me!
+ // Itof Pass
+ DXBCOperation{ D3D10_SB_OPCODE_ITOF, false }
+ .appendOperand(rConversionTempDst)
+ .appendOperand(vXSrc)
+ .push(obj);
+
+ if (mappingCount % 2 == 0) {
+ rFormatSrc.setSwizzleOrWritemask(ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) |
+ ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_X, D3D10_SB_4_COMPONENT_X, D3D10_SB_4_COMPONENT_X, D3D10_SB_4_COMPONENT_X));
+ } else {
+ rFormatSrc.setSwizzleOrWritemask(ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) |
+ ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_Z, D3D10_SB_4_COMPONENT_Z, D3D10_SB_4_COMPONENT_Z, D3D10_SB_4_COMPONENT_Z));
+ }
+
+ DXBCOperation{ D3D10_SB_OPCODE_MOVC, false }
+ .appendOperand(rVxDst)
+ .appendOperand(rFormatSrc)
+ .appendOperand(rConversionTempSrc)
+ .appendOperand(vXSrc)
+ .push(obj);
+
+ // Utof Pass
+ DXBCOperation{ D3D10_SB_OPCODE_UTOF, false }
+ .appendOperand(rConversionTempDst)
+ .appendOperand(vXSrc)
+ .push(obj);
+
+ if (mappingCount % 2 == 0) {
+ rFormatSrc.setSwizzleOrWritemask(ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) |
+ ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_Y, D3D10_SB_4_COMPONENT_Y, D3D10_SB_4_COMPONENT_Y, D3D10_SB_4_COMPONENT_Y));
+ } else {
+ rFormatSrc.setSwizzleOrWritemask(ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) |
+ ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_W, D3D10_SB_4_COMPONENT_W, D3D10_SB_4_COMPONENT_W, D3D10_SB_4_COMPONENT_W));
+ }
+
+ DXBCOperation{ D3D10_SB_OPCODE_MOVC, false }
+ .appendOperand(rVxDst)
+ .appendOperand(rFormatSrc)
+ .appendOperand(rConversionTempSrc)
+ .appendOperand(rVxSrc)
+ .push(obj);
+
+ mappingCount++;
+ }
+ }
+ }
+
}
void pushInternal(ShaderBytecode& bytecode, ShaderCodeTranslator& shdrCode) override {
@@ -459,7 +565,7 @@ namespace dxup {
PlaceholderPtr<uint32_t> headerChunkSize{ "[SHEX] Chunk Header - Chunk Data Size", &((ChunkHeader*)nextPtr(obj))->size };
ChunkHeader{ fourcc("SHEX") }.push(obj); // [PUSH] Chunk Header
-
+
obj.push_back(ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(shdrCode.getShaderType() == ShaderType::Vertex ? D3D10_SB_VERTEX_SHADER : D3D10_SB_PIXEL_SHADER, 5, 0)); // [PUSH] DXBC Version Token - VerTok
PlaceholderPtr<uint32_t> dwordCount{ "[SHEX] Dword Count", nextPtr(obj) };
@@ -526,7 +632,7 @@ namespace dxup {
if (mapping.dclInfo.usage == transMapping.d3d9Usage && mapping.dclInfo.usageIndex == transMapping.d3d9UsageIndex)
baseMask |= isInput(ChunkType) ? mapping.readMask : mapping.writeMask;
});
-
+
baseMask = DECODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(baseMask);
element.mask = 0xFFu;
uint32_t rwMask = baseMask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
@@ -608,4 +714,4 @@ namespace dxup {
}
}
-}
\ No newline at end of file
+}
--
2.20.1.windows.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment