Last active
November 24, 2018 10:05
-
-
Save Triang3l/2449fbe65527125d35599561cd3cf698 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void DxbcShaderTranslator::CompletePixelShader_DepthTo24Bit() { | |
// Unpack the depth format. | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
system_constants_used_ |= 1ull << kSysConst_Flags_Index; | |
shader_code_.push_back(EncodeVectorSelectOperand( | |
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); | |
shader_code_.push_back(cbuffer_index_system_constants_); | |
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); | |
shader_code_.push_back(kSysConst_Flags_Vec); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(kSysFlag_DepthFloat24); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// Convert according to the format. | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | | |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( | |
D3D10_SB_INSTRUCTION_TEST_NONZERO) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
++stat_.instruction_count; | |
++stat_.dynamic_flow_control_count; | |
// *************************************************************************** | |
// 20e4 conversion begins here. | |
// CFloat24 from d3dref9.dll. | |
// *************************************************************************** | |
// Assuming the depth is already clamped to [0, 2) (in all places, the depth | |
// is written with the saturate flag set). | |
// Calculate the denormalized value if the number is too small to be | |
// represented as normalized 20e4 into Y. | |
// y = f32 & 0x7FFFFF | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(0x7FFFFF); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// y = (f32 & 0x7FFFFF) | 0x800000 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(0x800000); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// z = f32 >> 23 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(23); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// z = 113 - (f32 >> 23) | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(113); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) | | |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); | |
shader_code_.push_back( | |
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(D3D10_SB_OPERAND_MODIFIER_NEG)); | |
shader_code_.push_back(system_temp_depth_); | |
++stat_.instruction_count; | |
++stat_.int_instruction_count; | |
// y = ((f32 & 0x7FFFFF) | 0x800000) >> (113 - (f32 >> 23)) | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// Check if the number is too small to be represented as normalized 20e4. | |
// z = f32 < 0x38800000 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(0x38800000); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// Bias the exponent. | |
// f32 += 0xC8000000 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(0xC8000000u); | |
++stat_.instruction_count; | |
++stat_.int_instruction_count; | |
// Replace the number in f32 with a denormalized one if needed. | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
++stat_.instruction_count; | |
++stat_.movc_instruction_count; | |
// Build the 20e4 number. | |
// y = f32 >> 3 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(3); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// y = (f32 >> 3) & 1 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(1); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// f24 = f32 + 3 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(3); | |
++stat_.instruction_count; | |
++stat_.int_instruction_count; | |
// f24 = f32 + 3 + ((f32 >> 3) & 1) | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
++stat_.instruction_count; | |
++stat_.int_instruction_count; | |
// f24 = (f32 + 3 + ((f32 >> 3) & 1)) >> 3 | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(3); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// f24 = ((f32 + 3 + ((f32 >> 3) & 1)) >> 3) & 0xFFFFFF | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(0xFFFFFF); | |
++stat_.instruction_count; | |
++stat_.uint_instruction_count; | |
// *************************************************************************** | |
// 20e4 conversion ends here. | |
// *************************************************************************** | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); | |
++stat_.instruction_count; | |
// *************************************************************************** | |
// Unorm24 conversion begins here. | |
// *************************************************************************** | |
// Multiply by float(0xFFFFFF). | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); | |
shader_code_.push_back(0x4B7FFFFF); | |
++stat_.instruction_count; | |
++stat_.float_instruction_count; | |
// Round to the nearest integer. This is the correct way of rounding, rounding | |
// towards zero gives 0xFF instead of 0x100 in clear shaders in, for instance, | |
// Halo 3. | |
// https://docs.microsoft.com/en-us/windows/desktop/direct3d10/d3d10-graphics-programming-guide-resources-data-conversion | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NE) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
++stat_.instruction_count; | |
++stat_.float_instruction_count; | |
// Convert to fixed-point. | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); | |
shader_code_.push_back( | |
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
shader_code_.push_back( | |
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); | |
shader_code_.push_back(system_temp_depth_); | |
++stat_.instruction_count; | |
++stat_.conversion_instruction_count; | |
// *************************************************************************** | |
// Unorm24 conversion ends here. | |
// *************************************************************************** | |
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | | |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); | |
++stat_.instruction_count; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment