Skip to content

Instantly share code, notes, and snippets.

View Triang3l's full-sized avatar
🚀
Never say never!

Triang3l Triang3l

🚀
Never say never!
View GitHub Profile
@Triang3l
Triang3l / VECTOR_CONVERT_I2F.cc
Created December 10, 2020 18:40
Xenia — exact unsigned to float for 0x80000000–0xFFFFFFFF
struct VECTOR_CONVERT_I2F
: Sequence<VECTOR_CONVERT_I2F,
I<OPCODE_VECTOR_CONVERT_I2F, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// flags = ARITHMETIC_UNSIGNED
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
#if 1
// 0.5 ULP precision rounding to nearest even (the only rounding mode on
// AltiVec) for negatives.
// TODO(Triang3l): Ignore the current rounding mode for positives as well
#include "texture_load.hlsli"
uint4 XeDXT3AAs1111TwoBlocksRowToBGRA4(uint2 halfblocks) {
// Only 16 bits of half-blocks are used. X contains pixels 0123, Y - 4567 (in
// the image, halfblocks.y is halfblocks.x + 8).
// In the row, X contains pixels 01, Y - 23, Z - 45, W - 67.
uint4 row = (((halfblocks.xxyy >> uint2(0u, 8u).xyxy) & 1u) << 8u) |
(((halfblocks.xxyy >> uint2(4u, 12u).xyxy) & 1u) << 24u) |
(((halfblocks.xxyy >> uint2(1u, 9u).xyxy) & 1u) << 4u) |
(((halfblocks.xxyy >> uint2(5u, 13u).xyxy) & 1u) << 20u) |
// Check if the target format is 7e3 and the conversion is needed (this is
// pretty long, better to branch here).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(flags_temp);
++stat_.instruction_count;
@Triang3l
Triang3l / TiledOffset3D.cc
Last active March 6, 2019 20:52
3D texture tiling for Xenia, reversed from XGAddress(Volume?)TiledOffset from XGRAPHICS::TileVolume
static uint32_t TiledOffset3D(uint32_t x, uint32_t y, uint32_t z,
uint32_t pitch_h, uint32_t pitch_v,
uint32_t log2_bpp) {
// Reconstructed from disassembly of XGRAPHICS::TileVolume.
uint32_t macro_outer =
((y >> 4) + (z >> 2) * (pitch_v >> 4)) * (pitch_h >> 5);
uint32_t macro =
((((x >> 5) + macro_outer) << (log2_bpp + 6)) & 0xFFFFFFF) << 1;
uint32_t micro = (((x & 7) + ((y & 6) << 2)) << (log2_bpp + 6)) >> 6;
uint32_t offset_outer = ((y >> 3) + (z >> 2)) & 1;
G> 00000004 Generated vertex shader (948b) - hash 260BFB61343A4703:
/* 0.0 */ exec
/* 17 */ vfetch_full r1, r0.x, vf1, DataFormat=FMT_32_32_32_32_FLOAT, Stride=20, Signed=true, NumFormat=integer, PrefetchCount=8
/* 18 */ vfetch_mini r10.yxwz, Offset=4, DataFormat=FMT_16_16_16_16_FLOAT, Signed=true, NumFormat=integer
/* 19 */ vfetch_mini r9.yxwz, Offset=6, DataFormat=FMT_16_16_16_16
/* 20 */ vfetch_full r8.yxwz, r0.x, vf1, Offset=8, DataFormat=FMT_16_16_16_16, Stride=20, PrefetchCount=8
/* 21 */ vfetch_mini r4.yxwz, Offset=10, DataFormat=FMT_16_16_16_16
/* 22 */ vfetch_mini r3.yxwz, Offset=12, DataFormat=FMT_16_16_16_16_FLOAT, Signed=true, NumFormat=integer
/* 0.1 */ exec
/* 23 */ vfetch_mini r7.yx__, Offset=14, DataFormat=FMT_16_16_FLOAT, Signed=true, NumFormat=integer
G> 00000004 Generated vertex shader (5148b) - hash 3FC2506B01BB8A38:
/* 0.0 */ exec
/* 35 */ add r0.__z_, r0.xxxx, c229.xxxx
+ rcp r0._y__, c78.x
/* 36 */ mul r0._y__, r0.zzzz, r0.yyyy
/* 37 */ floors r0._y__, r0.y
/* 38 */ vfetch_full r2.xywz, r0.y, vf1, DataFormat=FMT_32_32_32_32_FLOAT, Stride=20, Signed=true, NumFormat=integer, PrefetchCount=8
/* 39 */ vfetch_mini r1.wyzx, Offset=4, DataFormat=FMT_16_16_16_16_FLOAT, Signed=true, NumFormat=integer
/* 40 */ vfetch_mini r3.xyw_, Offset=6, DataFormat=FMT_16_16_16_16
/* 0.1 */ exec
G> 00000004 Generated vertex shader (612b) - hash C61C15F57B81399E:
/* 0.0 */ exec
/* 6 */ vfetch_full r2.y0x1, r0.x, vf0, DataFormat=FMT_16_16_FLOAT, Stride=4, Signed=true, NumFormat=integer, PrefetchCount=4
/* 7 */ vfetch_mini r3.y0x1, Offset=1, DataFormat=FMT_16_16_FLOAT, Signed=true, NumFormat=integer
/* 8 */ vfetch_mini r4.y0x1, Offset=2, DataFormat=FMT_16_16_FLOAT, Signed=true, NumFormat=integer
/* 9 */ vfetch_mini r5.y0x1, Offset=3, DataFormat=FMT_16_16_FLOAT, Signed=true, NumFormat=integer
/* 10 */ serialize
add r0.x__w, c20.yyyy, -r0.yyzz
/* 11 */ mul r0, r0.xyxy, r0.wwzz
/* 0.1 */ exec // PredicateClean=false
G> 00000004 Generated vertex shader (696b) - hash 06B25D80222ECA41:
/* 0.0 */ exec // PredicateClean=false
/* 6 */ vfetch_full r2.xyz1, r1.x, vf0, DataFormat=FMT_32_32_32_FLOAT, Stride=9, Signed=true, NumFormat=integer, PrefetchCount=6
/* 7 */ vfetch_mini r3.xyz1, Offset=3, DataFormat=FMT_32_32_32_FLOAT, Signed=true, NumFormat=integer
/* 8 */ vfetch_full r4.xyz1, r1.x, vf0, Offset=6, DataFormat=FMT_32_32_32_FLOAT, Stride=9, Signed=true, NumFormat=integer
/* 9 */ serialize
add r5, r1.yyyy, -c8
/* 10 */ setp_eq r6.x___, r5.y
/* 11 */ (p0) max r0.xyz_, r0.zxyy, r0.zxyy
/* 0.1 */ exec // PredicateClean=false
G> 00000004 Generated vertex shader (2592b) - hash 95451A57451AAEF9:
/* 0.0 */ exec
/* 28 */ add r0.___w, r1.xxxx, c131.xxxx
/* 29 */ vfetch_full r2.__yx, r0.w, vf2, DataFormat=FMT_16_16, Stride=3, NumFormat=integer, PrefetchCount=3
/* 30 */ vfetch_mini r2.yx__, Offset=1, DataFormat=FMT_16_16, NumFormat=integer
/* 31 */ vfetch_mini r1.__yx, Offset=2, DataFormat=FMT_16_16, NumFormat=integer
/* 32 */ serialize
vfetch_full r4.xyz1, r2.z, vf0, DataFormat=FMT_32_32_32_FLOAT, Stride=8, Signed=true, NumFormat=integer, PrefetchCount=8
/* 33 */ vfetch_mini r12.xy0_, Offset=3, DataFormat=FMT_32_32_FLOAT, Signed=true, NumFormat=integer
/* 0.1 */ exec
void DxbcShaderTranslator::CompletePixelShader_DepthTo24Bit() {
// Unpack the depth format.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(system_temp_depth_);
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));