Last active
November 19, 2016 21:08
-
-
Save dwilliamson/2837611 to your computer and use it in GitHub Desktop.
Compile-time shader assembler from way back (D3D9, vs/ps 1.x). Compiles down to simply writing calculated opcodes to an array.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// COMPILE-TIME SHADER ASSEMBLER | |
#ifndef _INCLUDED_ASSEMBLER_H | |
#define _INCLUDED_ASSEMBLER_H | |
#ifndef _INCLUDED_D3D_H | |
#include "D3D.h" | |
#endif | |
// Where do you NOT want to go today? | |
#undef min | |
#undef max | |
// inline.... | |
#define SA_INLINE __forceinline | |
// shADER aSSEMBLER | |
namespace sha | |
{ | |
// Just got annoying typing "const DWORD" all over the place | |
typedef const DWORD CDWORD; | |
// Two types of modification: | |
// Masking: destination, o*, rn | |
// --- | |
// Allows you to select which parts of the register get transferred from source to dest. | |
// mov r0.x, r1 -- only moves .x | |
// mov r0.xw, r1 -- only moves .xw | |
// | |
// Swizzling: source, vn, cn, rn | |
// --- | |
// Defines where parts move from source to dest. | |
// mov r0, r1.xywz -- moves r1.w into r0.z and r1.z into r0.w | |
// mov r0, r1.wxyz -- rotates all components right by 1 | |
// mov r0, r1.xxxx -- moves r1.x into all (unmasked) components of r0 | |
// Individual register components | |
enum Component | |
{ | |
x = D3DSP_WRITEMASK_0, | |
y = D3DSP_WRITEMASK_1, | |
z = D3DSP_WRITEMASK_2, | |
w = D3DSP_WRITEMASK_3, | |
all = x | y | z | w | |
}; | |
// Destination parameter modifiers | |
enum DestMod { saturate = D3DSPDM_SATURATE }; | |
// Source parameter modifiers | |
enum SrcMod | |
{ | |
neg = D3DSPSM_NEG, // -r0 | |
bias = D3DSPSM_BIAS, // r0_bias | |
biasneg = D3DSPSM_BIASNEG, // ??? | |
sign = D3DSPSM_SIGN, // ??? | |
signneg = D3DSPSM_SIGNNEG, // ??? | |
comp = D3DSPSM_COMP, // 1-r0 | |
x2 = D3DSPSM_X2, // r0_x2 (ps.1.4) | |
bx2 = D3DSPSM_SIGN, // r0_bx2 | |
dz = D3DSPSM_DZ, // ps.1.4 | |
dw = D3DSPSM_DW // ps.1.4 | |
}; | |
// Converts the D3DSP_WRITEMASK_n into an index for the swizzle code, avoiding any | |
// branching in the process | |
SA_INLINE CDWORD MaskToSwizzle(CDWORD mask) | |
{ | |
return ( | |
(((mask >> 16) & 1) >> 0) * 0 + | |
(((mask >> 16) & 2) >> 1) * 1 + | |
(((mask >> 16) & 4) >> 2) * 2 + | |
(((mask >> 16) & 8) >> 3) * 3); | |
} | |
struct ComponentStruct | |
{ | |
SA_INLINE ComponentStruct(CDWORD _op) : | |
op(_op), x(op | sha::x), y(op | sha::y), z(op | sha::z), w(op | sha::w) { } | |
SA_INLINE operator CDWORD (void) const { return (op); } | |
CDWORD op; | |
CDWORD x; | |
CDWORD y; | |
CDWORD z; | |
CDWORD w; | |
}; | |
// Class for mapping registers and their indices to a register op | |
// This is quite cool because it allows any number of any registers (eg. GF3/4 R8500 differences) | |
template <int REG> | |
struct RegHerring | |
{ | |
SA_INLINE ComponentStruct operator [] (CDWORD index) const { return (REG | index); } | |
SA_INLINE operator CDWORD (void) const { return (REG); } | |
}; | |
// Constructs the swizzle code | |
// Bit of a cheat this because it uses the definitions of D3DVS_X_X and the like to work | |
// and the internals theoretically *could* be changed in a later version of DX | |
SA_INLINE CDWORD s(const Component a, const Component b, const Component c, const Component d) | |
{ | |
return ( | |
(MaskToSwizzle(a) << (D3DVS_SWIZZLE_SHIFT + 0)) | | |
(MaskToSwizzle(b) << (D3DVS_SWIZZLE_SHIFT + 2)) | | |
(MaskToSwizzle(c) << (D3DVS_SWIZZLE_SHIFT + 4)) | | |
(MaskToSwizzle(d) << (D3DVS_SWIZZLE_SHIFT + 6)) | | |
// Set this here to flag that the user wants to set the swizzle code | |
0x80000000); | |
} | |
// Overloads for opcodes that take one partially complete swizzle codes | |
SA_INLINE CDWORD s(const Component a) { return (s(a, a, a, a)); } | |
SA_INLINE CDWORD s(const Component a, const Component b) { return (s(a, b, b, b)); } | |
SA_INLINE CDWORD s(const Component a, const Component b, Component c) { return (s(a, b, c, c)); } | |
// All vertex shader registers | |
static const RegHerring<D3DSPR_TEMP> r; | |
static const RegHerring<D3DSPR_INPUT> v; | |
static const RegHerring<D3DSPR_CONST> c; | |
static const RegHerring<D3DSPR_ADDR> a; | |
static const RegHerring<D3DSPR_TEXTURE> t; | |
static const RegHerring<D3DSPR_ATTROUT> oD; | |
static const RegHerring<D3DSPR_RASTOUT> oPos; | |
static const RegHerring<D3DSPR_TEXCRDOUT> oT; | |
// This will set the high bit and also set the write mask to all if no write mask | |
// has been manually set | |
SA_INLINE CDWORD MakeDest(CDWORD d) | |
{ | |
return (((d & all) == 0 ? d | all : d) | 0x80000000); | |
} | |
// This checks to see if the user has already set the swizzle code before ORing | |
// with the "no swizzle" code if they haven't, and then setting the high bit | |
SA_INLINE CDWORD MakeSource(CDWORD d) | |
{ | |
return ((d & 0x80000000) ? d : d | D3DVS_NOSWIZZLE | 0x80000000); | |
} | |
extern DWORD do_coissue; | |
SA_INLINE void coissue(void) | |
{ | |
do_coissue = D3DSI_COISSUE; | |
} | |
// Adding this to a register addressing is the same as a bitwise OR | |
static const int addr0 = D3DVS_ADDRMODE_RELATIVE; | |
// =========================================================================================== | |
// Functions which automate the tedious process of constructing opcodes | |
extern DWORD nb_instructions; | |
SA_INLINE void opcode0(DWORD *& ptr, CDWORD opcode) | |
{ | |
*ptr++ = opcode | do_coissue; | |
nb_instructions++; | |
do_coissue = 0; | |
} | |
SA_INLINE void opcode1_dst(DWORD *& ptr, CDWORD opcode, CDWORD dst) | |
{ | |
ptr[0] = opcode | do_coissue; | |
ptr[1] = MakeDest(dst); | |
ptr += 2; | |
nb_instructions++; | |
do_coissue = 0; | |
} | |
SA_INLINE void opcode1_src(DWORD *& ptr, CDWORD opcode, CDWORD src) | |
{ | |
ptr[0] = opcode | do_coissue; | |
ptr[1] = MakeSource(src); | |
ptr += 2; | |
nb_instructions++; | |
do_coissue = 0; | |
} | |
SA_INLINE void opcode2(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src) | |
{ | |
ptr[0] = opcode | do_coissue; | |
ptr[1] = MakeDest(dst); | |
ptr[2] = MakeSource(src); | |
ptr += 3; | |
nb_instructions++; | |
do_coissue = 0; | |
} | |
SA_INLINE void opcode3(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
ptr[0] = opcode | do_coissue; | |
ptr[1] = MakeDest(dst); | |
ptr[2] = MakeSource(src_a); | |
ptr[3] = MakeSource(src_b); | |
ptr += 4; | |
nb_instructions++; | |
do_coissue = 0; | |
} | |
SA_INLINE void opcode4(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c) | |
{ | |
ptr[0] = opcode | do_coissue; | |
ptr[1] = MakeDest(dst); | |
ptr[2] = MakeSource(src_a); | |
ptr[3] = MakeSource(src_b); | |
ptr[4] = MakeSource(src_c); | |
ptr += 5; | |
nb_instructions++; | |
do_coissue = 0; | |
} | |
SA_INLINE void opcode5(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c, CDWORD src_d) | |
{ | |
ptr[0] = opcode | do_coissue; | |
ptr[1] = MakeDest(dst); | |
ptr[2] = MakeSource(src_a); | |
ptr[3] = MakeSource(src_b); | |
ptr[4] = MakeSource(src_c); | |
ptr[5] = MakeSource(src_d); | |
ptr += 6; | |
nb_instructions++; | |
do_coissue = 0; | |
} | |
// =========================================================================================== | |
// Within namespace sha | |
extern DWORD* sptr; | |
SA_INLINE void BeginVS(DWORD * ptr) | |
{ | |
// All shaders begin with a version number | |
sptr = ptr; | |
nb_instructions = 0; | |
*sptr++ = D3DVS_VERSION(1, 1); | |
} | |
SA_INLINE void EndVS(void) | |
{ | |
*sptr++ = D3DVS_END(); | |
sptr = 0; | |
} | |
SA_INLINE void BeginPS(DWORD* ptr) | |
{ | |
sptr = ptr; | |
nb_instructions = 0; | |
*sptr++ = D3DPS_VERSION(1, 1); | |
} | |
SA_INLINE void EndPS(void) | |
{ | |
*sptr++ = D3DPS_END(); | |
sptr = 0; | |
} | |
#define MATHOP_3_SINGLE(name, op, mod) \ | |
__forceinline void name(CDWORD dst, CDWORD src_a, CDWORD src_b) \ | |
{ \ | |
opcode3(sptr, op, dst | mod, src_a, src_b); \ | |
} | |
#define MATHOP_4_SINGLE(name, op, mod) \ | |
__forceinline void name(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c) \ | |
{ \ | |
opcode4(sptr, op, dst | mod, src_a, src_b, src_c); \ | |
} | |
#define MATHOP(name, c, op) \ | |
MATHOP_##c##_SINGLE(name, op, 0) \ | |
MATHOP_##c##_SINGLE(name##_##x2, op, (1 << D3DSP_DSTSHIFT_SHIFT)) \ | |
MATHOP_##c##_SINGLE(name##_##x4, op, (2 << D3DSP_DSTSHIFT_SHIFT)) \ | |
MATHOP_##c##_SINGLE(name##_##sat, op, D3DSPDM_SATURATE) \ | |
MATHOP_##c##_SINGLE(name##_##x2_sat, op, ((1 << D3DSP_DSTSHIFT_SHIFT) | D3DSPDM_SATURATE)) \ | |
MATHOP_##c##_SINGLE(name##_##x4_sat, op, ((2 << D3DSP_DSTSHIFT_SHIFT) | D3DSPDM_SATURATE)) | |
MATHOP(mul, 3, D3DSIO_MUL); | |
MATHOP(dp3, 3, D3DSIO_DP3); | |
MATHOP(dp4, 3, D3DSIO_DP4); | |
MATHOP(min, 3, D3DSIO_MIN); | |
MATHOP(max, 3, D3DSIO_MAX); | |
MATHOP(add, 3, D3DSIO_ADD); | |
MATHOP(sub, 3, D3DSIO_SUB); | |
MATHOP(mad, 4, D3DSIO_MAD); | |
SA_INLINE void nop(void) | |
{ | |
opcode0(sptr, D3DSIO_NOP); | |
} | |
SA_INLINE void mov(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_MOV, dst, src); | |
} | |
SA_INLINE void rcp(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_RCP, dst, src); | |
} | |
SA_INLINE void rsq(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_RSQ, dst, src); | |
} | |
SA_INLINE void slt(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_SLT, dst, src_a, src_b); | |
} | |
SA_INLINE void sge(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_SGE, dst, src_a, src_b); | |
} | |
SA_INLINE void exp(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_EXP, dst, src); | |
} | |
SA_INLINE void log(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_LOG, dst, src); | |
} | |
SA_INLINE void lit(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_LIT, dst, src); | |
} | |
SA_INLINE void dst(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_DST, dst, src_a, src_b); | |
} | |
SA_INLINE void lrp(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c) | |
{ | |
opcode4(sptr, D3DSIO_LRP, dst, src_a, src_b, src_c); | |
} | |
SA_INLINE void frc(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_FRC, dst, src); | |
} | |
SA_INLINE void m4x4(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_M4x4, dst, src_a, src_b); | |
} | |
SA_INLINE void m4x3(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_M4x3, dst, src_a, src_b); | |
} | |
SA_INLINE void m3x4(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_M3x4, dst, src_a, src_b); | |
} | |
SA_INLINE void m3x3(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_M3x3, dst, src_a, src_b); | |
} | |
SA_INLINE void m3x2(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_M3x2, dst, src_a, src_b); | |
} | |
SA_INLINE void texcoord(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXCOORD, dst, src); | |
} | |
SA_INLINE void texkill(CDWORD dst) | |
{ | |
opcode1_dst(sptr, D3DSIO_TEXKILL, dst); | |
} | |
SA_INLINE void tex(CDWORD dst) | |
{ | |
opcode1_dst(sptr, D3DSIO_TEX, dst); | |
} | |
SA_INLINE void texbem(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXBEM, dst, src); | |
} | |
SA_INLINE void texbeml(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXBEML, dst, src); | |
} | |
SA_INLINE void texreg2ar(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXREG2AR, dst, src); | |
} | |
SA_INLINE void texreg2gb(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXREG2GB, dst, src); | |
} | |
SA_INLINE void texm3x2pad(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXM3x2PAD, dst, src); | |
} | |
SA_INLINE void texm3x2tex(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXM3x2TEX, dst, src); | |
} | |
SA_INLINE void texm3x3pad(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXM3x3PAD, dst, src); | |
} | |
SA_INLINE void texm3x3tex(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXM3x3TEX, dst, src); | |
} | |
SA_INLINE void texm3x3spec(CDWORD dst, CDWORD src_a, CDWORD src_b) | |
{ | |
opcode3(sptr, D3DSIO_TEXM3x3SPEC, dst, src_a, src_b); | |
} | |
SA_INLINE void texm3x3vspec(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXM3x3VSPEC, dst, src); | |
} | |
SA_INLINE void cnd(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c, CDWORD src_d) | |
{ | |
opcode4(sptr, D3DSIO_CND, dst, src_a, src_b, src_d); | |
} | |
SA_INLINE void def(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c, CDWORD src_d) | |
{ | |
opcode5(sptr, D3DSIO_DEF, dst, src_a, src_b, src_c, src_d); | |
} | |
SA_INLINE void texreg2rgb(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXREG2RGB, dst, src); | |
} | |
SA_INLINE void texdp3tex(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXDP3TEX, dst, src); | |
} | |
SA_INLINE void texm3x2depth(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXM3x2DEPTH, dst, src); | |
} | |
SA_INLINE void texdp3(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXDP3, dst, src); | |
} | |
SA_INLINE void texm3x3(CDWORD dst, CDWORD src) | |
{ | |
opcode2(sptr, D3DSIO_TEXM3x3, dst, src); | |
} | |
SA_INLINE void cmp(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c) | |
{ | |
opcode4(sptr, D3DSIO_CMP, dst, src_a, src_b, src_c); | |
} | |
}; // End namespace sha | |
// Don't need it anymore | |
#undef SA_INLINE | |
#endif /* _INCLUDED_ASSEMBLER_H */ | |
void test(void) | |
{ | |
using namespace sha; | |
static const int VS_PROJECTION = 0; | |
static const int VS_LIGHTVIEWTRANSFORM = 17; | |
static const int VS_VIEWORIENTATION = 21; | |
// Stick this somewhere | |
static DWORD scratchpad[512]; | |
DWORD* ptr = scratchpad; | |
// transform into screen space | |
m4x4(ptr, oPos, v[0], c[VS_PROJECTION]); | |
// copy the normal map texcoord to t0 | |
mov(ptr, oT[0], v[3]); | |
// compute the relative light position in view space | |
m4x4(ptr, r[0], v[0], c[VS_LIGHTVIEWTRANSFORM]); | |
// generate the binormal to complete the tangent space basis | |
mov(ptr, r[1], v[1]); | |
mul(ptr, r[2], v[2] | s(y, z, x), r[1] | s(z, x, y)); | |
mad(ptr, r[3], v[2] | s(z, x, y), r[1] | s(y, z, x), r[2] | neg); | |
// store the basis in t1, t2, t3 from TSB in v2, r3, r1 | |
dp3(ptr, oT[1] | x, v[2], c[VS_VIEWORIENTATION]); | |
dp3(ptr, oT[1] | y, r[3], c[VS_VIEWORIENTATION]); | |
dp3(ptr, oT[1] | z, r[1], c[VS_VIEWORIENTATION]); | |
dp3(ptr, oT[2] | x, v[2], c[VS_VIEWORIENTATION + 1]); | |
dp3(ptr, oT[2] | y, r[3], c[VS_VIEWORIENTATION + 1]); | |
dp3(ptr, oT[2] | z, r[1], c[VS_VIEWORIENTATION + 1]); | |
dp3(ptr, oT[3] | x, v[2], c[VS_VIEWORIENTATION + 2]); | |
dp3(ptr, oT[3] | y, r[3], c[VS_VIEWORIENTATION + 2]); | |
dp3(ptr, oT[3] | z, r[1], c[VS_VIEWORIENTATION + 2]); | |
// store the light direction in t1.w, t2.w, t3.w | |
mov(ptr, oT[1] | w, r[0] | s(x)); | |
mov(ptr, oT[2] | w, r[0] | s(y)); | |
mov(ptr, oT[3] | w, r[0] | s(z)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment