Skip to content

Instantly share code, notes, and snippets.

@dwilliamson
Last active November 19, 2016 21:08
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dwilliamson/2837611 to your computer and use it in GitHub Desktop.
Save dwilliamson/2837611 to your computer and use it in GitHub Desktop.
Compile-time shader assembler from way back (D3D9, vs/ps 1.x). Compiles down to simply writing calculated opcodes to an array.
// COMPILE-TIME SHADER ASSEMBLER
#ifndef _INCLUDED_ASSEMBLER_H
#define _INCLUDED_ASSEMBLER_H
#ifndef _INCLUDED_D3D_H
#include "D3D.h"
#endif
// Where do you NOT want to go today?
#undef min
#undef max
// inline....
#define SA_INLINE __forceinline
// shADER aSSEMBLER
namespace sha
{
// Just got annoying typing "const DWORD" all over the place
typedef const DWORD CDWORD;
// Two types of modification:
// Masking: destination, o*, rn
// ---
// Allows you to select which parts of the register get transferred from source to dest.
// mov r0.x, r1 -- only moves .x
// mov r0.xw, r1 -- only moves .xw
//
// Swizzling: source, vn, cn, rn
// ---
// Defines where parts move from source to dest.
// mov r0, r1.xywz -- moves r1.w into r0.z and r1.z into r0.w
// mov r0, r1.wxyz -- rotates all components right by 1
// mov r0, r1.xxxx -- moves r1.x into all (unmasked) components of r0
// Individual register components
enum Component
{
x = D3DSP_WRITEMASK_0,
y = D3DSP_WRITEMASK_1,
z = D3DSP_WRITEMASK_2,
w = D3DSP_WRITEMASK_3,
all = x | y | z | w
};
// Destination parameter modifiers
enum DestMod { saturate = D3DSPDM_SATURATE };
// Source parameter modifiers
enum SrcMod
{
neg = D3DSPSM_NEG, // -r0
bias = D3DSPSM_BIAS, // r0_bias
biasneg = D3DSPSM_BIASNEG, // ???
sign = D3DSPSM_SIGN, // ???
signneg = D3DSPSM_SIGNNEG, // ???
comp = D3DSPSM_COMP, // 1-r0
x2 = D3DSPSM_X2, // r0_x2 (ps.1.4)
bx2 = D3DSPSM_SIGN, // r0_bx2
dz = D3DSPSM_DZ, // ps.1.4
dw = D3DSPSM_DW // ps.1.4
};
// Converts the D3DSP_WRITEMASK_n into an index for the swizzle code, avoiding any
// branching in the process
SA_INLINE CDWORD MaskToSwizzle(CDWORD mask)
{
return (
(((mask >> 16) & 1) >> 0) * 0 +
(((mask >> 16) & 2) >> 1) * 1 +
(((mask >> 16) & 4) >> 2) * 2 +
(((mask >> 16) & 8) >> 3) * 3);
}
struct ComponentStruct
{
SA_INLINE ComponentStruct(CDWORD _op) :
op(_op), x(op | sha::x), y(op | sha::y), z(op | sha::z), w(op | sha::w) { }
SA_INLINE operator CDWORD (void) const { return (op); }
CDWORD op;
CDWORD x;
CDWORD y;
CDWORD z;
CDWORD w;
};
// Class for mapping registers and their indices to a register op
// This is quite cool because it allows any number of any registers (eg. GF3/4 R8500 differences)
template <int REG>
struct RegHerring
{
SA_INLINE ComponentStruct operator [] (CDWORD index) const { return (REG | index); }
SA_INLINE operator CDWORD (void) const { return (REG); }
};
// Constructs the swizzle code
// Bit of a cheat this because it uses the definitions of D3DVS_X_X and the like to work
// and the internals theoretically *could* be changed in a later version of DX
SA_INLINE CDWORD s(const Component a, const Component b, const Component c, const Component d)
{
return (
(MaskToSwizzle(a) << (D3DVS_SWIZZLE_SHIFT + 0)) |
(MaskToSwizzle(b) << (D3DVS_SWIZZLE_SHIFT + 2)) |
(MaskToSwizzle(c) << (D3DVS_SWIZZLE_SHIFT + 4)) |
(MaskToSwizzle(d) << (D3DVS_SWIZZLE_SHIFT + 6)) |
// Set this here to flag that the user wants to set the swizzle code
0x80000000);
}
// Overloads for opcodes that take one partially complete swizzle codes
SA_INLINE CDWORD s(const Component a) { return (s(a, a, a, a)); }
SA_INLINE CDWORD s(const Component a, const Component b) { return (s(a, b, b, b)); }
SA_INLINE CDWORD s(const Component a, const Component b, Component c) { return (s(a, b, c, c)); }
// All vertex shader registers
static const RegHerring<D3DSPR_TEMP> r;
static const RegHerring<D3DSPR_INPUT> v;
static const RegHerring<D3DSPR_CONST> c;
static const RegHerring<D3DSPR_ADDR> a;
static const RegHerring<D3DSPR_TEXTURE> t;
static const RegHerring<D3DSPR_ATTROUT> oD;
static const RegHerring<D3DSPR_RASTOUT> oPos;
static const RegHerring<D3DSPR_TEXCRDOUT> oT;
// This will set the high bit and also set the write mask to all if no write mask
// has been manually set
SA_INLINE CDWORD MakeDest(CDWORD d)
{
return (((d & all) == 0 ? d | all : d) | 0x80000000);
}
// This checks to see if the user has already set the swizzle code before ORing
// with the "no swizzle" code if they haven't, and then setting the high bit
SA_INLINE CDWORD MakeSource(CDWORD d)
{
return ((d & 0x80000000) ? d : d | D3DVS_NOSWIZZLE | 0x80000000);
}
extern DWORD do_coissue;
SA_INLINE void coissue(void)
{
do_coissue = D3DSI_COISSUE;
}
// Adding this to a register addressing is the same as a bitwise OR
static const int addr0 = D3DVS_ADDRMODE_RELATIVE;
// ===========================================================================================
// Functions which automate the tedious process of constructing opcodes
extern DWORD nb_instructions;
SA_INLINE void opcode0(DWORD *& ptr, CDWORD opcode)
{
*ptr++ = opcode | do_coissue;
nb_instructions++;
do_coissue = 0;
}
SA_INLINE void opcode1_dst(DWORD *& ptr, CDWORD opcode, CDWORD dst)
{
ptr[0] = opcode | do_coissue;
ptr[1] = MakeDest(dst);
ptr += 2;
nb_instructions++;
do_coissue = 0;
}
SA_INLINE void opcode1_src(DWORD *& ptr, CDWORD opcode, CDWORD src)
{
ptr[0] = opcode | do_coissue;
ptr[1] = MakeSource(src);
ptr += 2;
nb_instructions++;
do_coissue = 0;
}
SA_INLINE void opcode2(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src)
{
ptr[0] = opcode | do_coissue;
ptr[1] = MakeDest(dst);
ptr[2] = MakeSource(src);
ptr += 3;
nb_instructions++;
do_coissue = 0;
}
SA_INLINE void opcode3(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src_a, CDWORD src_b)
{
ptr[0] = opcode | do_coissue;
ptr[1] = MakeDest(dst);
ptr[2] = MakeSource(src_a);
ptr[3] = MakeSource(src_b);
ptr += 4;
nb_instructions++;
do_coissue = 0;
}
SA_INLINE void opcode4(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c)
{
ptr[0] = opcode | do_coissue;
ptr[1] = MakeDest(dst);
ptr[2] = MakeSource(src_a);
ptr[3] = MakeSource(src_b);
ptr[4] = MakeSource(src_c);
ptr += 5;
nb_instructions++;
do_coissue = 0;
}
SA_INLINE void opcode5(DWORD *& ptr, CDWORD opcode, CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c, CDWORD src_d)
{
ptr[0] = opcode | do_coissue;
ptr[1] = MakeDest(dst);
ptr[2] = MakeSource(src_a);
ptr[3] = MakeSource(src_b);
ptr[4] = MakeSource(src_c);
ptr[5] = MakeSource(src_d);
ptr += 6;
nb_instructions++;
do_coissue = 0;
}
// ===========================================================================================
// Within namespace sha
extern DWORD* sptr;
SA_INLINE void BeginVS(DWORD * ptr)
{
// All shaders begin with a version number
sptr = ptr;
nb_instructions = 0;
*sptr++ = D3DVS_VERSION(1, 1);
}
SA_INLINE void EndVS(void)
{
*sptr++ = D3DVS_END();
sptr = 0;
}
SA_INLINE void BeginPS(DWORD* ptr)
{
sptr = ptr;
nb_instructions = 0;
*sptr++ = D3DPS_VERSION(1, 1);
}
SA_INLINE void EndPS(void)
{
*sptr++ = D3DPS_END();
sptr = 0;
}
#define MATHOP_3_SINGLE(name, op, mod) \
__forceinline void name(CDWORD dst, CDWORD src_a, CDWORD src_b) \
{ \
opcode3(sptr, op, dst | mod, src_a, src_b); \
}
#define MATHOP_4_SINGLE(name, op, mod) \
__forceinline void name(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c) \
{ \
opcode4(sptr, op, dst | mod, src_a, src_b, src_c); \
}
#define MATHOP(name, c, op) \
MATHOP_##c##_SINGLE(name, op, 0) \
MATHOP_##c##_SINGLE(name##_##x2, op, (1 << D3DSP_DSTSHIFT_SHIFT)) \
MATHOP_##c##_SINGLE(name##_##x4, op, (2 << D3DSP_DSTSHIFT_SHIFT)) \
MATHOP_##c##_SINGLE(name##_##sat, op, D3DSPDM_SATURATE) \
MATHOP_##c##_SINGLE(name##_##x2_sat, op, ((1 << D3DSP_DSTSHIFT_SHIFT) | D3DSPDM_SATURATE)) \
MATHOP_##c##_SINGLE(name##_##x4_sat, op, ((2 << D3DSP_DSTSHIFT_SHIFT) | D3DSPDM_SATURATE))
MATHOP(mul, 3, D3DSIO_MUL);
MATHOP(dp3, 3, D3DSIO_DP3);
MATHOP(dp4, 3, D3DSIO_DP4);
MATHOP(min, 3, D3DSIO_MIN);
MATHOP(max, 3, D3DSIO_MAX);
MATHOP(add, 3, D3DSIO_ADD);
MATHOP(sub, 3, D3DSIO_SUB);
MATHOP(mad, 4, D3DSIO_MAD);
SA_INLINE void nop(void)
{
opcode0(sptr, D3DSIO_NOP);
}
SA_INLINE void mov(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_MOV, dst, src);
}
SA_INLINE void rcp(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_RCP, dst, src);
}
SA_INLINE void rsq(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_RSQ, dst, src);
}
SA_INLINE void slt(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_SLT, dst, src_a, src_b);
}
SA_INLINE void sge(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_SGE, dst, src_a, src_b);
}
SA_INLINE void exp(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_EXP, dst, src);
}
SA_INLINE void log(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_LOG, dst, src);
}
SA_INLINE void lit(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_LIT, dst, src);
}
SA_INLINE void dst(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_DST, dst, src_a, src_b);
}
SA_INLINE void lrp(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c)
{
opcode4(sptr, D3DSIO_LRP, dst, src_a, src_b, src_c);
}
SA_INLINE void frc(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_FRC, dst, src);
}
SA_INLINE void m4x4(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_M4x4, dst, src_a, src_b);
}
SA_INLINE void m4x3(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_M4x3, dst, src_a, src_b);
}
SA_INLINE void m3x4(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_M3x4, dst, src_a, src_b);
}
SA_INLINE void m3x3(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_M3x3, dst, src_a, src_b);
}
SA_INLINE void m3x2(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_M3x2, dst, src_a, src_b);
}
SA_INLINE void texcoord(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXCOORD, dst, src);
}
SA_INLINE void texkill(CDWORD dst)
{
opcode1_dst(sptr, D3DSIO_TEXKILL, dst);
}
SA_INLINE void tex(CDWORD dst)
{
opcode1_dst(sptr, D3DSIO_TEX, dst);
}
SA_INLINE void texbem(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXBEM, dst, src);
}
SA_INLINE void texbeml(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXBEML, dst, src);
}
SA_INLINE void texreg2ar(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXREG2AR, dst, src);
}
SA_INLINE void texreg2gb(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXREG2GB, dst, src);
}
SA_INLINE void texm3x2pad(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXM3x2PAD, dst, src);
}
SA_INLINE void texm3x2tex(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXM3x2TEX, dst, src);
}
SA_INLINE void texm3x3pad(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXM3x3PAD, dst, src);
}
SA_INLINE void texm3x3tex(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXM3x3TEX, dst, src);
}
SA_INLINE void texm3x3spec(CDWORD dst, CDWORD src_a, CDWORD src_b)
{
opcode3(sptr, D3DSIO_TEXM3x3SPEC, dst, src_a, src_b);
}
SA_INLINE void texm3x3vspec(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXM3x3VSPEC, dst, src);
}
SA_INLINE void cnd(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c, CDWORD src_d)
{
opcode4(sptr, D3DSIO_CND, dst, src_a, src_b, src_d);
}
SA_INLINE void def(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c, CDWORD src_d)
{
opcode5(sptr, D3DSIO_DEF, dst, src_a, src_b, src_c, src_d);
}
SA_INLINE void texreg2rgb(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXREG2RGB, dst, src);
}
SA_INLINE void texdp3tex(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXDP3TEX, dst, src);
}
SA_INLINE void texm3x2depth(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXM3x2DEPTH, dst, src);
}
SA_INLINE void texdp3(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXDP3, dst, src);
}
SA_INLINE void texm3x3(CDWORD dst, CDWORD src)
{
opcode2(sptr, D3DSIO_TEXM3x3, dst, src);
}
SA_INLINE void cmp(CDWORD dst, CDWORD src_a, CDWORD src_b, CDWORD src_c)
{
opcode4(sptr, D3DSIO_CMP, dst, src_a, src_b, src_c);
}
}; // End namespace sha
// Don't need it anymore
#undef SA_INLINE
#endif /* _INCLUDED_ASSEMBLER_H */
void test(void)
{
using namespace sha;
static const int VS_PROJECTION = 0;
static const int VS_LIGHTVIEWTRANSFORM = 17;
static const int VS_VIEWORIENTATION = 21;
// Stick this somewhere
static DWORD scratchpad[512];
DWORD* ptr = scratchpad;
// transform into screen space
m4x4(ptr, oPos, v[0], c[VS_PROJECTION]);
// copy the normal map texcoord to t0
mov(ptr, oT[0], v[3]);
// compute the relative light position in view space
m4x4(ptr, r[0], v[0], c[VS_LIGHTVIEWTRANSFORM]);
// generate the binormal to complete the tangent space basis
mov(ptr, r[1], v[1]);
mul(ptr, r[2], v[2] | s(y, z, x), r[1] | s(z, x, y));
mad(ptr, r[3], v[2] | s(z, x, y), r[1] | s(y, z, x), r[2] | neg);
// store the basis in t1, t2, t3 from TSB in v2, r3, r1
dp3(ptr, oT[1] | x, v[2], c[VS_VIEWORIENTATION]);
dp3(ptr, oT[1] | y, r[3], c[VS_VIEWORIENTATION]);
dp3(ptr, oT[1] | z, r[1], c[VS_VIEWORIENTATION]);
dp3(ptr, oT[2] | x, v[2], c[VS_VIEWORIENTATION + 1]);
dp3(ptr, oT[2] | y, r[3], c[VS_VIEWORIENTATION + 1]);
dp3(ptr, oT[2] | z, r[1], c[VS_VIEWORIENTATION + 1]);
dp3(ptr, oT[3] | x, v[2], c[VS_VIEWORIENTATION + 2]);
dp3(ptr, oT[3] | y, r[3], c[VS_VIEWORIENTATION + 2]);
dp3(ptr, oT[3] | z, r[1], c[VS_VIEWORIENTATION + 2]);
// store the light direction in t1.w, t2.w, t3.w
mov(ptr, oT[1] | w, r[0] | s(x));
mov(ptr, oT[2] | w, r[0] | s(y));
mov(ptr, oT[3] | w, r[0] | s(z));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment