Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Shuffle for simple noise.
// ---- original code
// when declaring this as const, FPS drops from 30fps to 4fps on NV. wat?
int T[8] = int[8](0x15,0x38,0x32,0x2c,0x0d,0x13,0x07,0x2a);
int b0(int N, int B) { return (N>>B) & 1; }
int b(ivec3 p, int B) {
return T[b0(p.x,B)<<2 | b0(p.y,B)<<1 | b0(p.z,B)];
}
int shuffle(ivec3 p) {
return b(p,0) + b(p.yzx,1) + b(p.zxy,2) + b(p,3) +
b(p.yzx,4) + b(p.zxy,5) + b(p,6) + b(p.yzx,7);
}
// ---- but lookups like this tend to generate bad code (and pathological behavior, see comment), so I recommended:
int b(ivec3 p, int B)
{
int T0 = 0x2c323815; // entries 0-3
int T1 = 0x2a07130d; // entries 4-7
int T = (b0(p.x,B) != 0) ? T1 : T0; // select table
int ind = (b0(p.y,B) << 1) | b0(p.z.B);
return (T >> (8*ind)) & 0xff;
}
// ---- which you can further simplify as follows:
int b(ivec3 p)
{
int T = (p.x != 0) ? 0x2a07130d : 0x2c323815;
return (T >> ((p.y != 0 ? 16 : 0) + (p.z != 0 ? 8 : 0))) & 0xff;
}
int shuffle(ivec3 p)
{
return b(p & (1<<0)) + b(p.yzx & (1<<1)) + b(p.zxy & (1<<2)) + b(p & (1<<3)) +
b(p.yzx & (1<<4)) + b(p.zxy & (1<<5)) + b(p & (1<<6)) + b(p.yzx & (1<<7));
}
// ---- note that since this all just depends on bits of p, you can texture-bake it too:
// shuffle_tex(p) = b(p & (1<<0)) + b(p.yzx & (1<<1)) + b(p.zxy & (1<<2)) + b(p & (1<<3))
// (16x16x16, R8UI, so 4k data, good chance of it staying in cache, and good access coherence!)
usampler3D shuffle_tex;
int shuffle(ivec3 p)
{
return texelFetch(shuffle_tex, p, 0) + texelFetch(shuffle_tex, p.yzx >> 4, 0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment