Skip to content

Instantly share code, notes, and snippets.

@lolney
Created October 18, 2019 00:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lolney/283a046c0959adc75147e7439c03890a to your computer and use it in GitHub Desktop.
Save lolney/283a046c0959adc75147e7439c03890a to your computer and use it in GitHub Desktop.
#version 300 es
precision mediump float;
precision mediump int;
precision mediump sampler2D;
precision mediump sampler2DArray;
const int LOOP_MAX = 1000;
uniform ivec3 uOutputDim;
uniform ivec2 uTexSize;
in vec2 vTexCoord;
const int BIT_COUNT = 32;
int modi(int x, int y) {
return x - y * (x / y);
}
int bitwiseOr(int a, int b) {
int result = 0;
int n = 1;
for (int i = 0; i < BIT_COUNT; i++) {
if ((modi(a, 2) == 1) || (modi(b, 2) == 1)) {
result += n;
}
a = a / 2;
b = b / 2;
n = n * 2;
if(!(a > 0 || b > 0)) {
break;
}
}
return result;
}
int bitwiseXOR(int a, int b) {
int result = 0;
int n = 1;
for (int i = 0; i < BIT_COUNT; i++) {
if ((modi(a, 2) == 1) != (modi(b, 2) == 1)) {
result += n;
}
a = a / 2;
b = b / 2;
n = n * 2;
if(!(a > 0 || b > 0)) {
break;
}
}
return result;
}
int bitwiseAnd(int a, int b) {
int result = 0;
int n = 1;
for (int i = 0; i < BIT_COUNT; i++) {
if ((modi(a, 2) == 1) && (modi(b, 2) == 1)) {
result += n;
}
a = a / 2;
b = b / 2;
n = n * 2;
if(!(a > 0 && b > 0)) {
break;
}
}
return result;
}
int bitwiseNot(int a) {
int result = 0;
int n = 1;
for (int i = 0; i < BIT_COUNT; i++) {
if (modi(a, 2) == 0) {
result += n;
}
a = a / 2;
n = n * 2;
}
return result;
}
int bitwiseZeroFillLeftShift(int n, int shift) {
int maxBytes = BIT_COUNT;
for (int i = 0; i < BIT_COUNT; i++) {
if (maxBytes >= n) {
break;
}
maxBytes *= 2;
}
for (int i = 0; i < BIT_COUNT; i++) {
if (i >= shift) {
break;
}
n *= 2;
}
int result = 0;
int byteVal = 1;
for (int i = 0; i < BIT_COUNT; i++) {
if (i >= maxBytes) break;
if (modi(n, 2) > 0) { result += byteVal; }
n = int(n / 2);
byteVal *= 2;
}
return result;
}
int bitwiseSignedRightShift(int num, int shifts) {
return int(floor(float(num) / pow(2.0, float(shifts))));
}
int bitwiseZeroFillRightShift(int n, int shift) {
int maxBytes = BIT_COUNT;
for (int i = 0; i < BIT_COUNT; i++) {
if (maxBytes >= n) {
break;
}
maxBytes *= 2;
}
for (int i = 0; i < BIT_COUNT; i++) {
if (i >= shift) {
break;
}
n /= 2;
}
int result = 0;
int byteVal = 1;
for (int i = 0; i < BIT_COUNT; i++) {
if (i >= maxBytes) break;
if (modi(n, 2) > 0) { result += byteVal; }
n = int(n / 2);
byteVal *= 2;
}
return result;
}
vec2 integerMod(vec2 x, float y) {
vec2 res = floor(mod(x, y));
return res * step(1.0 - floor(y), -res);
}
vec3 integerMod(vec3 x, float y) {
vec3 res = floor(mod(x, y));
return res * step(1.0 - floor(y), -res);
}
vec4 integerMod(vec4 x, vec4 y) {
vec4 res = floor(mod(x, y));
return res * step(1.0 - floor(y), -res);
}
float integerMod(float x, float y) {
float res = floor(mod(x, y));
return res * (res > floor(y) - 1.0 ? 0.0 : 1.0);
}
int integerMod(int x, int y) {
return x - (y * int(x/y));
}
float div_with_int_check(float x, float y) {
if (floor(x) == x && floor(y) == y && integerMod(x, y) == 0.0) {
return float(int(x)/int(y));
}
return x / y;
}
// Here be dragons!
// DO NOT OPTIMIZE THIS CODE
// YOU WILL BREAK SOMETHING ON SOMEBODY'S MACHINE
// LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME
const vec2 MAGIC_VEC = vec2(1.0, -256.0);
const vec4 SCALE_FACTOR = vec4(1.0, 256.0, 65536.0, 0.0);
const vec4 SCALE_FACTOR_INV = vec4(1.0, 0.00390625, 0.0000152587890625, 0.0); // 1, 1/256, 1/65536
float decode32(vec4 texel) {
texel *= 255.0;
vec2 gte128;
gte128.x = texel.b >= 128.0 ? 1.0 : 0.0;
gte128.y = texel.a >= 128.0 ? 1.0 : 0.0;
float exponent = 2.0 * texel.a - 127.0 + dot(gte128, MAGIC_VEC);
float res = exp2(round(exponent));
texel.b = texel.b - 128.0 * gte128.x;
res = dot(texel, SCALE_FACTOR) * exp2(round(exponent-23.0)) + res;
res *= gte128.y * -2.0 + 1.0;
return res;
}
float decode16(vec4 texel, int index) {
int channel = integerMod(index, 2);
return texel[channel*2] * 255.0 + texel[channel*2 + 1] * 65280.0;
}
float decode8(vec4 texel, int index) {
int channel = integerMod(index, 4);
return texel[channel] * 255.0;
}
vec4 legacyEncode32(float f) {
float F = abs(f);
float sign = f < 0.0 ? 1.0 : 0.0;
float exponent = floor(log2(F));
float mantissa = (exp2(-exponent) * F);
// exponent += floor(log2(mantissa));
vec4 texel = vec4(F * exp2(23.0-exponent)) * SCALE_FACTOR_INV;
texel.rg = integerMod(texel.rg, 256.0);
texel.b = integerMod(texel.b, 128.0);
texel.a = exponent*0.5 + 63.5;
texel.ba += vec2(integerMod(exponent+127.0, 2.0), sign) * 128.0;
texel = floor(texel);
texel *= 0.003921569; // 1/255
return texel;
}
// https://github.com/gpujs/gpu.js/wiki/Encoder-details
vec4 encode32(float value) {
if (value == 0.0) return vec4(0, 0, 0, 0);
float exponent;
float mantissa;
vec4 result;
float sgn;
sgn = step(0.0, -value);
value = abs(value);
exponent = floor(log2(value));
mantissa = value*pow(2.0, -exponent)-1.0;
exponent = exponent+127.0;
result = vec4(0,0,0,0);
result.a = floor(exponent/2.0);
exponent = exponent - result.a*2.0;
result.a = result.a + 128.0*sgn;
result.b = floor(mantissa * 128.0);
mantissa = mantissa - result.b / 128.0;
result.b = result.b + exponent*128.0;
result.g = floor(mantissa*32768.0);
mantissa = mantissa - result.g/32768.0;
result.r = floor(mantissa*8388608.0);
return result/255.0;
}
// Dragons end here
int index;
ivec3 threadId;
ivec3 indexTo3D(int idx, ivec3 texDim) {
int z = int(idx / (texDim.x * texDim.y));
idx -= z * int(texDim.x * texDim.y);
int y = int(idx / texDim.x);
int x = int(integerMod(idx, texDim.x));
return ivec3(x, y, z);
}
float get32(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + texDim.x * (y + texDim.y * z);
int w = texSize.x;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
vec4 texel = texture(tex, st / vec2(texSize));
return decode32(texel);
}
float get16(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + (texDim.x * (y + (texDim.y * z)));
int w = texSize.x * 2;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
vec4 texel = texture(tex, st / vec2(texSize.x * 2, texSize.y));
return decode16(texel, index);
}
float get8(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + (texDim.x * (y + (texDim.y * z)));
int w = texSize.x * 4;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
vec4 texel = texture(tex, st / vec2(texSize.x * 4, texSize.y));
return decode8(texel, index);
}
float getMemoryOptimized32(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + (texDim.x * (y + (texDim.y * z)));
int channel = integerMod(index, 4);
index = index / 4;
int w = texSize.x;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
index = index / 4;
vec4 texel = texture(tex, st / vec2(texSize));
return texel[channel];
}
vec4 getImage2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + texDim.x * (y + texDim.y * z);
int w = texSize.x;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
return texture(tex, st / vec2(texSize));
}
vec4 getImage3D(sampler2DArray tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + texDim.x * (y + texDim.y * z);
int w = texSize.x;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
return texture(tex, vec3(st / vec2(texSize), z));
}
float getFloatFromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
vec4 result = getImage2D(tex, texSize, texDim, z, y, x);
return result[0];
}
vec2 getVec2FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
vec4 result = getImage2D(tex, texSize, texDim, z, y, x);
return vec2(result[0], result[1]);
}
vec2 getMemoryOptimizedVec2(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + texDim.x * (y + texDim.y * z);
int channel = integerMod(index, 2);
index = index / 2;
int w = texSize.x;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
vec4 texel = texture(tex, st / vec2(texSize));
if (channel == 0) return vec2(texel.r, texel.g);
if (channel == 1) return vec2(texel.b, texel.a);
return vec2(0.0, 0.0);
}
vec3 getVec3FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
vec4 result = getImage2D(tex, texSize, texDim, z, y, x);
return vec3(result[0], result[1], result[2]);
}
vec3 getMemoryOptimizedVec3(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int fieldIndex = 3 * (x + texDim.x * (y + texDim.y * z));
int vectorIndex = fieldIndex / 4;
int vectorOffset = fieldIndex - vectorIndex * 4;
int readY = vectorIndex / texSize.x;
int readX = vectorIndex - readY * texSize.x;
vec4 tex1 = texture(tex, (vec2(readX, readY) + 0.5) / vec2(texSize));
if (vectorOffset == 0) {
return tex1.xyz;
} else if (vectorOffset == 1) {
return tex1.yzw;
} else {
readX++;
if (readX >= texSize.x) {
readX = 0;
readY++;
}
vec4 tex2 = texture(tex, vec2(readX, readY) / vec2(texSize));
if (vectorOffset == 2) {
return vec3(tex1.z, tex1.w, tex2.x);
} else {
return vec3(tex1.w, tex2.x, tex2.y);
}
}
}
vec4 getVec4FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
return getImage2D(tex, texSize, texDim, z, y, x);
}
vec4 getMemoryOptimizedVec4(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
int index = x + texDim.x * (y + texDim.y * z);
int channel = integerMod(index, 2);
int w = texSize.x;
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5;
vec4 texel = texture(tex, st / vec2(texSize));
return vec4(texel.r, texel.g, texel.b, texel.a);
}
vec4 actualColor;
void color(float r, float g, float b, float a) {
actualColor = vec4(r,g,b,a);
}
void color(float r, float g, float b) {
color(r,g,b,1.0);
}
uniform mediump sampler2D user_input;
mediump ivec2 user_inputSize = ivec2(35, 36);
mediump ivec3 user_inputDim = ivec3(5000, 1, 1);
out vec4 data0;
float kernelResult;
void kernel() {
float user_accumulator=0.0;
for (int user_i=1;(user_i<=5000);user_i++){
user_accumulator+=div_with_int_check((getMemoryOptimized32(user_input, user_inputSize, user_inputDim, 0, 0, threadId.x)+1.0), float(user_i));}
kernelResult = user_accumulator;return;
}
void main(void) {
index = int(vTexCoord.s * float(uTexSize.x)) + int(vTexCoord.t * float(uTexSize.y)) * uTexSize.x;
threadId = indexTo3D(index, uOutputDim);
kernel();
data0[0] = kernelResult;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment