Created
October 18, 2019 00:48
-
-
Save lolney/283a046c0959adc75147e7439c03890a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 300 es | |
precision mediump float; | |
precision mediump int; | |
precision mediump sampler2D; | |
precision mediump sampler2DArray; | |
const int LOOP_MAX = 1000; | |
uniform ivec3 uOutputDim; | |
uniform ivec2 uTexSize; | |
in vec2 vTexCoord; | |
const int BIT_COUNT = 32; | |
int modi(int x, int y) { | |
return x - y * (x / y); | |
} | |
int bitwiseOr(int a, int b) { | |
int result = 0; | |
int n = 1; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if ((modi(a, 2) == 1) || (modi(b, 2) == 1)) { | |
result += n; | |
} | |
a = a / 2; | |
b = b / 2; | |
n = n * 2; | |
if(!(a > 0 || b > 0)) { | |
break; | |
} | |
} | |
return result; | |
} | |
int bitwiseXOR(int a, int b) { | |
int result = 0; | |
int n = 1; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if ((modi(a, 2) == 1) != (modi(b, 2) == 1)) { | |
result += n; | |
} | |
a = a / 2; | |
b = b / 2; | |
n = n * 2; | |
if(!(a > 0 || b > 0)) { | |
break; | |
} | |
} | |
return result; | |
} | |
int bitwiseAnd(int a, int b) { | |
int result = 0; | |
int n = 1; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if ((modi(a, 2) == 1) && (modi(b, 2) == 1)) { | |
result += n; | |
} | |
a = a / 2; | |
b = b / 2; | |
n = n * 2; | |
if(!(a > 0 && b > 0)) { | |
break; | |
} | |
} | |
return result; | |
} | |
int bitwiseNot(int a) { | |
int result = 0; | |
int n = 1; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if (modi(a, 2) == 0) { | |
result += n; | |
} | |
a = a / 2; | |
n = n * 2; | |
} | |
return result; | |
} | |
int bitwiseZeroFillLeftShift(int n, int shift) { | |
int maxBytes = BIT_COUNT; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if (maxBytes >= n) { | |
break; | |
} | |
maxBytes *= 2; | |
} | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if (i >= shift) { | |
break; | |
} | |
n *= 2; | |
} | |
int result = 0; | |
int byteVal = 1; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if (i >= maxBytes) break; | |
if (modi(n, 2) > 0) { result += byteVal; } | |
n = int(n / 2); | |
byteVal *= 2; | |
} | |
return result; | |
} | |
int bitwiseSignedRightShift(int num, int shifts) { | |
return int(floor(float(num) / pow(2.0, float(shifts)))); | |
} | |
int bitwiseZeroFillRightShift(int n, int shift) { | |
int maxBytes = BIT_COUNT; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if (maxBytes >= n) { | |
break; | |
} | |
maxBytes *= 2; | |
} | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if (i >= shift) { | |
break; | |
} | |
n /= 2; | |
} | |
int result = 0; | |
int byteVal = 1; | |
for (int i = 0; i < BIT_COUNT; i++) { | |
if (i >= maxBytes) break; | |
if (modi(n, 2) > 0) { result += byteVal; } | |
n = int(n / 2); | |
byteVal *= 2; | |
} | |
return result; | |
} | |
vec2 integerMod(vec2 x, float y) { | |
vec2 res = floor(mod(x, y)); | |
return res * step(1.0 - floor(y), -res); | |
} | |
vec3 integerMod(vec3 x, float y) { | |
vec3 res = floor(mod(x, y)); | |
return res * step(1.0 - floor(y), -res); | |
} | |
vec4 integerMod(vec4 x, vec4 y) { | |
vec4 res = floor(mod(x, y)); | |
return res * step(1.0 - floor(y), -res); | |
} | |
float integerMod(float x, float y) { | |
float res = floor(mod(x, y)); | |
return res * (res > floor(y) - 1.0 ? 0.0 : 1.0); | |
} | |
int integerMod(int x, int y) { | |
return x - (y * int(x/y)); | |
} | |
float div_with_int_check(float x, float y) { | |
if (floor(x) == x && floor(y) == y && integerMod(x, y) == 0.0) { | |
return float(int(x)/int(y)); | |
} | |
return x / y; | |
} | |
// Here be dragons! | |
// DO NOT OPTIMIZE THIS CODE | |
// YOU WILL BREAK SOMETHING ON SOMEBODY'S MACHINE | |
// LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME | |
const vec2 MAGIC_VEC = vec2(1.0, -256.0); | |
const vec4 SCALE_FACTOR = vec4(1.0, 256.0, 65536.0, 0.0); | |
const vec4 SCALE_FACTOR_INV = vec4(1.0, 0.00390625, 0.0000152587890625, 0.0); // 1, 1/256, 1/65536 | |
float decode32(vec4 texel) { | |
texel *= 255.0; | |
vec2 gte128; | |
gte128.x = texel.b >= 128.0 ? 1.0 : 0.0; | |
gte128.y = texel.a >= 128.0 ? 1.0 : 0.0; | |
float exponent = 2.0 * texel.a - 127.0 + dot(gte128, MAGIC_VEC); | |
float res = exp2(round(exponent)); | |
texel.b = texel.b - 128.0 * gte128.x; | |
res = dot(texel, SCALE_FACTOR) * exp2(round(exponent-23.0)) + res; | |
res *= gte128.y * -2.0 + 1.0; | |
return res; | |
} | |
float decode16(vec4 texel, int index) { | |
int channel = integerMod(index, 2); | |
return texel[channel*2] * 255.0 + texel[channel*2 + 1] * 65280.0; | |
} | |
float decode8(vec4 texel, int index) { | |
int channel = integerMod(index, 4); | |
return texel[channel] * 255.0; | |
} | |
vec4 legacyEncode32(float f) { | |
float F = abs(f); | |
float sign = f < 0.0 ? 1.0 : 0.0; | |
float exponent = floor(log2(F)); | |
float mantissa = (exp2(-exponent) * F); | |
// exponent += floor(log2(mantissa)); | |
vec4 texel = vec4(F * exp2(23.0-exponent)) * SCALE_FACTOR_INV; | |
texel.rg = integerMod(texel.rg, 256.0); | |
texel.b = integerMod(texel.b, 128.0); | |
texel.a = exponent*0.5 + 63.5; | |
texel.ba += vec2(integerMod(exponent+127.0, 2.0), sign) * 128.0; | |
texel = floor(texel); | |
texel *= 0.003921569; // 1/255 | |
return texel; | |
} | |
// https://github.com/gpujs/gpu.js/wiki/Encoder-details | |
vec4 encode32(float value) { | |
if (value == 0.0) return vec4(0, 0, 0, 0); | |
float exponent; | |
float mantissa; | |
vec4 result; | |
float sgn; | |
sgn = step(0.0, -value); | |
value = abs(value); | |
exponent = floor(log2(value)); | |
mantissa = value*pow(2.0, -exponent)-1.0; | |
exponent = exponent+127.0; | |
result = vec4(0,0,0,0); | |
result.a = floor(exponent/2.0); | |
exponent = exponent - result.a*2.0; | |
result.a = result.a + 128.0*sgn; | |
result.b = floor(mantissa * 128.0); | |
mantissa = mantissa - result.b / 128.0; | |
result.b = result.b + exponent*128.0; | |
result.g = floor(mantissa*32768.0); | |
mantissa = mantissa - result.g/32768.0; | |
result.r = floor(mantissa*8388608.0); | |
return result/255.0; | |
} | |
// Dragons end here | |
int index; | |
ivec3 threadId; | |
ivec3 indexTo3D(int idx, ivec3 texDim) { | |
int z = int(idx / (texDim.x * texDim.y)); | |
idx -= z * int(texDim.x * texDim.y); | |
int y = int(idx / texDim.x); | |
int x = int(integerMod(idx, texDim.x)); | |
return ivec3(x, y, z); | |
} | |
float get32(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + texDim.x * (y + texDim.y * z); | |
int w = texSize.x; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
vec4 texel = texture(tex, st / vec2(texSize)); | |
return decode32(texel); | |
} | |
float get16(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + (texDim.x * (y + (texDim.y * z))); | |
int w = texSize.x * 2; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
vec4 texel = texture(tex, st / vec2(texSize.x * 2, texSize.y)); | |
return decode16(texel, index); | |
} | |
float get8(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + (texDim.x * (y + (texDim.y * z))); | |
int w = texSize.x * 4; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
vec4 texel = texture(tex, st / vec2(texSize.x * 4, texSize.y)); | |
return decode8(texel, index); | |
} | |
float getMemoryOptimized32(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + (texDim.x * (y + (texDim.y * z))); | |
int channel = integerMod(index, 4); | |
index = index / 4; | |
int w = texSize.x; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
index = index / 4; | |
vec4 texel = texture(tex, st / vec2(texSize)); | |
return texel[channel]; | |
} | |
vec4 getImage2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + texDim.x * (y + texDim.y * z); | |
int w = texSize.x; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
return texture(tex, st / vec2(texSize)); | |
} | |
vec4 getImage3D(sampler2DArray tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + texDim.x * (y + texDim.y * z); | |
int w = texSize.x; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
return texture(tex, vec3(st / vec2(texSize), z)); | |
} | |
float getFloatFromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
vec4 result = getImage2D(tex, texSize, texDim, z, y, x); | |
return result[0]; | |
} | |
vec2 getVec2FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
vec4 result = getImage2D(tex, texSize, texDim, z, y, x); | |
return vec2(result[0], result[1]); | |
} | |
vec2 getMemoryOptimizedVec2(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + texDim.x * (y + texDim.y * z); | |
int channel = integerMod(index, 2); | |
index = index / 2; | |
int w = texSize.x; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
vec4 texel = texture(tex, st / vec2(texSize)); | |
if (channel == 0) return vec2(texel.r, texel.g); | |
if (channel == 1) return vec2(texel.b, texel.a); | |
return vec2(0.0, 0.0); | |
} | |
vec3 getVec3FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
vec4 result = getImage2D(tex, texSize, texDim, z, y, x); | |
return vec3(result[0], result[1], result[2]); | |
} | |
vec3 getMemoryOptimizedVec3(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int fieldIndex = 3 * (x + texDim.x * (y + texDim.y * z)); | |
int vectorIndex = fieldIndex / 4; | |
int vectorOffset = fieldIndex - vectorIndex * 4; | |
int readY = vectorIndex / texSize.x; | |
int readX = vectorIndex - readY * texSize.x; | |
vec4 tex1 = texture(tex, (vec2(readX, readY) + 0.5) / vec2(texSize)); | |
if (vectorOffset == 0) { | |
return tex1.xyz; | |
} else if (vectorOffset == 1) { | |
return tex1.yzw; | |
} else { | |
readX++; | |
if (readX >= texSize.x) { | |
readX = 0; | |
readY++; | |
} | |
vec4 tex2 = texture(tex, vec2(readX, readY) / vec2(texSize)); | |
if (vectorOffset == 2) { | |
return vec3(tex1.z, tex1.w, tex2.x); | |
} else { | |
return vec3(tex1.w, tex2.x, tex2.y); | |
} | |
} | |
} | |
vec4 getVec4FromSampler2D(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
return getImage2D(tex, texSize, texDim, z, y, x); | |
} | |
vec4 getMemoryOptimizedVec4(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) { | |
int index = x + texDim.x * (y + texDim.y * z); | |
int channel = integerMod(index, 2); | |
int w = texSize.x; | |
vec2 st = vec2(float(integerMod(index, w)), float(index / w)) + 0.5; | |
vec4 texel = texture(tex, st / vec2(texSize)); | |
return vec4(texel.r, texel.g, texel.b, texel.a); | |
} | |
vec4 actualColor; | |
void color(float r, float g, float b, float a) { | |
actualColor = vec4(r,g,b,a); | |
} | |
void color(float r, float g, float b) { | |
color(r,g,b,1.0); | |
} | |
uniform mediump sampler2D user_input; | |
mediump ivec2 user_inputSize = ivec2(35, 36); | |
mediump ivec3 user_inputDim = ivec3(5000, 1, 1); | |
out vec4 data0; | |
float kernelResult; | |
void kernel() { | |
float user_accumulator=0.0; | |
for (int user_i=1;(user_i<=5000);user_i++){ | |
user_accumulator+=div_with_int_check((getMemoryOptimized32(user_input, user_inputSize, user_inputDim, 0, 0, threadId.x)+1.0), float(user_i));} | |
kernelResult = user_accumulator;return; | |
} | |
void main(void) { | |
index = int(vTexCoord.s * float(uTexSize.x)) + int(vTexCoord.t * float(uTexSize.y)) * uTexSize.x; | |
threadId = indexTo3D(index, uOutputDim); | |
kernel(); | |
data0[0] = kernelResult; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment