Skip to content

Instantly share code, notes, and snippets.

Created December 6, 2014 08:39
Show Gist options
  • Save anonymous/cfcbdcd01d217f04ef2f to your computer and use it in GitHub Desktop.
Save anonymous/cfcbdcd01d217f04ef2f to your computer and use it in GitHub Desktop.
clfflame.nt
module clfflame;
import c.CL.cl;
import sys, std.(file, string, util, random, math, time, thread, channel, hashmap, png, macros.(switchover, where));
pragma(lib, "OpenCL");
alias NUMFUNS = 10; // functions implemented
alias numfuns = 3; // function set size, must be constant because compiled into shader
alias SPEED = 1.0;
void twriteln(string s) { writeln "$(sec())\t$s"; }
float frand() { return randf(deflt); }
vec3f randcol() { return vec3f(frand(), frand(), frand()); }
float smallrand() { return frand() / 300 + frand() / 400 + frand() / 350; }
float resign(float f) { return f * [-1,1][frand() > 0.5]; }
vec2f randvec(int component) {
alias c2 = resign pow(frand() * 1.1, 7); // usually small, potentially large
if (component == 0) return vec2f(1 + c2, c2);
if (component == 1) return vec2f(c2, 1 + c2);
if (component == 2) return vec2f(c2, c2);
// return vec2f(frand() * 2 - 1, frand() * 2 - 1);
}
template dgwrapper(T) {
extern(C) void callHolder(T t, void* ptr) {
auto trip = *(void*, void delegate(T) dg)*:ptr;
auto _threadlocal = trip[0];
trip[1](t);
}
auto dgwrapper(void delegate(T) dg) {
auto ptr = new (void*, void delegate(T));
(*ptr) = (_threadlocal, dg);
return (&callHolder, void*:ptr);
}
}
void clCheckRes (int i) {
if (i != 0) {
writeln "CL failed with $i! ";
fail;
}
}
template clCheckCall(alias A) {
template clCheckCall(T) {
type-of A(value-of!T, null) clCheckCall(T t) {
int error;
onExit clCheckRes (error);
return A(t, &error);
}
}
}
cl_context createContextFromType(cl_context_properties[] props, cl_device_type type, void delegate(char* errinfo, void* private_info, size_t cb) notify) {
cl_int ret;
auto tup = dgwrapper!(char*, void*, size_t)(void delegate((char*,void*,size_t)):notify);
props ~= cl_context_properties:0;
return clCheckCall!clCreateContextFromType (props.ptr, type, (ParamTypes type-of &clCreateContextFromType)[2]: tup[0], tup[1]);
}
cl_context createContext(cl_context_properties[] props, int devs, cl_device_id* devp, void delegate(char* errinfo, void* private_info, size_t cb) notify) {
cl_int ret;
auto tup = dgwrapper!(char*, void*, size_t)(void delegate((char*,void*,size_t)):notify);
props ~= cl_context_properties:0;
return clCheckCall!clCreateContext (props.ptr, devs, devp, (ParamTypes type-of &clCreateContext)[3]: tup[0], tup[1]);
}
import std.lib.glfw3, std.lib.opengl.(, window);
/*
shared ThreadPool tp;
void init() { tp = new ThreadPool(2); }
*/
void delegate() myAsyncRead(cl_command_queue queue, cl_mem mem, vec4f[] target, cl_event ev) {
clCheckRes clEnqueueReadBuffer (queue, mem, CL_FALSE, 0, target.length * size-of vec4f, target.ptr, (1, [ev].dup.ptr), &cl_event readback);
return new λ{ clCheckRes clWaitForEvents (1, &readback); }
// no benefit from this as clEnqueueMapBuffer copies into main ram
/*assert(!!ev);
auto start = sec();
auto ptr = vec4f*: clCheckCall!clEnqueueMapBuffer(queue, mem, true, CL_MAP_READ, 0, target.length * size-of vec4f, 1, &ev, null);
writeln "map took $((sec() - start) * 1000)ms";
auto sem = new Semaphore;
tp.addTask new λ{
target[] = ptr[0..target.length];
sem.release;
}
return new λ{
sem.acquire;
clEnqueueUnmapMemObject(queue, mem, ptr, 0, null, &cl_event unmap);
clCheckRes clWaitForEvents (1, &unmap);
}*/
}
enum DrawMode { Frame, Screenshot }
struct DrawMessage {
DrawMode mode;
vec4f[] array;
void init(DrawMode mode) { this.mode = mode; }
void init(vec4f[] a) { mode = DrawMode.Frame; array = a; }
vec4f[] getArray() where mode == DrawMode.Frame return array;
bool isValid() { if (mode == DrawMode.Frame) return !!array; else return true; }
alias implicit-cast = isValid();
alias implicit-cast-2 = getArray();
}
shared bool pause;
template Repeat(T) {
template Repeat(alias A) {
static if (A == 0) { alias Repeat = (); }
else {
Repeat!(A - 1) below; T t;
alias Repeat = type-of __flatten_tuple (t, below);
}
}
}
alias Weights = Repeat!float!NUMFUNS;
alias Tup = (vec3f color,
(vec2f a, vec2f b, vec2f c) mat1,
(vec2f a, vec2f b, vec2f c) mat2,
Weights weights, (float weight, int fac) caleid);
struct FunSet {
Tup[auto~] functions;
void addfun() {
type-of functions[0] foo;
functions ~= foo;
regenat(functions.length - 1);
}
void copyfrom(FunSet* other, int id) {
functions[id] = other.functions[id];
}
void regenat(int id) {
Weights weights;
float weightsum;
static for int i <- 0..NUMFUNS { ref w = weights[i]; w = frand(); weightsum += w; }
auto scalefactor = 1 / weightsum;
static for int i <- 0..NUMFUNS { weights[i] *= scalefactor; }
functions[id] = (randcol(),
(randvec(0), randvec(1), randvec(2)), // pretransform
(randvec(0), randvec(1), randvec(2)), // posttransform
weights, (weight => [0, 1][std.random.rand() % 3 == 0], fac => id + 1));
}
}
class FunFade {
FunSet a, b;
(float f, float d)[auto~] transfers;
cl_mem funvec;
void init(int numfuns, cl_context ctx) {
for 0..numfuns {
a.addfun;
b.addfun;
transfers ~= (0, smallrand() * SPEED);
}
funvec = clCheckCall!clCreateBuffer (ctx, CL_MEM_READ_ONLY,
(numfuns * size-of Tup), null);
}
void fini() {
clReleaseMemObject funvec;
}
void step() {
// writeln "a: $(a.functions)";
// writeln "b: $(b.functions)";
// writeln "tf: $transfers";
float step = 1;
if (pause) step = 0;
for int i <- 0..numfuns {
ref tf = transfers[i];
tf.f += tf.d * step;
if (tf.f > 1) {
a.copyfrom(&b, i);
b.regenat(i);
tf.f -= 1;
tf.d = smallrand();
}
}
}
void upload(cl_command_queue queue) {
Tup[auto~] funs;
for int i <- 0..numfuns {
ref tf = transfers[i];
ref af = a.functions[i], bf = b.functions[i];
float f = tf.f;
float interp(float a, b, f) {
// return a * (1 - f) + b * f;
auto f2 = (1 - cos(f * PI)) / 2;
return a * (1 - f2) + b * f2;
}
vec2f interp(vec2f a, b, float f) {
return vec2f(interp(a.x, b.x, f), interp(a.y, b.y, f));
}
vec3f interp(vec3f a, b, float f) {
return vec3f(interp(a.x, b.x, f), interp(a.y, b.y, f), interp(a.z, b.z, f));
}
alias binterp = interp;
/*vec2f binterp(vec2f a, b, float f) {
auto res = interp(a, b, f);
auto lres = |res|;
auto newlen = pow(lres, 0.1);
res = res * newlen / lres;
return res;
}*/
auto m1 = (
binterp(af.mat1.a, bf.mat1.a, f),
binterp(af.mat1.b, bf.mat1.b, f),
binterp(af.mat1.c, bf.mat1.c, f));
auto m2 = (
binterp(af.mat2.a, bf.mat2.a, f),
binterp(af.mat2.b, bf.mat2.b, f),
binterp(af.mat2.c, bf.mat2.c, f));
auto caleid_interp = (
weight => interp(af.caleid.weight, bf.caleid.weight, f),
fac => af.caleid.fac);
Weights interps;
static for int i <- 0..NUMFUNS {
interps[i] = interp(af.weights[i], bf.weights[i], f);
}
funs ~= (interp(af.color, bf.color, f),
m1, m2, interps, caleid_interp);
}
// writeln "funs: $funs";
auto funvec_data = ubyte[]:funs[];
clCheckRes clEnqueueWriteBuffer (queue, funvec, CL_TRUE, 0, funvec_data.(length, ptr), 0, null, null);
}
}
/**
* start with random point, color black
* have a set of functions (vec3f color, int index, matrix2x2, vec2)
* have an output (vec3 sum, int count)
* every step:
select random function
transform point by matrix and vec
mix color with function object color
write to output
**/
class CLContext {
cl_context ctx;
cl_command_queue queue;
Hashmap!((int, vec4f*), cl_mem) bufcache;
Hashmap!(int, cl_mem) zbufcache;
cl_kernel fflameKernel, fixupKernel;
cl_program fflame, fixup;
void fini() {
clReleaseKernel fflameKernel;
clReleaseKernel fixupKernel;
clReleaseProgram fflame;
clReleaseProgram fixup;
bufcache .iterate λ((int, vec4f*), cl_mem mem) { clReleaseMemObject(mem); };
zbufcache.iterate λ(int i, cl_mem mem) { clReleaseMemObject(mem); };
clReleaseCommandQueue queue;
clReleaseContext ctx;
}
cl_mem cacheGetBufferSized(int size, vec4f* ptr) {
if (auto ptr = bufcache.get(size, ptr)) return *ptr;
writeln "alloc buffer of $(size)";
auto res = clCheckCall!clCreateBuffer (ctx, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
size, null);
bufcache.insert((size, ptr), res);
return res;
}
cl_mem getZeroBuffer(int size) {
if (auto p = zbufcache.get(size)) return *p;
auto res = clCheckCall!clCreateBuffer (ctx, CL_MEM_READ_ONLY, size, null);
scope zeroes = [for 0..size: byte:0].eval;
clCheckRes clEnqueueWriteBuffer (queue, res, CL_TRUE, 0, size, zeroes.ptr, 0, null, null);
zbufcache.insert(size, res);
return res;
}
void init() {
string weights() { return join [for i <- 0..NUMFUNS: "float weight$i; "]; }
auto fflamekernel = "
typedef struct _funobj {
float4 color;
float2 mat1a, mat1b, mat1c;
float2 mat2a, mat2b, mat2c;
$(weights())
float caleid_weight; int caleid_fac;
} funobj;
uint MWC64X(uint2 *state)
{
enum { A=4294883355U};
uint x=(*state).x, c=(*state).y; // Unpack the state
uint res=x^c; // Calculate the result
uint hi=mul_hi(x,A); // Step the RNG
x=x*A+c;
c=hi+(x<c);
*state=(uint2)(x,c); // Pack the state back up
return res; // Return the next result
}
__constant float PI = 3.14159265358979323846264f;
// __constant float coeff_1 = PI / 4.0f; // jesus christ nvidia
__constant float coeff_1 = 0.785398163397f;
// __constant float coeff_2 = 3.0f * coeff_1;
__constant float coeff_2 = 2.35619449019f;
float atan2f(float y, float x) {
float abs_y = y * sign(y);
float angle, r;
/*if (x >= 0) {
r = (x - abs_y) / (abs_y + x);
angle = coeff_1 - coeff_1 * r;
} else {
r = (x + abs_y) / (abs_y - x);
angle = coeff_2 - coeff_1 * r;
}*/
int s = sign(x);
r = native_divide(x - s * abs_y, abs_y + s * x);
angle = ((s == 1)?coeff_1:coeff_2) - coeff_1 * r;
// return y < 0 ? -angle : angle;
return angle * sign(y);
}
float sinf(float f) { return native_sin(f); }
float cosf(float f) { return native_cos(f); }
float2 apply(float2 pos, __constant funobj* fo) {
float r2 = dot(pos, pos), s = sinf(r2), c = cosf(r2), a = atan2f(pos.y, pos.x), r = fast_length(pos);
float abypi = native_divide(a, PI);
float ar = a * r;
float sar = sinf(ar), car = cosf(ar);
// this is not actually any slower than a switch would have been.
pos =
// 0 linear
fo->weight0 * pos +
// 1 sinusoidal
fo->weight1 * (float2)(sinf(pos.x), sinf(pos.y)) +
// 2 spherical
fo->weight2 * native_divide(pos, r2) +
// 3 swirl
fo->weight3 * (float2)(pos.x * s - pos.y * c, pos.x * c + pos.y * s) +
// 4 horseshoe
fo->weight4 * (float2)((pos.x - pos.y) * (pos.x + pos.y), 2 * pos.x * pos.y) +
// 5 polar
fo->weight5 * (float2)(abypi, r - 1.0f) +
// 6 handkerchief
fo->weight6 * r * (float2)(sinf(a + r), cosf(a - r)) +
// 7 heart
fo->weight7 * r * (float2)(sar,-car) +
// 8 disc
fo->weight8 * (abypi) * (float2)(sar, car) +
// 9 spiral
fo->weight9 * native_recip(r) * (float2)(cosf(a) + sinf(r), sinf(a) - cosf(r));
return pos;
}
__kernel void fflame(__global float4* res, __constant funobj* funset, const int2 size, const int iters) {
uint2 rngstate = (uint2)(get_global_id(0), 0);
float2 pos = (float2)(0, 0);
float3 col = (float3)(0, 0, 0);
for (int i = 0; i < iters; i++) {
int selected = MWC64X(&rngstate) % $numfuns;
__constant funobj *fo = &funset[selected];
int caleid_rand = MWC64X(&rngstate);
int randflags = MWC64X(&rngstate);
float2 prevpos = pos;
pos = pos.x * fo->mat1a + pos.y * fo->mat1b + 1 * fo->mat1c;
pos = apply(pos, fo);
{
float2 cpos = pos - 0.5f;
// transform pos into radial around origin
float r = native_sqrt(dot(cpos, cpos)), angle = atan2f(cpos.y, cpos.x); /* -pi..pi */
float b = PI / fo->caleid_fac;
int fac = caleid_rand % fo->caleid_fac;
float newangle = (angle + b * fac) * ((randflags & 1)?1:-1);
cpos = r * (float2)(cosf(newangle), sinf(newangle)) + 0.5f;
pos = pos * (1 - fo->caleid_weight) + cpos * fo->caleid_weight;
}
pos = pos.x * fo->mat2a + pos.y * fo->mat2b + 1 * fo->mat2c;
col = (col + fo->color.xyz) * 0.5f;
float2 scaledpos = native_divide(pos + 1.0f, 2.0f) * (float2)(size.x, size.y);
int2 ipos = (int2)((int) scaledpos.x, (int) scaledpos.y);
if ((ipos.x >= 0) & (ipos.x < size.x) & (ipos.y >= 0) & (ipos.y < size.y)) {
int index = ipos.y * size.x + ipos.x;
res[index] += (float4)(col.x, col.y, col.z, 1);
}
}
}";
auto fixupkernel = "
__kernel void fixup(__global float4* data, const int2 size, const float basefactor) {
int index = get_global_id(0);
float4 col = data[index];
float count = col.w / basefactor;
col = col * native_log(count + 1) / col.w;
col = clamp(col, 0.0f, 1.0f);
col.w = 1;
data[index] = col;
}
";
clCheckRes clGetPlatformIDs(0, null, &int ids);
auto platforms = new cl_platform_id[] ids;
clCheckRes clGetPlatformIDs(ids, platforms.ptr, null);
writeln "$ids platform(s). ";
cl_device_id[] getDevices(cl_platform_id platf) {
int devs;
clCheckRes clGetDeviceIDs (platf, CL_DEVICE_TYPE_GPU, 0, null, &devs);
auto devlist = new cl_device_id[] devs;
clCheckRes clGetDeviceIDs (platf, CL_DEVICE_TYPE_GPU, devs, devlist.ptr, null);
return devlist;
}
auto platf = platforms[0];
cl_device_id dev = getDevices(platf)[0];
for (string devinfo, int enum2) <- [
("Extensions"[], CL_DEVICE_EXTENSIONS),
("Name"[], CL_DEVICE_NAME),
("Profile"[], CL_DEVICE_PROFILE),
("Vendor"[], CL_DEVICE_VENDOR),
("Version"[], CL_DEVICE_VERSION),
("DriverVersion"[], CL_DRIVER_VERSION)]
{
int size;
clCheckRes clGetDeviceInfo (dev, enum2, 0, null, &size);
scope devstore = new char[] size;
clCheckRes clGetDeviceInfo (dev, enum2, size, devstore.ptr, int*:null);
writeln "$devinfo = $devstore ($size)";
}
cl_context_properties[] props;
props ~= CL_CONTEXT_PLATFORM;
props ~= cl_context_properties: platf;
ctx = createContext(props, 1, &dev, null);
writeln "Context created. ";
queue = clCheckCall!clCreateCommandQueue (ctx, dev, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
writeln "Command queue created. ";
writeln "Buffers created. ";
writeln "Building. ";
cl_program build(string source) {
scope sourcelines = [for line <- splitAt(once source, "\n"): line ~ "\n\x00"].eval[];
// writeln "$(sourcelines.length) lines of source. ";
scope ptrs = [for line <- sourcelines: line.ptr].eval[];
auto prog = clCreateProgramWithSource(ctx, sourcelines.length,
ptrs.ptr, null, null);
auto err = clBuildProgram (prog, 0, null, "-cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math -Werror -cl-nv-verbose", null x 2);
int len;
clGetProgramBuildInfo (prog, dev, CL_PROGRAM_BUILD_LOG, 0, null, &len);
auto str = new char[] len;
clGetProgramBuildInfo (prog, dev, CL_PROGRAM_BUILD_LOG, len, str.ptr, null);
if (err) {
writeln "Failed to build: $str";
exit(1);
} else {
if (len > 2) writeln "Build log: $str";
}
return prog;
}
fflame = build fflamekernel;
fixup = build fixupkernel;
writeln "Program built. ";
fflameKernel = clCheckCall!clCreateKernel (fflame, "fflame".ptr);
fixupKernel = clCheckCall!clCreateKernel (fixup, "fixup".ptr);
writeln "Kernel created. ";
}
(int, void delegate() wait) calc(vec2i size, int threads, vec4f[] output, cl_mem funvec) {
auto vec = cacheGetBufferSized(size.(x*y) * size-of vec4f, output.ptr);
// not supported in my api version o.o
// clCheckRes clEnqueueFillBuffer (queue, vec, &int zero, size-of int, 0, (ubyte[]:output).length, 0, null, null);
cl_event zeroOut;
{
int len = (ubyte[]:output).length;
auto zbuf = getZeroBuffer(len);
clCheckRes clEnqueueCopyBuffer(queue, zbuf, vec, 0, 0, len, 0, null, &zeroOut);
}
auto iters = cl_int:512;
clCheckRes clSetKernelArg (fflameKernel, 0, size-of type-of vec, void*:&vec);
clCheckRes clSetKernelArg (fflameKernel, 1, size-of type-of funvec, void*:&funvec);
clCheckRes clSetKernelArg (fflameKernel, 2, size-of type-of size, void*:&size);
clCheckRes clSetKernelArg (fflameKernel, 3, size-of int, void*:&iters);
float basefactor = (threads * iters) * 1f / size.(x * y);
clCheckRes clSetKernelArg (fixupKernel, 0, size-of type-of vec, void*:&vec);
clCheckRes clSetKernelArg (fixupKernel, 1, size-of type-of size, void*:&size);
clCheckRes clSetKernelArg (fixupKernel, 2, size-of float, void*:&basefactor);
clCheckRes clEnqueueNDRangeKernel (queue, fflameKernel, 1, null, [threads].dup.ptr, null, (1, [zeroOut].dup.ptr), &cl_event calcStep);
clCheckRes clEnqueueNDRangeKernel (queue, fixupKernel , 1, null, [size.(x * y)].dup.ptr, null, (1, [calcStep].dup.ptr), &cl_event fixupStep);
int workDone = threads * iters;
// read-back
return (workDone, myAsyncRead(queue, vec, output, fixupStep));
}
}
void saveAsPng(vec4f[] buf, string filename, vec2i size, int aa) {
auto largesize = size * aa;
scope ubyte[auto~] pngdata;
using new PNGWriter λ(string s) { pngdata ~= ubyte[]:s; } {
configure size;
scope vec4f[auto~] line;
alias weights = vec3f(0.299, 0.587, 0.114);
// see http://excamera.com/sphinx/article-srgb.html
alias a = 0.055, γ = 2.2;
float lin2srgb(float f) {
if (f <= 0.0031308) return f * 12.92;
return (1 + a) * pow(f, 1 / γ) - a;
}
float srgb2lin(float f) {
if (f <= 0.04045) return f * (1 / 12.92f);
return pow((f + a) / (1 + a), γ);
}
vec3f lin2srgb(vec3f v) { return v.(vec3f(lin2srgb x, lin2srgb y, lin2srgb z)); }
vec3f srgb2lin(vec3f v) { return v.(vec3f(srgb2lin x, srgb2lin y, srgb2lin z)); }
for int y <- 0..size.y {
for int x <- 0..size.x {
vec3f sum;
for int y2 <- 0..aa for int x2 <- 0..aa {
int lx = x * aa + x2, ly = y * aa + y2;
sum += srgb2lin buf[ly * largesize.x + lx].xyz;
}
sum /= aa * aa;
sum = lin2srgb sum;
line ~= sum.(vec4f(x, y, z, 1));
}
writeLine ubyte[]: line[];
line.clear;
}
end;
}
// work THAT out
scope tmp_filename = filename.basedir().sub("." ~ filename.relativePathAt filename.basedir());
writeAll(tmp_filename, pngdata[]);
rename(tmp_filename, filename);
}
int main() {
auto size = vec2i(1920, 1080) / 1;
auto screensize = vec2i(1600, 900);
int threads = 8192;
if (true) {
auto size = vec2i(1920, 1080), aa = 3;
auto largesize = size * aa;
auto save2calc = new Channel!vec4f[];
auto calc2save = new Channel!(int, vec4f[]);
for 0..3 save2calc.put(new vec4f[] largesize.(x*y));
string zeroprefix(string s, int i) { while (s.length < i) s = "0$s"; return s; }
string fn(int i) { return "clfflame_anim/frame_"~zeroprefix("$i", 6)~".png"; }
int firstMissing;
startThread λ{
deflt = getPRNG s => 5;
auto ctx = new CLContext;
onSuccess ctx.fini;
auto fade = new FunFade(numfuns, ctx.ctx);
onSuccess fade.fini;
while (fn(firstMissing).exists()) { fade.step; firstMissing ++; }
auto buf = save2calc.take();
int i = firstMissing; // the index that the current buf/wait belongs to
twriteln "2: begin calculation $i";
void delegate() stepcalc(vec4f[] buf) {
fade.step; fade.upload(ctx.queue);
return ctx.calc(largesize, 2^20, buf, fade.funvec).wait;
}
auto wait = stepcalc(buf);
while (true) {
twriteln "2: request buffer";
auto nbuf = save2calc.take();
auto ni = i + 1;
twriteln "2: begin calculation $ni";
auto nwait = stepcalc(nbuf);
twriteln "2: block for $i";
wait();
twriteln "2: release buffer for $i";
calc2save.put(i, buf);
(i, wait, buf) = (ni, nwait, nbuf);
}
};
auto start = sec();
while true {
twriteln "1: request buffer";
(int i, vec4f[] buf) = calc2save.take();
string filename = fn(i);
twriteln "1: generate png data";
saveAsPng(buf,
filename,
size => size, aa => aa);
twriteln "1: release buffer";
save2calc.put(buf);
float fps = (i - firstMissing + 1) / float:(sec() - start);
twriteln "1: saved $filename, $fps fps, $(fps * 3600) fph";
}
}
auto
draw2calc = new Channel!DrawMessage,
calc2draw = new Channel!(vec4f[], double);
for 0..3 draw2calc.put(new vec4f[] (size[0]*size[1])); // double^Wtriple buffer
int fps;
auto threadQuit = new Semaphore;
startThread λ{
onExit threadQuit.release;
deflt = getPRNG s => 5;
auto ctx = new CLContext;
onSuccess ctx.fini;
auto fade = new FunFade(numfuns, ctx.ctx);
onSuccess fade.fini;
fade.step; fade.upload(ctx.queue);
auto msg = draw2calc.take();
auto wait = ctx.calc(size, threads, msg, fade.funvec).wait;
do auto nmsg = draw2calc.take();
while (nmsg) {
// TODO
/*case nmsg of {
Frame x: */
switch DrawMode mode over mode == nmsg.mode {
case DrawMode.Frame:
fade.step;
fade.upload(ctx.queue);
fps ++;
(int totalIters, void delegate() nwait) = ctx.calc(size, threads, nmsg, fade.funvec);
wait(); // wait for previous to complete
calc2draw.put(msg, totalIters);
(msg, wait) = (nmsg, nwait); // rotate over
case DrawMode.Screenshot:
auto size = vec2i(1920, 1080), aa = 4;
auto largesize = size * aa;
scope lbuf = new vec4f[] largesize.(x*y);
twriteln "begin calculation";
ctx.calc(largesize, 2^20, lbuf, fade.funvec).wait();
saveAsPng(lbuf, "out.png", size, aa => 4);
writeln "written to out.png";
default: fail "$(nmsg.mode)";
}
}
}
glwindow = new GLFWWindow;
// glwindow.fullscreen = true;
glwindow.setup(screensize);
bool update() {
glwindow.update();
if (key-pressed(Key.Q)) return true;
if (key-pressed(Key.W)) threads = int:(threads / 2);
if (key-pressed(Key.E)) threads = int:(threads * 2);
if (key-pressed(Key.T)) { draw2calc.put DrawMessage:DrawMode.Screenshot; }
if (key-pressed(Key.Space)) pause = !pause;
return false;
}
void draw(vec4f[] output, double iters) using mode GL {
ClearColor (0, 0, 0, 0);
ClearDepth 1;
Enable TEXTURE_2D;
Clear (COLOR_BUFFER_BIT | DEPTH_BUFFER_BIT);
MatrixMode PROJECTION; LoadIdentity;
glOrtho(0, 1, 1, 0, -1, 1);
MatrixMode MODELVIEW; LoadIdentity;
Color3f White;
GenTextures(1, &GLuint datatex);
onSuccess DeleteTextures(1, &datatex);
using TEXTURE_2D {
BindTexture(datatex);
TexParameteri (TEXTURE_MAX_LEVEL, 0);
TexParameteri (TEXTURE_MIN_FILTER, NEAREST);
TexImage2D (0, RGBA, size, 0, RGBA, FLOAT, output.ptr);
}
using Quads {
TexCoord2f(0, 0); Vertex2f(0, 0);
TexCoord2f(0, 1); Vertex2f(0, 1);
TexCoord2f(1, 1); Vertex2f(1, 1);
TexCoord2f(1, 0); Vertex2f(1, 0);
}
return;
}
auto lastsec = sec();
auto start = sec();
int targetfps = 30;
while !update() {
(vec4f[] buf, double iters) = calc2draw.take();
draw(buf, iters);
draw2calc.put(buf);
if (sec() - lastsec > 1) {
writeln "$fps fps - $(iters*fps) steps/s";
// we took that much fps to do threads tasks
// so threads*fps is the load that takes 1s
/*auto oldthreads = threads;
threads = (threads * fps) / targetfps;
writeln "adjust to $threads from $oldthreads due to $fps <> $targetfps";*/
lastsec = sec;
fps = 0;
}
// if (sec() - start > 5) exit(0);
}
writeln "Cleaning up calc thread.";
draw2calc.put(null);
threadQuit.acquire();
writeln "Exiting.";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment