avesus/common.glsl

## common.glsl
# ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
precision highp int;
precision highp sampler2D;
# else
precision mediump float;
precision lowp int;
precision lowp sampler2D;
# endif

/*

  Maximum _parallel_ IO size is 1024 pixels.
  Think about these as 32,768 output pins,
  and another 32K input pins, being a rough
  equivalent to a 65,536 pins FPGA package.

  A single IO "tile" is fixed size 32x32 pixels
  (on ShaderToy it _should_ be 16x16 pixels,
  because of the use of float texels instead of RGBA8)

  The 32x32 tile embraces the perimeter of the computer core
  and provides IO, beginning from the left side to the bottom
  clockwise.

  Each pixel of the IO tile feeds 32 rows at the left (and at the right),
  and 64 columns at the top and at the bottom. The discrepancy is due
  to triangular shape of the computing elements:

 0   1
________
\  /\  /\   0
 \/__\/__\
 /\  /\  /  1
/__\/__\/
\  /\  /\   2
 \/__\/__\
 /\  /\  /  3
/__\/__\/

  Each triangular computing element uses a single pixel (32 bits) for its state storage.
  See "struct Element" for specifics.

*/

// The length of data IO buffer until it enters the computer
// The same size is visible when data outputs the computer
#define IO_FIFO_DEPTH_BITS 32.
 //470.

// Computer width must be a multiple of 64 to match IO surfaces
#define MIN_COMPUTER_WIDTH 64.

// Computer height must be a multiple of 32 to match IO surfaces
#define MIN_COMPUTER_HEIGHT 32.

#define MAX_COMPUTER_WIDTH 8192.
#define MAX_COMPUTER_HEIGHT 8192.

// Note that IO on the top and the bottom edge
// is twice less as that of the left and the right
#define COMPUTER_WIDTH 1280.
#define COMPUTER_HEIGHT 640.

// IO size, in bits. The number of "package pins" is twice of that.
// The number of IO pixels is 32 times smaller.
# define IO_SIZE ((COMPUTER_WIDTH) + 2.0 * (COMPUTER_HEIGHT))

# define IO_PIXELS (0.03125 * IO_SIZE)

// 32 bits on bit-by-bit access
struct Bitset8Bits {

    vec4 bit0;
    vec4 bit1;
    vec4 bit2;
    vec4 bit3;
    vec4 bit4;
    vec4 bit5;
    vec4 bit6;
    vec4 bit7;
};

// 3 * 4 bits
struct Bitset3Bits {

    vec4 bit0;
    vec4 bit1;
    vec4 bit2;
};

float whenGt (float l, float r) {
  return max(sign(l - r), 0.0);
}

vec4 when_gt (vec4 l, vec4 r) {
  return max(sign(l - r), 0.0);
}

//------------------------------------
//
//      texels -> structured data

Bitset3Bits unpack_4_numbers (vec4 numbers) {
    Bitset3Bits result;

    result.bit2 = when_gt(numbers, vec4(3.5));

    vec4 bits0to1 = numbers - 4.0 * result.bit2;
    result.bit1 = when_gt(bits0to1, vec4(1.5));

    result.bit0 = when_gt(bits0to1 - 2.0 * result.bit1, vec4(0.5));

    return result;
}


Bitset8Bits unpack_4_bytes (vec4 byte) {
    Bitset8Bits result;

    result.bit7 = when_gt(byte, vec4(127.5));

    vec4 bits0to6 = byte - 128.0 * result.bit7;
    result.bit6 = when_gt(bits0to6, vec4(63.5));

    vec4 bits0to5 = bits0to6 - 64.0 * result.bit6;
    result.bit5 = when_gt(bits0to5, vec4(31.5));

    vec4 bits0to4 = bits0to5 - 32.0 * result.bit5;
    result.bit4 = when_gt(bits0to4, vec4(15.5));

    vec4 bits0to3 = bits0to4 - 16.0 * result.bit4;
    result.bit3 = when_gt(bits0to3, vec4(7.5));

    vec4 bits0to2 = bits0to3 - 8.0 * result.bit3;
    result.bit2 = when_gt(bits0to2, vec4(3.5));

    vec4 bits0to1 = bits0to2 - 4.0 * result.bit2;
    result.bit1 = when_gt(bits0to1, vec4(1.5));

    result.bit0 = when_gt(bits0to1 - 2.0 * result.bit1, vec4(0.5));

    return result;
}

struct Array32Bits {
    float bit[32];
};

Array32Bits unpack_32_bits (vec4 byte) {
    Bitset8Bits bits = unpack_4_bytes(byte);

    Array32Bits a;
    a.bit[0] = bits.bit0.r;
    a.bit[1] = bits.bit1.r;
    a.bit[2] = bits.bit2.r;
    a.bit[3] = bits.bit3.r;
    a.bit[4] = bits.bit4.r;
    a.bit[5] = bits.bit5.r;
    a.bit[6] = bits.bit6.r;
    a.bit[7] = bits.bit7.r;

    a.bit[8] = bits.bit0.g;
    a.bit[9] = bits.bit1.g;
    a.bit[10] = bits.bit2.g;
    a.bit[11] = bits.bit3.g;
    a.bit[12] = bits.bit4.g;
    a.bit[13] = bits.bit5.g;
    a.bit[14] = bits.bit6.g;
    a.bit[15] = bits.bit7.g;


    a.bit[16] = bits.bit0.b;
    a.bit[17] = bits.bit1.b;
    a.bit[18] = bits.bit2.b;
    a.bit[19] = bits.bit3.b;
    a.bit[20] = bits.bit4.b;
    a.bit[21] = bits.bit5.b;
    a.bit[22] = bits.bit6.b;
    a.bit[23] = bits.bit7.b;

    a.bit[24] = bits.bit0.a;
    a.bit[25] = bits.bit1.a;
    a.bit[26] = bits.bit2.a;
    a.bit[27] = bits.bit3.a;
    a.bit[28] = bits.bit4.a;
    a.bit[29] = bits.bit5.a;
    a.bit[30] = bits.bit6.a;
    a.bit[31] = bits.bit7.a;

    return a;
}

/* -----------------------------------------------------------------------


Figure 1. Wiring permutations. Signals enter from "in" and go to "out".


Each "out" has a D flip-flop in it.
Circuits, composed of many triangles, are globally clocked.

(In hardware implementation, clock domains can be in each Cartilage subtree).

         /\
       in--out
side A /    \   side B
    out      in
     / \    / \
     ^^^in^^out^

       side C


Permutation | Input | Output
----------------------------
            |   A   |   A        /\
      1     |   B   |   B       *  *
            |   C   |   C      /    \
----------------------------   ^^**^^
            |   A   |   A        /\
      2     |   B   |   C       * /\
            |   C   |   B      / / /\
----------------------------   ^^^^^^
            |   A   |   B        /\
      3     |   B   |   A       /^^\
            |   C   |   C      /^^^^\
----------------------------   ^^**^^
            |   A   |   B        /\
      4     |   B   |   C       /^^\
            |   C   |   A      /\  /\
----------------------------   ^^^^^^
            |   A   |   C        /\
      5     |   B   |   A       /\/\
            |   C   |   B      /^/\^\
----------------------------   ^^^^^^
            |   A   |   C        /\
      6     |   B   |   B       /\ *
            |   C   |   A      /\ \ \
----------------------------   ^^^^^^


Figure 2. Switching cause source D flip-flop permutation


     /\    /\
    /  \  /  \
   /__2_\/__3_\
  /\    /\    /\
 / 1\  /  \  /4 \
/____\/____\/____\
     /\    /\
    / 6\  /5 \
   /____\/____\

Only one D flip-flop can drive element's crossbar switch.

The crossbar switch switches between two wirings (from fig.1).

An ordinary crossbar switch switches two channels,
and it switches just between two wiring schemes:


       [A]->----->[C]                     [A]--   -->[C]
              .                                \ /
[CTRL = 0]->...           <=>     [CTRL = 1]->..X
              .                                / \
       [B]->----->[C]                     [B]--   -->[D]


Our triangular element switches three channels,
and it _also_ switches just between two wiring schemes:

   [A in]->  --------  ->[A out]           [A in]->  --------  ->[A out]
            / wiring \                              / wiring \
   [B in]-> | scheme | ->[B out]    <=>    [B in]-> | scheme | ->[B out]
            \ ~ 0 ~  /                              \ ~ 1 ~  /
   [C in]->  --------  ->[C out]           [C in]->  --------  ->[C out]


The difference from ordinary crossbar switch is that
there are 6 wiring schemes to choose from to use as "wiring scheme 0",
and the same 6 wiring schemes to use as "wiring scheme 1".


The selections are stored in "wiring0" and "wiring1".

----------------------------------------------------------------------------*/


struct Element {

    // 16 bits of runtime state, fully reconfigurable

    // --- Three D flip-flops storing the states of the three outputs A, B, and C ---

    lowp float outA;  // 0..1
    lowp float outB;  // 0..1
    lowp float outC;  // 0..1


    // --- 9 bits of wiring and control configuration ---

    // Wiring scheme (see fig. 1) when switching cause bit is 0. A number 1..6.
    float wiring0;

    // Wiring scheme when switching cause bit is 1. A number 1..6.
    float wiring1;

    // Switching cause D flip-flop (see fig.2) A number 1..6
    float switchCtrl;


    // --- 4 bits of Cartilage tree engine ---

    // Pointer to the owner element - A, B, or C
    float parent; // 1..3

    // End-of-subtree markers (relatively to parent)
    // When neighbor element's parent points to our element,
    // that neighbor for us becomes a subtree we can reconfigure.
    lowp float leftIsLeaf;   // 0..1
    lowp float rightIsLeaf;  // 0..1


    // 16 reconfiguration FIFO bits:

    // Mode switcher. When reconfiguration starts, the old state is removed
    // and sent to parent, bit by bit. The new state is pushed in.
    // When the final configuration bit is captured from the parent's D flip-flop,
    // It's immediately used to reconfigure the element. This is due to 32 bits total limit.
    // There possibly can be two modes
    // lowp float isReconfiguring;

    // 16 bits deep double buffer / FIFO
    lowp float fifo0;
    lowp float fifo1;
    lowp float fifo2;
    lowp float fifo3;
    lowp float fifo4;
    lowp float fifo5;
    lowp float fifo6;
    lowp float fifo7;
    lowp float fifo8;
    lowp float fifo9;
    lowp float fifo10;
    lowp float fifo11;
    lowp float fifo12;
    lowp float fifo13;
    lowp float fifo14;
    lowp float fifo15;

};

Element elementFromTexel (in vec4 texel) {

    Element elt;

    Bitset8Bits bits = unpack_4_bytes(texel * 255.0);

    elt.outA = bits.bit0.r;
    elt.outB = bits.bit1.r;
    elt.outC = bits.bit2.r;


    elt.wiring0 = float(bits.bit3.r + 2.0 * bits.bit4.r + 4.0 * bits.bit5.r);
    elt.wiring1 = float(bits.bit6.r + 2.0 * bits.bit7.r + 4.0 * bits.bit0.g);
    elt.switchCtrl = float(bits.bit1.g + 2.0 * bits.bit2.g + 4.0 * bits.bit3.g);
    elt.parent = float(bits.bit4.g + 2.0 * bits.bit5.g);

    elt.leftIsLeaf = bits.bit6.g;
    elt.rightIsLeaf = bits.bit7.g;


    elt.fifo0 = bits.bit0.b;
    elt.fifo1 = bits.bit1.b;
    elt.fifo2 = bits.bit2.b;
    elt.fifo3 = bits.bit3.b;
    elt.fifo4 = bits.bit4.b;
    elt.fifo5 = bits.bit5.b;
    elt.fifo6 = bits.bit6.b;
    elt.fifo7 = bits.bit7.b;

    elt.fifo8 = bits.bit0.a;
    elt.fifo9 = bits.bit1.a;
    elt.fifo10 = bits.bit2.a;
    elt.fifo11 = bits.bit3.a;
    elt.fifo12 = bits.bit4.a;
    elt.fifo13 = bits.bit5.a;
    elt.fifo14 = bits.bit6.a;
    elt.fifo15 = bits.bit7.a;

    return elt;
}


//------------------------------------
//
//       structured data -> texels
//

vec4 pack_4_bytes (Bitset8Bits state) {

  vec4 data;

  data = vec4(state.bit0)
    + 2.0 * vec4(state.bit1)
    + 4.0 * vec4(state.bit2)
    + 8.0 * vec4(state.bit3)
    + 16.0 * vec4(state.bit4)
    + 32.0 * vec4(state.bit5)
    + 64.0 * vec4(state.bit6)
    + 128.0 * vec4(state.bit7);

  return data;
}

vec4 texelFromElement (in Element elt) {

    Bitset8Bits bits;

    Bitset3Bits parameters = unpack_4_numbers(vec4(elt.wiring0, elt.wiring1, elt.switchCtrl, elt.parent));

    bits.bit0.r = elt.outA;
    bits.bit1.r = elt.outB;
    bits.bit2.r = elt.outC;
    bits.bit3.r = parameters.bit0.r;
    bits.bit4.r = parameters.bit1.r;
    bits.bit5.r = parameters.bit2.r;
    bits.bit6.r = parameters.bit0.g;
    bits.bit7.r = parameters.bit1.g;


    bits.bit0.g = parameters.bit2.g;
    bits.bit1.g = parameters.bit0.b;
    bits.bit2.g = parameters.bit1.b;
    bits.bit3.g = parameters.bit2.b;
    bits.bit4.g = parameters.bit0.a;
    bits.bit5.g = parameters.bit1.a;
    bits.bit6.g = elt.leftIsLeaf;
    bits.bit7.g = elt.rightIsLeaf;


    bits.bit0.b = elt.fifo0;
    bits.bit1.b = elt.fifo1;
    bits.bit2.b = elt.fifo2;
    bits.bit3.b = elt.fifo3;
    bits.bit4.b = elt.fifo4;
    bits.bit5.b = elt.fifo5;
    bits.bit6.b = elt.fifo6;
    bits.bit7.b = elt.fifo7;

    bits.bit0.a = elt.fifo8;
    bits.bit1.a = elt.fifo9;
    bits.bit2.a = elt.fifo10;
    bits.bit3.a = elt.fifo11;
    bits.bit4.a = elt.fifo12;
    bits.bit5.a = elt.fifo13;
    bits.bit6.a = elt.fifo14;
    bits.bit7.a = elt.fifo15;

    return pack_4_bytes(bits) / 255.0;
}

## display.glsl
// Standard 2D rotation formula.
mat2 rot2(in float a){ float c = cos(a), s = sin(a); return mat2(c, -s, s, c); }

// Unsigned distance to the segment joining "a" and "b".
float distLine (vec2 a, vec2 b) {


	b = a - b;
	float h = clamp(dot(a, b)/ dot(b, b), 0.0, 1.0);

    return length(a - b * h);
}

vec4 render (in vec2 fragCoord) {

    vec4 fragColor = vec4(0.0);

    // Use 1-byte color values for compatibility with mobile
    // vec4 gpgpu_data = texture(iChannel0, 0.125 * fragCoord/iResolution.xy);
    vec4 gpgpu_data = texture(iChannel0, 0.5 * fragCoord/iResolution.xy);

    Element elt = elementFromTexel(gpgpu_data);

    // iFrame
    // iChannel0
    // iMouse zw click


    // Time varying pixel color
    //vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));

    // fragColor = vec4(fragCoord.x - iMouse.x, fragCoord.y - iMouse.y, gpgpu_data.x, 1.0);


    // Screen coordinates. I've put a cap on the fullscreen resolution to stop
    // the pattern looking too blurred out.
	vec2 uv = (fragCoord - iResolution.xy*.5)/min(650., iResolution.y);

    // Normalized pixel coordinates (from 0 to 1)
    //vec2 uv = fragCoord/iResolution.xy;

    // Position with some scrolling, and screen rotation to level the pattern.
    vec2 p = rot2(3.14159/12.)*uv + vec2(.8660254, .5)*1./16.;

    // Scaling constant.
    //const float gSc = 64.0;//0.25;//8.0;
    const float gSc = 8.0;
    p *= gSc;

    vec2 s = floor(p + (p.x + p.y) * 0.36602540378); // Skew the current point.

    p -= s - (s.x + s.y) * .211324865; // Use it to attain the vector to the base vertex (from p).

    // Determine which triangle we're in. Much easier to visualize than the 3D version.
    float i = p.x < p.y? 1.0 : 0.0; // Apparently, faster than: i = step(p.y, p.x);
    vec2 ioffs = vec2(1.0 - i, i);

    // Vectors to the other two triangle vertices.
    vec2 ip0 = vec2(0);
    vec2 ip1 = ioffs - 0.2113248654;
    vec2 ip2 = vec2(0.577350269);

    // Displaying the 2D simplex grid. Basically, we're rendering lines between
    // each of the three triangular cell vertices to show the outline of the
    // cell edges.
    float tri = min(min(distLine(p - ip0, p - ip1), distLine(p - ip1, p - ip2)),
                  distLine(p - ip2, p - ip0));
    // Adding the triangle grid to the d5 distance field value.
    float d5 = min(1e5, tri);

    // Initial color.
    vec3 col = vec3(0.5, 1.0, 1.0);

    // Smoothing factor.
    float sf = .004;


    // Triangle grid overlay.
    d5 /= gSc;
    //col = mix(vec3(0.33333 * (elt.outA + elt.outB + elt.outC)), vec3(1.0, 1.0, 1.0), (1. - smoothstep(0., sf, d5)) * 0.35);

    col = vec3(0.33333 * (elt.outA + elt.outB + elt.outC));
    //col = vec3(0.33333 * (elt.outA + elt.outB + elt.outC), 0.33333 * (elt.outA + elt.outB + elt.outC), elt.switchCtrl / 6.0);

    //col = vec3(elt.outB, elt.outA, elt.outC);

    return vec4(col, 1.0);


    //fragColor = vec4(gpgpu_data.rgba);//, 1.0);

    //return vec4(mix(col, vec3(0.), (1. - smoothstep(0., sf, fragColor.arg))), 0.95);

}

void mainImage (out vec4 fragColor, in vec2 fragCoord) {

    fragColor = render(fragCoord);
}

## gpgpu.glsl
// Random number generator. Borrowed from https://www.shadertoy.com/view/wltcRS
//
// Usage:

//   rng_initialize(fragCoord, iFrame);

//   fragColor = rand4();


//internal RNG state
uvec4 s0, s1;
ivec2 pixel;

void rng_initialize(vec2 p, int frame)
{
    pixel = ivec2(p);

    //white noise seed
    s0 = uvec4(p, uint(frame), uint(p.x) + uint(p.y));

    //blue noise seed
    s1 = uvec4(frame, frame*15843, frame*31 + 4566, frame*2345 + 58585);
}

// https://www.pcg-random.org/
void pcg4d(inout uvec4 v)
{
	v = v * 1664525u + 1013904223u;
    v.x += v.y*v.w; v.y += v.z*v.x; v.z += v.x*v.y; v.w += v.y*v.z;
    v = v ^ (v>>16u);
    v.x += v.y*v.w; v.y += v.z*v.x; v.z += v.x*v.y; v.w += v.y*v.z;
}

float rand()
{
    pcg4d(s0); return float(s0.x)/float(0xffffffffu);
}

vec2 rand2()
{
    pcg4d(s0); return vec2(s0.xy)/float(0xffffffffu);
}

vec3 rand3()
{
    pcg4d(s0); return vec3(s0.xyz)/float(0xffffffffu);
}

vec4 rand4()
{
    pcg4d(s0); return vec4(s0)/float(0xffffffffu);
}

//random blue noise sampling pos
ivec2 shift2()
{
    pcg4d(s1);
    return (pixel + ivec2(s1.xy%0x0fffffffu))%1024;
}


/*

 IO is organized in blocks of 32x32 texels,
 only one block is read at a time,
 and only one block is written at a time.

 Memory organization (FIFO depth 3 bits):


 /^^^^^^^^^^^^^^T^^^^^^^^^^^^^^^^^^^^^^^T^^^^^^^^^^^^^^^^^^^^^^^T^^^^^\
 | computer's   | input (32x32 texels)  | input (32x32 texels)  | ... |
 |              |-----------------------|-----------------------| ... |
 |   core       | input (32x32 texels)  | output (32x32 texels) | ... |
 |              |-----------------------|-----------------------| ... |
 |--------------| output (32x32 texels) | output (32x32 texels) | ... |
 |              \-----------------------|-----------------------/ ... |
 |                                                                    |
 \____________________________________________________________________/

Note that computer's core size is a multiple of 32, due to simultaneous use
of the entire 32-bits slice within a texel of IO.
IO happens only on the edges of the core.

*/

float ioBlockNumber (
    in vec2 fragCoord,
    out float ioBlockX,
    out float ioBlockY) {

    // exclude the core
    if (fragCoord.x < COMPUTER_WIDTH && fragCoord.y < COMPUTER_HEIGHT) {
        return -1.0;
    }

    // Are there IO blocks on the right?

    float blocksOnTheRight = floor((iResolution.x - COMPUTER_WIDTH) / 32.0);
    float blocksWidth = floor(iResolution.x / 32.0);
    float totalBlocksOnTheRight = blocksOnTheRight * COMPUTER_HEIGHT / 32.0;

    // Decimal part here is texel coordinate of the IO block
    float row;
    float column;

    // IO block number
    float block;

    if (blocksOnTheRight > 0.0) {
        if (fragCoord.y < COMPUTER_HEIGHT) {

            column = (fragCoord.x - COMPUTER_WIDTH) / 32.0;
            if (column > blocksOnTheRight) {
                // completely ignore that area
                discard;
            }

            row = fragCoord.y / 32.0;
            block = floor(row) * blocksOnTheRight + floor(column);
         } else {
            column = fragCoord.x / 32.0;
            if (column > blocksWidth) {
                discard;
            }
            row = (fragCoord.y - COMPUTER_HEIGHT) / 32.0;
            block = totalBlocksOnTheRight + floor(row) * blocksWidth + floor(column);
         }
    } else {
        // The rest is always attempted to fill by blocks. Otherwise, nothing will work.
        column = fragCoord.x / 32.0;
        if (column > blocksWidth) {
            discard;
        }
        row = (fragCoord.y - COMPUTER_HEIGHT) / 32.0;

        block = floor(row) * blocksWidth + floor(column);
    }

    const float totalBlocks = IO_FIFO_DEPTH_BITS * 2.0;
    if (block > totalBlocks - 1.0) {
        discard;
    }

    ioBlockY = row - floor(row);
    ioBlockX = column - floor(column);

    return block;
}


vec4 writeIoBlock(
    float ioBlock,
    float ioBlockX,
    float ioBlockY) {

    vec4 gpgpu_data = texture(iChannel0, vec2(ioBlockX, ioBlockY));

    return gpgpu_data;
}

struct Direction {
  lowp float left;
  lowp float leftTop;
  lowp float rightTop;
  lowp float right;
  lowp float rightBottom;
  lowp float leftBottom;
};

Direction isDirection (float dir) {

  Direction rd;
  rd.left = whenGt(dir, 0.5) * whenGt(1.5, dir);
  rd.leftTop = whenGt(dir, 1.5) * whenGt(2.5, dir);
  rd.rightTop = whenGt(dir, 2.5) * whenGt(3.5, dir);
  rd.right = whenGt(dir, 3.5) * whenGt(4.5, dir);
  rd.rightBottom = whenGt(dir, 4.5) * whenGt(5.5, dir);
  rd.leftBottom = whenGt(dir, 5.5) * whenGt(6.5, dir);

  return rd;
}

lowp vec2 ctrlOffset (float dir) {

  return vec2(-2.0,  0.0) * whenGt(dir, 0.5) * whenGt(1.5, dir) +
         vec2(-1.0, -1.0) * whenGt(dir, 1.5) * whenGt(2.5, dir) +
         vec2( 1.0, -1.0) * whenGt(dir, 2.5) * whenGt(3.5, dir) +
         vec2( 2.0,  0.0) * whenGt(dir, 3.5) * whenGt(4.5, dir) +
         vec2( 1.0,  1.0) * whenGt(dir, 4.5) * whenGt(5.5, dir) +
         vec2(-1.0,  1.0) * whenGt(dir, 5.5) * whenGt(6.5, dir);
}

bool triDownSwitchingCauseBit (float dir, in Element switchingElement) {

    return whenGt(switchingElement.outB,  0.5) * whenGt(dir, 0.5) * whenGt(2.5, dir) +
           whenGt(switchingElement.outC,  0.5) * whenGt(dir, 2.5) * whenGt(4.5, dir) +
           whenGt(switchingElement.outA,  0.5) * whenGt(dir, 4.5) * whenGt(6.5, dir) > 0.5;
}

bool triUpSwitchingCauseBit (float dir, in Element switchingElement) {

    return whenGt(switchingElement.outA,  0.5) *  whenGt(dir, 1.5) * whenGt(3.5, dir) +
           whenGt(switchingElement.outB,  0.5) *  whenGt(dir, 3.5) * whenGt(5.5, dir) +
           whenGt(switchingElement.outC,  0.5) * (
               whenGt(dir, 5.5) * whenGt(6.5, dir)
             + whenGt(dir, 0.5) * whenGt(1.5, dir)
           ) > 0.5;
}

lowp vec3 outFromPerm (float perm, float lr, float rl, float middle) {
    // If triangle is down, then rl is right, otherwise it's left
    // if triangle is down, then lr is left, otherwise it's right
    return vec3(
        middle * whenGt(perm, 0.5) * whenGt(2.5, perm) +
        rl * whenGt(perm, 2.5) * whenGt(4.5, perm) +
        lr * whenGt(perm, 4.5) * whenGt(6.5, perm),

        rl * whenGt(perm, 0.5) * whenGt(1.5, perm) +
        lr * whenGt(perm, 1.5) * whenGt(2.5, perm) +
        middle * whenGt(perm, 2.5) * whenGt(3.5, perm) +
        lr * whenGt(perm, 3.5) * whenGt(4.5, perm) +
        middle * whenGt(perm, 4.5) * whenGt(5.5, perm) +
        rl * whenGt(perm, 5.5) * whenGt(6.5, perm),

        lr * whenGt(perm, 0.5) * whenGt(1.5, perm) +
        rl * whenGt(perm, 1.5) * whenGt(2.5, perm) +
        lr * whenGt(perm, 2.5) * whenGt(3.5, perm) +
        middle * whenGt(perm, 3.5) * whenGt(4.5, perm) +
        rl * whenGt(perm, 4.5) * whenGt(5.5, perm) +
        middle * whenGt(perm, 5.5) * whenGt(6.5, perm)
    );
}


lowp vec3 triUpOutFromPerm (float perm, in Element left, in Element right, in Element middle, in vec2 fragCoord) {

    if (fragCoord.x < 0.5) {
        // Left edge

    } else if (fragCoord.x > (COMPUTER_WIDTH - 1.0)) {
        // Right edge
        lowp float ioBit = 1.0;
        return outFromPerm(perm, ioBit, left.outB, middle.outA);
    }

    return outFromPerm(perm, right.outC, left.outB, middle.outA);
/*
    return vec3(
        middle.outA * whenGt(perm, 0.5) * whenGt(2.5, perm) +
        left.outB * whenGt(perm, 2.5) * whenGt(4.5, perm) +
        right.outC * whenGt(perm, 4.5) * whenGt(6.5, perm),

        left.outB * whenGt(perm, 0.5) * whenGt(1.5, perm) +
        right.outC * whenGt(perm, 1.5) * whenGt(2.5, perm) +
        middle.outA * whenGt(perm, 2.5) * whenGt(3.5, perm) +
        right.outC * whenGt(perm, 3.5) * whenGt(4.5, perm) +
        middle.outA * whenGt(perm, 4.5) * whenGt(5.5, perm) +
        left.outB * whenGt(perm, 5.5) * whenGt(6.5, perm),

        right.outC * whenGt(perm, 0.5) * whenGt(1.5, perm) +
        left.outB * whenGt(perm, 1.5) * whenGt(2.5, perm) +
        right.outC * whenGt(perm, 2.5) * whenGt(3.5, perm) +
        middle.outA * whenGt(perm, 3.5) * whenGt(4.5, perm) +
        left.outB * whenGt(perm, 4.5) * whenGt(5.5, perm) +
        middle.outA * whenGt(perm, 5.5) * whenGt(6.5, perm)
    );
    */
}


Element computeElement (in Element old, in vec2 fragCoord, float fifoRwPtr) {

    Element elt = old;

    // Switching cause bit:
    Element switchingCause = elementFromTexel(texture(iChannel0, (fragCoord + ctrlOffset(elt.switchCtrl))/iResolution.xy));

    // Fetch neighbor elements or IO surface bits
    vec4 gpgpu_data_left = texture(iChannel0, (fragCoord - vec2(1.0, 0.0))/iResolution.xy);
    vec4 gpgpu_data_right = texture(iChannel0, (fragCoord + vec2(1.0, 0.0))/iResolution.xy);

    Element left = elementFromTexel(gpgpu_data_left);
    Element right = elementFromTexel(gpgpu_data_right);

    // Rotate parent pointer
    elt.parent = old.parent + 1.0 > 3.5 ? 0.0 : old.parent + 1.0;

    lowp vec2 odd = vec2(int(fragCoord.x) % 2, int(fragCoord.y) % 2);


    // lowp vec2 switchingCauseOffset;

    if (abs(odd.x - odd.y) < 0.5) {

        /* Both this triangle and switchingCause point down:
           ______
           \  A /
           C\  /B
             \/
        */

        vec4 gpgpu_data_middle = texture(iChannel0, (fragCoord + vec2(0.0, -1.0))/iResolution.xy);
        Element middle = elementFromTexel(gpgpu_data_middle);

        lowp vec3 res;

        float perm = triDownSwitchingCauseBit(elt.switchCtrl, switchingCause)
            ? elt.wiring1
            : elt.wiring0;

        if (fragCoord.x < 1.0) {
            // Left edge

            vec4 gpgpu_data_io = texture(iChannel0,
                // TODO: read actual IO coordinate!!! (this is a fake rn)
                vec2(
                    fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
                    0.0

                ) / iResolution.xy);

            Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

            lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];

            res = outFromPerm(perm, ioBit, right.outB, middle.outA);

        } else if (fragCoord.x > (COMPUTER_WIDTH - 1.0)) {
            // Right edge
            vec4 gpgpu_data_io = texture(iChannel0,
                // TODO: read actual IO coordinate!!! (this is a fake rn)
                vec2(
                    fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
                    1.0

                ) / iResolution.xy);

            Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

            lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];
            res = outFromPerm(perm, left.outC, ioBit, middle.outA);
        } else {
            res = outFromPerm(perm, left.outC, right.outB, middle.outA);
        }

        elt.outA = res.x;
        elt.outB = res.y;
        elt.outC = res.z;

    } else {

        /* The triangles point up:

             /\
           B/  \C
           /____\
             A        */

        vec4 gpgpu_data_middle = texture(iChannel0, (fragCoord + vec2(0.0, 1.0))/iResolution.xy);
        Element middle = elementFromTexel(gpgpu_data_middle);

        lowp vec3 res;

        float perm = triUpSwitchingCauseBit(elt.switchCtrl, switchingCause)
            ? elt.wiring1
            : elt.wiring0;

        if (fragCoord.x < 1.0) {
            // Left edge
            vec4 gpgpu_data_io = texture(iChannel0,
                // TODO: read actual IO coordinate!!! (this is a fake rn)
                vec2(
                    fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
                    2.0

                ) / iResolution.xy);

            Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

            lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];
            res = outFromPerm(perm, right.outC, ioBit, middle.outA);

        } else if (fragCoord.x > (COMPUTER_WIDTH - 1.0)) {
            // Right edge
            vec4 gpgpu_data_io = texture(iChannel0,
                // TODO: read actual IO coordinate!!! (this is a fake rn)
                vec2(
                    fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
                    3.0

                ) / iResolution.xy);

            Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

            lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];
            res = outFromPerm(perm, ioBit, left.outB, middle.outA);

        } else {

            res = outFromPerm(perm, right.outC, left.outB, middle.outA);
        }

        elt.outA = res.x;
        elt.outB = res.y;
        elt.outC = res.z;
    }

    return elt;
}


vec4 update (in vec2 fragCoord) {

    // We use a combined FIFO pointers for both reading and writing
    // when interfacing with the core. Our computer's core never waits.
    // If you want to support asynchronous FIFO with blocking IO,
    // insert special signaling bits.
    float fifoRwPtr = float((iFrame - 2) % int(IO_FIFO_DEPTH_BITS));

    float ioBlockX;
    float ioBlockY;

    float ioBlock = ioBlockNumber(fragCoord, ioBlockX, ioBlockY);

    if (ioBlock > -0.5) {

        float ioBlockPixel = 32.0 * ioBlockY + ioBlockX;
        if (ioBlockPixel > (IO_PIXELS - 1.0)) {
            // IO surface is adjusted to the total computer's core size
            discard;
        }

        if (ioBlock < IO_FIFO_DEPTH_BITS) {
            // Read IO block

            // Emulate external IO input:
            // (lags by one, so it's on the opposite side of the FIFO)
            float fifoExternalWritePtr = float((int(IO_FIFO_DEPTH_BITS) + iFrame - 3 ) % int(IO_FIFO_DEPTH_BITS));


            if (abs(ioBlock - fifoExternalWritePtr) < 0.5) {
                rng_initialize(fragCoord, iFrame);
                return rand4();
            } else {

                // do nothing; preserve contents
                vec4 gpgpu_data = texture(iChannel0, fragCoord/iResolution.xy);
                return gpgpu_data;
            }

        } else {
            // Write IO block
            if (abs(ioBlock - IO_FIFO_DEPTH_BITS - fifoRwPtr) < 0.5) {

                // TODO: read Element's D flip-flop state
                return writeIoBlock(ioBlock - IO_FIFO_DEPTH_BITS, 0.0, 0.0);// ioBlockX, ioBlockY);
            } else {
                // do nothing; preserve contents
                vec4 gpgpu_data = texture(iChannel0, fragCoord/iResolution.xy);
                return gpgpu_data;
            }
        }
    }


    // Use 1-byte color values for compatibility with mobile
    vec4 gpgpu_data = texture(iChannel0, fragCoord/iResolution.xy);

    if (fragCoord.x > COMPUTER_WIDTH || fragCoord.y > COMPUTER_HEIGHT) {
        // Skip processing of data not in computer core
        // or not being in IO
        // Note that we can write in this buffer realtime external inputs
        // (mouse, keyboard, etc.)
        return gpgpu_data;
    }

    return texelFromElement(computeElement(elementFromTexel(gpgpu_data), fragCoord, fifoRwPtr));
}


vec4 init (in vec2 fragCoord) {

    if (fragCoord.x > COMPUTER_WIDTH || fragCoord.y > COMPUTER_HEIGHT) {
        discard;
        /*
        lowp float ioBlockX;
        lowp float ioBlockY;

        float ioBlock = ioBlockNumber(fragCoord, ioBlockX, ioBlockY);
        if (ioBlock > -0.5) {

            rng_initialize(fragCoord, iFrame);

            return rand4();

            // return vec4(0.7, 0.2, 0.8, 1.0);
        } else {
            discard;
        }
        */
    }

    rng_initialize(fragCoord, iFrame);
    vec4 randomValues = rand4();

    Element elt = elementFromTexel(randomValues);

    lowp vec2 odd = vec2(int(fragCoord.x) % 2, int(fragCoord.y) % 2);

    /*
    if (abs(odd.x - odd.y) < 0.5) {
        // Triangle points down
        elt.wiring0 = 4.0;
        elt.wiring1 = 5.0;
        //elt.switchCtrl = 1.0;
        //elt.outA = 1.0;
        //elt.outB = 0.0;
        //elt.outC = 1.0;
    } else {
        elt.wiring0 = 5.0;
        elt.wiring1 = 4.0;
        //elt.switchCtrl = 1.0;

        //elt.outA = 0.0;
        //elt.outB = 1.0;
        //elt.outC = 0.0;
    }
    */

    elt.wiring0 = elt.wiring0 < 0.5 ? 1.0 : elt.wiring0 > 6.5 ? 6.0 : floor(elt.wiring0);
    elt.wiring1 = elt.wiring1 < 0.5 ? 1.0 : elt.wiring1 > 6.5 ? 6.0 : floor(elt.wiring1);
    elt.switchCtrl = elt.switchCtrl < 0.5 ? 1.0 : elt.switchCtrl > 6.5 ? 6.0 : floor(elt.switchCtrl);

    elt.parent = elt.parent < 0.5 ? 1.0 : elt.parent > 3.5 ? 3.0 : floor(elt.parent);

    vec4 moreRandomValues = rand4();
    elt.outA = floor(0.5 + moreRandomValues.r);
    elt.outB = floor(0.5 + moreRandomValues.g);
    elt.outC = floor(0.5 + moreRandomValues.b);

    return texelFromElement(elt);
}

void mainImage (out vec4 fragColor, in vec2 fragCoord) {

    if (iFrame > 1) {
        fragColor = update(fragCoord);
    } else {
        fragColor = init(fragCoord);
    }
}
	# ifdef GL_FRAGMENT_PRECISION_HIGH
	precision highp float;
	precision highp int;
	precision highp sampler2D;
	# else
	precision mediump float;
	precision lowp int;
	precision lowp sampler2D;
	# endif

	/*

	Maximum _parallel_ IO size is 1024 pixels.
	Think about these as 32,768 output pins,
	and another 32K input pins, being a rough
	equivalent to a 65,536 pins FPGA package.

	A single IO "tile" is fixed size 32x32 pixels
	(on ShaderToy it _should_ be 16x16 pixels,
	because of the use of float texels instead of RGBA8)

	The 32x32 tile embraces the perimeter of the computer core
	and provides IO, beginning from the left side to the bottom
	clockwise.

	Each pixel of the IO tile feeds 32 rows at the left (and at the right),
	and 64 columns at the top and at the bottom. The discrepancy is due
	to triangular shape of the computing elements:

	0 1
	________
	\ /\ /\ 0
	\/__\/__\
	/\ /\ / 1
	/__\/__\/
	\ /\ /\ 2
	\/__\/__\
	/\ /\ / 3
	/__\/__\/

	Each triangular computing element uses a single pixel (32 bits) for its state storage.
	See "struct Element" for specifics.

	*/

	// The length of data IO buffer until it enters the computer
	// The same size is visible when data outputs the computer
	#define IO_FIFO_DEPTH_BITS 32.
	//470.

	// Computer width must be a multiple of 64 to match IO surfaces
	#define MIN_COMPUTER_WIDTH 64.

	// Computer height must be a multiple of 32 to match IO surfaces
	#define MIN_COMPUTER_HEIGHT 32.

	#define MAX_COMPUTER_WIDTH 8192.
	#define MAX_COMPUTER_HEIGHT 8192.

	// Note that IO on the top and the bottom edge
	// is twice less as that of the left and the right
	#define COMPUTER_WIDTH 1280.
	#define COMPUTER_HEIGHT 640.

	// IO size, in bits. The number of "package pins" is twice of that.
	// The number of IO pixels is 32 times smaller.
	# define IO_SIZE ((COMPUTER_WIDTH) + 2.0 * (COMPUTER_HEIGHT))

	# define IO_PIXELS (0.03125 * IO_SIZE)

	// 32 bits on bit-by-bit access
	struct Bitset8Bits {

	vec4 bit0;
	vec4 bit1;
	vec4 bit2;
	vec4 bit3;
	vec4 bit4;
	vec4 bit5;
	vec4 bit6;
	vec4 bit7;
	};

	// 3 * 4 bits
	struct Bitset3Bits {

	vec4 bit0;
	vec4 bit1;
	vec4 bit2;
	};

	float whenGt (float l, float r) {
	return max(sign(l - r), 0.0);
	}

	vec4 when_gt (vec4 l, vec4 r) {
	return max(sign(l - r), 0.0);
	}

	//------------------------------------
	//
	// texels -> structured data

	Bitset3Bits unpack_4_numbers (vec4 numbers) {
	Bitset3Bits result;

	result.bit2 = when_gt(numbers, vec4(3.5));

	vec4 bits0to1 = numbers - 4.0 * result.bit2;
	result.bit1 = when_gt(bits0to1, vec4(1.5));

	result.bit0 = when_gt(bits0to1 - 2.0 * result.bit1, vec4(0.5));

	return result;
	}


	Bitset8Bits unpack_4_bytes (vec4 byte) {
	Bitset8Bits result;

	result.bit7 = when_gt(byte, vec4(127.5));

	vec4 bits0to6 = byte - 128.0 * result.bit7;
	result.bit6 = when_gt(bits0to6, vec4(63.5));

	vec4 bits0to5 = bits0to6 - 64.0 * result.bit6;
	result.bit5 = when_gt(bits0to5, vec4(31.5));

	vec4 bits0to4 = bits0to5 - 32.0 * result.bit5;
	result.bit4 = when_gt(bits0to4, vec4(15.5));

	vec4 bits0to3 = bits0to4 - 16.0 * result.bit4;
	result.bit3 = when_gt(bits0to3, vec4(7.5));

	vec4 bits0to2 = bits0to3 - 8.0 * result.bit3;
	result.bit2 = when_gt(bits0to2, vec4(3.5));

	vec4 bits0to1 = bits0to2 - 4.0 * result.bit2;
	result.bit1 = when_gt(bits0to1, vec4(1.5));

	result.bit0 = when_gt(bits0to1 - 2.0 * result.bit1, vec4(0.5));

	return result;
	}

	struct Array32Bits {
	float bit[32];
	};

	Array32Bits unpack_32_bits (vec4 byte) {
	Bitset8Bits bits = unpack_4_bytes(byte);

	Array32Bits a;
	a.bit[0] = bits.bit0.r;
	a.bit[1] = bits.bit1.r;
	a.bit[2] = bits.bit2.r;
	a.bit[3] = bits.bit3.r;
	a.bit[4] = bits.bit4.r;
	a.bit[5] = bits.bit5.r;
	a.bit[6] = bits.bit6.r;
	a.bit[7] = bits.bit7.r;

	a.bit[8] = bits.bit0.g;
	a.bit[9] = bits.bit1.g;
	a.bit[10] = bits.bit2.g;
	a.bit[11] = bits.bit3.g;
	a.bit[12] = bits.bit4.g;
	a.bit[13] = bits.bit5.g;
	a.bit[14] = bits.bit6.g;
	a.bit[15] = bits.bit7.g;


	a.bit[16] = bits.bit0.b;
	a.bit[17] = bits.bit1.b;
	a.bit[18] = bits.bit2.b;
	a.bit[19] = bits.bit3.b;
	a.bit[20] = bits.bit4.b;
	a.bit[21] = bits.bit5.b;
	a.bit[22] = bits.bit6.b;
	a.bit[23] = bits.bit7.b;

	a.bit[24] = bits.bit0.a;
	a.bit[25] = bits.bit1.a;
	a.bit[26] = bits.bit2.a;
	a.bit[27] = bits.bit3.a;
	a.bit[28] = bits.bit4.a;
	a.bit[29] = bits.bit5.a;
	a.bit[30] = bits.bit6.a;
	a.bit[31] = bits.bit7.a;

	return a;
	}

	/* -----------------------------------------------------------------------


	Figure 1. Wiring permutations. Signals enter from "in" and go to "out".


	Each "out" has a D flip-flop in it.
	Circuits, composed of many triangles, are globally clocked.

	(In hardware implementation, clock domains can be in each Cartilage subtree).

	/\
	in--out
	side A / \ side B
	out in
	/ \ / \
	^^^in^^out^

	side C


	Permutation \| Input \| Output
	----------------------------
	\| A \| A /\
	1 \| B \| B * *
	\| C \| C / \
	---------------------------- ^^**^^
	\| A \| A /\
	2 \| B \| C * /\
	\| C \| B / / /\
	---------------------------- ^^^^^^
	\| A \| B /\
	3 \| B \| A /^^\
	\| C \| C /^^^^\
	---------------------------- ^^**^^
	\| A \| B /\
	4 \| B \| C /^^\
	\| C \| A /\ /\
	---------------------------- ^^^^^^
	\| A \| C /\
	5 \| B \| A /\/\
	\| C \| B /^/\^\
	---------------------------- ^^^^^^
	\| A \| C /\
	6 \| B \| B /\ *
	\| C \| A /\ \ \
	---------------------------- ^^^^^^




	Figure 2. Switching cause source D flip-flop permutation


	/\ /\
	/ \ / \
	/__2_\/__3_\
	/\ /\ /\
	/ 1\ / \ /4 \
	/____\/____\/____\
	/\ /\
	/ 6\ /5 \
	/____\/____\

	Only one D flip-flop can drive element's crossbar switch.

	The crossbar switch switches between two wirings (from fig.1).

	An ordinary crossbar switch switches two channels,
	and it switches just between two wiring schemes:


	[A]->----->[C] [A]-- -->[C]
	. \ /
	[CTRL = 0]->... <=> [CTRL = 1]->..X
	. / \
	[B]->----->[C] [B]-- -->[D]


	Our triangular element switches three channels,
	and it _also_ switches just between two wiring schemes:

	[A in]-> -------- ->[A out] [A in]-> -------- ->[A out]
	/ wiring \ / wiring \
	[B in]-> \| scheme \| ->[B out] <=> [B in]-> \| scheme \| ->[B out]
	\ ~ 0 ~ / \ ~ 1 ~ /
	[C in]-> -------- ->[C out] [C in]-> -------- ->[C out]


	The difference from ordinary crossbar switch is that
	there are 6 wiring schemes to choose from to use as "wiring scheme 0",
	and the same 6 wiring schemes to use as "wiring scheme 1".


	The selections are stored in "wiring0" and "wiring1".

	----------------------------------------------------------------------------*/



	struct Element {

	// 16 bits of runtime state, fully reconfigurable

	// --- Three D flip-flops storing the states of the three outputs A, B, and C ---

	lowp float outA; // 0..1
	lowp float outB; // 0..1
	lowp float outC; // 0..1


	// --- 9 bits of wiring and control configuration ---

	// Wiring scheme (see fig. 1) when switching cause bit is 0. A number 1..6.
	float wiring0;

	// Wiring scheme when switching cause bit is 1. A number 1..6.
	float wiring1;

	// Switching cause D flip-flop (see fig.2) A number 1..6
	float switchCtrl;


	// --- 4 bits of Cartilage tree engine ---

	// Pointer to the owner element - A, B, or C
	float parent; // 1..3

	// End-of-subtree markers (relatively to parent)
	// When neighbor element's parent points to our element,
	// that neighbor for us becomes a subtree we can reconfigure.
	lowp float leftIsLeaf; // 0..1
	lowp float rightIsLeaf; // 0..1


	// 16 reconfiguration FIFO bits:

	// Mode switcher. When reconfiguration starts, the old state is removed
	// and sent to parent, bit by bit. The new state is pushed in.
	// When the final configuration bit is captured from the parent's D flip-flop,
	// It's immediately used to reconfigure the element. This is due to 32 bits total limit.
	// There possibly can be two modes
	// lowp float isReconfiguring;

	// 16 bits deep double buffer / FIFO
	lowp float fifo0;
	lowp float fifo1;
	lowp float fifo2;
	lowp float fifo3;
	lowp float fifo4;
	lowp float fifo5;
	lowp float fifo6;
	lowp float fifo7;
	lowp float fifo8;
	lowp float fifo9;
	lowp float fifo10;
	lowp float fifo11;
	lowp float fifo12;
	lowp float fifo13;
	lowp float fifo14;
	lowp float fifo15;

	};

	Element elementFromTexel (in vec4 texel) {

	Element elt;

	Bitset8Bits bits = unpack_4_bytes(texel * 255.0);

	elt.outA = bits.bit0.r;
	elt.outB = bits.bit1.r;
	elt.outC = bits.bit2.r;


	elt.wiring0 = float(bits.bit3.r + 2.0 * bits.bit4.r + 4.0 * bits.bit5.r);
	elt.wiring1 = float(bits.bit6.r + 2.0 * bits.bit7.r + 4.0 * bits.bit0.g);
	elt.switchCtrl = float(bits.bit1.g + 2.0 * bits.bit2.g + 4.0 * bits.bit3.g);
	elt.parent = float(bits.bit4.g + 2.0 * bits.bit5.g);

	elt.leftIsLeaf = bits.bit6.g;
	elt.rightIsLeaf = bits.bit7.g;


	elt.fifo0 = bits.bit0.b;
	elt.fifo1 = bits.bit1.b;
	elt.fifo2 = bits.bit2.b;
	elt.fifo3 = bits.bit3.b;
	elt.fifo4 = bits.bit4.b;
	elt.fifo5 = bits.bit5.b;
	elt.fifo6 = bits.bit6.b;
	elt.fifo7 = bits.bit7.b;

	elt.fifo8 = bits.bit0.a;
	elt.fifo9 = bits.bit1.a;
	elt.fifo10 = bits.bit2.a;
	elt.fifo11 = bits.bit3.a;
	elt.fifo12 = bits.bit4.a;
	elt.fifo13 = bits.bit5.a;
	elt.fifo14 = bits.bit6.a;
	elt.fifo15 = bits.bit7.a;

	return elt;
	}


	//------------------------------------
	//
	// structured data -> texels
	//

	vec4 pack_4_bytes (Bitset8Bits state) {

	vec4 data;

	data = vec4(state.bit0)
	+ 2.0 * vec4(state.bit1)
	+ 4.0 * vec4(state.bit2)
	+ 8.0 * vec4(state.bit3)
	+ 16.0 * vec4(state.bit4)
	+ 32.0 * vec4(state.bit5)
	+ 64.0 * vec4(state.bit6)
	+ 128.0 * vec4(state.bit7);

	return data;
	}

	vec4 texelFromElement (in Element elt) {

	Bitset8Bits bits;

	Bitset3Bits parameters = unpack_4_numbers(vec4(elt.wiring0, elt.wiring1, elt.switchCtrl, elt.parent));

	bits.bit0.r = elt.outA;
	bits.bit1.r = elt.outB;
	bits.bit2.r = elt.outC;
	bits.bit3.r = parameters.bit0.r;
	bits.bit4.r = parameters.bit1.r;
	bits.bit5.r = parameters.bit2.r;
	bits.bit6.r = parameters.bit0.g;
	bits.bit7.r = parameters.bit1.g;


	bits.bit0.g = parameters.bit2.g;
	bits.bit1.g = parameters.bit0.b;
	bits.bit2.g = parameters.bit1.b;
	bits.bit3.g = parameters.bit2.b;
	bits.bit4.g = parameters.bit0.a;
	bits.bit5.g = parameters.bit1.a;
	bits.bit6.g = elt.leftIsLeaf;
	bits.bit7.g = elt.rightIsLeaf;


	bits.bit0.b = elt.fifo0;
	bits.bit1.b = elt.fifo1;
	bits.bit2.b = elt.fifo2;
	bits.bit3.b = elt.fifo3;
	bits.bit4.b = elt.fifo4;
	bits.bit5.b = elt.fifo5;
	bits.bit6.b = elt.fifo6;
	bits.bit7.b = elt.fifo7;

	bits.bit0.a = elt.fifo8;
	bits.bit1.a = elt.fifo9;
	bits.bit2.a = elt.fifo10;
	bits.bit3.a = elt.fifo11;
	bits.bit4.a = elt.fifo12;
	bits.bit5.a = elt.fifo13;
	bits.bit6.a = elt.fifo14;
	bits.bit7.a = elt.fifo15;

	return pack_4_bytes(bits) / 255.0;
	}
	// Standard 2D rotation formula.
	mat2 rot2(in float a){ float c = cos(a), s = sin(a); return mat2(c, -s, s, c); }

	// Unsigned distance to the segment joining "a" and "b".
	float distLine (vec2 a, vec2 b) {


	b = a - b;
	float h = clamp(dot(a, b)/ dot(b, b), 0.0, 1.0);

	return length(a - b * h);
	}

	vec4 render (in vec2 fragCoord) {

	vec4 fragColor = vec4(0.0);

	// Use 1-byte color values for compatibility with mobile
	// vec4 gpgpu_data = texture(iChannel0, 0.125 * fragCoord/iResolution.xy);
	vec4 gpgpu_data = texture(iChannel0, 0.5 * fragCoord/iResolution.xy);

	Element elt = elementFromTexel(gpgpu_data);

	// iFrame
	// iChannel0
	// iMouse zw click



	// Time varying pixel color
	//vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));

	// fragColor = vec4(fragCoord.x - iMouse.x, fragCoord.y - iMouse.y, gpgpu_data.x, 1.0);


	// Screen coordinates. I've put a cap on the fullscreen resolution to stop
	// the pattern looking too blurred out.
	vec2 uv = (fragCoord - iResolution.xy*.5)/min(650., iResolution.y);

	// Normalized pixel coordinates (from 0 to 1)
	//vec2 uv = fragCoord/iResolution.xy;

	// Position with some scrolling, and screen rotation to level the pattern.
	vec2 p = rot2(3.14159/12.)uv + vec2(.8660254, .5)1./16.;

	// Scaling constant.
	//const float gSc = 64.0;//0.25;//8.0;
	const float gSc = 8.0;
	p *= gSc;

	vec2 s = floor(p + (p.x + p.y) * 0.36602540378); // Skew the current point.

	p -= s - (s.x + s.y) * .211324865; // Use it to attain the vector to the base vertex (from p).

	// Determine which triangle we're in. Much easier to visualize than the 3D version.
	float i = p.x < p.y? 1.0 : 0.0; // Apparently, faster than: i = step(p.y, p.x);
	vec2 ioffs = vec2(1.0 - i, i);

	// Vectors to the other two triangle vertices.
	vec2 ip0 = vec2(0);
	vec2 ip1 = ioffs - 0.2113248654;
	vec2 ip2 = vec2(0.577350269);

	// Displaying the 2D simplex grid. Basically, we're rendering lines between
	// each of the three triangular cell vertices to show the outline of the
	// cell edges.
	float tri = min(min(distLine(p - ip0, p - ip1), distLine(p - ip1, p - ip2)),
	distLine(p - ip2, p - ip0));
	// Adding the triangle grid to the d5 distance field value.
	float d5 = min(1e5, tri);

	// Initial color.
	vec3 col = vec3(0.5, 1.0, 1.0);

	// Smoothing factor.
	float sf = .004;


	// Triangle grid overlay.
	d5 /= gSc;
	//col = mix(vec3(0.33333 * (elt.outA + elt.outB + elt.outC)), vec3(1.0, 1.0, 1.0), (1. - smoothstep(0., sf, d5)) * 0.35);

	col = vec3(0.33333 * (elt.outA + elt.outB + elt.outC));
	//col = vec3(0.33333 * (elt.outA + elt.outB + elt.outC), 0.33333 * (elt.outA + elt.outB + elt.outC), elt.switchCtrl / 6.0);

	//col = vec3(elt.outB, elt.outA, elt.outC);

	return vec4(col, 1.0);


	//fragColor = vec4(gpgpu_data.rgba);//, 1.0);

	//return vec4(mix(col, vec3(0.), (1. - smoothstep(0., sf, fragColor.arg))), 0.95);

	}

	void mainImage (out vec4 fragColor, in vec2 fragCoord) {

	fragColor = render(fragCoord);
	}
	// Random number generator. Borrowed from https://www.shadertoy.com/view/wltcRS
	//
	// Usage:

	// rng_initialize(fragCoord, iFrame);

	// fragColor = rand4();


	//internal RNG state
	uvec4 s0, s1;
	ivec2 pixel;

	void rng_initialize(vec2 p, int frame)
	{
	pixel = ivec2(p);

	//white noise seed
	s0 = uvec4(p, uint(frame), uint(p.x) + uint(p.y));

	//blue noise seed
	s1 = uvec4(frame, frame15843, frame31 + 4566, frame*2345 + 58585);
	}

	// https://www.pcg-random.org/
	void pcg4d(inout uvec4 v)
	{
	v = v * 1664525u + 1013904223u;
	v.x += v.yv.w; v.y += v.zv.x; v.z += v.xv.y; v.w += v.yv.z;
	v = v ^ (v>>16u);
	v.x += v.yv.w; v.y += v.zv.x; v.z += v.xv.y; v.w += v.yv.z;
	}

	float rand()
	{
	pcg4d(s0); return float(s0.x)/float(0xffffffffu);
	}

	vec2 rand2()
	{
	pcg4d(s0); return vec2(s0.xy)/float(0xffffffffu);
	}

	vec3 rand3()
	{
	pcg4d(s0); return vec3(s0.xyz)/float(0xffffffffu);
	}

	vec4 rand4()
	{
	pcg4d(s0); return vec4(s0)/float(0xffffffffu);
	}

	//random blue noise sampling pos
	ivec2 shift2()
	{
	pcg4d(s1);
	return (pixel + ivec2(s1.xy%0x0fffffffu))%1024;
	}



	/*

	IO is organized in blocks of 32x32 texels,
	only one block is read at a time,
	and only one block is written at a time.

	Memory organization (FIFO depth 3 bits):


	/^^^^^^^^^^^^^^T^^^^^^^^^^^^^^^^^^^^^^^T^^^^^^^^^^^^^^^^^^^^^^^T^^^^^\
	\| computer's \| input (32x32 texels) \| input (32x32 texels) \| ... \|
	\| \|-----------------------\|-----------------------\| ... \|
	\| core \| input (32x32 texels) \| output (32x32 texels) \| ... \|
	\| \|-----------------------\|-----------------------\| ... \|
	\|--------------\| output (32x32 texels) \| output (32x32 texels) \| ... \|
	\| \-----------------------\|-----------------------/ ... \|
	\| \|
	\____________________________________________________________________/

	Note that computer's core size is a multiple of 32, due to simultaneous use
	of the entire 32-bits slice within a texel of IO.
	IO happens only on the edges of the core.

	*/

	float ioBlockNumber (
	in vec2 fragCoord,
	out float ioBlockX,
	out float ioBlockY) {

	// exclude the core
	if (fragCoord.x < COMPUTER_WIDTH && fragCoord.y < COMPUTER_HEIGHT) {
	return -1.0;
	}

	// Are there IO blocks on the right?

	float blocksOnTheRight = floor((iResolution.x - COMPUTER_WIDTH) / 32.0);
	float blocksWidth = floor(iResolution.x / 32.0);
	float totalBlocksOnTheRight = blocksOnTheRight * COMPUTER_HEIGHT / 32.0;

	// Decimal part here is texel coordinate of the IO block
	float row;
	float column;

	// IO block number
	float block;

	if (blocksOnTheRight > 0.0) {
	if (fragCoord.y < COMPUTER_HEIGHT) {

	column = (fragCoord.x - COMPUTER_WIDTH) / 32.0;
	if (column > blocksOnTheRight) {
	// completely ignore that area
	discard;
	}

	row = fragCoord.y / 32.0;
	block = floor(row) * blocksOnTheRight + floor(column);
	} else {
	column = fragCoord.x / 32.0;
	if (column > blocksWidth) {
	discard;
	}
	row = (fragCoord.y - COMPUTER_HEIGHT) / 32.0;
	block = totalBlocksOnTheRight + floor(row) * blocksWidth + floor(column);
	}
	} else {
	// The rest is always attempted to fill by blocks. Otherwise, nothing will work.
	column = fragCoord.x / 32.0;
	if (column > blocksWidth) {
	discard;
	}
	row = (fragCoord.y - COMPUTER_HEIGHT) / 32.0;

	block = floor(row) * blocksWidth + floor(column);
	}

	const float totalBlocks = IO_FIFO_DEPTH_BITS * 2.0;
	if (block > totalBlocks - 1.0) {
	discard;
	}

	ioBlockY = row - floor(row);
	ioBlockX = column - floor(column);

	return block;
	}


	vec4 writeIoBlock(
	float ioBlock,
	float ioBlockX,
	float ioBlockY) {

	vec4 gpgpu_data = texture(iChannel0, vec2(ioBlockX, ioBlockY));

	return gpgpu_data;
	}

	struct Direction {
	lowp float left;
	lowp float leftTop;
	lowp float rightTop;
	lowp float right;
	lowp float rightBottom;
	lowp float leftBottom;
	};

	Direction isDirection (float dir) {

	Direction rd;
	rd.left = whenGt(dir, 0.5) * whenGt(1.5, dir);
	rd.leftTop = whenGt(dir, 1.5) * whenGt(2.5, dir);
	rd.rightTop = whenGt(dir, 2.5) * whenGt(3.5, dir);
	rd.right = whenGt(dir, 3.5) * whenGt(4.5, dir);
	rd.rightBottom = whenGt(dir, 4.5) * whenGt(5.5, dir);
	rd.leftBottom = whenGt(dir, 5.5) * whenGt(6.5, dir);

	return rd;
	}

	lowp vec2 ctrlOffset (float dir) {

	return vec2(-2.0, 0.0) * whenGt(dir, 0.5) * whenGt(1.5, dir) +
	vec2(-1.0, -1.0) * whenGt(dir, 1.5) * whenGt(2.5, dir) +
	vec2( 1.0, -1.0) * whenGt(dir, 2.5) * whenGt(3.5, dir) +
	vec2( 2.0, 0.0) * whenGt(dir, 3.5) * whenGt(4.5, dir) +
	vec2( 1.0, 1.0) * whenGt(dir, 4.5) * whenGt(5.5, dir) +
	vec2(-1.0, 1.0) * whenGt(dir, 5.5) * whenGt(6.5, dir);
	}

	bool triDownSwitchingCauseBit (float dir, in Element switchingElement) {

	return whenGt(switchingElement.outB, 0.5) * whenGt(dir, 0.5) * whenGt(2.5, dir) +
	whenGt(switchingElement.outC, 0.5) * whenGt(dir, 2.5) * whenGt(4.5, dir) +
	whenGt(switchingElement.outA, 0.5) * whenGt(dir, 4.5) * whenGt(6.5, dir) > 0.5;
	}

	bool triUpSwitchingCauseBit (float dir, in Element switchingElement) {

	return whenGt(switchingElement.outA, 0.5) * whenGt(dir, 1.5) * whenGt(3.5, dir) +
	whenGt(switchingElement.outB, 0.5) * whenGt(dir, 3.5) * whenGt(5.5, dir) +
	whenGt(switchingElement.outC, 0.5) * (
	whenGt(dir, 5.5) * whenGt(6.5, dir)
	+ whenGt(dir, 0.5) * whenGt(1.5, dir)
	) > 0.5;
	}

	lowp vec3 outFromPerm (float perm, float lr, float rl, float middle) {
	// If triangle is down, then rl is right, otherwise it's left
	// if triangle is down, then lr is left, otherwise it's right
	return vec3(
	middle * whenGt(perm, 0.5) * whenGt(2.5, perm) +
	rl * whenGt(perm, 2.5) * whenGt(4.5, perm) +
	lr * whenGt(perm, 4.5) * whenGt(6.5, perm),

	rl * whenGt(perm, 0.5) * whenGt(1.5, perm) +
	lr * whenGt(perm, 1.5) * whenGt(2.5, perm) +
	middle * whenGt(perm, 2.5) * whenGt(3.5, perm) +
	lr * whenGt(perm, 3.5) * whenGt(4.5, perm) +
	middle * whenGt(perm, 4.5) * whenGt(5.5, perm) +
	rl * whenGt(perm, 5.5) * whenGt(6.5, perm),

	lr * whenGt(perm, 0.5) * whenGt(1.5, perm) +
	rl * whenGt(perm, 1.5) * whenGt(2.5, perm) +
	lr * whenGt(perm, 2.5) * whenGt(3.5, perm) +
	middle * whenGt(perm, 3.5) * whenGt(4.5, perm) +
	rl * whenGt(perm, 4.5) * whenGt(5.5, perm) +
	middle * whenGt(perm, 5.5) * whenGt(6.5, perm)
	);
	}


	lowp vec3 triUpOutFromPerm (float perm, in Element left, in Element right, in Element middle, in vec2 fragCoord) {

	if (fragCoord.x < 0.5) {
	// Left edge

	} else if (fragCoord.x > (COMPUTER_WIDTH - 1.0)) {
	// Right edge
	lowp float ioBit = 1.0;
	return outFromPerm(perm, ioBit, left.outB, middle.outA);
	}

	return outFromPerm(perm, right.outC, left.outB, middle.outA);
	/*
	return vec3(
	middle.outA * whenGt(perm, 0.5) * whenGt(2.5, perm) +
	left.outB * whenGt(perm, 2.5) * whenGt(4.5, perm) +
	right.outC * whenGt(perm, 4.5) * whenGt(6.5, perm),

	left.outB * whenGt(perm, 0.5) * whenGt(1.5, perm) +
	right.outC * whenGt(perm, 1.5) * whenGt(2.5, perm) +
	middle.outA * whenGt(perm, 2.5) * whenGt(3.5, perm) +
	right.outC * whenGt(perm, 3.5) * whenGt(4.5, perm) +
	middle.outA * whenGt(perm, 4.5) * whenGt(5.5, perm) +
	left.outB * whenGt(perm, 5.5) * whenGt(6.5, perm),

	right.outC * whenGt(perm, 0.5) * whenGt(1.5, perm) +
	left.outB * whenGt(perm, 1.5) * whenGt(2.5, perm) +
	right.outC * whenGt(perm, 2.5) * whenGt(3.5, perm) +
	middle.outA * whenGt(perm, 3.5) * whenGt(4.5, perm) +
	left.outB * whenGt(perm, 4.5) * whenGt(5.5, perm) +
	middle.outA * whenGt(perm, 5.5) * whenGt(6.5, perm)
	);
	*/
	}


	Element computeElement (in Element old, in vec2 fragCoord, float fifoRwPtr) {

	Element elt = old;

	// Switching cause bit:
	Element switchingCause = elementFromTexel(texture(iChannel0, (fragCoord + ctrlOffset(elt.switchCtrl))/iResolution.xy));

	// Fetch neighbor elements or IO surface bits
	vec4 gpgpu_data_left = texture(iChannel0, (fragCoord - vec2(1.0, 0.0))/iResolution.xy);
	vec4 gpgpu_data_right = texture(iChannel0, (fragCoord + vec2(1.0, 0.0))/iResolution.xy);

	Element left = elementFromTexel(gpgpu_data_left);
	Element right = elementFromTexel(gpgpu_data_right);

	// Rotate parent pointer
	elt.parent = old.parent + 1.0 > 3.5 ? 0.0 : old.parent + 1.0;

	lowp vec2 odd = vec2(int(fragCoord.x) % 2, int(fragCoord.y) % 2);



	// lowp vec2 switchingCauseOffset;

	if (abs(odd.x - odd.y) < 0.5) {

	/* Both this triangle and switchingCause point down:
	______
	\ A /
	C\ /B
	\/
	*/

	vec4 gpgpu_data_middle = texture(iChannel0, (fragCoord + vec2(0.0, -1.0))/iResolution.xy);
	Element middle = elementFromTexel(gpgpu_data_middle);

	lowp vec3 res;

	float perm = triDownSwitchingCauseBit(elt.switchCtrl, switchingCause)
	? elt.wiring1
	: elt.wiring0;

	if (fragCoord.x < 1.0) {
	// Left edge

	vec4 gpgpu_data_io = texture(iChannel0,
	// TODO: read actual IO coordinate!!! (this is a fake rn)
	vec2(
	fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
	0.0

	) / iResolution.xy);

	Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

	lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];

	res = outFromPerm(perm, ioBit, right.outB, middle.outA);

	} else if (fragCoord.x > (COMPUTER_WIDTH - 1.0)) {
	// Right edge
	vec4 gpgpu_data_io = texture(iChannel0,
	// TODO: read actual IO coordinate!!! (this is a fake rn)
	vec2(
	fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
	1.0

	) / iResolution.xy);

	Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

	lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];
	res = outFromPerm(perm, left.outC, ioBit, middle.outA);
	} else {
	res = outFromPerm(perm, left.outC, right.outB, middle.outA);
	}

	elt.outA = res.x;
	elt.outB = res.y;
	elt.outC = res.z;

	} else {

	/* The triangles point up:

	/\
	B/ \C
	/____\
	A */

	vec4 gpgpu_data_middle = texture(iChannel0, (fragCoord + vec2(0.0, 1.0))/iResolution.xy);
	Element middle = elementFromTexel(gpgpu_data_middle);

	lowp vec3 res;

	float perm = triUpSwitchingCauseBit(elt.switchCtrl, switchingCause)
	? elt.wiring1
	: elt.wiring0;

	if (fragCoord.x < 1.0) {
	// Left edge
	vec4 gpgpu_data_io = texture(iChannel0,
	// TODO: read actual IO coordinate!!! (this is a fake rn)
	vec2(
	fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
	2.0

	) / iResolution.xy);

	Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

	lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];
	res = outFromPerm(perm, right.outC, ioBit, middle.outA);

	} else if (fragCoord.x > (COMPUTER_WIDTH - 1.0)) {
	// Right edge
	vec4 gpgpu_data_io = texture(iChannel0,
	// TODO: read actual IO coordinate!!! (this is a fake rn)
	vec2(
	fifoRwPtr * 32.0 + COMPUTER_WIDTH + (fragCoord.y / 32.0),
	3.0

	) / iResolution.xy);

	Array32Bits io_bits = unpack_32_bits(gpgpu_data_io * 255.0);

	lowp float ioBit = io_bits.bit[int(fragCoord.y) % 32];
	res = outFromPerm(perm, ioBit, left.outB, middle.outA);

	} else {

	res = outFromPerm(perm, right.outC, left.outB, middle.outA);
	}

	elt.outA = res.x;
	elt.outB = res.y;
	elt.outC = res.z;
	}

	return elt;
	}


	vec4 update (in vec2 fragCoord) {

	// We use a combined FIFO pointers for both reading and writing
	// when interfacing with the core. Our computer's core never waits.
	// If you want to support asynchronous FIFO with blocking IO,
	// insert special signaling bits.
	float fifoRwPtr = float((iFrame - 2) % int(IO_FIFO_DEPTH_BITS));

	float ioBlockX;
	float ioBlockY;

	float ioBlock = ioBlockNumber(fragCoord, ioBlockX, ioBlockY);

	if (ioBlock > -0.5) {

	float ioBlockPixel = 32.0 * ioBlockY + ioBlockX;
	if (ioBlockPixel > (IO_PIXELS - 1.0)) {
	// IO surface is adjusted to the total computer's core size
	discard;
	}

	if (ioBlock < IO_FIFO_DEPTH_BITS) {
	// Read IO block

	// Emulate external IO input:
	// (lags by one, so it's on the opposite side of the FIFO)
	float fifoExternalWritePtr = float((int(IO_FIFO_DEPTH_BITS) + iFrame - 3 ) % int(IO_FIFO_DEPTH_BITS));


	if (abs(ioBlock - fifoExternalWritePtr) < 0.5) {
	rng_initialize(fragCoord, iFrame);
	return rand4();
	} else {

	// do nothing; preserve contents
	vec4 gpgpu_data = texture(iChannel0, fragCoord/iResolution.xy);
	return gpgpu_data;
	}

	} else {
	// Write IO block
	if (abs(ioBlock - IO_FIFO_DEPTH_BITS - fifoRwPtr) < 0.5) {

	// TODO: read Element's D flip-flop state
	return writeIoBlock(ioBlock - IO_FIFO_DEPTH_BITS, 0.0, 0.0);// ioBlockX, ioBlockY);
	} else {
	// do nothing; preserve contents
	vec4 gpgpu_data = texture(iChannel0, fragCoord/iResolution.xy);
	return gpgpu_data;
	}
	}
	}


	// Use 1-byte color values for compatibility with mobile
	vec4 gpgpu_data = texture(iChannel0, fragCoord/iResolution.xy);

	if (fragCoord.x > COMPUTER_WIDTH \|\| fragCoord.y > COMPUTER_HEIGHT) {
	// Skip processing of data not in computer core
	// or not being in IO
	// Note that we can write in this buffer realtime external inputs
	// (mouse, keyboard, etc.)
	return gpgpu_data;
	}

	return texelFromElement(computeElement(elementFromTexel(gpgpu_data), fragCoord, fifoRwPtr));
	}





	vec4 init (in vec2 fragCoord) {

	if (fragCoord.x > COMPUTER_WIDTH \|\| fragCoord.y > COMPUTER_HEIGHT) {
	discard;
	/*
	lowp float ioBlockX;
	lowp float ioBlockY;

	float ioBlock = ioBlockNumber(fragCoord, ioBlockX, ioBlockY);
	if (ioBlock > -0.5) {

	rng_initialize(fragCoord, iFrame);

	return rand4();

	// return vec4(0.7, 0.2, 0.8, 1.0);
	} else {
	discard;
	}
	*/
	}

	rng_initialize(fragCoord, iFrame);
	vec4 randomValues = rand4();

	Element elt = elementFromTexel(randomValues);

	lowp vec2 odd = vec2(int(fragCoord.x) % 2, int(fragCoord.y) % 2);

	/*
	if (abs(odd.x - odd.y) < 0.5) {
	// Triangle points down
	elt.wiring0 = 4.0;
	elt.wiring1 = 5.0;
	//elt.switchCtrl = 1.0;
	//elt.outA = 1.0;
	//elt.outB = 0.0;
	//elt.outC = 1.0;
	} else {
	elt.wiring0 = 5.0;
	elt.wiring1 = 4.0;
	//elt.switchCtrl = 1.0;

	//elt.outA = 0.0;
	//elt.outB = 1.0;
	//elt.outC = 0.0;
	}
	*/

	elt.wiring0 = elt.wiring0 < 0.5 ? 1.0 : elt.wiring0 > 6.5 ? 6.0 : floor(elt.wiring0);
	elt.wiring1 = elt.wiring1 < 0.5 ? 1.0 : elt.wiring1 > 6.5 ? 6.0 : floor(elt.wiring1);
	elt.switchCtrl = elt.switchCtrl < 0.5 ? 1.0 : elt.switchCtrl > 6.5 ? 6.0 : floor(elt.switchCtrl);

	elt.parent = elt.parent < 0.5 ? 1.0 : elt.parent > 3.5 ? 3.0 : floor(elt.parent);

	vec4 moreRandomValues = rand4();
	elt.outA = floor(0.5 + moreRandomValues.r);
	elt.outB = floor(0.5 + moreRandomValues.g);
	elt.outC = floor(0.5 + moreRandomValues.b);

	return texelFromElement(elt);
	}

	void mainImage (out vec4 fragColor, in vec2 fragCoord) {

	if (iFrame > 1) {
	fragColor = update(fragCoord);
	} else {
	fragColor = init(fragCoord);
	}
	}