Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save texone/66a46554a6fbda50ccc9831561e4037f to your computer and use it in GitHub Desktop.
Save texone/66a46554a6fbda50ccc9831561e4037f to your computer and use it in GitHub Desktop.
Unity lockless (no GPU readback) marching cubes via Graphics.DrawProceduralIndirect - some slight faffing because compute shader must append full triangle (3 verts) at a time to render correctly, but this means the appendbuffer count is 3 times smaller than it needs to be, so we have to invoke a very short compute shader (FixupIndirectArgs) just…
MarchingCubesGPU.cs:
...
// DrawProceduralIndirect
ComputeBuffer argsBuffer;
[StructLayout(LayoutKind.Sequential)]
struct DrawCallArgBuffer
{
public const int size =
sizeof(int) +
sizeof(int) +
sizeof(int) +
sizeof(int);
public int vertexCountPerInstance;
public int instanceCount;
public int startVertexLocation;
public int startInstanceLocation;
}
...
void Fixup()
{
SIZE = N * N * N * 3 * 5;
//There are 8 threads run per group so N must be divisible by 8.
if (N % 8 != 0)
throw new System.ArgumentException("N must be divisible be 8");
//Holds the voxel values, generated from perlin noise.
m_noiseBuffer = new ComputeBuffer(N * N * N, sizeof(float));
//Holds the normals of the voxels.
// TODO: convert this to render textures, with bilinear sample in compute shader
m_normalsBuffer = new ComputeBuffer(N * N * N, sizeof(float) * 3);
//Holds the verts generated by the marching cubes.
m_meshBuffer = new ComputeBuffer(SIZE, sizeof(float) * 8 * 3, ComputeBufferType.Append);
m_meshBuffer.SetCounterValue(0);
//These two buffers are just some settings needed by the marching cubes.
m_cubeEdgeFlags = new ComputeBuffer(256, sizeof(int));
m_cubeEdgeFlags.SetData(cubeEdgeFlags);
m_triangleConnectionTable = new ComputeBuffer(256 * 16, sizeof(int));
m_triangleConnectionTable.SetData(triangleConnectionTable);
//Make the perlin noise, make sure to load resources to match shader used.
perlin = new ImprovedPerlinNoise(m_seed);
perlin.LoadResourcesFor4DNoise();
// Indirect args just stores the number of verts for the draw call
argsBuffer = new ComputeBuffer(1, DrawCallArgBuffer.size, ComputeBufferType.IndirectArguments);
int[] args = new int[] { 0, 1, 0, 0 };
argsBuffer.SetData(args);
}
...
void Update()
{
//Make the voxels.
m_perlinNoise.SetInt("_Width", N);
m_perlinNoise.SetInt("_Height", N);
m_perlinNoise.SetFloat("_Frequency", m_freq);
m_perlinNoise.SetFloat("_Lacunarity", m_lacunarity);
m_perlinNoise.SetFloat("_Gain", m_gain);
m_perlinNoise.SetFloat("_Time", Time.time * m_speed);
m_perlinNoise.SetTexture(0, "_PermTable1D", perlin.GetPermutationTable1D());
m_perlinNoise.SetTexture(0, "_PermTable2D", perlin.GetPermutationTable2D());
m_perlinNoise.SetTexture(0, "_Gradient4D", perlin.GetGradient4D());
m_perlinNoise.SetBuffer(0, "_Result", m_noiseBuffer);
m_perlinNoise.Dispatch(0, N / 8, N / 8, N / 8);
//Make the voxel normals.
m_normals.SetInt("_Width", N);
m_normals.SetInt("_Height", N);
m_normals.SetBuffer(0, "_Noise", m_noiseBuffer);
m_normals.SetBuffer(0, "_Result", m_normalsBuffer);
m_normals.Dispatch(0, N / 8, N / 8, N / 8);
//Make the mesh verts
m_marchingCubes.SetInt("_Width", N);
m_marchingCubes.SetInt("_Height", N);
m_marchingCubes.SetInt("_Depth", N);
m_marchingCubes.SetInt("_Border", 1);
m_marchingCubes.SetFloat("_Target", 0.0f);
m_marchingCubes.SetBuffer(0, "_Voxels", m_noiseBuffer);
m_marchingCubes.SetBuffer(0, "_Normals", m_normalsBuffer);
m_meshBuffer.SetCounterValue(0);
m_marchingCubes.SetBuffer(0, "_Buffer", m_meshBuffer);
m_marchingCubes.SetBuffer(0, "_CubeEdgeFlags", m_cubeEdgeFlags);
m_marchingCubes.SetBuffer(0, "_TriangleConnectionTable", m_triangleConnectionTable);
m_marchingCubes.SetBuffer(0, "DrawCallArgs", argsBuffer);
m_marchingCubes.Dispatch(0, N / 8, N / 8, N / 8);
// Copy generated count
ComputeBuffer.CopyCount(m_meshBuffer, argsBuffer, 0);
// Invoke very simple args fixup as generated count was triangles, not verts
m_fixupArgsCount.SetBuffer(0, "DrawCallArgs", argsBuffer);
m_fixupArgsCount.Dispatch(0, 1, 1, 1);
// Draw mesh using indirect args buffer
m_drawBuffer.SetPass(0);
m_drawBuffer.SetBuffer("_Buffer", m_meshBuffer);
m_drawBuffer.SetMatrix("objMat", transform.localToWorldMatrix);
Graphics.DrawProceduralIndirect(m_drawBuffer, new Bounds(transform.position, transform.lossyScale),
MeshTopology.Triangles, argsBuffer, 0, null, null,
UnityEngine.Rendering.ShadowCastingMode.On, true);
}
//===========================================================================
MarchingCubes.compute:
struct Vert
{
float4 position;
float3 normal;
float dummy; // TODO: faster with or without this padding?
};
struct Triangle
{
Vert verts[3];
};
AppendStructuredBuffer<Triangle> _Buffer;
struct DrawCallArgsBuffer
{
uint vertexCountPerInstance;
uint instanceCount;
uint startVertexLocation;
uint startInstanceLocation;
};
RWStructuredBuffer <DrawCallArgsBuffer> DrawCallArgs;
...
[numthreads(8, 8, 8)]
void CSMain(int3 id : SV_DispatchThreadID)
{
//Dont generate verts at the edge as they dont have
//neighbours to make a cube from and the normal will
//not be correct around border.
if (id.x >= _Width - 1 - _Border) return;
if (id.y >= _Height - 1 - _Border) return;
if (id.z >= _Depth - 1 - _Border) return;
float3 pos = float3(id);
float3 centre = float3(_Width, _Height, _Depth) / 2.0;
float cube[8];
FillCube(id.x, id.y, id.z, cube);
int i = 0, j = 0;
int flagIndex = 0;
float3 edgeVertex[12];
//Find which vertices are inside of the surface and which are outside
for (i = 0; i < 8; i++)
if (cube[i] <= _Target) flagIndex |= 1 << i;
//Find which edges are intersected by the surface
int edgeFlags = _CubeEdgeFlags[flagIndex];
// no connections, return
if (edgeFlags == 0) return;
//Find the point of intersection of the surface with each edge
for (i = 0; i < 12; i++)
{
//if there is an intersection on this edge
if ((edgeFlags & (1 << i)) != 0)
{
float offset = GetOffset(cube[edgeConnection[i].x], cube[edgeConnection[i].y]);
edgeVertex[i] = pos + (vertexOffset[edgeConnection[i].x] + offset * edgeDirection[i]);
}
}
int idx = id.x + id.y * _Width + id.z * _Width * _Height;
//Save the triangles that were found. There can be up to five per cube
for (i = 0; i < 5; i++)
{
//If the connection table is not -1 then this a triangle.
if (_TriangleConnectionTable[flagIndex * 16 + 3 * i] >= 0)
{
Vert verts[3];
for (j = 0; j < 3; j++)
{
int v = _TriangleConnectionTable[flagIndex * 16 + (3 * i + j)];
float3 position = edgeVertex[v];
verts[j].position = float4(position - centre, 1.0) / (float)_Width;
verts[j].normal = SampleBilinear(_Normals, position);
//verts[j].normal = _Normals[idx];
}
Triangle tri = (Triangle)0;
tri.verts[0] = verts[0];
tri.verts[1] = verts[1];
tri.verts[2] = verts[2];
_Buffer.Append(tri);
}
}
}
//===========================================================================
FixupIndirectArgs.compute:
// Each #kernel tells which function to compile; you can have many kernels
#pragma kernel CSMain
struct DrawCallArgsBuffer
{
uint vertexCountPerInstance;
uint instanceCount;
uint startVertexLocation;
uint startInstanceLocation;
};
RWStructuredBuffer <DrawCallArgsBuffer> DrawCallArgs;
[numthreads(1,1,1)]
void CSMain (uint3 id : SV_DispatchThreadID)
{
DrawCallArgs[0].vertexCountPerInstance *= 3;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment