Skip to content

Instantly share code, notes, and snippets.

@Refsa
Last active August 27, 2020 05:44
Show Gist options
  • Save Refsa/289545a1d637b39bf4bab78c7ced754a to your computer and use it in GitHub Desktop.
Save Refsa/289545a1d637b39bf4bab78c7ced754a to your computer and use it in GitHub Desktop.
Lockless drawing of procedural quads on the GPU in Unity - 16 million quads on a 2070 @ 60FPS
#pragma kernel Setup
#pragma kernel Runtime
struct RenderData
{
float3 Position;
float4 Color;
};
// From: https://thebookofshaders.com/11/
float random (in float2 st) {
return frac(sin(dot(st.xy,
float2(12.9898,78.233)))
* 43758.5453123);
}
// From: https://thebookofshaders.com/11/
float noise (in float2 st) {
float2 i = floor(st);
float2 f = frac(st);
// Four corners in 2D of a tile
float a = random(i);
float b = random(i + float2(1.0, 0.0));
float c = random(i + float2(0.0, 1.0));
float d = random(i + float2(1.0, 1.0));
// Smooth Interpolation
// Cubic Hermine Curve. Same as SmoothStep()
float2 u = f*f*(3.0-2.0*f);
// u = smoothstep(0.,1.,f);
// Mix 4 coorners percentages
return lerp(a, b, u.x) +
(c - a)* u.y * (1.0 - u.x) +
(d - b) * u.x * u.y;
}
RWStructuredBuffer<RenderData> _RenderData;
float _Time;
int _Size;
float _NoiseScale;
float _TimeScale;
float2 _NoiseOffset;
float2 _NoiseDirection;
[numthreads(1,1,1)]
void Setup (uint3 id : SV_DispatchThreadID)
{
RenderData rd;
rd.Position = float3(id.x, 0, id.y);
rd.Color = float4(0, 0, 0, 1);
// _RenderData.Append(rd);
_RenderData[id.x * _Size + id.y] = rd;
}
[numthreads(32,32,1)]
void Runtime (uint3 id : SV_DispatchThreadID)
{
float n = noise((id.xy + _NoiseOffset + float2(_Time, _Time) * _NoiseDirection * _TimeScale) * _NoiseScale);
_RenderData[id.x * _Size + id.y].Color = float4(n, n, n, 1);
_RenderData[id.x * _Size + id.y].Position.y = n * 5;
}
using UnityEngine;
public class ProceduralQuads : MonoBehaviour
{
struct RenderData
{
public Vector3 Position;
public Vector4 Color;
}
[SerializeField] Material material;
[SerializeField] ComputeShader computeShader;
[SerializeField] int width = 256;
[SerializeField] int height = 256;
[Header("Can edit at Runtime")]
[SerializeField] float noiseScale = 0.1f;
[SerializeField] float timeScale = 20f;
[SerializeField] Vector2 noiseOffset = Vector2.zero;
[SerializeField] Vector2 noiseDirecton = Vector2.one;
ComputeBuffer renderData;
ComputeBuffer args;
uint[] tempArgs;
void Start()
{
renderData = new ComputeBuffer(width * height, sizeof(float) * 7);
tempArgs = new uint[5] {(uint)(width * height), 1, 0, 0, 0};
args = new ComputeBuffer(5, 5 * sizeof(uint), ComputeBufferType.IndirectArguments);
args.SetData(tempArgs);
// Set GPU memory
material.SetBuffer("_RenderData", renderData);
// Initialize GPU memory
computeShader.SetInt("_Size", width);
computeShader.SetBuffer(0, "_RenderData", renderData);
computeShader.SetBuffer(1, "_RenderData", renderData);
computeShader.Dispatch(0, width, height, 1);
Application.targetFrameRate = 240;
QualitySettings.vSyncCount = 0;
}
void OnDestroy()
{
args.Dispose();
renderData.Dispose();
}
void Update()
{
// Alter GPU memory with compute shader
computeShader.SetFloat("_NoiseScale", noiseScale);
computeShader.SetFloat("_TimeScale", timeScale);
computeShader.SetVector("_NoiseOffset", noiseOffset);
computeShader.SetVector("_NoiseDirection", noiseDirecton.normalized);
computeShader.SetFloat("_Time", Time.time);
computeShader.Dispatch(1, width / 32, height / 32, 1);
// Draw directly with data on GPU aka lockless rendering
Graphics.DrawProceduralIndirect(
material, new Bounds(Vector3.zero, Vector3.one * 1000f), MeshTopology.Points, args, 0, UnityEditor.SceneView.lastActiveSceneView.camera
);
}
}
Shader "ProceduralQuads" {
Properties {
}
SubShader {
Tags {
"RenderType" = "Transparent"
"IgnoreProjector" = "True"
"Queue" = "Transparent"
}
Cull Off
Lighting Off
ZWrite Off
Blend SrcAlpha OneMinusSrcAlpha
Pass {
CGPROGRAM
#pragma vertex vert
#pragma geometry geom
#pragma fragment frag
#pragma multi_compile_instancing
#include "UnityCG.cginc"
struct v2g {
float4 pos : SV_POSITION;
uint vertexID: TEXCOORD0;
};
struct g2f {
float4 pos: SV_POSITION;
uint vertexID: TEXCOORD3;
};
struct RenderData
{
float3 Position;
float4 Color;
};
StructuredBuffer<RenderData> _RenderData;
v2g vert(appdata_full v, uint vertexID: SV_VertexID)
{
v2g f;
RenderData prop = _RenderData[vertexID];
f.vertexID = vertexID;
f.pos = float4(prop.Position, 1.0);
return f;
}
[maxvertexcount(6)]
void geom(point v2g input[1], inout TriangleStream<g2f> triangleStream)
{
RenderData prop = _RenderData[input[0].vertexID];
float4 center = input[0].pos;
float4 c1 = center + float4(-0.5, 0, -0.5, 0);
float4 c2 = center + float4(-0.5, 0, 0.5, 0);
float4 c3 = center + float4(0.5, 0, 0.5, 0);
float4 c4 = center + float4(0.5, 0, -0.5, 0);
g2f vd1 = (g2f)0;
vd1.pos = UnityObjectToClipPos(c1);
vd1.vertexID = input[0].vertexID;
g2f vd2 = (g2f)0;
vd2.pos = UnityObjectToClipPos(c2);
vd2.vertexID = input[0].vertexID;
g2f vd3 = (g2f)0;
vd3.pos = UnityObjectToClipPos(c3);
vd3.vertexID = input[0].vertexID;
g2f vd4 = (g2f)0;
vd4.pos = UnityObjectToClipPos(c4);
vd4.vertexID = input[0].vertexID;
triangleStream.Append(vd1);
triangleStream.Append(vd2);
triangleStream.Append(vd3);
triangleStream.Append(vd1);
triangleStream.Append(vd3);
triangleStream.Append(vd4);
triangleStream.RestartStrip();
}
float4 frag(g2f f) : SV_Target{
RenderData prop = _RenderData[f.vertexID];
float4 c = prop.Color;
return c;
}
ENDCG
}
}
}
@Refsa
Copy link
Author

Refsa commented Aug 27, 2020

gist formatter butchers the .shader file, I give up

width and height needs to be set before entering playmode.
It currently renders into the sceneview, you're gonna need a large far clip plane to see everything
There is no culling or depth sorting, everything is drawn in the order they are in the buffer

Probably not very useful for most situations, more of a benchmark to see how hard it is possible to push the GPU.

@Refsa
Copy link
Author

Refsa commented Aug 27, 2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment