Skip to content

Instantly share code, notes, and snippets.

Created September 8, 2018 02:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JSandusky/6abbe2d365a938142860225b1a6979db to your computer and use it in GitHub Desktop.
Save JSandusky/6abbe2d365a938142860225b1a6979db to your computer and use it in GitHub Desktop.
Monogame automatic sorting and instancing of common draws
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using DelveLib;
using Microsoft.Xna.Framework;
using Microsoft.Xna.Framework.Graphics;
namespace Delve.Graphics
/// <summary>
/// Interface for Xna.Framework.Graphics.Effect implementations to implement if they should be used with the meshbatch.
/// </summary>
public interface IMeshBatchEffect
// Implementation must apply a pass from a technique in the effect, return true if we can render.
// Use the passID to decide how to setup the Effect
bool PrepareInstanced(int passID);
// Implementation must apply a pass from a technique in the effect, return true if we can render
// Use the passID to decide how to setup the Effect
bool PrepareOneOff(Matrix transform, int passID);
// Do any per-frame draw cluster initialization (current-time, deltas, etc), setup technique/pass etc
// Return true if it can render
bool EffectSelected(int passID);
// Do any post-draw cluster tasks, should be none - barring some odd need to mess with GPU state
void EffectDeselected();
// Data for the additional stream of per-instance data
[StructLayout(LayoutKind.Sequential, Pack = 1)]
public struct VertexInstanceTransform : IVertexType
//?? garbage, not currently using this actual type
public Matrix transform;
public static readonly VertexDeclaration vertexDeclaration;
static VertexInstanceTransform()
//TODO: replace with a 3x4, using a 4x4 is stupid
VertexElement[] elements = new VertexElement[] {
new VertexElement(0, VertexElementFormat.Vector4, VertexElementUsage.TextureCoordinate, 1),
new VertexElement(16, VertexElementFormat.Vector4, VertexElementUsage.TextureCoordinate, 2),
new VertexElement(32, VertexElementFormat.Vector4, VertexElementUsage.TextureCoordinate, 3),
new VertexElement(48, VertexElementFormat.Vector4, VertexElementUsage.TextureCoordinate, 4),
VertexDeclaration declaration = new VertexDeclaration(elements);
vertexDeclaration = declaration;
public VertexDeclaration VertexDeclaration
get { return VertexDeclaration; }
// A sortable container of the necessary information to draw
class MeshDraw : IComparable<MeshDraw>
public VertexBuffer verts;
public IndexBuffer indices;
public Matrix transform;
public IMeshBatchEffect effect;
public ulong sortCode; // must be set via SortCode()
public List<Matrix> Transforms = new List<Matrix>();
public bool canInstance = true;
public ulong SortCode(int renderOrder)
// 8 bit render order, more like a layer
ulong r = ((ulong)renderOrder & 0xFF) << 56;
// Effect goes in the high-bits, an Effect change likely means changes to texture-units, and other render state
ulong fx = (ulong)effect.GetHashCode();
fx &= 0xFFFF;
fx <<= 40;
r |= fx;
// TODO: combine vertex and index buffers into a geometry object and use that as a hashcode source
// will free up 16 bits, caveats? should it be 24-bits in that case?
ulong i = indices != null ? (ulong)indices.GetHashCode() : 0;
i &= 0xFFFF;
i <<= 24;
r |= i;
ulong v = (ulong)verts.GetHashCode();
v &= 0xFFFF;
v <<= 8;
r |= v;
r |= canInstance ? (1ul << 7) : 0;
// 6 bits left
return r;
public int CompareTo(MeshDraw other) { return sortCode.CompareTo(other.sortCode); }
/// <example>
/// MeshBatch batch = new MeshBatch(graphicsDevice);
/// batch.Begin(myCamera);
/// batch.Add(vtxBuffer, idxBuffer, transformMat, effect);
/// batch.Add(vtxBuffer, idxBuffer, transformMat, effect); (automatic instancing)
/// batch.End();
/// </example>
/// <remarks>
/// SortedList is unintuitive, insertion overhead
/// recording commands into a list and then sorting them is much slower:
/// - C# List.Sort is SLOWWW
/// - Have to scan through ranges to find what can be instanced into one draw-call
/// - Don't care about removal, always done via Clear()
/// - Want to get the ordered `Values` as fast as possible
/// - Slow to do on SortedDictionary, though SortedDictionary is faster to insert
/// Pooling isn't used:
/// pooling with a Deque was tried but it was actually slower, test cased timed out to:
/// - 3.6ms to not pool (avg)
/// - 5.1ms to pool (avg)
/// test case was for drawing 400 objects (with MAX_INSTANCES at 50), w/ PBR (single light) on an Intel HD4000
/// pooling wasn't worth the CPU cost
/// garbage produced here is all/mostly gen-0
/// </remarks>
public class MeshBatch
const int MAX_INSTANCES = 1024;
const int MIN_INSTANCES = 0;
Camera camera_;
BoundingFrustum frustum_;
DynamicVertexBuffer instancedTransforms_;
GraphicsDevice device_;
// Unintuitive, but this is faster than appending a list, sorting the list, then selecting ranges for automatic instancing
SortedList<ulong, MeshDraw> draws_ = new SortedList<ulong, MeshDraw>();
IMeshBatchEffect lastEffect_ = null;
VertexBuffer lastMesh_ = null;
IndexBuffer lastIndices_ = null;
public MeshBatch(GraphicsDevice device)
device_ = device;
/// <summary>
/// Append a rendering task
/// </summary>
/// <param name="verts">vertex buffer to draw</param>
/// <param name="ind">index buffer to draw</param>
/// <param name="transform">transform of the geometry being drawn</param>
/// <param name="effect">shader effect / texture-combination</param>
/// <param name="drawOrder">Sequence, ordered low -> high, use like a layer</param>
/// <param name="canInstance">Whether to allow automatic instancing or not</param>
public void Add(VertexBuffer verts, IndexBuffer ind, Matrix transform, IMeshBatchEffect effect, int drawOrder = 0, bool canInstance = true)
Add(null, verts, ind, transform, effect, drawOrder, canInstance);
/// Specialization of the above to perform culling.
public void Add(BoundingSphere? bounds, VertexBuffer verts, IndexBuffer ind, Matrix transform, IMeshBatchEffect effect, int drawOrder = 0, bool canInstance = true)
if (bounds.HasValue)
var bnds = bounds.Value;
bnds.Center = Vector3.Transform(bnds.Center, transform);
if (frustum_.Contains(bnds) == ContainmentType.Disjoint)
var draw = new Graphics.MeshDraw {
verts = verts,
indices = ind,
transform = transform,
effect = effect,
canInstance = canInstance
draw.sortCode = draw.SortCode(drawOrder);
Graphics.MeshDraw d;
if (draws_.TryGetValue(draw.sortCode, out d))
if (d.Transforms.Count == 0)
draws_.Add(draw.sortCode, draw);
/// Prepares state for enqueuing draws
public void Begin(Camera camera)
camera_ = camera;
frustum_ = new BoundingFrustum(camera.ViewMatrix * camera.ProjectionMatrix);
// Render everything in the list of draws.
public void Render(Camera camera, int passID)
lastEffect_ = null;
lastMesh_ = null;
var draws = draws_.Values.ToArray();
for (int i = 0; i < draws.Length; ++i)
MeshDraw nextDraw = draws[i];
if (nextDraw.verts != lastMesh_ || nextDraw.effect != lastEffect_)
if (nextDraw.effect != lastEffect_)
if (lastEffect_ != null)
if (!nextDraw.effect.EffectSelected(passID))
lastEffect_ = null;
lastMesh_ = nextDraw.verts;
lastIndices_ = nextDraw.indices;
lastEffect_ = nextDraw.effect;
if (nextDraw.canInstance && nextDraw.Transforms?.Count > MIN_INSTANCES)
int ct = nextDraw.Transforms.Count / MAX_INSTANCES;
int remaining = nextDraw.Transforms.Count;
int idx = 0;
while (remaining > 0)
int drawCt = Math.Min(MAX_INSTANCES, remaining);
if (MAX_INSTANCES > nextDraw.Transforms.Count)
InstancedDraw(nextDraw.verts, nextDraw.indices, nextDraw.Transforms.ToArray(), camera, passID);
var trans = nextDraw.Transforms.GetRange(idx, drawCt).ToArray();
InstancedDraw(nextDraw.verts, nextDraw.indices, trans, camera, passID);
remaining -= drawCt;
idx += drawCt;
if (nextDraw.Transforms?.Count > 0)
for (int m = 0; m < nextDraw.Transforms.Count; ++m)
OneOffDraw(nextDraw.verts, nextDraw.indices, nextDraw.Transforms[m], camera, passID);
OneOffDraw(nextDraw.verts, nextDraw.indices, nextDraw.transform, camera, passID);
if (lastEffect_ != null)
lastMesh_ = null;
lastEffect_ = null;
VertexBuffer GetInstanceBuffer(int ct, Matrix[] transforms)
if (instancedTransforms_ == null)
instancedTransforms_ = new DynamicVertexBuffer(device_, VertexInstanceTransform.vertexDeclaration, ct, BufferUsage.WriteOnly);
instancedTransforms_.SetData(transforms, 0, transforms.Length, SetDataOptions.Discard);
return instancedTransforms_;
void OneOffDraw(VertexBuffer verts, IndexBuffer indices, Matrix trans, Camera camera, int passID)
if (lastEffect_.PrepareOneOff(trans, passID))
device_.Indices = indices;
device_.DrawIndexedPrimitives(PrimitiveType.TriangleList, 0, 0, indices.IndexCount / 3);
void InstancedDraw(VertexBuffer verts, IndexBuffer indices, Matrix[] matrices, Camera camera, int passID)
if (lastEffect_.PrepareInstanced(passID))
device_.Indices = indices;
VertexBuffer instanceBuff = GetInstanceBuffer(matrices.Length, matrices);
device_.SetVertexBuffers(new VertexBufferBinding[] {
new VertexBufferBinding(verts),
new VertexBufferBinding(instanceBuff, 0, 1)
device_.DrawInstancedPrimitives(PrimitiveType.TriangleList, 0, 0, indices.IndexCount / 3, matrices.Length);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment