Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
.NET Core 3.1 AVX2 16x16 life implementation
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace CoreConsoleApp1
{
static class Program
{
static void Main()
{
var v = new Vector256<ushort>();
v = v.WithElement(0x0, (ushort)0b0100_0000_0000_0000);
v = v.WithElement(0x1, (ushort)0b0010_0000_0000_0111);
v = v.WithElement(0x2, (ushort)0b1110_0000_0000_1110);
v = v.WithElement(0x3, (ushort)0b0000_0000_0000_0000);
v = v.WithElement(0x4, (ushort)0b0000_0000_0000_0000);
v = v.WithElement(0x5, (ushort)0b0000_0000_0000_0000);
v = v.WithElement(0x6, (ushort)0b0000_0000_0000_0011);
v = v.WithElement(0x7, (ushort)0b0000_0000_0000_0011);
v = v.WithElement(0x8, (ushort)0b0000_0010_0000_0000);
v = v.WithElement(0x9, (ushort)0b0000_0010_0000_0000);
v = v.WithElement(0xA, (ushort)0b0000_0010_0000_0000);
v = v.WithElement(0xB, (ushort)0b0000_0000_0000_0000);
v = v.WithElement(0xC, (ushort)0b0000_0000_0011_0000);
v = v.WithElement(0xD, (ushort)0b0000_0000_0011_0000);
v = v.WithElement(0xE, (ushort)0b0000_0000_0000_1100);
v = v.WithElement(0xF, (ushort)0b0000_0000_0000_1100);
var life = new Life16x16(v);
for (; ; )
{
Print(life.State);
life.Step();
}
}
static void Print(Vector256<ushort> state)
{
Console.SetCursorPosition(0, 0);
for (var i = 0; i < 16; i++)
{
var row = state.GetElement(i);
for (var j = 0; j < 16; j++)
{
var value = (row & 1 << (15 - j)) != 0;
Console.Write(value ? "[]" : "__");
}
Console.WriteLine();
}
Console.WriteLine("Press enter to continue...");
Console.ReadLine();
}
}
unsafe struct Life16x16
{
fixed ushort state[18];
internal Life16x16(Vector256<ushort> initialState)
{
fixed (ushort* p = state)
{
Avx.Store(p + 1, initialState);
}
}
internal Vector256<ushort> State
{
get
{
fixed (ushort* p = state)
{
return Avx.LoadDquVector256(p + 1);
}
}
}
internal void Step()
{
fixed (ushort* p = state)
{
var v4 = Avx.LoadDquVector256(p + 1);
var v1 = Avx.LoadDquVector256(p);
var v7 = Avx.LoadDquVector256(p + 2);
var v3 = Avx2.ShiftRightLogical(v4, 1);
var v0 = Avx2.ShiftRightLogical(v1, 1);
var v2 = Avx2.ShiftLeftLogical(v1, 1);
var v5 = Avx2.ShiftLeftLogical(v4, 1);
var v6 = Avx2.ShiftRightLogical(v7, 1);
var v8 = Avx2.ShiftLeftLogical(v7, 1);
var acc = new FourBitAccumulator(v0, v1, v2, v3);
acc.Add(v4);
acc.Add(v5);
acc.Add(v6);
acc.Add(v7);
acc.Add(v8);
Avx.Store(p + 1, Avx2.Or(acc.IsThree, Avx2.And(acc.IsFour, v4)));
}
}
}
struct FourBitAccumulator
{
Vector256<ushort> b0;
Vector256<ushort> b1;
Vector256<ushort> b2;
Vector256<ushort> b3;
internal FourBitAccumulator(
Vector256<ushort> v0,
Vector256<ushort> v1,
Vector256<ushort> v2,
Vector256<ushort> v3)
{
b0 = Avx2.Xor(v0, v1);
b1 = Avx2.And(v0, v1);
var c0 = Avx2.And(b0, v2);
b0 = Avx2.Xor(b0, v2);
b2 = Avx2.And(b1, c0);
b1 = Avx2.Xor(b1, c0);
c0 = Avx2.And(b0, v3);
b0 = Avx2.Xor(b0, v3);
var c1 = Avx2.And(b1, c0);
b1 = Avx2.Xor(b1, c0);
b3 = Avx2.And(b2, c1);
b2 = Avx2.Xor(b2, c1);
}
internal void Add(Vector256<ushort> v)
{
var c0 = Avx2.And(b0, v);
b0 = Avx2.Xor(b0, v);
var c1 = Avx2.And(b1, c0);
b1 = Avx2.Xor(b1, c0);
b3 = Avx2.Xor(b3, Avx2.And(b2, c1));
b2 = Avx2.Xor(b2, c1);
}
internal Vector256<ushort> IsThree => Avx2.AndNot(b3, Avx2.And(b0, b1));
internal Vector256<ushort> IsFour => Avx2.AndNot(Avx2.Or(b0, b1), Avx2.AndNot(b3, b2));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.