Last active
May 3, 2020 09:02
-
-
Save jfrijters/19dffa13c7e00fa1d4d7d77888613eee to your computer and use it in GitHub Desktop.
.NET Core 3.1 AVX2 16x16 life implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
namespace CoreConsoleApp1 | |
{ | |
static class Program | |
{ | |
static void Main() | |
{ | |
var v = new Vector256<ushort>(); | |
v = v.WithElement(0x0, (ushort)0b0100_0000_0000_0000); | |
v = v.WithElement(0x1, (ushort)0b0010_0000_0000_0111); | |
v = v.WithElement(0x2, (ushort)0b1110_0000_0000_1110); | |
v = v.WithElement(0x3, (ushort)0b0000_0000_0000_0000); | |
v = v.WithElement(0x4, (ushort)0b0000_0000_0000_0000); | |
v = v.WithElement(0x5, (ushort)0b0000_0000_0000_0000); | |
v = v.WithElement(0x6, (ushort)0b0000_0000_0000_0011); | |
v = v.WithElement(0x7, (ushort)0b0000_0000_0000_0011); | |
v = v.WithElement(0x8, (ushort)0b0000_0010_0000_0000); | |
v = v.WithElement(0x9, (ushort)0b0000_0010_0000_0000); | |
v = v.WithElement(0xA, (ushort)0b0000_0010_0000_0000); | |
v = v.WithElement(0xB, (ushort)0b0000_0000_0000_0000); | |
v = v.WithElement(0xC, (ushort)0b0000_0000_0011_0000); | |
v = v.WithElement(0xD, (ushort)0b0000_0000_0011_0000); | |
v = v.WithElement(0xE, (ushort)0b0000_0000_0000_1100); | |
v = v.WithElement(0xF, (ushort)0b0000_0000_0000_1100); | |
var life = new Life16x16(v); | |
for (; ; ) | |
{ | |
Print(life.State); | |
life.Step(); | |
} | |
} | |
static void Print(Vector256<ushort> state) | |
{ | |
Console.SetCursorPosition(0, 0); | |
for (var i = 0; i < 16; i++) | |
{ | |
var row = state.GetElement(i); | |
for (var j = 0; j < 16; j++) | |
{ | |
var value = (row & 1 << (15 - j)) != 0; | |
Console.Write(value ? "[]" : "__"); | |
} | |
Console.WriteLine(); | |
} | |
Console.WriteLine("Press enter to continue..."); | |
Console.ReadLine(); | |
} | |
} | |
unsafe struct Life16x16 | |
{ | |
fixed ushort state[18]; | |
internal Life16x16(Vector256<ushort> initialState) | |
{ | |
fixed (ushort* p = state) | |
{ | |
Avx.Store(p + 1, initialState); | |
} | |
} | |
internal Vector256<ushort> State | |
{ | |
get | |
{ | |
fixed (ushort* p = state) | |
{ | |
return Avx.LoadDquVector256(p + 1); | |
} | |
} | |
} | |
internal void Step() | |
{ | |
fixed (ushort* p = state) | |
{ | |
var v4 = Avx.LoadDquVector256(p + 1); | |
var v1 = Avx.LoadDquVector256(p); | |
var v7 = Avx.LoadDquVector256(p + 2); | |
var v3 = Avx2.ShiftRightLogical(v4, 1); | |
var v0 = Avx2.ShiftRightLogical(v1, 1); | |
var v2 = Avx2.ShiftLeftLogical(v1, 1); | |
var v5 = Avx2.ShiftLeftLogical(v4, 1); | |
var v6 = Avx2.ShiftRightLogical(v7, 1); | |
var v8 = Avx2.ShiftLeftLogical(v7, 1); | |
var acc = new FourBitAccumulator(v0, v1, v2, v3); | |
acc.Add(v4); | |
acc.Add(v5); | |
acc.Add(v6); | |
acc.Add(v7); | |
acc.Add(v8); | |
Avx.Store(p + 1, Avx2.Or(acc.IsThree, Avx2.And(acc.IsFour, v4))); | |
} | |
} | |
} | |
struct FourBitAccumulator | |
{ | |
Vector256<ushort> b0; | |
Vector256<ushort> b1; | |
Vector256<ushort> b2; | |
Vector256<ushort> b3; | |
internal FourBitAccumulator( | |
Vector256<ushort> v0, | |
Vector256<ushort> v1, | |
Vector256<ushort> v2, | |
Vector256<ushort> v3) | |
{ | |
b0 = Avx2.Xor(v0, v1); | |
b1 = Avx2.And(v0, v1); | |
var c0 = Avx2.And(b0, v2); | |
b0 = Avx2.Xor(b0, v2); | |
b2 = Avx2.And(b1, c0); | |
b1 = Avx2.Xor(b1, c0); | |
c0 = Avx2.And(b0, v3); | |
b0 = Avx2.Xor(b0, v3); | |
var c1 = Avx2.And(b1, c0); | |
b1 = Avx2.Xor(b1, c0); | |
b3 = Avx2.And(b2, c1); | |
b2 = Avx2.Xor(b2, c1); | |
} | |
internal void Add(Vector256<ushort> v) | |
{ | |
var c0 = Avx2.And(b0, v); | |
b0 = Avx2.Xor(b0, v); | |
var c1 = Avx2.And(b1, c0); | |
b1 = Avx2.Xor(b1, c0); | |
b3 = Avx2.Xor(b3, Avx2.And(b2, c1)); | |
b2 = Avx2.Xor(b2, c1); | |
} | |
internal Vector256<ushort> IsThree => Avx2.AndNot(b3, Avx2.And(b0, b1)); | |
internal Vector256<ushort> IsFour => Avx2.AndNot(Avx2.Or(b0, b1), Avx2.AndNot(b3, b2)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment