var totalSize = sizeof(PForHeader);
int i = 0;
var entriesAsInt = (uint*)_entriesOutput;
var entriesIn = _entries;
var prev = Vector256.Create(*entriesIn);
var max = Vector256.Create<long>(uint.MaxValue);
for (; i + 256 <= _count; i += 256)
{
    var blockStart = entriesAsInt;
    int j = 0;
    for (; j < 256; j += Vector256<long>.Count)
    {
        var cur = Vector256.Load(entriesIn + i + j);
        var mixed = Vector256.Shuffle(cur, Vector256.Create(0, 0, 1, 2)) & Vector256.Create(0, -1, -1, -1) |
                    Vector256.Shuffle(prev, Vector256.Create(3, 3, 3, 3)) & Vector256.Create(-1, 0, 0, 0);
        prev = cur;
        var delta = cur - mixed;

        if (Vector256.GreaterThanAny(delta, max))
        {
            HandleDeltaGreaterThanMax(j, delta);
        }

        var deltaInts = Vector256.Shuffle(delta.AsUInt32(), Vector256.Create(0u,2,4,6,0,0,0,0));
        deltaInts.Store(entriesAsInt);
        // we write 8 values, but increment by 4, so we'll overwrite it next op
        entriesAsInt += Vector256<long>.Count;
    }
    totalSize += ProcessBlock(blockStart);
}