Skip to content

Instantly share code, notes, and snippets.

@hyrmn
Last active Sep 10, 2021
Embed
What would you like to do?
Surprisingly slower
static unsafe int CountLines(FileStream file)
{
var count = 0;
var vectorSize = 256 / 8; //256 bits, 8 bits in a byte.
var maskSrc = stackalloc byte[vectorSize];
var scratch = stackalloc byte[vectorSize];
for (var i = 0; i < vectorSize; i++)
{
maskSrc[i] = Rune;
}
var runeMask = Avx2.LoadVector256(maskSrc);
var zero = Vector256<byte>.Zero;
var accumulator = Vector256<long>.Zero;
long bytesRead = 0;
var fileSize = file.Length;
int read;
var buffer = new byte[BufferSize];
while (bytesRead < fileSize)
{
read = file.Read(buffer, 0, BufferSize);
bytesRead += read;
int i;
fixed (byte* ptr = buffer)
{
for (i = 0; i <= read - vectorSize; i += vectorSize)
{
var v = Avx2.LoadVector256(ptr + i);
var masked = Avx2.CompareEqual(v, runeMask);
var pop = Avx2.Subtract(zero, masked);
for (var el = 0; el < vectorSize; el++)
{
count += pop.GetElement(el);
}
}
}
}
return count;
}
using System.Numerics;
namespace nlc;
public class LineCounter
{
public const int BufferSize = 128 * 1024;
private const byte rune = (byte)'\n';
private static readonly Vector<byte> mask = new(rune);
public uint CountLines(Stream stream)
{
int read;
int idxOf;
int vectorSize = Vector<byte>.Count;
var buffer = new Span<byte>(new byte[BufferSize]);
uint count = 0;
while ((read = stream.Read(buffer)) > 0)
{
int i;
for (i = 0; i <= read - vectorSize; i += vectorSize)
{
var v = new Vector<byte>(buffer.Slice(i, vectorSize));
count += Vector.Dot(-Vector.Equals(v, mask), Vector<byte>.One);
}
if(i < read)
{
var slice = buffer.Slice(i, read - i);
while ((idxOf = slice.IndexOf(rune)) > -1)
{
slice = slice.Slice(idxOf + 1);
count++;
}
}
}
return count;
}
}
@Buildstarted

This comment has been minimized.

Copy link

@Buildstarted Buildstarted commented Sep 10, 2021

Try this

	public unsafe uint CountLines(Stream file)
	{
		uint count = 0;
		var vectorSize = 256 / 8; //256 bits, 8 bits in a byte.
		var maskSrc = stackalloc byte[vectorSize];
		var oneSrc = stackalloc byte[vectorSize];

		for (var i = 0; i < vectorSize; i++)
		{
			maskSrc[i] = 10;
		}

		for (var i = 0; i < vectorSize; i++)
		{
			oneSrc[i] = 1;
		}

		var runeMask = Avx2.LoadVector256(maskSrc);
		var zero = Vector256<byte>.Zero;
		var oneMask = Avx2.LoadVector256(oneSrc);

		long bytesRead = 0;
		var fileSize = file.Length;

		int read;

		var buffer = new byte[BufferSize];

		while (bytesRead < fileSize)
		{
			read = file.Read(buffer, 0, BufferSize);
			bytesRead += read;
			int i;
			fixed (byte* ptr = buffer)
			{
				for (i = 0; i <= read - vectorSize; i += vectorSize)
				{
					var v = Avx2.LoadVector256(ptr + i);
					var masked = Avx2.CompareEqual(v, runeMask);
					var pop = Avx2.Subtract(zero, masked);
					if (!Avx2.TestZ(pop, oneMask))
					{
						for (var el = 0; el < vectorSize; el++)
						{
							count += pop.GetElement(el);
						}
					}
				}
			}
		}

		return count;
	}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment