Skip to content

Instantly share code, notes, and snippets.

@GrabYourPitchforks
Created June 25, 2024 20:38
Show Gist options
  • Save GrabYourPitchforks/a3b1ec234fee3ffb4cf68ffa8e51904f to your computer and use it in GitHub Desktop.
Save GrabYourPitchforks/a3b1ec234fee3ffb4cf68ffa8e51904f to your computer and use it in GitHub Desktop.
UTF8 char count testing
using System;
using System.Buffers;
using System.Diagnostics;
using System.IO;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Text;
using System.Text.Unicode;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
BenchmarkRunner.Run<Utf8Experimental>();
[SimpleJob(RuntimeMoniker.Net80)]
public unsafe class Utf8Experimental
{
private byte[] _data;
[GlobalSetup]
public void Setup()
{
_data = File.ReadAllBytes("twitter.json");
}
[Benchmark(Baseline = true)]
public nuint CountChars_Net8()
{
return (uint)Encoding.UTF8.GetCharCount(_data);
}
// [Benchmark(Baseline = true)]
public string GetString_Net8()
{
return (string)Encoding.UTF8.GetString(_data);
}
[Benchmark]
public nuint CountChars_Mod_B()
{
return (uint)CountChars_AssumeValid_Entry_B(_data);
}
// [Benchmark]
public string GetString_Mod_B()
{
nuint stringLength = CountChars_AssumeValid_Entry_B(_data);
if (stringLength <= (uint)_data.Length)
{
bool success = false;
string str = string.Create((int)stringLength, (data: _data, successPtr: (IntPtr)(&success)), static (chars, state) =>
{
var opStatus = Utf8.ToUtf16(state.data, chars, out int bytesRead, out int charsWritten, replaceInvalidSequences: false);
*((bool*)state.successPtr) = (opStatus == OperationStatus.Done) && (bytesRead == state.data.Length) && (charsWritten == chars.Length);
});
if (success)
{
return str;
}
}
// something went wrong - fall back to old logic instead
return Encoding.UTF8.GetString(_data);
}
private static nuint CountChars_AssumeValid_Entry_B(ReadOnlySpan<byte> buffer)
{
if (buffer.IsEmpty)
{
return 0;
}
fixed (byte* pData = &MemoryMarshal.GetReference(buffer))
{
return CountChars_AssumeValid_Avx2_B(pData, (uint)buffer.Length);
}
}
private static nuint CountChars_AssumeValid_Avx2_B(byte* pbData, nuint cbData)
{
Debug.Assert(pbData != null);
Debug.Assert(cbData > 0);
Debug.Assert(Avx2.X64.IsSupported);
Debug.Assert(Popcnt.X64.IsSupported);
// General logic: all bytes in the ranges [00..7F] and [C0..EF] should
// result in 1 char being generated, since they all correspond to ASCII
// chars or lead bytes corresponding to BMP chars. All bytes in the
// range [F0..FF] should result in 2 chars being generated since they
// are lead bytes corresponding to the astral planes.
Vector256<byte> vecC0 = Vector256.Create((byte)0xC0);
Vector256<byte> vec70 = Vector256.Create((byte)0x70);
nuint cumulativeUtf16Chars = 0;
// Read the first byte or more of data. This also has potential to read
// data before the start of the buffer, which we'll discard.
byte* pAlignedReadStart = (byte*)((nuint)pbData & ~(nuint)(Vector256<byte>.Count - 1));
byte* pAlignedReadEnd = (byte*)((nuint)(pbData + cbData) /* can't integer overflow due to C memory addressing rules */ & ~(nuint)(Vector256<byte>.Count - 1));
Vector256<byte> thisStripe = Avx2.LoadAlignedVector256(pAlignedReadStart);
uint nonContinuationBytesBitmap = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(thisStripe.AsSByte(), vecC0.AsSByte()));
uint astralLeadBytesBitmap = (uint)Avx2.MoveMask(Avx2.SubtractSaturate(thisStripe, vec70));
int numPrefixBytesToDiscard = (int)pbData & (Vector256<byte>.Count - 1);
uint startMask = unchecked((uint)(-1)) << numPrefixBytesToDiscard;
// Do any suffix bytes need to be discarded?
byte* pJustPastEndOfData = pbData + cbData;
int numSuffixBytesToDiscard = -(int)pJustPastEndOfData & (Vector256<byte>.Count - 1); // could be 0
uint finalMask = (unchecked((uint)(-1)) << numSuffixBytesToDiscard) >> numSuffixBytesToDiscard;
if (pAlignedReadStart == pAlignedReadEnd)
{
nonContinuationBytesBitmap &= finalMask;
astralLeadBytesBitmap &= finalMask;
}
nonContinuationBytesBitmap &= startMask;
cumulativeUtf16Chars += (uint)Popcnt.PopCount(nonContinuationBytesBitmap);
astralLeadBytesBitmap &= startMask;
cumulativeUtf16Chars += (uint)Popcnt.PopCount(astralLeadBytesBitmap);
// Now read the rest of the data in a loop.
while (pAlignedReadStart < pAlignedReadEnd)
{
thisStripe = Avx2.LoadAlignedVector256(pAlignedReadStart + Vector256<byte>.Count);
pAlignedReadStart += Vector256<byte>.Count;
nonContinuationBytesBitmap = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(thisStripe.AsSByte(), vecC0.AsSByte()));
astralLeadBytesBitmap = (uint)Avx2.MoveMask(Avx2.SubtractSaturate(thisStripe, vec70));
// If we just ran past the end of the buffer, discard the extra bytes.
if (pAlignedReadStart == pAlignedReadEnd)
{
nonContinuationBytesBitmap &= finalMask;
astralLeadBytesBitmap &= finalMask;
}
cumulativeUtf16Chars += (uint)Popcnt.PopCount(nonContinuationBytesBitmap);
cumulativeUtf16Chars += (uint)Popcnt.PopCount(astralLeadBytesBitmap);
}
return cumulativeUtf16Chars;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment