|
using BenchmarkDotNet.Attributes; |
|
using System; |
|
using System.Runtime.Intrinsics; |
|
using System.Runtime.Intrinsics.X86; |
|
using System.Text; |
|
|
|
namespace Benchmarks |
|
{ |
|
public class QuickIntBench |
|
{ |
|
// this needs to be at least 16 characters |
|
// this is currently the largest int value. |
|
const string input = "2147483647,123456"; |
|
readonly byte[] data; |
|
|
|
public QuickIntBench() |
|
{ |
|
data = Encoding.UTF8.GetBytes(input); |
|
} |
|
|
|
[Benchmark] |
|
public void QuickIntParse() |
|
{ |
|
var val = QuickInt.Parse(data, out int consumed); |
|
} |
|
|
|
[Benchmark] |
|
public void Utf8IntParse() |
|
{ |
|
var val = System.Buffers.Text.Utf8Parser.TryParse(data, out int value, out int consumed); |
|
} |
|
} |
|
|
|
// a C# implementation of Andreas Fredriksson SIMD integer parser as seen in https://vimeo.com/644068002 |
|
internal static class QuickInt |
|
{ |
|
static int[] Factors = new[] { 1000000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; |
|
|
|
static Vector128<sbyte> ma = Vector128.Create(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1); |
|
static Vector128<sbyte> mb = Vector128.Create(4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1); |
|
static Vector128<sbyte> mc = Vector128.Create(8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11, -1, -1, -1); |
|
|
|
static Vector128<sbyte> offset = Vector128.Create((sbyte)'0'); |
|
static Vector128<sbyte> shift = Vector128.Create((sbyte)(sbyte.MaxValue - 10)); |
|
|
|
public unsafe static int Parse(ReadOnlySpan<byte> buffer, out int count) |
|
{ |
|
// WARNING: this code has issues and is only meant to explore a SIMD solution |
|
// to integer parsing. |
|
// what if ... |
|
// buffer length is < 16 |
|
// number is negative (leading '-') |
|
// more than 10 digit number |
|
// a 10 digit number that is larger than int.MaxValue |
|
// ... and probably other things I haven't considered |
|
|
|
count = 0; |
|
fixed (byte* pp = buffer) |
|
{ |
|
var p = (sbyte*)pp; |
|
var dat = Sse2.LoadVector128(p); |
|
var values = Sse2.Subtract(dat, offset); |
|
var mask = Sse2.CompareLessThan(Sse2.Add(values, shift), shift); |
|
var maskbits = Sse2.MoveMask(mask); |
|
count = (int)Bmi1.TrailingZeroCount((uint)maskbits); |
|
|
|
fixed (int* fp = &Factors[10 - count]) // index into factors based on number of digits |
|
{ |
|
var fa = Sse2.LoadVector128(fp); |
|
var fb = Sse2.LoadVector128(fp + 4); |
|
var fc = Sse2.LoadVector128(fp + 8); |
|
|
|
var a = Ssse3.Shuffle(values, ma).AsInt32(); |
|
var b = Ssse3.Shuffle(values, mb).AsInt32(); |
|
var c = Ssse3.Shuffle(values, mc).AsInt32(); |
|
var fma = Sse41.MultiplyLow(a, fa); |
|
var fmb = Sse41.MultiplyLow(b, fb); |
|
var fmc = Sse41.MultiplyLow(c, fc); |
|
fma = Sse2.Add(fma, fmb); |
|
fma = Sse2.Add(fma, fmc); |
|
|
|
var vecc = Ssse3.HorizontalAdd(fma, fma); |
|
vecc = Ssse3.HorizontalAdd(vecc, vecc); |
|
return vecc.GetElement(0); |
|
} |
|
} |
|
} |
|
} |
|
} |