Created
October 1, 2021 01:59
-
-
Save tannergooding/2a477a4fca73e803f90dd1bde914a0a5 to your computer and use it in GitHub Desktop.
Comparison of before xplat hwintrinsics and after
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public float Sum(float[] values) { | |
if (values is null) | |
{ | |
throw new ArgumentNullException(); | |
} | |
if (Vector128.IsHardwareAccelerated) | |
{ | |
ref var value = ref MemoryMarshal.GetArrayDataReference(values); | |
var vsum = Vector128<float>.Zero; | |
int count = Math.DivRem(values.Length, Vector128<float>.Count, out int remainder); | |
for (int i = 0; i < count; i += Vector128<float>.Count) | |
{ | |
vsum += Unsafe.As<float, Vector128<float>>(ref Unsafe.Add(ref value, i)); | |
} | |
float sum = Vector128.Sum(vsum); | |
for (int i = values.Length - remainder; i < values.Length; i++) | |
{ | |
sum += values[i]; | |
} | |
return sum; | |
} | |
else | |
{ | |
var sum = 0.0f; | |
for (int i = 0; i < values.Length; i++) | |
{ | |
sum += values[i]; | |
} | |
return sum; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public float Sum(float[] values) { | |
if (values is null) | |
{ | |
throw new ArgumentNullException(); | |
} | |
if (Sse3.IsSupported) | |
{ | |
ref var value = ref MemoryMarshal.GetArrayDataReference(values); | |
var vsum = Vector128<float>.Zero; | |
int count = Math.DivRem(values.Length, Vector128<float>.Count, out int remainder); | |
for (int i = 0; i < count; i += Vector128<float>.Count) | |
{ | |
vsum = Sse.Add(vsum, Unsafe.As<float, Vector128<float>>(ref Unsafe.Add(ref value, i))); | |
} | |
vsum = Sse.Add(vsum, Sse.MoveHighToLow(vsum, vsum)); | |
// The control byte shuffles the four 32-bit floats of partialSum: ABCD -> BADC. | |
float sum = Sse.Add(vsum, Sse.Shuffle(vsum, vsum, 0xB1)).ToScalar(); | |
for (int i = values.Length - remainder; i < values.Length; i++) | |
{ | |
sum += values[i]; | |
} | |
return sum; | |
} | |
else if (AdvSimd.Arm64.IsSupported) | |
{ | |
ref var value = ref MemoryMarshal.GetArrayDataReference(values); | |
var vsum = Vector128<float>.Zero; | |
int count = Math.DivRem(values.Length, Vector128<float>.Count, out int remainder); | |
for (int i = 0; i < count; i += Vector128<float>.Count) | |
{ | |
vsum = AdvSimd.Add(vsum, Unsafe.As<float, Vector128<float>>(ref Unsafe.Add(ref value, i))); | |
} | |
vsum = AdvSimd.Arm64.AddPairwise(vsum, vsum); | |
float sum = AdvSimd.Arm64.AddPairwise(vsum, vsum).ToScalar(); | |
for (int i = values.Length - remainder; i < values.Length; i++) | |
{ | |
sum += values[i]; | |
} | |
return sum; | |
} | |
else | |
{ | |
var sum = 0.0f; | |
for (int i = 0; i < values.Length; i++) | |
{ | |
sum += values[i]; | |
} | |
return sum; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment