Skip to content

Instantly share code, notes, and snippets.

View FrankNiemeyer's full-sized avatar

Frank Niemeyer FrankNiemeyer

View GitHub Profile
open System.Numerics
[<EntryPoint>]
let main argv =
let laneWidth = System.Numerics.Vector<float32>.Count
printfn "%i" laneWidth
0
------ Build started: Project: ConsoleApplication1, Configuration: Release Any CPU ------
C:\Program Files (x86)\Microsoft SDKs\F#\4.0\Framework\v4.0\fsc.exe -o:obj\Release\ConsoleApplication1.exe --debug:pdbonly --noframework --define:TRACE --doc:bin\Release\ConsoleApplication1.XML --optimize+ --platform:x64 -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\FSharp\.NETFramework\v4.0\4.4.0.0\FSharp.Core.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\mscorlib.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.Core.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.Numerics.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.Numerics.Vectors.dll" --target:exe --warn:3 --warnaserror:76 --vserrors --validate-type-provid
static void Dot3AosScalar(Vector3[] vs, float[] dp) {
for (var j = 0; j < reps; ++j) {
for (var i = 0; i < dp.Length; ++i) {
dp[i] = vs[i].X * vs[i].X + vs[i].Y * vs[i].Y + vs[i].Z * vs[i].Z;
}
}
}
cmp r9d,r10d
jae 00007FF98D096F68
movsxd r11,r9d
imul r11,r11,3
vmovss xmm0,dword ptr [rcx+r11*4+10h]
vmulss xmm0,xmm0,xmm0
lea r11,[rcx+r11*4+10h]
vmovss xmm1,dword ptr [r11+4]
vmulss xmm1,xmm1,xmm1
vaddss xmm0,xmm0,xmm1
static void Dot3AosVectorDp(Vector3[] vs, float[] dp) {
for (var j = 0; j < reps; ++j) {
for (var i = 0; i < dp.Length; ++i) {
dp[i] = Vector3.Dot(vs[i], vs[i]);
}
}
}
cmp r9d,r10d
jae 00007FF98D077A54
movsxd r11,r9d
imul r11,r11,3
lea r11,[rcx+r11*4+10h]
vmovss xmm1,dword ptr [r11+8]
vmovsd xmm0,qword ptr [r11]
vshufps xmm0,xmm0,xmm1,44h
vmovss xmm2,dword ptr [r11+8]
vmovsd xmm1,qword ptr [r11]
static void Dot3AosGather(Vector3[] vs, float[] dp) {
var xtmp = new float[laneWidth];
var ytmp = new float[laneWidth];
var ztmp = new float[laneWidth];
for (var j = 0; j < reps; ++j) {
for (var i = 0; i < dp.Length; i += laneWidth) {
for (var k = 0; k < laneWidth; ++k) {
xtmp[k] = vs[i + k].X;
ytmp[k] = vs[i + k].Y;
ztmp[k] = vs[i + k].Z;
xor r9d,r9d
mov r10d,dword ptr [rbx+8]
movsxd r10,r10d
cmp r10,8
setge r10b
movzx r10d,r10b
mov r11d,dword ptr [rbp+8]
movsxd r11,r11d
cmp r11,8
setge r11b
static void Dot3SoaScalar(float[] xs, float[] ys, float[] zs, float[] dp) {
for (var j = 0; j < reps; ++j) {
for (var i = 0; i < dp.Length; ++i) {
dp[i] = xs[i] * xs[i] + ys[i] * ys[i] + zs[i] * zs[i];
}
}
}
movsxd rsi,r11d
vmovss xmm0,dword ptr [rcx+rsi*4+10h]
vmulss xmm0,xmm0,xmm0
movsxd rsi,r11d
vmovss xmm1,dword ptr [rdx+rsi*4+10h]
vmulss xmm1,xmm1,xmm1
vaddss xmm0,xmm0,xmm1
movsxd rsi,r11d
vmovss xmm1,dword ptr [r8+rsi*4+10h]
vmulss xmm1,xmm1,xmm1