This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System.Numerics | |
[<EntryPoint>] | |
let main argv = | |
let laneWidth = System.Numerics.Vector<float32>.Count | |
printfn "%i" laneWidth | |
0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
------ Build started: Project: ConsoleApplication1, Configuration: Release Any CPU ------ | |
C:\Program Files (x86)\Microsoft SDKs\F#\4.0\Framework\v4.0\fsc.exe -o:obj\Release\ConsoleApplication1.exe --debug:pdbonly --noframework --define:TRACE --doc:bin\Release\ConsoleApplication1.XML --optimize+ --platform:x64 -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\FSharp\.NETFramework\v4.0\4.4.0.0\FSharp.Core.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\mscorlib.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.Core.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.Numerics.dll" -r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.6\System.Numerics.Vectors.dll" --target:exe --warn:3 --warnaserror:76 --vserrors --validate-type-provid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Dot3AosScalar(Vector3[] vs, float[] dp) { | |
for (var j = 0; j < reps; ++j) { | |
for (var i = 0; i < dp.Length; ++i) { | |
dp[i] = vs[i].X * vs[i].X + vs[i].Y * vs[i].Y + vs[i].Z * vs[i].Z; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmp r9d,r10d | |
jae 00007FF98D096F68 | |
movsxd r11,r9d | |
imul r11,r11,3 | |
vmovss xmm0,dword ptr [rcx+r11*4+10h] | |
vmulss xmm0,xmm0,xmm0 | |
lea r11,[rcx+r11*4+10h] | |
vmovss xmm1,dword ptr [r11+4] | |
vmulss xmm1,xmm1,xmm1 | |
vaddss xmm0,xmm0,xmm1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Dot3AosVectorDp(Vector3[] vs, float[] dp) { | |
for (var j = 0; j < reps; ++j) { | |
for (var i = 0; i < dp.Length; ++i) { | |
dp[i] = Vector3.Dot(vs[i], vs[i]); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmp r9d,r10d | |
jae 00007FF98D077A54 | |
movsxd r11,r9d | |
imul r11,r11,3 | |
lea r11,[rcx+r11*4+10h] | |
vmovss xmm1,dword ptr [r11+8] | |
vmovsd xmm0,qword ptr [r11] | |
vshufps xmm0,xmm0,xmm1,44h | |
vmovss xmm2,dword ptr [r11+8] | |
vmovsd xmm1,qword ptr [r11] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Dot3AosGather(Vector3[] vs, float[] dp) { | |
var xtmp = new float[laneWidth]; | |
var ytmp = new float[laneWidth]; | |
var ztmp = new float[laneWidth]; | |
for (var j = 0; j < reps; ++j) { | |
for (var i = 0; i < dp.Length; i += laneWidth) { | |
for (var k = 0; k < laneWidth; ++k) { | |
xtmp[k] = vs[i + k].X; | |
ytmp[k] = vs[i + k].Y; | |
ztmp[k] = vs[i + k].Z; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xor r9d,r9d | |
mov r10d,dword ptr [rbx+8] | |
movsxd r10,r10d | |
cmp r10,8 | |
setge r10b | |
movzx r10d,r10b | |
mov r11d,dword ptr [rbp+8] | |
movsxd r11,r11d | |
cmp r11,8 | |
setge r11b |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Dot3SoaScalar(float[] xs, float[] ys, float[] zs, float[] dp) { | |
for (var j = 0; j < reps; ++j) { | |
for (var i = 0; i < dp.Length; ++i) { | |
dp[i] = xs[i] * xs[i] + ys[i] * ys[i] + zs[i] * zs[i]; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
movsxd rsi,r11d | |
vmovss xmm0,dword ptr [rcx+rsi*4+10h] | |
vmulss xmm0,xmm0,xmm0 | |
movsxd rsi,r11d | |
vmovss xmm1,dword ptr [rdx+rsi*4+10h] | |
vmulss xmm1,xmm1,xmm1 | |
vaddss xmm0,xmm0,xmm1 | |
movsxd rsi,r11d | |
vmovss xmm1,dword ptr [r8+rsi*4+10h] | |
vmulss xmm1,xmm1,xmm1 |
OlderNewer