Skip to content

Instantly share code, notes, and snippets.

@keijiro
Last active March 4, 2020 01:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keijiro/7acc51a96f21988d0337f01edb924dce to your computer and use it in GitHub Desktop.
Save keijiro/7acc51a96f21988d0337f01edb924dce to your computer and use it in GitHub Desktop.
[Unity.Burst.BurstCompile(CompileSynchronously = true)]
struct DftJob : IJobParallelFor
{
[ReadOnly] public NativeArray<float4> input;
[ReadOnly] public NativeArray<float4> coeffsR;
[ReadOnly] public NativeArray<float4> coeffsI;
[WriteOnly] public NativeArray<float> output;
public void Execute(int i)
{
var offs = i * input.Length;
var rl = 0.0f;
var im = 0.0f;
for (var n = 0; n < input.Length; n++)
{
var x_n = input[n];
rl += math.dot(x_n, coeffsR[offs + n]);
im -= math.dot(x_n, coeffsI[offs + n]);
}
output[i] = math.sqrt(rl * rl + im * im) * 0.5f / input.Length;
}
}
.text
.intel_syntax noprefix
.section .rodata.cst4,"aM",@progbits,4
.p2align 2
.LCPI0_0:
.long 1065353216
.LCPI0_1:
.long 3204448256
.LCPI0_2:
.long 3225419776
.LCPI0_3:
.long 1056964608
.text
.globl "Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6"
.p2align 4, 0x90
.type "Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6",@function
"Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6":
.Lfunc_begin0:
.cfi_startproc
push r15
.cfi_def_cfa_offset 16
push r14
.cfi_def_cfa_offset 24
push r13
.cfi_def_cfa_offset 32
push r12
.cfi_def_cfa_offset 40
push rsi
.cfi_def_cfa_offset 48
push rdi
.cfi_def_cfa_offset 56
push rbp
.cfi_def_cfa_offset 64
push rbx
.cfi_def_cfa_offset 72
sub rsp, 136
movaps xmmword ptr [rsp + 112], xmm10
movaps xmmword ptr [rsp + 96], xmm9
movaps xmmword ptr [rsp + 80], xmm8
movaps xmmword ptr [rsp + 64], xmm7
movaps xmmword ptr [rsp + 48], xmm6
.cfi_def_cfa_offset 208
.cfi_offset rbx, -72
.cfi_offset rbp, -64
.cfi_offset rdi, -56
.cfi_offset rsi, -48
.cfi_offset r12, -40
.cfi_offset r13, -32
.cfi_offset r14, -24
.cfi_offset r15, -16
.cfi_offset xmm6, -160
.cfi_offset xmm7, -144
.cfi_offset xmm8, -128
.cfi_offset xmm9, -112
.cfi_offset xmm10, -96
mov r15, r9
mov r14, rcx
mov edx, dword ptr [rsp + 240]
movabs rax, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr"
lea r8, [rsp + 40]
lea r9, [rsp + 44]
mov rcx, r15
call qword ptr [rax]
test al, al
je .LBB0_10
movabs rax, offset .LCPI0_0
movss xmm8, dword ptr [rax]
movabs rax, offset .LCPI0_1
movss xmm10, dword ptr [rax]
movabs rax, offset .LCPI0_2
movss xmm6, dword ptr [rax]
xorps xmm9, xmm9
movabs rax, offset .LCPI0_3
movss xmm7, dword ptr [rax]
lea r12, [rsp + 44]
.p2align 4, 0x90
.LBB0_2:
movsxd r10, dword ptr [rsp + 44]
movsxd rdx, dword ptr [rsp + 40]
cmp edx, r10d
jge .LBB0_9
mov r11d, dword ptr [r14 + 8]
test r11d, r11d
mov r8, qword ptr [r14 + 168]
jle .LBB0_8
xorps xmm1, xmm1
cvtsi2ss xmm1, r11d
movaps xmm0, xmm8
divss xmm0, xmm1
mov r9, qword ptr [r14]
mov rsi, qword ptr [r14 + 56]
mov rdi, qword ptr [r14 + 112]
mov ebx, edx
imul ebx, r11d
.p2align 4, 0x90
.LBB0_5:
xorps xmm2, xmm2
mov rbp, r9
mov eax, ebx
mov rcx, r11
xorps xmm1, xmm1
.p2align 4, 0x90
.LBB0_6:
movups xmm3, xmmword ptr [rbp]
cdqe
mov r13, rax
shl r13, 4
movups xmm4, xmmword ptr [rsi + r13]
mulps xmm4, xmm3
movshdup xmm5, xmm4
addps xmm4, xmm5
movhlps xmm5, xmm4
addps xmm5, xmm4
addss xmm2, xmm5
movups xmm4, xmmword ptr [rdi + r13]
mulps xmm4, xmm3
movshdup xmm3, xmm4
addps xmm4, xmm3
movhlps xmm3, xmm4
addps xmm3, xmm4
subss xmm1, xmm3
inc eax
add rbp, 16
dec rcx
jne .LBB0_6
mulss xmm2, xmm2
mulss xmm1, xmm1
addss xmm1, xmm2
xorps xmm2, xmm2
rsqrtss xmm2, xmm1
movaps xmm3, xmm1
mulss xmm3, xmm2
movaps xmm4, xmm3
mulss xmm4, xmm10
mulss xmm3, xmm2
addss xmm3, xmm6
mulss xmm3, xmm4
cmpeqss xmm1, xmm9
andnps xmm1, xmm3
mulss xmm1, xmm7
mulss xmm1, xmm0
movss dword ptr [r8 + 4*rdx], xmm1
inc rdx
add ebx, r11d
cmp rdx, r10
jne .LBB0_5
jmp .LBB0_9
.LBB0_8:
lea rcx, [r8 + 4*rdx]
dec r10d
sub r10d, edx
lea r8, [4*r10 + 4]
xor edx, edx
movabs rax, offset memset
call rax
.p2align 4, 0x90
.LBB0_9:
mov rcx, r15
mov edx, dword ptr [rsp + 240]
lea r8, [rsp + 40]
mov r9, r12
movabs rax, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr"
call qword ptr [rax]
test al, al
jne .LBB0_2
.LBB0_10:
movaps xmm6, xmmword ptr [rsp + 48]
movaps xmm7, xmmword ptr [rsp + 64]
movaps xmm8, xmmword ptr [rsp + 80]
movaps xmm9, xmmword ptr [rsp + 96]
movaps xmm10, xmmword ptr [rsp + 112]
add rsp, 136
pop rbx
pop rbp
pop rdi
pop rsi
pop r12
pop r13
pop r14
pop r15
ret
.Lfunc_end0:
.size "Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6", .Lfunc_end0-"Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6"
.cfi_endproc
.globl burst.initialize
.p2align 4, 0x90
.type burst.initialize,@function
burst.initialize:
.Lfunc_begin1:
.cfi_startproc
sub rsp, 40
.cfi_def_cfa_offset 48
mov rax, rcx
movabs rcx, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string"
call rax
movabs rcx, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr"
mov qword ptr [rcx], rax
add rsp, 40
ret
.Lfunc_end1:
.size burst.initialize, .Lfunc_end1-burst.initialize
.cfi_endproc
.type ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr",@object
.local ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr"
.comm ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr",8,8
.type ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string",@object
.section .rodata,"a",@progbits
".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string":
.asciz "Unity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange"
.size ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string", 61
.ident "Burst"
.section ".note.GNU-stack","",@progbits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment