Last active
March 4, 2020 01:54
-
-
Save keijiro/7acc51a96f21988d0337f01edb924dce to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Unity.Burst.BurstCompile(CompileSynchronously = true)] | |
struct DftJob : IJobParallelFor | |
{ | |
[ReadOnly] public NativeArray<float4> input; | |
[ReadOnly] public NativeArray<float4> coeffsR; | |
[ReadOnly] public NativeArray<float4> coeffsI; | |
[WriteOnly] public NativeArray<float> output; | |
public void Execute(int i) | |
{ | |
var offs = i * input.Length; | |
var rl = 0.0f; | |
var im = 0.0f; | |
for (var n = 0; n < input.Length; n++) | |
{ | |
var x_n = input[n]; | |
rl += math.dot(x_n, coeffsR[offs + n]); | |
im -= math.dot(x_n, coeffsI[offs + n]); | |
} | |
output[i] = math.sqrt(rl * rl + im * im) * 0.5f / input.Length; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.intel_syntax noprefix | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2 | |
.LCPI0_0: | |
.long 1065353216 | |
.LCPI0_1: | |
.long 3204448256 | |
.LCPI0_2: | |
.long 3225419776 | |
.LCPI0_3: | |
.long 1056964608 | |
.text | |
.globl "Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6" | |
.p2align 4, 0x90 | |
.type "Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6",@function | |
"Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6": | |
.Lfunc_begin0: | |
.cfi_startproc | |
push r15 | |
.cfi_def_cfa_offset 16 | |
push r14 | |
.cfi_def_cfa_offset 24 | |
push r13 | |
.cfi_def_cfa_offset 32 | |
push r12 | |
.cfi_def_cfa_offset 40 | |
push rsi | |
.cfi_def_cfa_offset 48 | |
push rdi | |
.cfi_def_cfa_offset 56 | |
push rbp | |
.cfi_def_cfa_offset 64 | |
push rbx | |
.cfi_def_cfa_offset 72 | |
sub rsp, 136 | |
movaps xmmword ptr [rsp + 112], xmm10 | |
movaps xmmword ptr [rsp + 96], xmm9 | |
movaps xmmword ptr [rsp + 80], xmm8 | |
movaps xmmword ptr [rsp + 64], xmm7 | |
movaps xmmword ptr [rsp + 48], xmm6 | |
.cfi_def_cfa_offset 208 | |
.cfi_offset rbx, -72 | |
.cfi_offset rbp, -64 | |
.cfi_offset rdi, -56 | |
.cfi_offset rsi, -48 | |
.cfi_offset r12, -40 | |
.cfi_offset r13, -32 | |
.cfi_offset r14, -24 | |
.cfi_offset r15, -16 | |
.cfi_offset xmm6, -160 | |
.cfi_offset xmm7, -144 | |
.cfi_offset xmm8, -128 | |
.cfi_offset xmm9, -112 | |
.cfi_offset xmm10, -96 | |
mov r15, r9 | |
mov r14, rcx | |
mov edx, dword ptr [rsp + 240] | |
movabs rax, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr" | |
lea r8, [rsp + 40] | |
lea r9, [rsp + 44] | |
mov rcx, r15 | |
call qword ptr [rax] | |
test al, al | |
je .LBB0_10 | |
movabs rax, offset .LCPI0_0 | |
movss xmm8, dword ptr [rax] | |
movabs rax, offset .LCPI0_1 | |
movss xmm10, dword ptr [rax] | |
movabs rax, offset .LCPI0_2 | |
movss xmm6, dword ptr [rax] | |
xorps xmm9, xmm9 | |
movabs rax, offset .LCPI0_3 | |
movss xmm7, dword ptr [rax] | |
lea r12, [rsp + 44] | |
.p2align 4, 0x90 | |
.LBB0_2: | |
movsxd r10, dword ptr [rsp + 44] | |
movsxd rdx, dword ptr [rsp + 40] | |
cmp edx, r10d | |
jge .LBB0_9 | |
mov r11d, dword ptr [r14 + 8] | |
test r11d, r11d | |
mov r8, qword ptr [r14 + 168] | |
jle .LBB0_8 | |
xorps xmm1, xmm1 | |
cvtsi2ss xmm1, r11d | |
movaps xmm0, xmm8 | |
divss xmm0, xmm1 | |
mov r9, qword ptr [r14] | |
mov rsi, qword ptr [r14 + 56] | |
mov rdi, qword ptr [r14 + 112] | |
mov ebx, edx | |
imul ebx, r11d | |
.p2align 4, 0x90 | |
.LBB0_5: | |
xorps xmm2, xmm2 | |
mov rbp, r9 | |
mov eax, ebx | |
mov rcx, r11 | |
xorps xmm1, xmm1 | |
.p2align 4, 0x90 | |
.LBB0_6: | |
movups xmm3, xmmword ptr [rbp] | |
cdqe | |
mov r13, rax | |
shl r13, 4 | |
movups xmm4, xmmword ptr [rsi + r13] | |
mulps xmm4, xmm3 | |
movshdup xmm5, xmm4 | |
addps xmm4, xmm5 | |
movhlps xmm5, xmm4 | |
addps xmm5, xmm4 | |
addss xmm2, xmm5 | |
movups xmm4, xmmword ptr [rdi + r13] | |
mulps xmm4, xmm3 | |
movshdup xmm3, xmm4 | |
addps xmm4, xmm3 | |
movhlps xmm3, xmm4 | |
addps xmm3, xmm4 | |
subss xmm1, xmm3 | |
inc eax | |
add rbp, 16 | |
dec rcx | |
jne .LBB0_6 | |
mulss xmm2, xmm2 | |
mulss xmm1, xmm1 | |
addss xmm1, xmm2 | |
xorps xmm2, xmm2 | |
rsqrtss xmm2, xmm1 | |
movaps xmm3, xmm1 | |
mulss xmm3, xmm2 | |
movaps xmm4, xmm3 | |
mulss xmm4, xmm10 | |
mulss xmm3, xmm2 | |
addss xmm3, xmm6 | |
mulss xmm3, xmm4 | |
cmpeqss xmm1, xmm9 | |
andnps xmm1, xmm3 | |
mulss xmm1, xmm7 | |
mulss xmm1, xmm0 | |
movss dword ptr [r8 + 4*rdx], xmm1 | |
inc rdx | |
add ebx, r11d | |
cmp rdx, r10 | |
jne .LBB0_5 | |
jmp .LBB0_9 | |
.LBB0_8: | |
lea rcx, [r8 + 4*rdx] | |
dec r10d | |
sub r10d, edx | |
lea r8, [4*r10 + 4] | |
xor edx, edx | |
movabs rax, offset memset | |
call rax | |
.p2align 4, 0x90 | |
.LBB0_9: | |
mov rcx, r15 | |
mov edx, dword ptr [rsp + 240] | |
lea r8, [rsp + 40] | |
mov r9, r12 | |
movabs rax, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr" | |
call qword ptr [rax] | |
test al, al | |
jne .LBB0_2 | |
.LBB0_10: | |
movaps xmm6, xmmword ptr [rsp + 48] | |
movaps xmm7, xmmword ptr [rsp + 64] | |
movaps xmm8, xmmword ptr [rsp + 80] | |
movaps xmm9, xmmword ptr [rsp + 96] | |
movaps xmm10, xmmword ptr [rsp + 112] | |
add rsp, 136 | |
pop rbx | |
pop rbp | |
pop rdi | |
pop rsi | |
pop r12 | |
pop r13 | |
pop r14 | |
pop r15 | |
ret | |
.Lfunc_end0: | |
.size "Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6", .Lfunc_end0-"Unity.Jobs.IJobParallelForExtensions.ParallelForJobStruct`1<DftBuffer.DftJob>.Execute(ref DftBuffer.DftJob jobData, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_6839D548F10413F6" | |
.cfi_endproc | |
.globl burst.initialize | |
.p2align 4, 0x90 | |
.type burst.initialize,@function | |
burst.initialize: | |
.Lfunc_begin1: | |
.cfi_startproc | |
sub rsp, 40 | |
.cfi_def_cfa_offset 48 | |
mov rax, rcx | |
movabs rcx, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string" | |
call rax | |
movabs rcx, offset ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr" | |
mov qword ptr [rcx], rax | |
add rsp, 40 | |
ret | |
.Lfunc_end1: | |
.size burst.initialize, .Lfunc_end1-burst.initialize | |
.cfi_endproc | |
.type ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr",@object | |
.local ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr" | |
.comm ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange_Ptr",8,8 | |
.type ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string",@object | |
.section .rodata,"a",@progbits | |
".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string": | |
.asciz "Unity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange" | |
.size ".LUnity.Jobs.LowLevel.Unsafe.JobsUtility::GetWorkStealingRange.function.string", 61 | |
.ident "Burst" | |
.section ".note.GNU-stack","",@progbits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment