Skip to content

Instantly share code, notes, and snippets.

@roxlu roxlu/perf.log Secret
Created Oct 30, 2013

Embed
What would you like to do?
│ 80: mov %rsi,%rdx ▒
│ xor %ecx,%ecx ▒
│ nop ▒
1.70 │ 88: vaddps (%rdx),%xmm3,%xmm0 ▒
55.13 │ add $0x1,%ecx ▒
0.82 │ add $0x60,%rdx ▒
1.59 │ vmovap %xmm0,-0x60(%rdx) ▒
4.75 │ vaddps -0x50(%rdx),%xmm2,%xmm0 ▒
6.60 │ vmovap %xmm0,-0x50(%rdx) ▒
3.22 │ cmp $0x61a8,%ecx ▒
│ ↑ jne 88 ▒
│ mov %rdi,%rcx ▒
│ xor %edx,%edx ▒
│ ↓ jmp c7 ▒
│ nop ▒
4.06 │ b8: add $0x1,%edx ▒
│ add $0x60,%rcx ▒
│ cmp $0x61a8,%edx ▒
│ ↓ je de ▒
3.10 │ c7: test $0x1,%dl ▒
│ ↑ jne b8 ▒
4.45 │ add $0x1,%edx ▒
│ prefet (%rcx) ▒
14.57 │ add $0x60,%rcx ▒
│ cmp $0x61a8,%edx ▒
│ ↑ jne c7 ▒
0.01 │ de: sub $0x1,%eax ▒
│ ↑ jne 80 ▒
│ vmovap %xmm2,0x10(%rsp) ▒
│ vmovap %xmm3,(%rsp) ▒
│ → callq uv_hrtime ▒
│ vmovap (%rsp),%xmm3 ▒
│ sub %rbp,%rax ▒
│ vmovap 0x10(%rsp),%xmm2 ◆
│ mov %rax,%rbp
struct particle_group {
__m128 pos_x;
__m128 pos_y;
__m128 force_x;
__m128 force_y;
__m128 vel_x;
__m128 vel_y;
};
void add_force(float x, float y) {
__m128 force_x = _mm_set_ps(x, x, x, x);
__m128 force_y = _mm_set_ps(y, y, y, y);
int const prefetch_count = power_of_two_below(CACHE_LINE_SIZE / sizeof(particles[0]));
__m128 tmp_x;
__m128 tmp_y;
for(int i = 0; i < GROUP_COUNT; ++i){
if(i % 256 == 0) {
// _mm_prefetch(particles + i + 1, _MM_HINT_T0);
// _mm_prefetch(particles + i + prefetch_count, _MM_HINT_T0);
}
tmp_x = particles[i].force_x;
tmp_y = particles[i].force_y;
particles[i].force_x = _mm_add_ps(tmp_x, force_x);
particles[i].force_y = _mm_add_ps(tmp_y, force_y);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.