Skip to content

Instantly share code, notes, and snippets.

@rxin
Created February 14, 2017 12:40
Show Gist options
  • Save rxin/74d65a166b2f48773e0f61f923c0caa2 to your computer and use it in GitHub Desktop.
Save rxin/74d65a166b2f48773e0f61f923c0caa2 to your computer and use it in GitHub Desktop.
Processing trillion rows per second on a single machine: how can nested loop joins be this fast?
Decoding compiled method 0x00007f4d0510f9d0:
Code:
[Entry Point]
[Verified Entry Point]
[Constants]
# {method} {0x00007f4ce9662458} 'join' '(JI)J' in 'Test'
0x00007f4d0510fb20: call 0x00007f4d1abd5a30 ; {runtime_call}
0x00007f4d0510fb25: data16 data16 nop WORD PTR [rax+rax*1+0x0]
0x00007f4d0510fb30: mov DWORD PTR [rsp-0x14000],eax
0x00007f4d0510fb37: push rbp
0x00007f4d0510fb38: sub rsp,0x40
0x00007f4d0510fb3c: mov ebp,DWORD PTR [rsi+0x20]
0x00007f4d0510fb3f: mov rbx,QWORD PTR [rsi+0x50]
0x00007f4d0510fb43: mov r13,QWORD PTR [rsi+0x68]
0x00007f4d0510fb47: mov r14,QWORD PTR [rsi+0x40]
0x00007f4d0510fb4b: mov r10,QWORD PTR [rsi+0x58]
0x00007f4d0510fb4f: mov QWORD PTR [rsp+0x10],r10
0x00007f4d0510fb54: mov rdi,rsi
0x00007f4d0510fb57: movabs r10,0x7f4d1ac78c80
0x00007f4d0510fb61: call r10
0x00007f4d0510fb64: mov r10d,DWORD PTR [rbx+0x8]
0x00007f4d0510fb68: cmp r10d,0xf800016d
0x00007f4d0510fb6f: jne 0x00007f4d0510fc85
0x00007f4d0510fb75: mov r10d,DWORD PTR [rbx+0xc] ;r10d ← bnlj_broadcast.length
0x00007f4d0510fb79: mov r9d,r10d
0x00007f4d0510fb7c: add r9d,0xfffffff1 ; r9d ← bnlj_broadcast.length - 15
0x00007f4d0510fb80: xor r8d,r8d
0x00007f4d0510fb83: mov ecx,0x80000000
0x00007f4d0510fb88: cmp r10d,r9d
0x00007f4d0510fb8b: cmovl r9d,ecx
0x00007f4d0510fb8f: movabs rcx,0x7581db988
0x00007f4d0510fb99: jmp 0x00007f4d0510fbb2
0x00007f4d0510fb9b: test DWORD PTR [rip+0x16d8d45f],eax
0x00007f4d0510fba1: cmp r13,QWORD PTR [rsp+0x10]
0x00007f4d0510fba6: je 0x00007f4d0510fc4e
0x00007f4d0510fbac: add r13,0x1 ; outer loop increment
0x00007f4d0510fbb0: xor ebp,ebp
0x00007f4d0510fbb2: cmp ebp,r10d ; inner loop condition
0x00007f4d0510fbb5: jge 0x00007f4d0510fb9b
0x00007f4d0510fbb7: cmp ebp,r10d
0x00007f4d0510fbba: jae 0x00007f4d0510fc62
0x00007f4d0510fbc0: mov edi,0x1
0x00007f4d0510fbc5: add rdi,QWORD PTR [rcx+0x68] ; incMetric(1)
0x00007f4d0510fbc9: mov QWORD PTR [rcx+0x68],rdi
0x00007f4d0510fbcd: mov r11d,ebp
0x00007f4d0510fbd0: inc r11d ; inner loop increment by 1
0x00007f4d0510fbd3: add r14,0x1 ; agg_value1 increment by 1
0x00007f4d0510fbd7: cmp r11d,r10d ; inner loop condition
0x00007f4d0510fbda: jge 0x00007f4d0510fb9b
0x00007f4d0510fbdc: add ebp,0x2
0x00007f4d0510fbdf: cmp ebp,r8d
0x00007f4d0510fbe2: cmovl ebp,r8d
0x00007f4d0510fbe6: cmp ebp,r10d
0x00007f4d0510fbe9: cmovg ebp,r10d
0x00007f4d0510fbed: cmp r11d,r10d
0x00007f4d0510fbf0: jae 0x00007f4d0510fc5d
0x00007f4d0510fbf2: add rdi,0x1
0x00007f4d0510fbf6: mov QWORD PTR [rcx+0x68],rdi
0x00007f4d0510fbfa: add r14,0x1
0x00007f4d0510fbfe: inc r11d
0x00007f4d0510fc01: cmp r11d,ebp
0x00007f4d0510fc04: jl 0x00007f4d0510fbed
0x00007f4d0510fc06: cmp r11d,r9d
0x00007f4d0510fc09: jge 0x00007f4d0510fc25
0x00007f4d0510fc0b: nop DWORD PTR [rax+rax*1+0x0]
0x00007f4d0510fc10: add rdi,0x10 ; incMetric(1) x 16
0x00007f4d0510fc14: mov QWORD PTR [rcx+0x68],rdi
0x00007f4d0510fc18: add r14,0x10 ; agg_value1 += 16
0x00007f4d0510fc1c: add r11d,0x10 ; inner loop increment by 16
0x00007f4d0510fc20: cmp r11d,r9d ; inner loop condition
; (bnlj_index < bnlj_broadcast.length - 15
0x00007f4d0510fc23: jl 0x00007f4d0510fc10 ;*if_icmpge
; - Test::join@38 (line 15)
0x00007f4d0510fc25: cmp r11d,r10d ; inner loop condition
; (bnlj_index < bnlj_broadcast.length
0x00007f4d0510fc28: jge 0x00007f4d0510fb9b
0x00007f4d0510fc2e: xchg ax,ax
0x00007f4d0510fc30: cmp r11d,r10d
0x00007f4d0510fc33: jae 0x00007f4d0510fc5d
0x00007f4d0510fc35: add rdi,0x1
0x00007f4d0510fc39: mov QWORD PTR [rcx+0x68],rdi
0x00007f4d0510fc3d: add r14,0x1
0x00007f4d0510fc41: inc r11d
0x00007f4d0510fc44: cmp r11d,r10d
0x00007f4d0510fc47: jl 0x00007f4d0510fc30
0x00007f4d0510fc49: jmp 0x00007f4d0510fb9b
0x00007f4d0510fc4e: mov rax,r14
0x00007f4d0510fc51: add rsp,0x40
0x00007f4d0510fc55: pop rbp
0x00007f4d0510fc56: test DWORD PTR [rip+0x16d8d3a4],eax
0x00007f4d0510fc5c: ret
0x00007f4d0510fc5d: mov rbp,r14
0x00007f4d0510fc60: jmp 0x00007f4d0510fc68
0x00007f4d0510fc62: mov r11d,ebp
0x00007f4d0510fc65: mov rbp,r14
0x00007f4d0510fc68: mov esi,0xffffffe4
0x00007f4d0510fc6d: mov QWORD PTR [rsp],r13
0x00007f4d0510fc71: mov DWORD PTR [rsp+0xc],r11d
0x00007f4d0510fc76: mov QWORD PTR [rsp+0x20],rbx
0x00007f4d0510fc7b: call 0x00007f4d050051a0
0x00007f4d0510fc80: call 0x00007f4d1abd5a30
0x00007f4d0510fc85: mov esi,0xffffff9d
0x00007f4d0510fc8a: mov QWORD PTR [rsp],r13
0x00007f4d0510fc8e: mov QWORD PTR [rsp+0x8],r14
0x00007f4d0510fc93: mov QWORD PTR [rsp+0x18],rbx
0x00007f4d0510fc98: data16 xchg ax,ax
0x00007f4d0510fc9b: call 0x00007f4d050051a0
0x00007f4d0510fca0: call 0x00007f4d1abd5a30
0x00007f4d0510fca5: mov esi,0xffffff86
0x00007f4d0510fcaa: mov QWORD PTR [rsp],r13
0x00007f4d0510fcae: mov QWORD PTR [rsp+0x8],r14
0x00007f4d0510fcb3: call 0x00007f4d050051a0
0x00007f4d0510fcb8: call 0x00007f4d1abd5a30
0x00007f4d0510fcbd: hlt
0x00007f4d0510fcbe: hlt
0x00007f4d0510fcbf: hlt
[Exception Handler]
[Stub Code]
0x00007f4d0510fcc0: jmp 0x00007f4d0506c760
[Deopt Handler Code]
0x00007f4d0510fcc5: call 0x00007f4d0510fcca
0x00007f4d0510fcca: sub QWORD PTR [rsp],0x5
0x00007f4d0510fccf: jmp 0x00007f4d050473c0
0x00007f4d0510fcd4: hlt
0x00007f4d0510fcd5: hlt
0x00007f4d0510fcd6: hlt
0x00007f4d0510fcd7: hlt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment