zeux/nbody.s

## nbody.s
# table type guard (memory safety)
cmp	dword ptr [rdi + 12], 6
jne	1072 <.text+0x5ea>
# load array index and convert to integer (+ exactness check)
movsd	xmm0, qword ptr [rdi + 256]
cvttsd2si	eax, xmm0
cvtsi2sd	xmm1, eax
ucomisd	xmm1, xmm0
jne	1046 <.text+0x5ea>
# indices are 1-based; could remove this one with runtime changes
dec	eax
# load table and do a bounds check on the lookup
mov	rcx, qword ptr [rdi]
mov	rdx, qword ptr [rcx + 32]
cmp	dword ptr [rcx + 20], eax
jbe	1028 <.text+0x5ea>
# actual array lookup
shl	eax, 4
movups	xmm0, xmmword ptr [rdx + rax]
movups	xmmword ptr [rdi + 272], xmm0
# record type guard (memory safety)
cmp	dword ptr [rdi + 284], 10
jne	1030 <.text+0x607>
# check that record has enough fields (memory safety)
mov	rcx, qword ptr [rdi + 272]
cmp	dword ptr [rcx + 12], 6
jle	1013 <.text+0x607>
# basic block compiler, load phase (loads from records and stack)
vmovsd	xmm12, qword ptr [rdi + 176]
vmovsd	xmm13, qword ptr [rdi + 192]
vmovsd	xmm14, qword ptr [rdi + 208]
vmovsd	xmm1, qword ptr [rcx + 96]
vmovsd	xmm2, qword ptr [rcx + 112]
vmovsd	xmm3, qword ptr [rcx + 128]
vmovsd	xmm15, qword ptr [rcx + 32]
vmovsd	xmm11, qword ptr [rcx + 48]
vmovsd	xmm8, qword ptr [rcx + 64]
vmovsd	xmm9, qword ptr [rcx + 80]
vmovsd	xmm0, qword ptr [rdi + 112]
# basic block compiler, arith phase (mostly devoid of memory access)
vsubsd	xmm4, xmm0, xmm1
vmovsd	xmm0, qword ptr [rdi + 128]
vsubsd	xmm5, xmm0, xmm2
vmovsd	xmm0, qword ptr [rdi + 144]
vsubsd	xmm6, xmm0, xmm3
vmulsd	xmm2, xmm4, xmm4
vmulsd	xmm1, xmm5, xmm5
vaddsd	xmm3, xmm2, xmm1
vmulsd	xmm2, xmm6, xmm6
vaddsd	xmm7, xmm3, xmm2
vsqrtsd	xmm10, xmm10, xmm7
vmulsd	xmm2, xmm10, xmm10
vmulsd	xmm3, xmm2, xmm10
vmovsd	xmm0, qword ptr [rdi + 32]
vdivsd	xmm7, xmm0, xmm3
vmulsd	xmm3, xmm7, qword ptr [rdi + 160]
vmulsd	xmm2, xmm15, xmm7
vmulsd	xmm1, xmm4, xmm2
vsubsd	xmm12, xmm12, xmm1
vmulsd	xmm1, xmm5, xmm2
vsubsd	xmm13, xmm13, xmm1
vmulsd	xmm1, xmm6, xmm2
vsubsd	xmm14, xmm14, xmm1
vmulsd	xmm1, xmm4, xmm3
vaddsd	xmm11, xmm11, xmm1
vmulsd	xmm1, xmm5, xmm3
vaddsd	xmm8, xmm8, xmm1
vmulsd	xmm1, xmm6, xmm3
vaddsd	xmm9, xmm9, xmm1
# basic block compiler, store phase (note, stores type tags for memory safety)
vmovsd	qword ptr [rdi + 176], xmm12
mov	dword ptr [rdi + 188], 3
vmovsd	qword ptr [rdi + 192], xmm13
mov	dword ptr [rdi + 204], 3
vmovsd	qword ptr [rdi + 208], xmm14
mov	dword ptr [rdi + 220], 3
vmovsd	qword ptr [rcx + 48], xmm11
mov	dword ptr [rcx + 60], 3
vmovsd	qword ptr [rcx + 64], xmm8
mov	dword ptr [rcx + 76], 3
vmovsd	qword ptr [rcx + 80], xmm9
mov	dword ptr [rcx + 92], 3
# loop interrupt check, necessary to solve halting problem
mov	rax, qword ptr [rbx + 32]
mov	rax, qword ptr [rax + 176]
test	rax, rax
jne	860 <.text+0x6a2>
# loop back edge
movsd	xmm0, qword ptr [rdi + 256]
movsd	xmm1, qword ptr [rdi + 224]
addsd	xmm0, qword ptr [rdi + 240]
movsd	qword ptr [rdi + 256], xmm0
ucomisd	xmm1, xmm0
jae	-448 <.text+0x1b0>

## nbody_vector.s
# table type guard (memory safety)
cmp	dword ptr [rdi + 12], 6
jne	802 <.text+0x49c>
# load array index and convert to integer (+ exactness check)
movsd	xmm0, qword ptr [rdi + 192]
cvttsd2si	eax, xmm0
cvtsi2sd	xmm1, eax
ucomisd	xmm1, xmm0
jne	776 <.text+0x49c>
# indices are 1-based; could remove this one with runtime changes
dec	eax
# load table and do a bounds check on the lookup
mov	rcx, qword ptr [rdi]
mov	rdx, qword ptr [rcx + 32]
cmp	dword ptr [rcx + 20], eax
jbe	758 <.text+0x49c>
# actual array lookup
shl	eax, 4
movups	xmm0, xmmword ptr [rdx + rax]
movups	xmmword ptr [rdi + 208], xmm0
# record type guard (memory safety)
cmp	dword ptr [rdi + 220], 10
jne	760 <.text+0x4b9>
# check that record has enough fields (memory safety)
mov	rcx, qword ptr [rdi + 208]
cmp	dword ptr [rcx + 12], 2
jle	743 <.text+0x4b9>
# basic block compiler, load phase (loads from records and stack)
vmovups	xmm8, xmmword ptr [rdi + 128]
vmovups	xmm2, xmmword ptr [rcx + 48]
vmovups	xmm3, xmmword ptr [rcx + 64]
vmovsd	xmm7, qword ptr [rcx + 32]
# basic block compiler, arith phase (mostly devoid of memory access)
vmovups	xmm0, xmmword ptr [rdi + 112]
vsubps	xmm1, xmm0, xmm2
vdpps	xmm0, xmm1, xmm1, 119
vcvtss2sd	xmm2, xmm0, xmm0
vsqrtsd	xmm4, xmm4, xmm2
vmulsd	xmm6, xmm4, xmm4
vmulsd	xmm5, xmm6, xmm4
vmovsd	xmm0, qword ptr [rdi + 32]
vdivsd	xmm2, xmm0, xmm5
vmulsd	xmm5, xmm2, qword ptr [rdi + 144]
vmulsd	xmm6, xmm7, xmm2
vcvtsd2ss	xmm0, xmm0, xmm6
vshufps	xmm0, xmm0, xmm0, 0
vmulps	xmm2, xmm0, xmm1
vsubps	xmm8, xmm8, xmm2
vcvtsd2ss	xmm0, xmm0, xmm5
vshufps	xmm0, xmm0, xmm0, 0
vmulps	xmm2, xmm0, xmm1
vaddps	xmm3, xmm3, xmm2
# basic block compiler, store phase (note, stores type tags for memory safety)
vmovups	xmmword ptr [rdi + 128], xmm8
mov	dword ptr [rdi + 140], 4
vmovups	xmmword ptr [rcx + 64], xmm3
mov	dword ptr [rcx + 76], 4
# loop interrupt check, necessary to solve halting problem
mov	rax, qword ptr [rbx + 32]
mov	rax, qword ptr [rax + 176]
test	rax, rax
jne	699 <.text+0x52c>
# loop back edge
movsd	xmm0, qword ptr [rdi + 192]
movsd	xmm1, qword ptr [rdi + 160]
addsd	xmm0, qword ptr [rdi + 176]
movsd	qword ptr [rdi + 192], xmm0
ucomisd	xmm1, xmm0
jae	-299 <.text+0x170>
	# table type guard (memory safety)
	cmp dword ptr [rdi + 12], 6
	jne 1072 <.text+0x5ea>
	# load array index and convert to integer (+ exactness check)
	movsd xmm0, qword ptr [rdi + 256]
	cvttsd2si eax, xmm0
	cvtsi2sd xmm1, eax
	ucomisd xmm1, xmm0
	jne 1046 <.text+0x5ea>
	# indices are 1-based; could remove this one with runtime changes
	dec eax
	# load table and do a bounds check on the lookup
	mov rcx, qword ptr [rdi]
	mov rdx, qword ptr [rcx + 32]
	cmp dword ptr [rcx + 20], eax
	jbe 1028 <.text+0x5ea>
	# actual array lookup
	shl eax, 4
	movups xmm0, xmmword ptr [rdx + rax]
	movups xmmword ptr [rdi + 272], xmm0
	# record type guard (memory safety)
	cmp dword ptr [rdi + 284], 10
	jne 1030 <.text+0x607>
	# check that record has enough fields (memory safety)
	mov rcx, qword ptr [rdi + 272]
	cmp dword ptr [rcx + 12], 6
	jle 1013 <.text+0x607>
	# basic block compiler, load phase (loads from records and stack)
	vmovsd xmm12, qword ptr [rdi + 176]
	vmovsd xmm13, qword ptr [rdi + 192]
	vmovsd xmm14, qword ptr [rdi + 208]
	vmovsd xmm1, qword ptr [rcx + 96]
	vmovsd xmm2, qword ptr [rcx + 112]
	vmovsd xmm3, qword ptr [rcx + 128]
	vmovsd xmm15, qword ptr [rcx + 32]
	vmovsd xmm11, qword ptr [rcx + 48]
	vmovsd xmm8, qword ptr [rcx + 64]
	vmovsd xmm9, qword ptr [rcx + 80]
	vmovsd xmm0, qword ptr [rdi + 112]
	# basic block compiler, arith phase (mostly devoid of memory access)
	vsubsd xmm4, xmm0, xmm1
	vmovsd xmm0, qword ptr [rdi + 128]
	vsubsd xmm5, xmm0, xmm2
	vmovsd xmm0, qword ptr [rdi + 144]
	vsubsd xmm6, xmm0, xmm3
	vmulsd xmm2, xmm4, xmm4
	vmulsd xmm1, xmm5, xmm5
	vaddsd xmm3, xmm2, xmm1
	vmulsd xmm2, xmm6, xmm6
	vaddsd xmm7, xmm3, xmm2
	vsqrtsd xmm10, xmm10, xmm7
	vmulsd xmm2, xmm10, xmm10
	vmulsd xmm3, xmm2, xmm10
	vmovsd xmm0, qword ptr [rdi + 32]
	vdivsd xmm7, xmm0, xmm3
	vmulsd xmm3, xmm7, qword ptr [rdi + 160]
	vmulsd xmm2, xmm15, xmm7
	vmulsd xmm1, xmm4, xmm2
	vsubsd xmm12, xmm12, xmm1
	vmulsd xmm1, xmm5, xmm2
	vsubsd xmm13, xmm13, xmm1
	vmulsd xmm1, xmm6, xmm2
	vsubsd xmm14, xmm14, xmm1
	vmulsd xmm1, xmm4, xmm3
	vaddsd xmm11, xmm11, xmm1
	vmulsd xmm1, xmm5, xmm3
	vaddsd xmm8, xmm8, xmm1
	vmulsd xmm1, xmm6, xmm3
	vaddsd xmm9, xmm9, xmm1
	# basic block compiler, store phase (note, stores type tags for memory safety)
	vmovsd qword ptr [rdi + 176], xmm12
	mov dword ptr [rdi + 188], 3
	vmovsd qword ptr [rdi + 192], xmm13
	mov dword ptr [rdi + 204], 3
	vmovsd qword ptr [rdi + 208], xmm14
	mov dword ptr [rdi + 220], 3
	vmovsd qword ptr [rcx + 48], xmm11
	mov dword ptr [rcx + 60], 3
	vmovsd qword ptr [rcx + 64], xmm8
	mov dword ptr [rcx + 76], 3
	vmovsd qword ptr [rcx + 80], xmm9
	mov dword ptr [rcx + 92], 3
	# loop interrupt check, necessary to solve halting problem
	mov rax, qword ptr [rbx + 32]
	mov rax, qword ptr [rax + 176]
	test rax, rax
	jne 860 <.text+0x6a2>
	# loop back edge
	movsd xmm0, qword ptr [rdi + 256]
	movsd xmm1, qword ptr [rdi + 224]
	addsd xmm0, qword ptr [rdi + 240]
	movsd qword ptr [rdi + 256], xmm0
	ucomisd xmm1, xmm0
	jae -448 <.text+0x1b0>
	# table type guard (memory safety)
	cmp dword ptr [rdi + 12], 6
	jne 802 <.text+0x49c>
	# load array index and convert to integer (+ exactness check)
	movsd xmm0, qword ptr [rdi + 192]
	cvttsd2si eax, xmm0
	cvtsi2sd xmm1, eax
	ucomisd xmm1, xmm0
	jne 776 <.text+0x49c>
	# indices are 1-based; could remove this one with runtime changes
	dec eax
	# load table and do a bounds check on the lookup
	mov rcx, qword ptr [rdi]
	mov rdx, qword ptr [rcx + 32]
	cmp dword ptr [rcx + 20], eax
	jbe 758 <.text+0x49c>
	# actual array lookup
	shl eax, 4
	movups xmm0, xmmword ptr [rdx + rax]
	movups xmmword ptr [rdi + 208], xmm0
	# record type guard (memory safety)
	cmp dword ptr [rdi + 220], 10
	jne 760 <.text+0x4b9>
	# check that record has enough fields (memory safety)
	mov rcx, qword ptr [rdi + 208]
	cmp dword ptr [rcx + 12], 2
	jle 743 <.text+0x4b9>
	# basic block compiler, load phase (loads from records and stack)
	vmovups xmm8, xmmword ptr [rdi + 128]
	vmovups xmm2, xmmword ptr [rcx + 48]
	vmovups xmm3, xmmword ptr [rcx + 64]
	vmovsd xmm7, qword ptr [rcx + 32]
	# basic block compiler, arith phase (mostly devoid of memory access)
	vmovups xmm0, xmmword ptr [rdi + 112]
	vsubps xmm1, xmm0, xmm2
	vdpps xmm0, xmm1, xmm1, 119
	vcvtss2sd xmm2, xmm0, xmm0
	vsqrtsd xmm4, xmm4, xmm2
	vmulsd xmm6, xmm4, xmm4
	vmulsd xmm5, xmm6, xmm4
	vmovsd xmm0, qword ptr [rdi + 32]
	vdivsd xmm2, xmm0, xmm5
	vmulsd xmm5, xmm2, qword ptr [rdi + 144]
	vmulsd xmm6, xmm7, xmm2
	vcvtsd2ss xmm0, xmm0, xmm6
	vshufps xmm0, xmm0, xmm0, 0
	vmulps xmm2, xmm0, xmm1
	vsubps xmm8, xmm8, xmm2
	vcvtsd2ss xmm0, xmm0, xmm5
	vshufps xmm0, xmm0, xmm0, 0
	vmulps xmm2, xmm0, xmm1
	vaddps xmm3, xmm3, xmm2
	# basic block compiler, store phase (note, stores type tags for memory safety)
	vmovups xmmword ptr [rdi + 128], xmm8
	mov dword ptr [rdi + 140], 4
	vmovups xmmword ptr [rcx + 64], xmm3
	mov dword ptr [rcx + 76], 4
	# loop interrupt check, necessary to solve halting problem
	mov rax, qword ptr [rbx + 32]
	mov rax, qword ptr [rax + 176]
	test rax, rax
	jne 699 <.text+0x52c>
	# loop back edge
	movsd xmm0, qword ptr [rdi + 192]
	movsd xmm1, qword ptr [rdi + 160]
	addsd xmm0, qword ptr [rdi + 176]
	movsd qword ptr [rdi + 192], xmm0
	ucomisd xmm1, xmm0
	jae -299 <.text+0x170>