Skip to content

Instantly share code, notes, and snippets.

@gciruelos
Created April 3, 2015 21:50
Show Gist options
  • Save gciruelos/515c7c358cbe01ab33bc to your computer and use it in GitHub Desktop.
Save gciruelos/515c7c358cbe01ab33bc to your computer and use it in GitHub Desktop.
.text
.file "fact.c"
.section .rodata.cst16,"aM",@progbits,16
.align 16
.LCPI0_0:
.long 1 # 0x1
.long 1 # 0x1
.long 1 # 0x1
.long 1 # 0x1
.LCPI0_1:
.long 0 # 0x0
.long 4294967295 # 0xffffffff
.long 4294967294 # 0xfffffffe
.long 4294967293 # 0xfffffffd
.LCPI0_2:
.long 4294967292 # 0xfffffffc
.long 4294967291 # 0xfffffffb
.long 4294967290 # 0xfffffffa
.long 4294967289 # 0xfffffff9
.text
.globl fact
.align 16, 0x90
.type fact,@function
fact: # @fact
.cfi_startproc
# BB#0:
push rbx
.Ltmp0:
.cfi_def_cfa_offset 16
.Ltmp1:
.cfi_offset rbx, -16
# kill: EDI<def> EDI<kill> RDI<def>
mov eax, 1
cmp edi, 2
jl .LBB0_20
# BB#1: # %tailrecurse.preheader
mov esi, edi
not esi
cmp esi, -4
mov r10d, -3
mov eax, -3
cmovg eax, esi
add eax, edi
mov edx, 1
cmp eax, -2
je .LBB0_13
# BB#2: # %overflow.checked
add eax, 2
mov ebx, eax
and ebx, -8
mov r8d, eax
and r8d, -8
je .LBB0_3
# BB#4: # %vector.body.preheader
cmp esi, -4
mov ecx, -3
cmovg ecx, esi
lea ecx, [rdi + rcx + 2]
and ecx, -8
add ecx, -8
shr ecx, 3
lea edx, [rcx + 1]
test dl, 1
je .LBB0_5
# BB#6: # %vector.body.prol
movd xmm0, edi
pshufd xmm2, xmm0, 0 # xmm2 = xmm0[0,0,0,0]
movdqa xmm3, xmmword ptr [rip + .LCPI0_1] # xmm3 = [0,4294967295,4294967294,4294967293]
paddd xmm3, xmm2
paddd xmm2, xmmword ptr [rip + .LCPI0_2]
mov r11d, 8
movdqa xmm1, xmm3
movdqa xmm0, xmm2
jmp .LBB0_7
.LBB0_3:
xor r8d, r8d
movdqa xmm3, xmmword ptr [rip + .LCPI0_0] # xmm3 = [1,1,1,1]
movdqa xmm2, xmm3
jmp .LBB0_12
.LBB0_5:
movdqa xmm1, xmmword ptr [rip + .LCPI0_0] # xmm1 = [1,1,1,1]
xor r11d, r11d
pxor xmm2, xmm2
pxor xmm3, xmm3
movdqa xmm0, xmm1
.LBB0_7: # %vector.body.preheader.split
mov r9d, edi
sub r9d, ebx
test ecx, ecx
jne .LBB0_9
# BB#8:
mov edi, r9d
jmp .LBB0_12
.LBB0_9: # %vector.body.preheader.split.split
mov edx, edi
sub edx, r11d
cmp esi, -4
mov ecx, -3
cmovg ecx, esi
lea esi, [rdi + rcx + 2]
and esi, -8
sub esi, r11d
movdqa xmm8, xmmword ptr [rip + .LCPI0_1] # xmm8 = [0,4294967295,4294967294,4294967293]
movdqa xmm3, xmmword ptr [rip + .LCPI0_2] # xmm3 = [4294967292,4294967291,4294967290,4294967289]
.align 16, 0x90
.LBB0_10: # %vector.body
# =>This Inner Loop Header: Depth=1
movd xmm4, edx
pshufd xmm4, xmm4, 0 # xmm4 = xmm4[0,0,0,0]
movdqa xmm5, xmm4
paddd xmm5, xmm8
paddd xmm4, xmm3
pshufd xmm6, xmm5, -11 # xmm6 = xmm5[1,1,3,3]
pmuludq xmm5, xmm1
pshufd xmm7, xmm1, -11 # xmm7 = xmm1[1,1,3,3]
pmuludq xmm7, xmm6
shufps xmm5, xmm7, -120 # xmm5 = xmm5[0,2],xmm7[0,2]
shufps xmm5, xmm5, -40 # xmm5 = xmm5[0,2,1,3]
pshufd xmm1, xmm4, -11 # xmm1 = xmm4[1,1,3,3]
pmuludq xmm4, xmm0
pshufd xmm6, xmm0, -11 # xmm6 = xmm0[1,1,3,3]
pmuludq xmm6, xmm1
shufps xmm4, xmm6, -120 # xmm4 = xmm4[0,2],xmm6[0,2]
shufps xmm4, xmm4, -40 # xmm4 = xmm4[0,2,1,3]
lea ecx, [rdx - 8]
movd xmm0, ecx
pshufd xmm0, xmm0, 0 # xmm0 = xmm0[0,0,0,0]
movdqa xmm1, xmm0
paddd xmm1, xmm8
paddd xmm0, xmm3
pshufd xmm2, xmm1, -11 # xmm2 = xmm1[1,1,3,3]
pmuludq xmm2, xmm7
pmuludq xmm1, xmm5
shufps xmm1, xmm2, -120 # xmm1 = xmm1[0,2],xmm2[0,2]
shufps xmm1, xmm1, -40 # xmm1 = xmm1[0,2,1,3]
pshufd xmm2, xmm0, -11 # xmm2 = xmm0[1,1,3,3]
pmuludq xmm2, xmm6
pmuludq xmm0, xmm4
shufps xmm0, xmm2, -120 # xmm0 = xmm0[0,2],xmm2[0,2]
shufps xmm0, xmm0, -40 # xmm0 = xmm0[0,2,1,3]
add edx, -16
add esi, -16
jne .LBB0_10
# BB#11:
mov edi, r9d
movaps xmm3, xmm1
movaps xmm2, xmm0
.LBB0_12: # %middle.block
pshufd xmm0, xmm2, -11 # xmm0 = xmm2[1,1,3,3]
pmuludq xmm2, xmm3
pshufd xmm1, xmm3, -11 # xmm1 = xmm3[1,1,3,3]
pmuludq xmm1, xmm0
movdqa xmm0, xmm2
shufps xmm0, xmm1, -120 # xmm0 = xmm0[0,2],xmm1[0,2]
shufps xmm0, xmm0, -40 # xmm0 = xmm0[0,2,1,3]
punpckhdq xmm2, xmm1 # xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
pmuludq xmm2, xmm0
pshufd xmm0, xmm1, 78 # xmm0 = xmm1[2,3,0,1]
pmuludq xmm0, xmm1
shufps xmm2, xmm0, -120 # xmm2 = xmm2[0,2],xmm0[0,2]
shufps xmm2, xmm2, -40 # xmm2 = xmm2[0,2,1,3]
pmuludq xmm2, xmm0
pmuludq xmm0, xmm0
shufps xmm2, xmm0, -120 # xmm2 = xmm2[0,2],xmm0[0,2]
shufps xmm2, xmm2, -40 # xmm2 = xmm2[0,2,1,3]
movd edx, xmm2
cmp eax, r8d
mov eax, edx
je .LBB0_20
.LBB0_13: # %tailrecurse.preheader9
mov ecx, edi
not ecx
cmp ecx, -4
cmovg r10d, ecx
lea esi, [rdi + r10 + 2]
lea r8d, [rdi + r10 + 1]
xor eax, eax
test sil, 7
je .LBB0_14
# BB#15: # %tailrecurse.prol.preheader
cmp ecx, -4
mov eax, -3
cmovg eax, ecx
lea esi, [rdi + rax + 2]
and esi, 7
neg esi
mov eax, edx
.align 16, 0x90
.LBB0_16: # %tailrecurse.prol
# =>This Inner Loop Header: Depth=1
lea ecx, [rdi - 1]
imul eax, edi
inc esi
mov edi, ecx
mov edx, eax
jne .LBB0_16
jmp .LBB0_17
.LBB0_14:
mov ecx, edi
.LBB0_17: # %tailrecurse.preheader9.split
cmp r8d, 7
jb .LBB0_20
# BB#18: # %tailrecurse.preheader9.split.split
add ecx, -3
mov eax, edx
.align 16, 0x90
.LBB0_19: # %tailrecurse
# =>This Inner Loop Header: Depth=1
lea edx, [rcx + 3]
imul edx, eax
lea eax, [rcx + 2]
imul eax, edx
lea edx, [rcx + 1]
imul edx, eax
imul edx, ecx
lea eax, [rcx - 1]
imul eax, edx
lea edx, [rcx - 2]
imul edx, eax
lea eax, [rcx - 3]
imul eax, edx
lea edx, [rcx - 4]
imul eax, edx
add ecx, -8
cmp edx, 2
jg .LBB0_19
.LBB0_20: # %tailrecurse._crit_edge
pop rbx
ret
.Ltmp2:
.size fact, .Ltmp2-fact
.cfi_endproc
.ident "clang version 3.6.0 (tags/RELEASE_360/final)"
.section ".note.GNU-stack","",@progbits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment