Last active
July 1, 2016 13:53
Revisions
-
vivekvpandya revised this gist
Jul 1, 2016 . 3 changed files with 194 additions and 230 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,129 +2,122 @@ _mp_add: ## @mp_add .cfi_startproc ## BB#0: ## %entry pushq %rbp Ltmp110: .cfi_def_cfa_offset 16 Ltmp111: .cfi_offset %rbp, -16 movq %rsp, %rbp Ltmp112: .cfi_def_cfa_register %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx pushq %rax Ltmp113: .cfi_offset %rbx, -56 Ltmp114: .cfi_offset %r12, -48 Ltmp115: .cfi_offset %r13, -40 Ltmp116: .cfi_offset %r14, -32 Ltmp117: .cfi_offset %r15, -24 movq %r8, %r14 movq %rcx, %rbx movq %rdx, %r12 movl %esi, %r11d movl %edi, %r8d movl (%r12), %r15d movl 4(%r12), %ecx movl (%rbx), %edx movl 4(%rbx), %eax movl %ecx, %r10d subl %eax, %r10d movl %ecx, %r13d cmovsl %eax, %r13d movl %edx, %esi imull %r15d, %esi testl %esi, %esi js LBB20_7 ## BB#1: ## %if.then7 je LBB20_2 ## BB#3: ## %if.end16 testl %r10d, %r10d jns LBB20_4 ## BB#6: ## %if.else23 negl %r10d addq $8, %rbx addq $8, %r12 leaq 8(%r14), %r9 movl %r8d, %edi movl %r11d, %esi movl %r10d, %edx movq %rbx, %rcx movq %r12, %r8 jmp LBB20_5 LBB20_7: ## %if.else31 leaq 4(%r12), %rsi leaq 4(%rbx), %rdx movl %r8d, %edi callq _mp_unsgn_cmp movl %eax, %r15d testl %r15d, %r15d movl %r8d, -44(%rbp) ## 4-byte Spill js LBB20_9 ## BB#8: ## %if.then36 leaq 8(%r12), %rcx addq $8, %rbx leaq 8(%r14), %r9 movl %r8d, %edi movl %r11d, %esi movl %r10d, %edx movq %rbx, %r8 jmp LBB20_10 LBB20_2: ## %if.end16.thread addl %edx, %r15d addl %ecx, %eax xorl %r10d, %r10d movl %eax, %r13d LBB20_4: ## %if.then18 addq $8, %r12 addq $8, %rbx leaq 8(%r14), %r9 movl %r8d, %edi movl %r11d, %esi movl %r10d, %edx movq %r12, %rcx movq %rbx, %r8 LBB20_5: ## %if.end54 callq _mp_unexp_add addl %r13d, %eax jmp LBB20_11 LBB20_9: ## %if.else41 negl %r10d addq $8, %rbx movl %r8d, %edi leaq 8(%r12), %r8 leaq 8(%r14), %r9 movl %r11d, %esi movl %r10d, %edx movq %rbx, %rcx LBB20_10: ## %if.end47 callq _mp_unexp_sub subl %eax, %r13d movl (%r12), %ecx imull %r15d, %ecx xorl %r15d, %r15d cmpl -44(%rbp), %eax ## 4-byte Folded Reload cmovnel %ecx, %r15d movl %r13d, %eax LBB20_11: ## %if.end54 testl %r15d, %r15d cmovel %r15d, %eax movl %r15d, (%r14) movl %eax, 4(%r14) addq $8, %rsp popq %rbx popq %r12 popq %r13 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,88 +9,59 @@ Ltmp34: movq %rsp, %rbp Ltmp35: .cfi_def_cfa_register %rbp movl %edi, %r9d movl $1, (%rcx) movl $1, (%r8) movl 4(%rdx), %r11d addq $8, %rdx callq _mp_unexp_mp2d testb $1, %r11b je LBB8_2 ## BB#1: ## %if.then cvtsi2sdl %esi, %xmm1 mulsd %xmm1, %xmm0 decl %r11d LBB8_2: ## %if.end movl %r11d, %r10d shrl $31, %r10d addl %r11d, %r10d sarl %r10d sqrtsd %xmm0, %xmm3 movsd LCPI8_0(%rip), %xmm4 ## xmm4 = mem[0],zero ucomisd %xmm3, %xmm4 jbe LBB8_4 ## BB#3: ## %if.then7 xorps %xmm0, %xmm0 cvtsi2sdl %esi, %xmm0 mulsd %xmm0, %xmm3 decl %r10d LBB8_4: ## %if.end11 movl %r10d, 4(%rcx) addq $8, %rcx movl %r9d, %edi movapd %xmm3, %xmm0 movq %rcx, %rdx callq _mp_unexp_d2mp negl %r10d movapd %xmm4, %xmm0 divsd %xmm3, %xmm0 ucomisd %xmm0, %xmm4 jbe LBB8_7 ## BB#5: ## %while.body.lr.ph xorps %xmm1, %xmm1 cvtsi2sdl %esi, %xmm1 .p2align 4, 0x90 LBB8_6: ## %while.body ## =>This Inner Loop Header: Depth=1 mulsd %xmm1, %xmm0 decl %r10d ucomisd %xmm0, %xmm4 ja LBB8_6 LBB8_7: ## %while.end movl %r10d, 4(%r8) addq $8, %r8 movl %r9d, %edi movq %r8, %rdx popq %rbp jmp _mp_unexp_d2mp ## TAILCALL .cfi_endproc This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,130 +2,130 @@ _mp_sub: ## @mp_sub .cfi_startproc ## BB#0: ## %entry pushq %rbp Ltmp102: .cfi_def_cfa_offset 16 Ltmp103: .cfi_offset %rbp, -16 movq %rsp, %rbp Ltmp104: .cfi_def_cfa_register %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx pushq %rax Ltmp105: .cfi_offset %rbx, -56 Ltmp106: .cfi_offset %r12, -48 Ltmp107: .cfi_offset %r13, -40 Ltmp108: .cfi_offset %r14, -32 Ltmp109: .cfi_offset %r15, -24 movq %rcx, %r11 movq %rdx, %r12 movl %edi, %r15d movl (%r12), %ebx movl 4(%r12), %ecx movl (%r11), %edx movl 4(%r11), %eax movl %ecx, %r10d subl %eax, %r10d movl %ecx, %r14d cmovsl %eax, %r14d movl %edx, %edi imull %ebx, %edi testl %edi, %edi jle LBB19_1 ## BB#7: ## %if.else31 movl %esi, %ebx leaq 4(%r12), %rsi leaq 4(%r11), %rdx movl %r15d, %edi callq _mp_unsgn_cmp movl %eax, %r13d testl %r13d, %r13d movq %r8, -48(%rbp) ## 8-byte Spill js LBB19_9 ## BB#8: ## %if.then36 leaq 8(%r12), %rcx addq $8, %r11 leaq 8(%r8), %r9 movl %r15d, %edi movl %ebx, %esi movl %r10d, %edx movq %r11, %r8 jmp LBB19_10 LBB19_1: ## %if.then7 js LBB19_3 ## BB#2: ## %if.end16.thread subl %edx, %ebx addl %ecx, %eax xorl %r10d, %r10d movl %eax, %r14d jmp LBB19_4 LBB19_9: ## %if.else41 negl %r10d addq $8, %r11 leaq 8(%r12), %rax leaq 8(%r8), %r9 movl %r15d, %edi movl %ebx, %esi movl %r10d, %edx movq %r11, %rcx movq %rax, %r8 LBB19_10: ## %if.end47 callq _mp_unexp_sub subl %eax, %r14d movl (%r12), %ecx imull %r13d, %ecx xorl %ebx, %ebx cmpl %r15d, %eax cmovnel %ecx, %ebx movl %r14d, %eax movq -48(%rbp), %rcx ## 8-byte Reload jmp LBB19_11 LBB19_3: ## %if.end16 testl %r10d, %r10d js LBB19_6 LBB19_4: ## %if.then18 addq $8, %r12 addq $8, %r11 movq %r8, %r13 leaq 8(%r13), %r9 movl %r15d, %edi movl %r10d, %edx movq %r12, %rcx movq %r11, %r8 LBB19_5: ## %if.end54 callq _mp_unexp_add movq %r13, %rcx addl %r14d, %eax LBB19_11: ## %if.end54 testl %ebx, %ebx cmovel %ebx, %eax movl %ebx, (%rcx) movl %eax, 4(%rcx) addq $8, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp retq LBB19_6: ## %if.else23 negl %r10d addq $8, %r11 addq $8, %r12 movq %r8, %r13 leaq 8(%r13), %r9 movl %r15d, %edi movl %r10d, %edx movq %r11, %rcx movq %r12, %r8 jmp LBB19_5 .cfi_endproc -
vivekvpandya created this gist
Jul 1, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,135 @@ _mp_add: ## @mp_add .cfi_startproc ## BB#0: ## %entry pushq %rbp Ltmp115: .cfi_def_cfa_offset 16 Ltmp116: .cfi_offset %rbp, -16 movq %rsp, %rbp Ltmp117: .cfi_def_cfa_register %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $24, %rsp Ltmp118: .cfi_offset %rbx, -56 Ltmp119: .cfi_offset %r12, -48 Ltmp120: .cfi_offset %r13, -40 Ltmp121: .cfi_offset %r14, -32 Ltmp122: .cfi_offset %r15, -24 movq %rcx, %r15 movq %rdx, %r12 movl (%r12), %r14d movl 4(%r12), %ecx movl (%r15), %ebx movl 4(%r15), %eax movl %ecx, %r13d subl %eax, %r13d movl %ecx, %edx cmovsl %eax, %edx movl %edx, -44(%rbp) ## 4-byte Spill movl %ebx, %edx imull %r14d, %edx testl %edx, %edx js LBB20_7 ## BB#1: ## %if.then7 je LBB20_2 ## BB#3: ## %if.end16 testl %r13d, %r13d js LBB20_6 ## BB#4: movl -44(%rbp), %ebx ## 4-byte Reload jmp LBB20_5 LBB20_7: ## %if.else31 movl %esi, %r14d movq %r8, -56(%rbp) ## 8-byte Spill leaq 4(%r12), %rsi leaq 4(%r15), %rdx movl %edi, %ebx callq _mp_unsgn_cmp movl %eax, -60(%rbp) ## 4-byte Spill testl %eax, %eax js LBB20_9 ## BB#8: ## %if.then36 leaq 8(%r12), %rcx addq $8, %r15 movq -56(%rbp), %rax ## 8-byte Reload leaq 8(%rax), %r9 movl %ebx, %edi movl %r14d, %esi movl %r13d, %edx movq %r15, %r8 jmp LBB20_10 LBB20_2: ## %if.end16.thread addl %ebx, %r14d addl %ecx, %eax xorl %r13d, %r13d movl %eax, %ebx LBB20_5: ## %if.then18 addq $8, %r12 addq $8, %r15 leaq 8(%r8), %r9 movl %r13d, %edx movq %r12, %rcx movq %r8, %r12 movq %r15, %r8 callq _mp_unexp_add movq %r12, %rcx addl %ebx, %eax jmp LBB20_11 LBB20_6: ## %if.else23 negl %r13d addq $8, %r15 addq $8, %r12 leaq 8(%r8), %r9 movl %r13d, %edx movq %r15, %rcx movq %r8, %rbx movq %r12, %r8 callq _mp_unexp_add movq %rbx, %rcx addl -44(%rbp), %eax ## 4-byte Folded Reload jmp LBB20_11 LBB20_9: ## %if.else41 negl %r13d addq $8, %r15 leaq 8(%r12), %r8 movq -56(%rbp), %rax ## 8-byte Reload leaq 8(%rax), %r9 movl %ebx, %edi movl %r14d, %esi movl %r13d, %edx movq %r15, %rcx LBB20_10: ## %if.end47 callq _mp_unexp_sub movl -44(%rbp), %edx ## 4-byte Reload subl %eax, %edx movl (%r12), %ecx imull -60(%rbp), %ecx ## 4-byte Folded Reload xorl %r14d, %r14d cmpl %ebx, %eax cmovnel %ecx, %r14d movl %edx, %eax movq -56(%rbp), %rcx ## 8-byte Reload LBB20_11: ## %if.end54 testl %r14d, %r14d cmovel %r14d, %eax movl %r14d, (%rcx) movl %eax, 4(%rcx) addq $24, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp retq .cfi_endproc This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,96 @@ _mp_sqrt_init: ## @mp_sqrt_init .cfi_startproc ## BB#0: ## %entry pushq %rbp Ltmp33: .cfi_def_cfa_offset 16 Ltmp34: .cfi_offset %rbp, -16 movq %rsp, %rbp Ltmp35: .cfi_def_cfa_register %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx pushq %rax Ltmp36: .cfi_offset %rbx, -56 Ltmp37: .cfi_offset %r12, -48 Ltmp38: .cfi_offset %r13, -40 Ltmp39: .cfi_offset %r14, -32 Ltmp40: .cfi_offset %r15, -24 movq %r8, %r12 movq %rcx, %r13 movl %esi, %r14d movl $1, (%r13) movl $1, (%r12) movl 4(%rdx), %r15d addq $8, %rdx movl %edi, -48(%rbp) ## 4-byte Spill callq _mp_unexp_mp2d testb $1, %r15b je LBB8_2 ## BB#1: ## %if.then cvtsi2sdl %r14d, %xmm1 mulsd %xmm1, %xmm0 decl %r15d LBB8_2: ## %if.end movl %r15d, %ebx shrl $31, %ebx addl %r15d, %ebx sarl %ebx sqrtsd %xmm0, %xmm0 movsd LCPI8_0(%rip), %xmm1 ## xmm1 = mem[0],zero ucomisd %xmm0, %xmm1 jbe LBB8_4 ## BB#3: ## %if.then7 xorps %xmm1, %xmm1 cvtsi2sdl %r14d, %xmm1 mulsd %xmm1, %xmm0 decl %ebx LBB8_4: ## %if.end11 movl -48(%rbp), %r15d ## 4-byte Reload movsd %xmm0, -48(%rbp) ## 8-byte Spill movl %ebx, 4(%r13) addq $8, %r13 movl %r15d, %edi movl %r14d, %esi movq %r13, %rdx callq _mp_unexp_d2mp negl %ebx movsd LCPI8_0(%rip), %xmm2 ## xmm2 = mem[0],zero movapd %xmm2, %xmm0 divsd -48(%rbp), %xmm0 ## 8-byte Folded Reload ucomisd %xmm0, %xmm2 jbe LBB8_7 ## BB#5: ## %while.body.lr.ph xorps %xmm1, %xmm1 cvtsi2sdl %r14d, %xmm1 .p2align 4, 0x90 LBB8_6: ## %while.body ## =>This Inner Loop Header: Depth=1 mulsd %xmm1, %xmm0 decl %ebx ucomisd %xmm0, %xmm2 ja LBB8_6 LBB8_7: ## %while.end movl %ebx, 4(%r12) addq $8, %r12 movl %r15d, %edi movl %r14d, %esi movq %r12, %rdx addq $8, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp jmp _mp_unexp_d2mp ## TAILCALL .cfi_endproc This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,131 @@ _mp_sub: ## @mp_sub .cfi_startproc ## BB#0: ## %entry pushq %rbp Ltmp107: .cfi_def_cfa_offset 16 Ltmp108: .cfi_offset %rbp, -16 movq %rsp, %rbp Ltmp109: .cfi_def_cfa_register %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $24, %rsp Ltmp110: .cfi_offset %rbx, -56 Ltmp111: .cfi_offset %r12, -48 Ltmp112: .cfi_offset %r13, -40 Ltmp113: .cfi_offset %r14, -32 Ltmp114: .cfi_offset %r15, -24 movq %rcx, %r14 movq %rdx, %r12 movl (%r12), %ebx movl 4(%r12), %ecx movl (%r14), %r9d movl 4(%r14), %eax movl %ecx, %r13d subl %eax, %r13d movl %ecx, %r15d cmovsl %eax, %r15d movl %r9d, %edx imull %ebx, %edx testl %edx, %edx jle LBB19_3 ## BB#1: ## %if.else31 movl %esi, -48(%rbp) ## 4-byte Spill movq %r8, -56(%rbp) ## 8-byte Spill leaq 4(%r12), %rsi leaq 4(%r14), %rdx movl %edi, -44(%rbp) ## 4-byte Spill callq _mp_unsgn_cmp movl %eax, %ebx testl %ebx, %ebx js LBB19_5 ## BB#2: ## %if.then36 leaq 8(%r12), %rcx addq $8, %r14 movq -56(%rbp), %rax ## 8-byte Reload leaq 8(%rax), %r9 movl -44(%rbp), %edi ## 4-byte Reload movl -48(%rbp), %esi ## 4-byte Reload movl %r13d, %edx movq %r14, %r8 jmp LBB19_6 LBB19_3: ## %if.then7 js LBB19_7 ## BB#4: ## %if.end16.thread subl %r9d, %ebx addl %ecx, %eax xorl %r13d, %r13d movl %eax, %r15d jmp LBB19_8 LBB19_5: ## %if.else41 negl %r13d addq $8, %r14 leaq 8(%r12), %r8 movq -56(%rbp), %rax ## 8-byte Reload leaq 8(%rax), %r9 movl -44(%rbp), %edi ## 4-byte Reload movl -48(%rbp), %esi ## 4-byte Reload movl %r13d, %edx movq %r14, %rcx LBB19_6: ## %if.end47 callq _mp_unexp_sub subl %eax, %r15d movl (%r12), %ecx imull %ebx, %ecx xorl %ebx, %ebx cmpl -44(%rbp), %eax ## 4-byte Folded Reload cmovnel %ecx, %ebx movl %r15d, %eax movq -56(%rbp), %rcx ## 8-byte Reload jmp LBB19_10 LBB19_7: ## %if.end16 testl %r13d, %r13d js LBB19_11 LBB19_8: ## %if.then18 addq $8, %r12 addq $8, %r14 leaq 8(%r8), %r9 movl %r13d, %edx movq %r12, %rcx movq %r8, %r12 movq %r14, %r8 callq _mp_unexp_add movq %r12, %rcx LBB19_9: ## %if.end54 addl %r15d, %eax LBB19_10: ## %if.end54 testl %ebx, %ebx cmovel %ebx, %eax movl %ebx, (%rcx) movl %eax, 4(%rcx) addq $24, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp retq LBB19_11: ## %if.else23 negl %r13d addq $8, %r14 addq $8, %r12 leaq 8(%r8), %r9 movl %r13d, %edx movq %r14, %rcx movq %r8, %r14 movq %r12, %r8 callq _mp_unexp_add movq %r14, %rcx jmp LBB19_9 .cfi_endproc