Skip to content

Instantly share code, notes, and snippets.

@ymyzk
Last active June 10, 2016 04:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ymyzk/cc19d3f5f1c375d011d40174161cc518 to your computer and use it in GitHub Desktop.
Save ymyzk/cc19d3f5f1c375d011d40174161cc518 to your computer and use it in GitHub Desktop.
Example of optimization (clang & gcc)
int sum(int n) {
int acc = 0;
for (int i = 0; i <= n; i++) {
acc += i;
}
return acc;
}
; ModuleID = '2.c'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
; Function Attrs: nounwind ssp uwtable
define i32 @sum(i32 %n) #0 {
entry:
%n.addr = alloca i32, align 4
%acc = alloca i32, align 4
%i = alloca i32, align 4
store i32 %n, i32* %n.addr, align 4
store i32 0, i32* %acc, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%1 = load i32, i32* %n.addr, align 4
%cmp = icmp sle i32 %0, %1
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%2 = load i32, i32* %i, align 4
%3 = load i32, i32* %acc, align 4
%add = add nsw i32 %3, %2
store i32 %add, i32* %acc, align 4
br label %for.inc
for.inc: ; preds = %for.body
%4 = load i32, i32* %i, align 4
%inc = add nsw i32 %4, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
%5 = load i32, i32* %acc, align 4
ret i32 %5
}
attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.intel_syntax noprefix
.globl _sum
.align 4, 0x90
_sum: ## @sum
.cfi_startproc
## BB#0: ## %entry
push rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset rbp, -16
mov rbp, rsp
Ltmp2:
.cfi_def_cfa_register rbp
mov dword ptr [rbp - 4], edi
mov dword ptr [rbp - 8], 0
mov dword ptr [rbp - 12], 0
LBB0_1: ## %for.cond
## =>This Inner Loop Header: Depth=1
mov eax, dword ptr [rbp - 12]
cmp eax, dword ptr [rbp - 4]
jg LBB0_4
## BB#2: ## %for.body
## in Loop: Header=BB0_1 Depth=1
mov eax, dword ptr [rbp - 12]
add eax, dword ptr [rbp - 8]
mov dword ptr [rbp - 8], eax
## BB#3: ## %for.inc
## in Loop: Header=BB0_1 Depth=1
mov eax, dword ptr [rbp - 12]
add eax, 1
mov dword ptr [rbp - 12], eax
jmp LBB0_1
LBB0_4: ## %for.end
mov eax, dword ptr [rbp - 8]
pop rbp
ret
.cfi_endproc
.subsections_via_symbols
; ModuleID = '2.c'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
; Function Attrs: norecurse nounwind readnone ssp uwtable
define i32 @sum(i32 %n) #0 {
entry:
%cmp6 = icmp slt i32 %n, 0
br i1 %cmp6, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
%0 = zext i32 %n to i33
%1 = add i32 %n, -1
%2 = zext i32 %1 to i33
%3 = mul i33 %0, %2
%4 = lshr i33 %3, 1
%5 = trunc i33 %4 to i32
%6 = add i32 %5, %n
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body.preheader, %entry
%acc.0.lcssa = phi i32 [ 0, %entry ], [ %6, %for.body.preheader ]
ret i32 %acc.0.lcssa
}
attributes #0 = { norecurse nounwind readnone ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.intel_syntax noprefix
.globl _sum
.align 4, 0x90
_sum: ## @sum
.cfi_startproc
## BB#0: ## %entry
push rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset rbp, -16
mov rbp, rsp
Ltmp2:
.cfi_def_cfa_register rbp
xor eax, eax
test edi, edi
js LBB0_2
## BB#1: ## %for.body.preheader
mov ecx, edi
lea eax, [rdi - 1]
imul rax, rcx
shr rax
add eax, edi
LBB0_2: ## %for.cond.cleanup
pop rbp
ret
.cfi_endproc
.subsections_via_symbols
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.globl _sum
.align 4, 0x90
_sum: ## @sum
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
movl %edi, -4(%rbp)
movl $0, -8(%rbp)
movl $0, -12(%rbp)
LBB0_1: ## =>This Inner Loop Header: Depth=1
movl -12(%rbp), %eax
cmpl -4(%rbp), %eax
jg LBB0_4
## BB#2: ## in Loop: Header=BB0_1 Depth=1
movl -12(%rbp), %eax
addl -8(%rbp), %eax
movl %eax, -8(%rbp)
## BB#3: ## in Loop: Header=BB0_1 Depth=1
movl -12(%rbp), %eax
addl $1, %eax
movl %eax, -12(%rbp)
jmp LBB0_1
LBB0_4:
movl -8(%rbp), %eax
popq %rbp
retq
.cfi_endproc
.subsections_via_symbols
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.globl _sum
.align 4, 0x90
_sum: ## @sum
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
xorl %eax, %eax
testl %edi, %edi
js LBB0_2
## BB#1: ## %.lr.ph.preheader
movl %edi, %ecx
leal -1(%rdi), %eax
imulq %rcx, %rax
shrq %rax
addl %edi, %eax
LBB0_2: ## %._crit_edge
popq %rbp
retq
.cfi_endproc
.subsections_via_symbols
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment