Skip to content

Instantly share code, notes, and snippets.

@pczarn
Last active August 29, 2015 14:12
Show Gist options
  • Save pczarn/62a953ea9ad40d264487 to your computer and use it in GitHub Desktop.
Save pczarn/62a953ea9ad40d264487 to your computer and use it in GitHub Desktop.
An attempt at optimization. The code merges slices contiguous in memory.
#include "stddef.h"
#include "stdio.h"
void __attribute__ ((noinline))
print(char *state, size_t state_len) {
for(size_t i = 0; i < state_len; i++) {
printf("%d ", (int)state[i]);
}
printf("\n");
}
int main(int argc, char const *argv[])
{
char val_slice[] = {12, 23, 34, 45, 12, 23, 34, 45};
size_t val_slice_len = sizeof(val_slice);
__asm__ volatile("" : "+m"(val_slice), "+m"(val_slice_len));
char *val_ptr = &val_slice[0];
char *state = val_ptr;
size_t state_len = 0;
while(val_slice_len--) {
state_len += 1;
val_ptr++;
if(&state[state_len] != val_ptr) {
// read.
// continue
state = val_ptr;
state_len = 0;
}
}
print(state, state_len);
return 0;
}
; ModuleID = 'c-myhash.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [4 x i8] c"%d \00", align 1
; Function Attrs: noinline nounwind uwtable
define void @print(i8* nocapture readonly %state, i64 %state_len) #0 {
%1 = icmp eq i64 %state_len, 0
br i1 %1, label %._crit_edge, label %.lr.ph.preheader
.lr.ph.preheader: ; preds = %0
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph
%i.01 = phi i64 [ %6, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%2 = getelementptr inbounds i8* %state, i64 %i.01
%3 = load i8* %2, align 1, !tbaa !1
%4 = sext i8 %3 to i32
%5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %4) #3
%6 = add i64 %i.01, 1
%exitcond = icmp eq i64 %6, %state_len
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
._crit_edge.loopexit: ; preds = %.lr.ph
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %0
%putchar = tail call i32 @putchar(i32 10) #3
ret void
}
; Function Attrs: nounwind
declare i32 @printf(i8* nocapture readonly, ...) #1
; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** nocapture readnone %argv) #2 {
%val_slice = alloca i64, align 8
%tmpcast = bitcast i64* %val_slice to [8 x i8]*
%val_slice_len = alloca i64, align 8
store i64 3252187221979174668, i64* %val_slice, align 8
store i64 8, i64* %val_slice_len, align 8, !tbaa !4
call void asm sideeffect "", "=*r,=*m,0,*m,~{dirflag},~{fpsr},~{flags}"([8 x i8]* %tmpcast, i64* %val_slice_len, i64 3252187221979174668, i64* %val_slice_len) #3, !srcloc !6
%1 = bitcast i64* %val_slice to i8*
%2 = load i64* %val_slice_len, align 8, !tbaa !4
%3 = add i64 %2, -1
store i64 %3, i64* %val_slice_len, align 8, !tbaa !4
%4 = icmp eq i64 %2, 0
br i1 %4, label %24, label %.lr.ph.preheader
.lr.ph.preheader: ; preds = %0
%xtraiter = and i64 %2, 1
%lcmp.mod = icmp ne i64 %xtraiter, 0
%lcmp.overflow = icmp eq i64 %2, 0
%lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
br i1 %lcmp.or, label %.lr.ph.unr, label %.lr.ph.preheader.split
.lr.ph.unr: ; preds = %.lr.ph.preheader
%5 = add i64 0, 1
%6 = getelementptr inbounds i8* %1, i64 1
%7 = getelementptr inbounds i8* %1, i64 %5
%8 = icmp eq i8* %7, %6
%state.0..unr = select i1 %8, i8* %1, i8* %6
%..unr = select i1 %8, i64 %5, i64 0
%9 = add i64 %3, -1
%10 = icmp eq i64 %3, 0
br label %.lr.ph.preheader.split
.lr.ph.preheader.split: ; preds = %.lr.ph.unr, %.lr.ph.preheader
%..lcssa.unr = phi i64 [ 0, %.lr.ph.preheader ], [ %..unr, %.lr.ph.unr ]
%state.0..lcssa.unr = phi i8* [ null, %.lr.ph.preheader ], [ %state.0..unr, %.lr.ph.unr ]
%.unr = phi i64 [ %3, %.lr.ph.preheader ], [ %9, %.lr.ph.unr ]
%state_len.03.unr = phi i64 [ 0, %.lr.ph.preheader ], [ %..unr, %.lr.ph.unr ]
%state.02.unr = phi i8* [ %1, %.lr.ph.preheader ], [ %state.0..unr, %.lr.ph.unr ]
%val_ptr.01.unr = phi i8* [ %1, %.lr.ph.preheader ], [ %6, %.lr.ph.unr ]
%11 = icmp ult i64 %2, 2
br i1 %11, label %._crit_edge, label %.lr.ph.preheader.split.split
.lr.ph.preheader.split.split: ; preds = %.lr.ph.preheader.split
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader.split.split
%12 = phi i64 [ %.unr, %.lr.ph.preheader.split.split ], [ %22, %.lr.ph ]
%state_len.03 = phi i64 [ %state_len.03.unr, %.lr.ph.preheader.split.split ], [ %..1, %.lr.ph ]
%state.02 = phi i8* [ %state.02.unr, %.lr.ph.preheader.split.split ], [ %state.0..1, %.lr.ph ]
%val_ptr.01 = phi i8* [ %val_ptr.01.unr, %.lr.ph.preheader.split.split ], [ %19, %.lr.ph ]
%13 = add i64 %state_len.03, 1
%14 = getelementptr inbounds i8* %val_ptr.01, i64 1
%15 = getelementptr inbounds i8* %state.02, i64 %13
%16 = icmp eq i8* %15, %14
%state.0. = select i1 %16, i8* %state.02, i8* %14
%. = select i1 %16, i64 %13, i64 0
%17 = add i64 %12, -1
%18 = add i64 %., 1
%19 = getelementptr inbounds i8* %14, i64 1
%20 = getelementptr inbounds i8* %state.0., i64 %18
%21 = icmp eq i8* %20, %19
%state.0..1 = select i1 %21, i8* %state.0., i8* %19
%..1 = select i1 %21, i64 %18, i64 0
%22 = add i64 %17, -1
%23 = icmp eq i64 %17, 0
br i1 %23, label %._crit_edge.unr-lcssa, label %.lr.ph
._crit_edge.unr-lcssa: ; preds = %.lr.ph
%..lcssa.ph = phi i64 [ %..1, %.lr.ph ]
%state.0..lcssa.ph = phi i8* [ %state.0..1, %.lr.ph ]
br label %._crit_edge
._crit_edge: ; preds = %.lr.ph.preheader.split, %._crit_edge.unr-lcssa
%..lcssa = phi i64 [ %..lcssa.unr, %.lr.ph.preheader.split ], [ %..lcssa.ph, %._crit_edge.unr-lcssa ]
%state.0..lcssa = phi i8* [ %state.0..lcssa.unr, %.lr.ph.preheader.split ], [ %state.0..lcssa.ph, %._crit_edge.unr-lcssa ]
store i64 -1, i64* %val_slice_len, align 8, !tbaa !4
br label %24
; <label>:24 ; preds = %._crit_edge, %0
%state_len.0.lcssa = phi i64 [ %..lcssa, %._crit_edge ], [ 0, %0 ]
%state.0.lcssa = phi i8* [ %state.0..lcssa, %._crit_edge ], [ %1, %0 ]
call void @print(i8* %state.0.lcssa, i64 %state_len.0.lcssa)
ret i32 0
}
; Function Attrs: nounwind
declare i32 @putchar(i32) #3
attributes #0 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.5.0 (tags/RELEASE_350/final)"}
!1 = metadata !{metadata !2, metadata !2, i64 0}
!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
!3 = metadata !{metadata !"Simple C/C++ TBAA"}
!4 = metadata !{metadata !5, metadata !5, i64 0}
!5 = metadata !{metadata !"long", metadata !2, i64 0}
!6 = metadata !{i32 509}
.text
.file "c-myhash.c"
.globl print
.align 16, 0x90
.type print,@function
print: # @print
.cfi_startproc
# BB#0:
pushq %rbp
.Ltmp0:
.cfi_def_cfa_offset 16
.Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp2:
.cfi_def_cfa_register %rbp
subq $32, %rsp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq $0, -24(%rbp)
.LBB0_1: # =>This Inner Loop Header: Depth=1
movq -24(%rbp), %rax
cmpq -16(%rbp), %rax
jae .LBB0_4
# BB#2: # in Loop: Header=BB0_1 Depth=1
movabsq $.L.str, %rdi
movq -24(%rbp), %rax
movq -8(%rbp), %rcx
movsbl (%rcx,%rax), %esi
movb $0, %al
callq printf
movl %eax, -28(%rbp) # 4-byte Spill
# BB#3: # in Loop: Header=BB0_1 Depth=1
movq -24(%rbp), %rax
addq $1, %rax
movq %rax, -24(%rbp)
jmp .LBB0_1
.LBB0_4:
movabsq $.L.str1, %rdi
movb $0, %al
callq printf
movl %eax, -32(%rbp) # 4-byte Spill
addq $32, %rsp
popq %rbp
retq
.Ltmp3:
.size print, .Ltmp3-print
.cfi_endproc
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0:
pushq %rbp
.Ltmp4:
.cfi_def_cfa_offset 16
.Ltmp5:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp6:
.cfi_def_cfa_register %rbp
subq $64, %rsp
movl $0, -4(%rbp)
movl %edi, -8(%rbp)
movq %rsi, -16(%rbp)
movq .Lmain.val_slice, %rsi
movq %rsi, -24(%rbp)
movq $8, -32(%rbp)
#APP
#NO_APP
leaq -24(%rbp), %rsi
movq %rsi, -40(%rbp)
movq -40(%rbp), %rsi
movq %rsi, -48(%rbp)
movq $0, -56(%rbp)
.LBB1_1: # =>This Inner Loop Header: Depth=1
movq -32(%rbp), %rax
movq %rax, %rcx
addq $-1, %rcx
movq %rcx, -32(%rbp)
cmpq $0, %rax
je .LBB1_5
# BB#2: # in Loop: Header=BB1_1 Depth=1
movq -56(%rbp), %rax
addq $1, %rax
movq %rax, -56(%rbp)
movq -40(%rbp), %rax
addq $1, %rax
movq %rax, -40(%rbp)
movq -56(%rbp), %rax
movq -48(%rbp), %rcx
addq %rax, %rcx
cmpq -40(%rbp), %rcx
je .LBB1_4
# BB#3: # in Loop: Header=BB1_1 Depth=1
movq -40(%rbp), %rax
movq %rax, -48(%rbp)
movq $0, -56(%rbp)
.LBB1_4: # in Loop: Header=BB1_1 Depth=1
jmp .LBB1_1
.LBB1_5:
movq -48(%rbp), %rdi
movq -56(%rbp), %rsi
callq print
movl $0, %eax
addq $64, %rsp
popq %rbp
retq
.Ltmp7:
.size main, .Ltmp7-main
.cfi_endproc
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%d "
.size .L.str, 4
.type .L.str1,@object # @.str1
.L.str1:
.asciz "\n"
.size .L.str1, 2
.type .Lmain.val_slice,@object # @main.val_slice
.section .rodata.cst8,"aM",@progbits,8
.Lmain.val_slice:
.ascii "\f\027\"-\f\027\"-"
.size .Lmain.val_slice, 8
.ident "clang version 3.5.0 (tags/RELEASE_350/final)"
.section ".note.GNU-stack","",@progbits
extern crate test;
use test::black_box;
struct MySlice {
start: *const u8,
len: uint,
end: *const u8,
}
fn main() {
let mut val_slice: &[u8] = &[12, 23, 34, 45];
let mut val = &mut val_slice;
unsafe { asm!("" : "+r"(val) ::: "volatile"); }
let mut state: Option<MySlice> = None;
let mut val_ptr = val.as_ptr();
for _ in range(0, val.len()) { unsafe {
match state {
Some(ref mut current) => {
if val_ptr == current.end {
current.len += 1;
current.end = current.end.offset(1);
} else {
black_box(&*current);
*current = MySlice {
start: val_ptr,
len: 1,
end: val_ptr.offset(1)
};
}
}
None => {
state = Some(MySlice {
start: val_ptr,
len: 1,
end: val_ptr.offset(1)
});
}
}
val_ptr = val_ptr.offset(1);
} }
black_box(&state);
}
extern crate test;
use test::black_box;
fn main() {
let mut val: &[u8] = &[12, 23, 34, 45];
let mut valr = &val;
unsafe { asm!("" : "+r"(valr) ::: "volatile"); }
let mut state: Option<&[u8]> = None;
for v in val.iter() { unsafe {
let next_sli = slice::from_raw_buf(mem::transmute(&v), mem::size_of::<u8>());
match state {
Some(ref mut current_sli) => {
let current_end = current_sli.as_ptr()
.offset(current_sli.len() as int);
if next_sli.as_ptr() == current_end {
*current_sli = slice::from_raw_buf(mem::transmute(&current_sli.as_ptr()),
current_sli.len() + next_sli.len());
} else {
black_box(&*current_sli);
*current_sli = next_sli;
}
}
None => {
state = Some(next_sli);
}
}
} }
black_box(&state);
}
; Function Attrs: nounwind uwtable
define internal void @_ZN4main20hb8fef18bfa5eff4bfubE() unnamed_addr #3 {
entry-block:
%dummy.i10 = alloca %"enum.core::option::Option<[&'static [u8]]>[#3]"*, align 8
%dummy.i = alloca { i8*, i64 }*, align 8
%val = alloca { i8*, i64 }, align 8
%0 = alloca [4 x i8], align 1
%state = alloca %"enum.core::option::Option<[&'static [u8]]>[#3]", align 8
%.sub = getelementptr inbounds [4 x i8]* %0, i64 0, i64 0
%1 = bitcast { i8*, i64 }* %val to i8*
call void @llvm.lifetime.start(i64 16, i8* %1)
call void @llvm.lifetime.start(i64 1, i8* %.sub)
store i8 12, i8* %.sub, align 1
%2 = getelementptr inbounds [4 x i8]* %0, i64 0, i64 1
store i8 23, i8* %2, align 1
%3 = getelementptr inbounds [4 x i8]* %0, i64 0, i64 2
store i8 34, i8* %3, align 1
%4 = getelementptr inbounds [4 x i8]* %0, i64 0, i64 3
store i8 45, i8* %4, align 1
%5 = getelementptr inbounds { i8*, i64 }* %val, i64 0, i32 0
store i8* %.sub, i8** %5, align 8
%6 = getelementptr inbounds { i8*, i64 }* %val, i64 0, i32 1
store i64 4, i64* %6, align 8
%7 = call { i8*, i64 }* asm sideeffect "", "=r,0,~{dirflag},~{fpsr},~{flags}"({ i8*, i64 }* %val) #5, !srcloc !0
%8 = bitcast %"enum.core::option::Option<[&'static [u8]]>[#3]"* %state to i8*
call void @llvm.lifetime.start(i64 16, i8* %8)
%9 = getelementptr inbounds %"enum.core::option::Option<[&'static [u8]]>[#3]"* %state, i64 0, i32 0, i32 0
store i8* null, i8** %9, align 8
%10 = load i8** %5, align 8
%11 = load i64* %6, align 8
%12 = getelementptr inbounds i8* %10, i64 %11
%13 = icmp eq i64 %11, 0
br i1 %13, label %for_exit, label %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit.lr.ph"
"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit.lr.ph": ; preds = %entry-block
%14 = getelementptr inbounds %"enum.core::option::Option<[&'static [u8]]>[#3]"* %state, i64 0, i32 0, i32 1
%15 = getelementptr inbounds %"enum.core::option::Option<[&'static [u8]]>[#3]"* %state, i64 0, i32 0
%16 = bitcast { i8*, i64 }** %dummy.i to i8*
br label %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit"
for_exit.loopexit: ; preds = %for_loopback.backedge, %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit"
br label %for_exit
for_exit: ; preds = %for_exit.loopexit, %entry-block
%17 = bitcast %"enum.core::option::Option<[&'static [u8]]>[#3]"** %dummy.i10 to i8*
call void @llvm.lifetime.start(i64 8, i8* %17) #5
store %"enum.core::option::Option<[&'static [u8]]>[#3]"* %state, %"enum.core::option::Option<[&'static [u8]]>[#3]"** %dummy.i10, align 8
call void asm "", "r,~{dirflag},~{fpsr},~{flags}"(%"enum.core::option::Option<[&'static [u8]]>[#3]"** %dummy.i10) #5, !srcloc !1
call void @llvm.lifetime.end(i64 8, i8* %17) #5
call void @llvm.lifetime.end(i64 16, i8* %8)
call void @llvm.lifetime.end(i64 16, i8* %1)
call void @llvm.lifetime.end(i64 4, i8* %.sub)
ret void
"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit": ; preds = %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit.lr.ph", %for_loopback.backedge
%18 = phi i64 [ undef, %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit.lr.ph" ], [ %27, %for_loopback.backedge ]
%19 = phi i8* [ null, %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit.lr.ph" ], [ %28, %for_loopback.backedge ]
%20 = phi i8* [ %10, %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit.lr.ph" ], [ %21, %for_loopback.backedge ]
%21 = getelementptr inbounds i8* %20, i64 1
%22 = icmp eq i8* %20, null
br i1 %22, label %for_exit.loopexit, label %for_body
for_body: ; preds = %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit"
%23 = icmp eq i8* %19, null
br i1 %23, label %match_else, label %match_case
match_else: ; preds = %for_body
store i8* %20, i8** %9, align 8
store i64 1, i64* %14, align 8
br label %for_loopback.backedge
match_case: ; preds = %for_body
%24 = getelementptr inbounds i8* %19, i64 %18
%25 = icmp eq i8* %20, %24
br i1 %25, label %then-block-5171-, label %else-block
then-block-5171-: ; preds = %match_case
%26 = add i64 %18, 1
store i8* %19, i8** %9, align 8
store i64 %26, i64* %14, align 8
br label %for_loopback.backedge
for_loopback.backedge: ; preds = %then-block-5171-, %else-block, %match_else
%27 = phi i64 [ %26, %then-block-5171- ], [ 1, %else-block ], [ 1, %match_else ]
%28 = phi i8* [ %19, %then-block-5171- ], [ %20, %else-block ], [ %20, %match_else ]
%29 = icmp eq i8* %21, %12
br i1 %29, label %for_exit.loopexit, label %"_ZN5slice64Iter$LT$$u{27}a$C$$u{20}T$GT$.Iterator$LT$$BP$$u{27}a$u{20}T$GT$4next20h5717300731006890093E.exit"
else-block: ; preds = %match_case
call void @llvm.lifetime.start(i64 8, i8* %16) #5
store { i8*, i64 }* %15, { i8*, i64 }** %dummy.i, align 8
call void asm "", "r,~{dirflag},~{fpsr},~{flags}"({ i8*, i64 }** %dummy.i) #5, !srcloc !1
call void @llvm.lifetime.end(i64 8, i8* %16) #5
store i8* %20, i8** %9, align 8
store i64 1, i64* %14, align 8
br label %for_loopback.backedge
}
define i64 @main(i64, i8**) unnamed_addr #4 {
top:
%2 = tail call i64 @_ZN2rt10lang_start20h64d42861c3bac6715ozE(i8* bitcast (void ()* @_ZN4main20hb8fef18bfa5eff4bfubE to i8*), i64 %0, i8** %1)
ret i64 %2
}
declare i64 @_ZN2rt10lang_start20h64d42861c3bac6715ozE(i8*, i64, i8**) unnamed_addr #4
attributes #0 = { nounwind "split-stack" }
attributes #1 = { inlinehint nounwind uwtable "split-stack" }
attributes #2 = { alwaysinline nounwind readonly uwtable "split-stack" }
attributes #3 = { nounwind uwtable "split-stack" }
attributes #4 = { "split-stack" }
attributes #5 = { nounwind }
!0 = metadata !{i32 157}
!1 = metadata !{i32 432}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment