Skip to content

Instantly share code, notes, and snippets.

View minjang's full-sized avatar

Minjang Kim minjang

  • Facebook
  • Menlo Park, CA
View GitHub Profile
@minjang
minjang / dlmalloc.c
Created December 10, 2016 19:43
Version 2.8.6 Wed Aug 29 06:57:58 201
/*
This is a version (aka dlmalloc) of malloc/free/realloc written by
Doug Lea and released to the public domain, as explained at
http://creativecommons.org/publicdomain/zero/1.0/ Send questions,
comments, complaints, performance data, etc to dl@cs.oswego.edu
* Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea
Note: There may be an updated version of this malloc obtainable at
ftp://gee.cs.oswego.edu/pub/misc/malloc.c
Check before installing!
@minjang
minjang / swap-mem2reg-inline-instcombine.ll
Created November 29, 2016 05:36
Fully optimized code
define i32 @_Z4testv() #0 {
entry:
%a = call i32 @get()
%b = call i32 @get()
%b = call i32 @process(i32 %a, i32 %b)
ret i32 %b
}
%a = call i32 @get()
%b = call i32 @get()
%xor = xor i32 %b, %a
%xor1 = xor i32 %a, %xor
; => %xor1 = %a ^ %xor
; => %xor1 = %a ^ (%b ^ %a) ; a ^ (b ^ a) = b ^ 0
; => %xor1 = %b ^ 0 ; b ^ 0 = b
; => %xor1 = %b ; 이후 %xor1 사용처를 모두 %b로 바꿈
; => %xor1 삭제
@minjang
minjang / swap-mem2reg-inline-mem2reg.ll
Created November 29, 2016 03:17
After mem2reg, inline, and mem2reg optimizations
define i32 @_Z4testv() #0 {
entry:
%call = call i32 @_Z3getv() ; a = get();
%call1 = call i32 @_Z3getv() ; b = get();
; temp_swap(a, b)는 사라짐
%xor.i = xor i32 %call1, %call ; xor_swap(a, b)가 xor_swap(b, a)로 바뀜
%xor1.i = xor i32 %call, %xor.i
%xor2.i = xor i32 %xor.i, %xor1.i
%call2 = call i32 @_Z7processii(i32 %xor2.i, i32 %xor1.i)
ret i32 %call2
@minjang
minjang / swap-mem2reg-gvn.ll
Last active November 28, 2016 23:03
Applied mem2reg and gvn pass
define linkonce_odr void @void temp_swap<int>(int&, int&)(i32* %a, i32* %b) #2 {
entry: ; %0
%0 = load i32, i32* %a, align 4 ; W %1
%1 = load i32, i32* %b, align 4 ; | W
store i32 %1, i32* %a, align 4 ; R |
store i32 %0, i32* %b, align 4 ; R
ret void
}
define linkonce_odr void @void xor_swap<int>(int&, int&)(i32* %a, i32* %b) #2 {
@minjang
minjang / swap-mem2reg-gvn-inline.ll
Last active November 29, 2016 01:00
After mem2reg, gvn, and inline optimizations
define i32 @_Z4testv() #0 {
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%call = call i32 @_Z3getv() ; int a = get();
store i32 %call, i32* %a, align 4
%call1 = call i32 @_Z3getv() ; int b = get();
store i32 %call1, i32* %b, align 4
%0 = load i32, i32* %a, align 4 ; temp_swap(a, b);
%1 = load i32, i32* %b, align 4
@minjang
minjang / gist:89ee4cd6a040dfda0d7dc23603b3c8c3
Created November 28, 2016 10:41
LLVM -O3 optimization passes
$ ./opt -O3 -debug-pass=Structure -o swap.opt.bc swap.bc
Pass Arguments: -tti -tbaa -scoped-noalias -assumption-cache-tracker -targetlibinfo -verify -simplifycfg -domtree -sroa -early-cse -basicaa -aa -memdep -memoryssa -gvn-hoist -lower-expect
Target Transform Information
Type-Based Alias Analysis
Scoped NoAlias Alias Analysis
Assumption Cache Tracker
Target Library Information
FunctionPass Manager
Module Verifier
Simplify the CFG
@minjang
minjang / swap-only-mem2reg.ll
Last active November 28, 2016 19:42
Applied only mem2reg pass
define linkonce_odr void @void temp_swap<int>(int&, int&)(i32* %a, i32* %b) #2 {
entry: ; %0
%0 = load i32, i32* %a, align 4 ; W %1
%1 = load i32, i32* %b, align 4 ; | W
store i32 %1, i32* %a, align 4 ; R |
store i32 %0, i32* %b, align 4 ; R
ret void
}
define linkonce_odr void @void xor_swap<int>(int&, int&)(i32* %a, i32* %b) #2 {
@minjang
minjang / swap-unoptimized.ll
Last active November 28, 2016 19:36
Unoptimized LLVM bitcode of the swap example
; ModuleID = 'swap.bc'
source_filename = "swap.cpp"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
; Function Attrs: ssp uwtable
define i32 @_Z4testv() #0 {
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
bool NStrideDetector::IsMergable(int64_t ea, int32_t /*size*/, const TRAINING& t) const
{
//
// Note: be careful of the sign for % operation
//
int64_t abs_stride = (t.distance > 0 ? t.distance : -t.distance);
int64_t lb = (t.low - abs_stride < t.low ? t.low - abs_stride : t.low/*0*/);
int64_t ub = (t.high + abs_stride > t.high ? t.high + abs_stride : t.high/*-1*/);
if (/*(t.size == size) &&*/ (((t.low - ea) % abs_stride) == 0) &&