Skip to content

Instantly share code, notes, and snippets.

View ArchRobison's full-sized avatar

Arch D. Robison ArchRobison

  • Nvidia
  • Champaign, IL
View GitHub Profile
@ArchRobison
ArchRobison / gist:9145036
Created February 21, 2014 22:36
Sample sessions with experimental Julia with SLPVectorizer enabled.
julia> function foo( a::NTuple{4,Float32}, b::NTuple{4,Float32} )
(a[1]+b[1],a[2]+b[2],a[3]+b[3],a[4]+b[4])
end
foo (generic function with 1 method)
julia> t = NTuple{4,Float32}
(Float32,Float32,Float32,Float32)
julia> code_llvm(foo,(t,t))
@ArchRobison
ArchRobison / gist:9231536
Created February 26, 2014 15:25
Simple benchmark for https://github.com/JuliaLang/julia/pull/5355 . Try ```flog(1000, 1000000, 125000)```. With PR5355, it should also vectorize without ```@simd```.
function saxpy( a, x, y )
@simd for i=1:length(x)
@inbounds y[i] = y[i]+a*x[i]
end
end
function flog( n, reps, tolerance )
x = rand(Float32,n)
y = rand(Float32,n)
z = copy(y)
@ArchRobison
ArchRobison / gist:9231763
Created February 26, 2014 15:35
This benchmark requires https://github.com/JuliaLang/julia/pull/5355 and building Julia with LLVM 3.4. Try flog(1000,1000,500).
function sweep( irange, jrange, U, Vx, Vy, A, B )
for j in jrange
@simd for i in irange
@inbounds begin
u = U[i,j]
Vx[i,j] += (A[i,j+1]+A[i,j])*(U[i,j+1]-u)
Vy[i,j] += (A[i+1,j]+A[i,j])*(U[i+1,j]-u)
U [i,j] = u + B[i,j]*((Vx[i,j]-Vx[i,j-1]) + (Vy[i,j]-Vy[i-1,j]))
end
end
@ArchRobison
ArchRobison / gist:9232051
Created February 26, 2014 15:47
This benchmark requires https://github.com/JuliaLang/julia/pull/5355. Try flog(1000,1000000,30)
function inner( x, y )
s = zero(eltype(x))
@simd for i=1:length(x)
@inbounds s += x[i]*y[i]
end
s
end
function flog( n, reps, tolerance )
x = rand(Float32,n)
@ArchRobison
ArchRobison / gist:9793916
Last active August 29, 2015 13:57
Julia tuple math example without SLPVectorizer
$ cat foo.jl
function add( a::NTuple{4,Float32}, b::NTuple{4,Float32} )
(a[1]+b[1],a[2]+b[2],a[3]+b[3],a[4]+b[4])
end
function mul( a::NTuple{4,Float32}, b::NTuple{4,Float32} )
(a[1]*b[1],a[2]*b[2],a[3]*b[3],a[4]*b[4])
end
function madd( a::NTuple{4,Float32}, b::NTuple{4,Float32}, c::NTuple{4,Float32} )
@ArchRobison
ArchRobison / gist:9808976
Created March 27, 2014 14:38
Example using Julia and LLVM trunk. SLPVectorizer appears to have generated superfluious extracts.
$ cat vadd.jl
function vadd( a::NTuple{4,Float64}, b::NTuple{4,Float64} )
(a[1]+b[1],a[2]+b[2],a[3]+b[3],a[4]+b[4])
end
function vadd_one!(arr::Array{Float64, 1})
len = length(arr) # assuming len multiple of 4
one = (1.0, 1.0, 1.0, 1.0)
@inbounds for i = 1:4:len
inp = (arr[i], arr[i+1], arr[i+2], arr[i+3])
@ArchRobison
ArchRobison / gist:9809496
Created March 27, 2014 15:01
Example of @simd and 32-bit floating point.
$ cat v.jl
function vadd_one!(arr::Array{Float32, 1})
@simd for i = 1:length(arr)
@inbounds arr[i] += 1
end
end
code_llvm(vadd_one!,(Array{Float32, 1},))
$ julia v.jl
@ArchRobison
ArchRobison / gist:9944313
Created April 2, 2014 22:13
Excerpt of Julia LLVM code from LLVM trunk (pre-3.5 LLVM)
L.preheader: ; preds = %top
%.op = add i64 %0, 1, !dbg !9
%8 = select i1 %2, i64 %.op, i64 1, !dbg !9
%9 = sub i64 %8, %5, !dbg !9
%xtraiter = and i64 %9, 7
switch i64 %xtraiter, label %L.unr [
i64 0, label %L.preheader.split
i64 1, label %L.unr22
i64 2, label %L.unr18
i64 3, label %L.unr14
@ArchRobison
ArchRobison / gist:a7c4cf396c5332a4ddd2
Created May 1, 2014 14:59
Patch to Julia sources to quiet Intel(R) VTune(TM) Amplifier warning about "sigal stack".
--- a/src/init.c
+++ b/src/init.c
@@ -64,6 +64,9 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
#include <sched.h> // for setting CPU affinity
#endif
+#undef SIGSTKSZ
+#define SIGSTKSZ (64*1024)
+
char *julia_home = NULL;
::::::::::::::
test1.jl
::::::::::::::
# Example from early discussion.
# Has constant lower bound, expression as upper bound, and no reductions.
function test1( a, x, y )
@simd for i=1:length(x)
@inbounds y[i] = y[i]+a*x[i]
end
end