Skip to content

Instantly share code, notes, and snippets.

@quinnj
Last active December 15, 2015 23:29
Show Gist options
  • Save quinnj/5340631 to your computer and use it in GitHub Desktop.
Save quinnj/5340631 to your computer and use it in GitHub Desktop.
* Implemented the spectral-norm benchmark from: http://benchmarksgame.alioth.debian.org/u32/performance.php?test=spectralnorm * From my timings, this implementation consistently ran in 10-12s, putting us ahead of all other dynamic languages (and represents a significant speed up compared to the previous version at https://github.com/JuliaLang/ju…
#
# The Computer Language Benchmarks Game
# spectral-norm benchmark
# http://shootout.alioth.debian.org/u32/performance.php?test=spectralnorm
#
# Based on the Javascript program
#
include("timing.jl")
A(i,j) = 1.0 / ((i+j)*(i+j+1.0)/2.0+i+1.0)
function Au(u,w)
n = length(u)
for i = 1:n, j = 1:n
j == 1 && (w[i] = 0.0)
w[i] += A(i-1,j-1) * u[j]
end
end
function Atu(w,v)
n = length(w)
for i = 1:n, j = 1:n
j == 1 && (v[i] = 0.0)
v[i] += A(j-1,i-1) * w[j]
end
end
function AtAu(u,v,w)
Au(u,w)
Atu(w,v)
end
function approximate(n)
u = ones(Float64,n)
v = zeros(Float64,n)
w = zeros(Float64,n)
vv = vBv = 0.0
for i = 1:10
AtAu(u,v,w)
AtAu(v,u,w)
end
for i = 1:n
vBv += u[i]*v[i]
vv += v[i]*v[i]
end
return sqrt(vBv/vv)
end
function spectralnorm(N)
@printf("%.09f\n", approximate(N))
end
if length(ARGS) >= 1
N = int(ARGS[1])
else
N = 100
end
spectralnorm(N)
# @assert round(spectralnorm(100),9) == 1.274219991
# @timeit spectralnorm(500) "spectralnorm(n=500)"
# @timeit spectralnorm(3000) "spectralnorm(n=3000)"
# @timeit spectralnorm(5500) "spectralnorm(n=5500)"
# macro timeit(ex,name)
# @gensym t i
# quote
# $t = Inf
# for $i=1:5
# $t = min($t, @elapsed $ex)
# end
# @printf("julia, %s: best of 5 took %.1f ms\n", $name, $t*1000)
# end
# end
@johnmyleswhite
Copy link

This looks pretty slick. The only thing I could imagine (maybe) giving a speedup would be inlining A(i,j) = 1.0 / ((i+j)*(i+j+1.0)/2.0+i+1.0).

@StefanKarpinski
Copy link

It's not impossible that it is already getting inlined automatically.

@StefanKarpinski
Copy link

Here's the disassemble output, which is not easy to read:

; julia> disassemble(Au,(Vector{Float64},Vector{Float64}))

define %jl_value_t* @julia_Au(%jl_value_t*, %jl_value_t**, i32) {
top:
  %3 = load %jl_value_t** %1, !dbg !5508
  %4 = getelementptr inbounds %jl_value_t* %3, i32 0, i32 0, !dbg !5515
  %5 = getelementptr %jl_value_t** %4, i64 2, !dbg !5515
  %6 = load %jl_value_t** %5, !dbg !5515
  %7 = ptrtoint %jl_value_t* %6 to i64, !dbg !5515
  %8 = icmp slt i64 %7, 1, !dbg !5516
  br i1 %8, label %L15, label %L1.preheader.lr.ph, !dbg !5516

L1.preheader.lr.ph:                               ; preds = %top
  %9 = getelementptr %jl_value_t** %1, i64 1, !dbg !5508
  %10 = load %jl_value_t** %9, !dbg !5508
  %11 = getelementptr inbounds %jl_value_t* %10, i32 0, i32 0, !dbg !5517
  %12 = getelementptr %jl_value_t** %11, i64 2, !dbg !5517
  %13 = getelementptr %jl_value_t** %11, i64 1, !dbg !5517
  %14 = getelementptr %jl_value_t** %4, i64 1, !dbg !5518
  br label %if2.lr.ph, !dbg !5516

if2.lr.ph:                                        ; preds = %L13, %L1.preheader.lr.ph
  %"#s1.017" = phi i64 [ 1, %L1.preheader.lr.ph ], [ %54, %L13 ]
  %15 = add i64 %"#s1.017", -1, !dbg !5517
  %16 = sitofp i64 %15 to double, !dbg !5518
  br label %if2, !dbg !5516

if2:                                              ; preds = %if2.lr.ph, %idxend10
  %"#s3.016" = phi i64 [ 1, %if2.lr.ph ], [ %52, %idxend10 ]
  %17 = icmp eq i64 %"#s3.016", 1, !dbg !5517
  br i1 %17, label %if3, label %L4, !dbg !5517

if3:                                              ; preds = %if2
  %18 = load %jl_value_t** %12, !dbg !5517
  %19 = ptrtoint %jl_value_t* %18 to i64, !dbg !5517
  %20 = icmp ult i64 %15, %19, !dbg !5517
  br i1 %20, label %idxend, label %oob, !dbg !5517

oob:                                              ; preds = %if3
  %21 = load %jl_value_t** @jl_bounds_exception, !dbg !5517
  call void @jl_throw_with_superfluous_argument(%jl_value_t* %21, i32 4), !dbg !5517
  unreachable, !dbg !5517

idxend:                                           ; preds = %if3
  %22 = load %jl_value_t** %13, !dbg !5517
  %23 = bitcast %jl_value_t* %22 to double*, !dbg !5517
  %24 = getelementptr double* %23, i64 %15, !dbg !5517
  store double 0.000000e+00, double* %24, !dbg !5517
  br label %L4, !dbg !5517

L4:                                               ; preds = %if2, %idxend
  %25 = add i64 %"#s3.016", -1, !dbg !5518
  %26 = load %jl_value_t** %12, !dbg !5518
  %27 = ptrtoint %jl_value_t* %26 to i64, !dbg !5518
  %28 = icmp ult i64 %15, %27, !dbg !5518
  br i1 %28, label %idxend6, label %oob5, !dbg !5518

oob5:                                             ; preds = %L4
  %29 = load %jl_value_t** @jl_bounds_exception, !dbg !5518
  call void @jl_throw_with_superfluous_argument(%jl_value_t* %29, i32 5), !dbg !5518
  unreachable, !dbg !5518

idxend6:                                          ; preds = %L4
  %30 = load %jl_value_t** %5, !dbg !5518
  %31 = ptrtoint %jl_value_t* %30 to i64, !dbg !5518
  %32 = icmp ult i64 %25, %31, !dbg !5518
  br i1 %32, label %idxend10, label %oob7, !dbg !5518

oob7:                                             ; preds = %idxend6
  %33 = load %jl_value_t** @jl_bounds_exception, !dbg !5518
  call void @jl_throw_with_superfluous_argument(%jl_value_t* %33, i32 5), !dbg !5518
  unreachable, !dbg !5518

idxend10:                                         ; preds = %idxend6
  %34 = add i64 %25, %15, !dbg !5518
  %35 = sitofp i64 %34 to double, !dbg !5518
  %36 = fadd double %35, 1.000000e+00, !dbg !5518
  %37 = fmul double %35, %36, !dbg !5518
  %38 = load %jl_value_t** %14, !dbg !5518
  %39 = fmul double %37, 5.000000e-01, !dbg !5518
  %40 = bitcast %jl_value_t* %38 to double*, !dbg !5518
  %41 = fadd double %16, %39, !dbg !5518
  %42 = load %jl_value_t** %13, !dbg !5518
  %43 = getelementptr double* %40, i64 %25, !dbg !5518
  %44 = fadd double %41, 1.000000e+00, !dbg !5518
  %45 = bitcast %jl_value_t* %42 to double*, !dbg !5518
  %46 = load double* %43, !dbg !5518
  %47 = fdiv double 1.000000e+00, %44, !dbg !5518
  %48 = getelementptr double* %45, i64 %15, !dbg !5518
  %49 = fmul double %46, %47, !dbg !5518
  %50 = load double* %48, !dbg !5518
  %51 = fadd double %50, %49, !dbg !5518
  store double %51, double* %48, !dbg !5518
  %52 = add i64 %"#s3.016", 1, !dbg !5518
  %53 = icmp sgt i64 %52, %7, !dbg !5516
  br i1 %53, label %L13, label %if2, !dbg !5516

L13:                                              ; preds = %idxend10
  %54 = add i64 %"#s1.017", 1, !dbg !5518
  %55 = icmp sgt i64 %54, %7, !dbg !5516
  br i1 %55, label %L15, label %if2.lr.ph, !dbg !5516

L15:                                              ; preds = %L13, %top
  ret %jl_value_t* inttoptr (i64 4312184832 to %jl_value_t*), !dbg !5518
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment