carstenbauer/bench.jl

## bench.jl
print("Loading modules...")
using BenchmarkTools
using CUDA
using DataFrames
using CSV
using Printf
println("done!")

const a = 3.1415f0

function saxpy_julia!(z,a,x,y)
    z .= a .* x .+ y
    return z
end

df = DataFrame(n=Int[], var"GFLOPS/s"=Float64[], var"GB/s"=Float64[], time=Float64[])

println("Running measurements...")
for i in 8:128
    n = 1024*1024*i
    x, y, z = CUDA.ones(n), CUDA.ones(n), CUDA.zeros(n)
    t_saxpy = @belapsed CUDA.@sync saxpy_julia!($z,$a,$x,$y)

    gflops = 2.0 * n * (1000)^(-3) / t_saxpy
    bandwidth = 3.0 * sizeof(Float32) * n * (1000)^(-3) / t_saxpy
    @printf("saxpy (julia): n= %12d %7.3f GFLOP/s %7.3f GB/s %7.3f s\n",
                 n, gflops, bandwidth, t_saxpy);
    push!(df, (n, gflops, bandwidth, t_saxpy))


    flush(stdout) # force immediate printing to .out logfile
    # free memory (to be safe)
    x, y, z = nothing, nothing, nothing
    GC.gc(true)
end
println("done!")
print("Writing results to disk...")
CSV.write("bench_results.csv", df)
println("done!")
	print("Loading modules...")
	using BenchmarkTools
	using CUDA
	using DataFrames
	using CSV
	using Printf
	println("done!")

	const a = 3.1415f0

	function saxpy_julia!(z,a,x,y)
	z .= a .* x .+ y
	return z
	end

	df = DataFrame(n=Int[], var"GFLOPS/s"=Float64[], var"GB/s"=Float64[], time=Float64[])

	println("Running measurements...")
	for i in 8:128
	n = 10241024i
	x, y, z = CUDA.ones(n), CUDA.ones(n), CUDA.zeros(n)
	t_saxpy = @belapsed CUDA.@sync saxpy_julia!($z,$a,$x,$y)

	gflops = 2.0 * n * (1000)^(-3) / t_saxpy
	bandwidth = 3.0 * sizeof(Float32) * n * (1000)^(-3) / t_saxpy
	@printf("saxpy (julia): n= %12d %7.3f GFLOP/s %7.3f GB/s %7.3f s\n",
	n, gflops, bandwidth, t_saxpy);
	push!(df, (n, gflops, bandwidth, t_saxpy))


	flush(stdout) # force immediate printing to .out logfile
	# free memory (to be safe)
	x, y, z = nothing, nothing, nothing
	GC.gc(true)
	end
	println("done!")
	print("Writing results to disk...")
	CSV.write("bench_results.csv", df)
	println("done!")