Skip to content

Instantly share code, notes, and snippets.

@exyi
Created August 13, 2023 11:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save exyi/0303e9472e116e325688b8d96f2847cb to your computer and use it in GitHub Desktop.
Save exyi/0303e9472e116e325688b8d96f2847cb to your computer and use it in GitHub Desktop.
Memory latency and bogotroughput benchmark
using BenchmarkTools
import Optim
function generate_permutation(size::Int)
arr = collect(UInt32(1):UInt32(size))
for i in size:-1:2
j = rand(1:(i-1))
arr[i], arr[j] = arr[j], arr[i]
end
arr
end
function walk_permutation_ib(perm::Vector{UInt32})
position = UInt32(1)
len = 1
@inbounds while perm[position] != 1
position = perm[position]
len += 1
if len > length(perm) + 10
return false
end
end
return length(perm) == len
end
function sum_array(perm::Vector{UInt32})
return sum(perm)
end
function expected_runtime(size::Int32)
if size * 4 <= 32 * 1024
# L1
return 1 * size
elseif size * 4 <= 256 * 1024
# L2
return 4 * size
elseif size * 4 <= 6 * 1024 * 1024
# L3
return 30 * size
else
# RAM
return 100 * size
end
end
struct Result
size
word_size
latency_min
latency_med
latency_std
latency_iters
throughput_min
throughput_med
throughput_std
throughput_iters
end
function print_header(file, additional="")
println(file, "size_num\tsize\tlatency_median\tthroughput_GBps\tlatency_min\tlatency_std\tlatency_test_iterations\tthroughput_median\tthroughput_min\tthroughput_std\tthroughput_test_iterations"*additional)
end
function print_result(file, r:: Result, additional="")
bytes_size = r.size * r.word_size
gbps = bytes_size / 1024 / 1024 / 1024 / (r.throughput_med / 1e9)
size_num = "$(r.size)"
size = format_bytes(bytes_size)
latency_median = round(r.latency_med / r.size, digits=8)
throughput_GBps = round(gbps, digits=8)
latency_min = round(r.latency_min / r.size, digits=8)
latency_std = round(r.latency_std / r.size, digits=8)
latency_test_iterations = "$(r.latency_iters)"
throughput_median = round(r.throughput_med / r.size, digits=8)
throughput_min = round(r.throughput_min / r.size, digits=8)
throughput_std = round(r.throughput_std / r.size, digits=8)
throughput_test_iterations = "$(r.throughput_iters)"
println(file, "$size_num\t$size\t$latency_median\t$throughput_GBps\t$latency_min\t$latency_std\t$latency_test_iterations\t$throughput_median\t$throughput_min\t$throughput_std\t$throughput_test_iterations$additional")
end
function format_bytes(x)
if x < 800
return "$x B"
elseif x < 800 * 1024
return "$(round(x / 1024, sigdigits=3))Ki"
elseif x < 800 * 1024 * 1024
return "$(round(x / 1024 / 1024, digits=3))Mi"
else
return "$(round(x / 1024 / 1024 / 1024, digits=3))Gi"
end
end
function run_benchmark(sizes, print_debug=true, print_results=true)
if print_results
print_header(stdout)
end
results = Vector{Result}()
for s in sizes
if print_debug
println(stderr, "Testing array size $(format_bytes(s))...")
end
@assert s % 4 == 0
perm = generate_permutation(s ÷ 4)
b_lat = @benchmark walk_permutation_ib($perm)
if print_debug
print(stderr, "Latency ")
show(stderr, MIME("text/plain"), b_lat)
println(stderr)
end
b_thr = @benchmark sum_array($perm)
if print_debug
print(stderr, "Throughput ")
show(stderr, MIME("text/plain"), b_thr)
println(stderr)
end
r = Result(length(perm), 4, minimum(b_lat).time, median(b_lat).time, BenchmarkTools.std(b_lat).time, length(b_lat.times), minimum(b_thr).time, median(b_thr).time, BenchmarkTools.std(b_thr).time, length(b_thr.times))
if print_results
print_result(stdout, r)
end
push!(results, r)
end
return results
end
function latency_function(size, L1_lat, L2_lat, L3_lat, L4_lat, L1_size, L2_size, L3_size)
pL1 = min(1, L1_size / size)
pL2 = max(0, min(1, L2_size / size) - pL1)
pL3 = max(0, min(1, L3_size / size) - pL1 - pL2)
pL4 = 1 - pL1 - pL2 - pL3
return pL1 * L1_lat + pL2 * L2_lat + pL3 * L3_lat + pL4 * L4_lat
end
# const test_sizes = [16, 32, 64, 128, 256, 512, 1024, 2048, 4 * 1024, 8 * 1024] .* 1024
const test_sizes = [16, 32, 48, 64, 96, 128, 256, 512, 768, 1024, 2048, 4 * 1024, 8 * 1024, 12 * 1024, 16 * 1024, 24 * 1024, 32 * 1024, 64 * 1024, 96 * 1024, 128 * 1024, 192 * 1024] .* 1024
const results = run_benchmark(test_sizes)
const latencies = [ r.latency_med / r.size for r in results ]
println(stderr, "Fitting $latencies to $test_sizes")
const L1_lat = latencies[1]
# const L4_size = sum(test_sizes) * 10 # enough RAM
function optim_objective(x)
L1_size, L2_size, L3_size, L2_lat, L3_lat, L4_lat = x
predicted_latencies = latency_function.(test_sizes, L1_lat, L2_lat, L3_lat, L4_lat, L1_size, L2_size, L3_size)
relative_errors = abs.((predicted_latencies .- latencies) ./ latencies)
# return sum(abs.(predicted_latencies .- latencies))
# return sum(relative_errors)
return sum(relative_errors .^ 2 .* (3 .+ log.(1 .+ latencies)))
end
initial_args = [ 16.0 * 1024, 256 * 1024, 30 * 1024 * 1024, 4, 30, 100 ]
opt_result = Optim.optimize(optim_objective, initial_args ./ 100, initial_args.*100, initial_args, Optim.Fminbox())
println(stderr, "Optimization result: ", opt_result)
optargs = Optim.minimizer(opt_result)
println(stderr, "Predicted cache sizes: $(format_bytes.(optargs[1:3]))")
println(stderr, "Fitted cache latencies: $([L1_lat; optargs[4:6]])")
assumed_cache_sizes = [ 32.0, 256, 6 * 1024 ] .* 1024
println(stderr, "Assuming cache sizes: $(format_bytes.(assumed_cache_sizes))")
opt_a_result = Optim.optimize(x -> optim_objective([assumed_cache_sizes; x]), initial_args[4:6]./100, initial_args[4:6].*100, optargs[4:6], Optim.Fminbox())
println(stderr, "Optimization result: ", opt_a_result)
opt_a_args = Optim.minimizer(opt_a_result)
println(stderr, "Fitted cache latencies: $([L1_lat; opt_a_args[1:3]])")
open("output.tsv", "w") do f
print_header(f, "\tpredition1\tpredition2")
for r in results
prediction1 = latency_function(r.size * r.word_size, L1_lat, optargs[4], optargs[5], optargs[6], optargs[1], optargs[2], optargs[3])
prediction2 = latency_function(r.size * r.word_size, L1_lat, opt_a_args[1], opt_a_args[2], opt_a_args[3], assumed_cache_sizes...)
print_result(f, r, "\t$prediction1\t$prediction2")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment