Skip to content

Instantly share code, notes, and snippets.

@morgangallant
Created January 3, 2024 22:07
Show Gist options
  • Save morgangallant/b3e2bc00054b8c01c41638edc6cb9795 to your computer and use it in GitHub Desktop.
Save morgangallant/b3e2bc00054b8c01c41638edc6cb9795 to your computer and use it in GitHub Desktop.
Script used to benchmark the Zig version of LevelDB
const std = @import("std");
const builtin = @import("builtin");
const db = @import("db.zig");
const options = @import("options.zig");
// Rough re-creation of LevelDB's db_bench program with our Zig port.
const usage =
\\db_bench [options]
\\
\\Options:
\\ --specific Run a specific benchmark by name
\\ --no-cleanup Don't cleanup the DB files on shutdown
\\
;
const db_fpath = "benchmark.db";
const default_num = 1000000; // 1m
const default_key_size = 16;
const default_value_size = 100;
const default_entries_per_batch = 1;
const prng_seed = 69;
const BenchmarkParam = struct {
name: []const u8,
fresh_db: bool = false,
read_opts: options.Read = .{},
write_opts: options.Write = .{},
num: usize = default_num,
key_size: usize = default_key_size,
value_size: usize = default_value_size,
report_byte_throughput: bool = false,
entries_per_batch: usize = default_entries_per_batch,
};
const benchmarks = [_]BenchmarkParam{
.{
.name = "fillseq",
.fresh_db = true,
},
.{
.name = "fillsync",
.fresh_db = true,
.num = default_num / 1000,
.write_opts = .{
.sync = true,
},
},
.{
.name = "fillrandom",
.fresh_db = true,
},
.{
.name = "overwrite",
},
.{
.name = "readrandom",
},
.{
.name = "readrandom",
},
.{
.name = "readseq",
},
.{
.name = "readreverse",
},
.{
.name = "compact",
},
.{
.name = "readrandom",
},
.{
.name = "readseq",
},
.{
.name = "readreverse",
},
.{
.name = "fill100K",
.fresh_db = true,
.num = default_num / 1000,
.value_size = default_value_size * 1000,
},
.{
.name = "crc32c",
},
};
pub fn main() !void {
var gp_alloc = std.heap.GeneralPurposeAllocator(.{}){};
const use_gpa = builtin.mode != .ReleaseFast or !builtin.link_libc;
const alloc = alloc: {
if (use_gpa) {
break :alloc gp_alloc.allocator();
}
if (@alignOf(std.c.max_align_t) < @alignOf(i128)) {
break :alloc std.heap.c_allocator;
}
break :alloc std.heap.raw_c_allocator;
};
defer if (use_gpa) {
_ = gp_alloc.deinit();
};
const args = try std.process.argsAlloc(alloc);
defer std.process.argsFree(alloc, args);
const stdout = std.io.getStdOut().writer();
var no_cleanup = false;
var specific: ?[]const u8 = null;
var arg_idx: usize = 1;
while (arg_idx < args.len) : (arg_idx += 1) {
const arg = args[arg_idx];
if (std.mem.eql(u8, arg, "--help")) {
try stdout.writeAll(usage);
return;
} else if (std.mem.eql(u8, arg, "--specific")) {
arg_idx += 1;
if (arg_idx >= args.len) {
try stdout.writeAll("Please specify a specific benchmark to run.\n");
return;
}
specific = args[arg_idx];
} else if (std.mem.eql(u8, arg, "--no-cleanup")) {
no_cleanup = true;
} else {
try stdout.print("Unknown command-line argument '{s}'.\n", .{arg});
try stdout.writeAll(usage);
return;
}
}
const dbType = db.OnDisk(.{});
var dbImpl: dbType = undefined;
try dbImpl.init(alloc, db_fpath);
if (specific) |specific_| {
try stdout.print("Running specific benchmark {s}:\n", .{specific_});
} else {
try stdout.print("Performing {d} benchmark steps:\n", .{benchmarks.len});
}
for (benchmarks) |params| {
if (specific) |specific_| {
if (!std.mem.eql(u8, params.name, specific_)) continue;
if (!params.fresh_db) {
try stdout.print(
"Warning: Chosen benchmark '{s}' expected non-fresh DB, environment may not be setup correctly.\n",
.{specific_},
);
}
}
if (params.fresh_db) {
try dbImpl.deinit();
try std.fs.cwd().deleteTree(db_fpath);
try dbImpl.init(alloc, db_fpath);
}
var stats: RunStats = undefined;
if (std.mem.eql(u8, params.name, "fillseq")) {
stats = try fillSequential(alloc, params, dbImpl.db());
} else if (std.mem.eql(u8, params.name, "fillsync") or
std.mem.eql(u8, params.name, "fillrandom") or
std.mem.eql(u8, params.name, "overwrite") or
std.mem.eql(u8, params.name, "fill100K"))
{
stats = try fillRandom(alloc, params, dbImpl.db());
} else if (std.mem.eql(u8, params.name, "readrandom")) {
stats = try readRandom(alloc, params, dbImpl.db());
} else if (std.mem.eql(u8, params.name, "readseq")) {
stats = try readSequential(alloc, params, dbImpl.db(), .forward);
} else if (std.mem.eql(u8, params.name, "readreverse")) {
stats = try readSequential(alloc, params, dbImpl.db(), .backward);
} else if (std.mem.eql(u8, params.name, "compact")) {
stats = try compact(alloc, params, dbImpl.db());
} else if (std.mem.eql(u8, params.name, "crc32c")) {
stats = try crc32c(alloc, params);
} else {
return error.UnsupportedBenchmark;
}
try stats.writeResults(
params.name,
stdout,
);
if (specific != null) break; // Only want to run that specific benchmark once.
}
try dbImpl.deinit();
if (!no_cleanup) {
std.fs.cwd().deleteTree(db_fpath) catch {}; // Best effort cleanup.
}
}
const RunStats = struct {
timer: std.time.Timer,
ops: u64,
nanos: u64,
bytes: u64,
fn init() !RunStats {
return .{
.timer = try std.time.Timer.start(),
.ops = 0,
.nanos = 0,
.bytes = 0,
};
}
fn finishedSingleOp(self: *RunStats) void {
self.nanos += self.timer.lap();
self.ops += 1;
}
fn addBytes(self: *RunStats, n: u64) void {
self.bytes += n;
}
const byte_units = [_][]const u8{ "B", "KiB", "MiB", "GiB", "TiB" };
fn writeResults(
self: *const RunStats,
name: []const u8,
writer: anytype,
) !void {
const us_per_op = @as(f64, @floatFromInt(self.nanos / self.ops)) / std.time.ns_per_us;
try writer.print("{s}\t\t{d:.3} us per op", .{ name, us_per_op });
if (self.bytes > 0) {
var bytes_per_s = (@as(f64, @floatFromInt(self.bytes)) / @as(f64, @floatFromInt(self.nanos))) * std.time.ns_per_s;
var byte_unit_idx: usize = 0; // I.e. "B".
while (bytes_per_s >= 1024 and byte_unit_idx < byte_units.len - 1) {
bytes_per_s /= 1024;
byte_unit_idx += 1;
}
try writer.print("\t\t({d:.3} {s}/s)", .{ bytes_per_s, byte_units[byte_unit_idx] });
}
try writer.writeByte('\n');
}
};
fn fillSequential(alloc: std.mem.Allocator, params: BenchmarkParam, inst: db.DB) !RunStats {
return doWrite(alloc, params, true, inst);
}
fn fillRandom(alloc: std.mem.Allocator, params: BenchmarkParam, inst: db.DB) !RunStats {
return doWrite(alloc, params, false, inst);
}
fn doWrite(alloc: std.mem.Allocator, params: BenchmarkParam, comptime seq: bool, inst: db.DB) !RunStats {
const key_buffer = try alloc.alloc(u8, params.key_size);
defer alloc.free(key_buffer);
@memset(key_buffer, 0);
// Since compression is disabled, the actual contents of the value_buffer
// doesn't matter. If compression were to be enabled, we'd probably want
// to use some semi-compressible (configurable) value for this buffer since
// all zeros is likely too compressible.
const value_buffer = try alloc.alloc(u8, params.value_size);
defer alloc.free(value_buffer);
@memset(value_buffer, 0);
var prng = std.rand.DefaultPrng.init(prng_seed);
const rng = prng.random();
var batch = try inst.createBatch();
defer batch.deinit();
var stats = try RunStats.init();
var i: usize = 0;
var bytes: u64 = 0;
while (i < params.num) : (i += params.entries_per_batch) {
try batch.clear();
for (0..params.entries_per_batch) |j| {
const k = if (seq) i + j else rng.uintLessThan(usize, params.num);
encodeFixed64(.big, key_buffer, k);
try batch.put(key_buffer, value_buffer);
bytes += key_buffer.len + value_buffer.len;
stats.finishedSingleOp();
}
try inst.applyBatch(&params.write_opts, batch);
}
stats.addBytes(bytes);
return stats;
}
fn readSequential(
allocator: std.mem.Allocator,
params: BenchmarkParam,
inst: db.DB,
comptime direction: enum { forward, backward },
) !RunStats {
var stats = try RunStats.init();
var i: usize = 0;
var bytes: usize = 0;
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
var iter = try inst.newIterator(&params.read_opts, &arena);
defer iter.deinit();
if (comptime direction == .forward) {
try iter.seekToFirst();
} else {
try iter.seekToLast();
}
while (i < params.num and iter.valid()) {
bytes += iter.key().len + iter.value().len;
stats.finishedSingleOp();
i += 1;
if (comptime direction == .forward) {
try iter.next();
} else {
try iter.prev();
}
}
stats.addBytes(bytes);
return stats;
}
fn readRandom(allocator: std.mem.Allocator, params: BenchmarkParam, inst: db.DB) !RunStats {
var stats = try RunStats.init();
var prng = std.rand.DefaultPrng.init(prng_seed);
const rng = prng.random();
const key_buffer = try allocator.alloc(u8, params.key_size);
defer allocator.free(key_buffer);
@memset(key_buffer, 0);
var value_buffer = std.ArrayList(u8).init(allocator);
defer value_buffer.deinit();
for (0..params.num) |_| {
const k = rng.uintLessThan(usize, params.num);
encodeFixed64(.big, key_buffer, k);
value_buffer.clearRetainingCapacity();
_ = try inst.get(&params.read_opts, key_buffer, &value_buffer);
stats.finishedSingleOp();
}
return stats;
}
fn compact(_: std.mem.Allocator, _: BenchmarkParam, inst: db.DB) !RunStats {
var stats = try RunStats.init();
try inst.compactRange(null, null);
stats.finishedSingleOp();
return stats;
}
fn crc32c(_: std.mem.Allocator, _: BenchmarkParam) !RunStats {
var stats = try RunStats.init();
var buffer: [4096]u8 = undefined;
@memset(&buffer, 'x');
var bytes: usize = 0;
var crc: u32 = undefined;
while (bytes < 500 * 1048576) {
var hasher = std.hash.Crc32.init();
hasher.update(&buffer);
crc = hasher.final();
stats.finishedSingleOp();
bytes += buffer.len;
}
std.mem.doNotOptimizeAway(crc);
stats.addBytes(bytes);
return stats;
}
const byteOrder = enum { little, big };
pub fn encodeFixed64(comptime order: byteOrder, buf: []u8, val: u64) void {
std.debug.assert(buf.len >= @sizeOf(u64));
if (comptime order == .little) {
inline for (0..8) |i| {
buf[i] = @as(u8, @truncate(val >> (comptime i * 8)));
}
} else {
inline for (0..8) |i| {
buf[comptime 7 - i] = @as(u8, @truncate(val >> (comptime i * 8)));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment