Skip to content

Instantly share code, notes, and snippets.

@jmeggitt
Created June 16, 2024 04:11
Show Gist options
  • Save jmeggitt/f6f8ee093ae291144d00c19bb0576739 to your computer and use it in GitHub Desktop.
Save jmeggitt/f6f8ee093ae291144d00c19bb0576739 to your computer and use it in GitHub Desktop.
use itertools::Itertools;
use rand::RngCore;
use std::collections::HashMap;
use std::fs::File;
use std::hint::black_box;
use std::io::{Seek, SeekFrom, Write};
use std::time::{Duration, Instant};
use std::{fs, io};
fn main() -> io::Result<()> {
// Log for storing the benchmark data
let mut log = File::create("benchmark.log")?;
// Folder for all the files we will be creating
let _ = fs::create_dir("tmp");
// Choose buffer sizes to run the benchmark for who's logs are evenly distributed
let buffer_sizes = (0..i32::MAX)
.map(|k| (k as f64 / 4.0).exp2() as usize)
.dedup()
.take(112)
.collect::<Vec<_>>();
let mut benchmark_aggregate = HashMap::<usize, ResultAggregate>::new();
for round in 0..30 {
println!("Running round {round} of benchmark");
writeln!(&mut log, "\nRun {round}:")?;
writeln!(&mut log, "buffer size,seek,write,sync,total")?;
for &buffer_size in &buffer_sizes {
let start_time = Instant::now();
let file_name = random_test_file_name();
let mut file = File::create(&file_name).expect("Failed to open file");
// Perform one warmup round before recording the times
black_box(run_benchmark(&mut file, buffer_size));
let mut data = run_benchmark(&mut file, buffer_size);
fs::remove_file(file_name)?;
// Add CSV line to log
write!(&mut log, "{},", buffer_size)?;
data.write_csv_line(&mut log)?;
// Update aggregate of benchmark runs with our result. This aggregate will help to
// counteract the effects of dynamic CPU frequency changes.
let aggregate = benchmark_aggregate.entry(buffer_size).or_default();
let (seek, write, sync, total) = data.medians();
aggregate.add_sample(seek, write, sync, total);
println!(
"Completed benchmark for buffer size {} in {:?}",
buffer_size,
start_time.elapsed()
);
}
writeln!(&mut log, "\nMedians of previous {round} benchmarks:")?;
for &buffer_size in &buffer_sizes {
let aggregate = benchmark_aggregate.entry(buffer_size).or_default();
write!(&mut log, "{},", buffer_size)?;
aggregate.write_csv_line(&mut log)?;
}
}
Ok(())
}
fn run_benchmark(file: &mut File, buffer_size: usize) -> ResultAggregate {
let mut run_times = ResultAggregate::default();
let start_time = Instant::now();
while start_time.elapsed() < Duration::from_millis(500) || run_times.sync.len() < 100 {
// If the benchmark is taking too long, exit even if we don't have the desired number of
// samples.
if start_time.elapsed() > Duration::from_secs(5) {
break;
}
let mut buffer = vec![0u8; buffer_size];
rand::thread_rng().fill_bytes(&mut buffer);
let t0 = Instant::now();
file.seek(SeekFrom::Start(0))
.expect("Failed to seek to the beginning of the file");
let t1 = Instant::now();
file.write_all(&buffer).expect("Failed to write to file");
let t2 = Instant::now();
file.sync_all().expect("Failed to sync to file");
let t3 = Instant::now();
let seek = t1.duration_since(t0).as_nanos();
let write = t2.duration_since(t1).as_nanos();
let sync = t3.duration_since(t2).as_nanos();
let total = t3.duration_since(t0).as_nanos();
run_times.add_sample(seek, write, sync, total);
}
run_times
}
/// Get a random test file name
fn random_test_file_name() -> String {
let mut file_name_buffer = [0u8; 8];
rand::thread_rng().fill_bytes(&mut file_name_buffer);
format!(
"tmp/{}.dat",
file_name_buffer.map(|x| format!("{:02x}", x)).join("")
)
}
#[derive(Default)]
struct ResultAggregate {
seek: Vec<u128>,
write: Vec<u128>,
sync: Vec<u128>,
total: Vec<u128>,
}
impl ResultAggregate {
fn add_sample(&mut self, seek: u128, write: u128, sync: u128, total: u128) {
self.seek.push(seek);
self.write.push(write);
self.sync.push(sync);
self.total.push(total);
}
fn medians(&mut self) -> (u128, u128, u128, u128) {
self.seek.sort_unstable();
self.write.sort_unstable();
self.sync.sort_unstable();
self.total.sort_unstable();
let median_seek = median(&self.seek);
let median_write = median(&self.write);
let median_sync = median(&self.sync);
let median_total = median(&self.total);
(median_seek, median_write, median_sync, median_total)
}
fn write_csv_line<W: Write>(&mut self, writer: &mut W) -> io::Result<()> {
let (seek, write, sync, total) = self.medians();
writeln!(
writer,
"{:.03},{:.03},{:.03},{:.03}",
seek as f64 / 1e3,
write as f64 / 1e3,
sync as f64 / 1e3,
total as f64 / 1e3
)
}
}
fn median(x: &[u128]) -> u128 {
if x.len() % 2 == 1 {
return x[x.len() / 2];
}
(x[x.len() / 2] + x[x.len() / 2 - 1]) / 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment