Created
June 16, 2024 04:11
-
-
Save jmeggitt/f6f8ee093ae291144d00c19bb0576739 to your computer and use it in GitHub Desktop.
Benchmark for https://stackoverflow.com/q/78626405/5987669
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use itertools::Itertools; | |
use rand::RngCore; | |
use std::collections::HashMap; | |
use std::fs::File; | |
use std::hint::black_box; | |
use std::io::{Seek, SeekFrom, Write}; | |
use std::time::{Duration, Instant}; | |
use std::{fs, io}; | |
fn main() -> io::Result<()> { | |
// Log for storing the benchmark data | |
let mut log = File::create("benchmark.log")?; | |
// Folder for all the files we will be creating | |
let _ = fs::create_dir("tmp"); | |
// Choose buffer sizes to run the benchmark for who's logs are evenly distributed | |
let buffer_sizes = (0..i32::MAX) | |
.map(|k| (k as f64 / 4.0).exp2() as usize) | |
.dedup() | |
.take(112) | |
.collect::<Vec<_>>(); | |
let mut benchmark_aggregate = HashMap::<usize, ResultAggregate>::new(); | |
for round in 0..30 { | |
println!("Running round {round} of benchmark"); | |
writeln!(&mut log, "\nRun {round}:")?; | |
writeln!(&mut log, "buffer size,seek,write,sync,total")?; | |
for &buffer_size in &buffer_sizes { | |
let start_time = Instant::now(); | |
let file_name = random_test_file_name(); | |
let mut file = File::create(&file_name).expect("Failed to open file"); | |
// Perform one warmup round before recording the times | |
black_box(run_benchmark(&mut file, buffer_size)); | |
let mut data = run_benchmark(&mut file, buffer_size); | |
fs::remove_file(file_name)?; | |
// Add CSV line to log | |
write!(&mut log, "{},", buffer_size)?; | |
data.write_csv_line(&mut log)?; | |
// Update aggregate of benchmark runs with our result. This aggregate will help to | |
// counteract the effects of dynamic CPU frequency changes. | |
let aggregate = benchmark_aggregate.entry(buffer_size).or_default(); | |
let (seek, write, sync, total) = data.medians(); | |
aggregate.add_sample(seek, write, sync, total); | |
println!( | |
"Completed benchmark for buffer size {} in {:?}", | |
buffer_size, | |
start_time.elapsed() | |
); | |
} | |
writeln!(&mut log, "\nMedians of previous {round} benchmarks:")?; | |
for &buffer_size in &buffer_sizes { | |
let aggregate = benchmark_aggregate.entry(buffer_size).or_default(); | |
write!(&mut log, "{},", buffer_size)?; | |
aggregate.write_csv_line(&mut log)?; | |
} | |
} | |
Ok(()) | |
} | |
fn run_benchmark(file: &mut File, buffer_size: usize) -> ResultAggregate { | |
let mut run_times = ResultAggregate::default(); | |
let start_time = Instant::now(); | |
while start_time.elapsed() < Duration::from_millis(500) || run_times.sync.len() < 100 { | |
// If the benchmark is taking too long, exit even if we don't have the desired number of | |
// samples. | |
if start_time.elapsed() > Duration::from_secs(5) { | |
break; | |
} | |
let mut buffer = vec![0u8; buffer_size]; | |
rand::thread_rng().fill_bytes(&mut buffer); | |
let t0 = Instant::now(); | |
file.seek(SeekFrom::Start(0)) | |
.expect("Failed to seek to the beginning of the file"); | |
let t1 = Instant::now(); | |
file.write_all(&buffer).expect("Failed to write to file"); | |
let t2 = Instant::now(); | |
file.sync_all().expect("Failed to sync to file"); | |
let t3 = Instant::now(); | |
let seek = t1.duration_since(t0).as_nanos(); | |
let write = t2.duration_since(t1).as_nanos(); | |
let sync = t3.duration_since(t2).as_nanos(); | |
let total = t3.duration_since(t0).as_nanos(); | |
run_times.add_sample(seek, write, sync, total); | |
} | |
run_times | |
} | |
/// Get a random test file name | |
fn random_test_file_name() -> String { | |
let mut file_name_buffer = [0u8; 8]; | |
rand::thread_rng().fill_bytes(&mut file_name_buffer); | |
format!( | |
"tmp/{}.dat", | |
file_name_buffer.map(|x| format!("{:02x}", x)).join("") | |
) | |
} | |
#[derive(Default)] | |
struct ResultAggregate { | |
seek: Vec<u128>, | |
write: Vec<u128>, | |
sync: Vec<u128>, | |
total: Vec<u128>, | |
} | |
impl ResultAggregate { | |
fn add_sample(&mut self, seek: u128, write: u128, sync: u128, total: u128) { | |
self.seek.push(seek); | |
self.write.push(write); | |
self.sync.push(sync); | |
self.total.push(total); | |
} | |
fn medians(&mut self) -> (u128, u128, u128, u128) { | |
self.seek.sort_unstable(); | |
self.write.sort_unstable(); | |
self.sync.sort_unstable(); | |
self.total.sort_unstable(); | |
let median_seek = median(&self.seek); | |
let median_write = median(&self.write); | |
let median_sync = median(&self.sync); | |
let median_total = median(&self.total); | |
(median_seek, median_write, median_sync, median_total) | |
} | |
fn write_csv_line<W: Write>(&mut self, writer: &mut W) -> io::Result<()> { | |
let (seek, write, sync, total) = self.medians(); | |
writeln!( | |
writer, | |
"{:.03},{:.03},{:.03},{:.03}", | |
seek as f64 / 1e3, | |
write as f64 / 1e3, | |
sync as f64 / 1e3, | |
total as f64 / 1e3 | |
) | |
} | |
} | |
fn median(x: &[u128]) -> u128 { | |
if x.len() % 2 == 1 { | |
return x[x.len() / 2]; | |
} | |
(x[x.len() / 2] + x[x.len() / 2 - 1]) / 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment