Skip to content

Instantly share code, notes, and snippets.

@bduffany
Created March 12, 2023 22:39
Show Gist options
  • Save bduffany/f407a7e076d3c573ed8106662cc70638 to your computer and use it in GitHub Desktop.
Save bduffany/f407a7e076d3c573ed8106662cc70638 to your computer and use it in GitHub Desktop.
use fastcdc::v2016::FastCDC as FastCDC2016;
use fastcdc::v2020::FastCDC as FastCDC2020;
use std::env;
use std::fs;
use std::process;
use std::thread;
use std::time::SystemTime;
use walkdir::WalkDir;
const NUM_THREADS: u8 = 8;
fn compute_size_2016(contents: &[u8]) -> usize {
let mut size: usize = 0;
let cdc = FastCDC2016::new(&contents, 256 * 1024, 1 * 1024 * 1024, 4 * 1024 * 1024);
for chunk in cdc {
size += chunk.length;
}
return size;
}
fn compute_size_2020(contents: &[u8]) -> usize {
let mut size: usize = 0;
let cdc = FastCDC2020::new(&contents, 256 * 1024, 1 * 1024 * 1024, 4 * 1024 * 1024);
for chunk in cdc {
size += chunk.length;
}
return size;
}
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
eprintln!("usage: fastcdc <directory>");
process::exit(1);
}
let version = env::var("VERSION").unwrap_or("2016".to_owned());
let compute_size = match version.as_str() {
"2016" => compute_size_2016,
_ => compute_size_2020,
};
let path = &args[1];
let mut file_paths: Vec<std::path::PathBuf> = vec![];
for entry in WalkDir::new(path) {
let e = entry.unwrap();
if !e.file_type().is_file() {
continue;
}
file_paths.push(e.path().to_owned());
}
let start = SystemTime::now();
let (path_tx, path_rx) = crossbeam_channel::unbounded::<std::path::PathBuf>();
let (result_tx, result_rx) = crossbeam_channel::unbounded::<usize>();
for _ in 0..NUM_THREADS {
let rx = path_rx.clone();
let tx = result_tx.clone();
thread::spawn(move || {
for path in rx {
let contents = fs::read(path).unwrap();
let size = compute_size(&contents);
tx.send(size).unwrap();
}
});
}
for path in &file_paths {
path_tx.send(path.to_path_buf()).unwrap();
}
let mut total_size: usize = 0;
for _ in &file_paths {
let size = result_rx.recv().unwrap();
total_size += size;
}
let duration = start.elapsed().unwrap();
println!(
"processed {total_size:.2} GB ({num_files} files) in {duration:.3}s ({throughput:.3} GB/s)",
total_size = (total_size as f64) / 1e9,
num_files = file_paths.len(),
duration = duration.as_secs_f64(),
throughput = (total_size as f64) / 1e9 / duration.as_secs_f64(),
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment