Created
March 12, 2023 22:39
-
-
Save bduffany/f407a7e076d3c573ed8106662cc70638 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use fastcdc::v2016::FastCDC as FastCDC2016; | |
use fastcdc::v2020::FastCDC as FastCDC2020; | |
use std::env; | |
use std::fs; | |
use std::process; | |
use std::thread; | |
use std::time::SystemTime; | |
use walkdir::WalkDir; | |
const NUM_THREADS: u8 = 8; | |
fn compute_size_2016(contents: &[u8]) -> usize { | |
let mut size: usize = 0; | |
let cdc = FastCDC2016::new(&contents, 256 * 1024, 1 * 1024 * 1024, 4 * 1024 * 1024); | |
for chunk in cdc { | |
size += chunk.length; | |
} | |
return size; | |
} | |
fn compute_size_2020(contents: &[u8]) -> usize { | |
let mut size: usize = 0; | |
let cdc = FastCDC2020::new(&contents, 256 * 1024, 1 * 1024 * 1024, 4 * 1024 * 1024); | |
for chunk in cdc { | |
size += chunk.length; | |
} | |
return size; | |
} | |
fn main() { | |
let args: Vec<String> = env::args().collect(); | |
if args.len() < 2 { | |
eprintln!("usage: fastcdc <directory>"); | |
process::exit(1); | |
} | |
let version = env::var("VERSION").unwrap_or("2016".to_owned()); | |
let compute_size = match version.as_str() { | |
"2016" => compute_size_2016, | |
_ => compute_size_2020, | |
}; | |
let path = &args[1]; | |
let mut file_paths: Vec<std::path::PathBuf> = vec![]; | |
for entry in WalkDir::new(path) { | |
let e = entry.unwrap(); | |
if !e.file_type().is_file() { | |
continue; | |
} | |
file_paths.push(e.path().to_owned()); | |
} | |
let start = SystemTime::now(); | |
let (path_tx, path_rx) = crossbeam_channel::unbounded::<std::path::PathBuf>(); | |
let (result_tx, result_rx) = crossbeam_channel::unbounded::<usize>(); | |
for _ in 0..NUM_THREADS { | |
let rx = path_rx.clone(); | |
let tx = result_tx.clone(); | |
thread::spawn(move || { | |
for path in rx { | |
let contents = fs::read(path).unwrap(); | |
let size = compute_size(&contents); | |
tx.send(size).unwrap(); | |
} | |
}); | |
} | |
for path in &file_paths { | |
path_tx.send(path.to_path_buf()).unwrap(); | |
} | |
let mut total_size: usize = 0; | |
for _ in &file_paths { | |
let size = result_rx.recv().unwrap(); | |
total_size += size; | |
} | |
let duration = start.elapsed().unwrap(); | |
println!( | |
"processed {total_size:.2} GB ({num_files} files) in {duration:.3}s ({throughput:.3} GB/s)", | |
total_size = (total_size as f64) / 1e9, | |
num_files = file_paths.len(), | |
duration = duration.as_secs_f64(), | |
throughput = (total_size as f64) / 1e9 / duration.as_secs_f64(), | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment