Skip to content

Instantly share code, notes, and snippets.

@xxiz
Created January 28, 2023 18:06
Show Gist options
  • Save xxiz/e7d9b667fbe3083d420ffdd2ef421852 to your computer and use it in GitHub Desktop.
Save xxiz/e7d9b667fbe3083d420ffdd2ef421852 to your computer and use it in GitHub Desktop.
fast way to read line count of csv
fn calculate_buffer_size(file_size: u64) -> usize {
let buffer_size = if file_size < (1 << 20) {
8192
} else if file_size < (1 << 30) {
1 << 20
} else {
1 << 30
};
buffer_size as usize
}
let file = File::open(path).unwrap();
let file_size = file.metadata().unwrap().len();
let buffer_size = calculate_buffer_size(file_size);
let mut file = File::open(path).unwrap();
let mut buffer = vec![0; buffer_size];
let mut line_count = 0;
let mut bytes_read;
loop {
bytes_read = file.read(&mut buffer).unwrap();
if bytes_read == 0 {
break;
}
println!("Line count: {}", line_count);
line_count += std::str::from_utf8(&buffer[..bytes_read])
.unwrap()
.matches('\n')
.count();
}
println!("Line count: {}", line_count);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment