Skip to content

Instantly share code, notes, and snippets.

@yiunsr
Created December 2, 2021 12:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yiunsr/c0b0768d9e3938461214ec073f053b44 to your computer and use it in GitHub Desktop.
Save yiunsr/c0b0768d9e3938461214ec073f053b44 to your computer and use it in GitHub Desktop.
ss_csv vs csv-core
[package]
name = "ss-csv-diff"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv-core = "0.1.10"
ss-csv = { git = "https://github.com/yiunsr/ss-csv.git", tag="v0.2.0" }
use std::time::Instant;
use std::io::prelude::*;
use std::io::BufReader;
use std::fs::File;
use std::result::Result;
use ss_csv::ss_csv::{CoreBuilder, FieldResult};
use csv_core::{Reader, ReadFieldResult};
static TEST_COUNT:i32 = 100;
static CSV_HAYSTACK: &'static [u8] = include_bytes!("../data/WPP2019_TotalPopulationBySex.csv");
fn test_loop(buf:&[u8]) -> Result<(), Box<dyn std::error::Error>> {
let before = Instant::now();
println!("======== Start prepare ========");
let mut count_fields = 0;
let mut count_records = 0;
for _ in 0..TEST_COUNT {
count_fields = 0;
count_records = 0;
for ch in buf.into_iter() {
if *ch == b','{
count_fields += 1;
}
else if *ch == b'\n'{
count_fields += 1;
count_records += 1;
}
}
}
let elapsed = before.elapsed().as_secs_f64() / TEST_COUNT as f64;
println!("row : {}, col : {}", count_records, count_fields);
println!("result time: {:.6?}", elapsed);
println!("======== End prepare ========");
Ok(())
}
fn test_ss_csv(buf:&[u8]) -> Result<(), Box<dyn std::error::Error>> {
println!("======== Start ss_csv ========");
let before = Instant::now();
let mut count_fields = 0;
let mut count_records = 0;
for _ in 0..TEST_COUNT {
let mut csv_parser = CoreBuilder::new().from_buffer(buf);
count_fields = 0;
count_records = 0;
loop{
let (csv_type, _) = csv_parser.next();
match csv_type{
FieldResult::Field => {
count_fields += 1;
},
FieldResult::FieldEnd =>{
count_fields += 1;
count_records += 1;
},
_ =>{
break;
}
}
}
}
let elapsed = before.elapsed().as_secs_f64() / TEST_COUNT as f64;
println!("row : {}, col : {}", count_records, count_fields);
println!("result time: {:.6?}", elapsed);
println!("======== End ss_csv ========");
Ok(())
}
fn test_csv_core(buf:&[u8]) -> Result<(), Box<dyn std::error::Error>> {
println!("======== Start csv_core ========");
let before = Instant::now();
let mut count_fields = 0;
let mut count_records = 0;
for _ in 0..TEST_COUNT {
let mut rdr = Reader::new();
count_fields = 0;
count_records = 0;
let mut buffer = buf;
loop {
// We skip handling the output since we don't need it for counting.
let (result, nin, _) = rdr.read_field(buffer, &mut [0; 1024]);
buffer = &buffer[nin..];
match result {
ReadFieldResult::InputEmpty => {},
ReadFieldResult::OutputFull => panic!("field too large"),
ReadFieldResult::Field { record_end } => {
count_fields += 1;
if record_end {
count_records += 1;
}
}
ReadFieldResult::End => break,
}
}
}
let elapsed = before.elapsed().as_secs_f64() / TEST_COUNT as f64;
println!("row : {}, col : {}", count_records, count_fields);
println!("result time: {:.6?}", elapsed);
println!("======== End csv_core ========");
Ok(())
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("================ Start Program ================");
let _ = test_loop(CSV_HAYSTACK);
let _ = test_ss_csv(CSV_HAYSTACK);
let _ = test_csv_core(CSV_HAYSTACK);
println!("================ End Program ================");
Ok(())
}
# sample csv file
* https://github.com/yiunsr/files/blob/main/WPP2019_TotalPopulationBySex.csv
```
================ Start Program ================
======== Start prepare ========
row : 280934, col : 2815239
result time: 0.022518
======== End prepare ========
======== Start ss_csv ========
row : 280934, col : 2809331
result time: 0.041631
======== End ss_csv ========
======== Start csv_core ========
row : 280934, col : 2809331
result time: 0.150060
======== End csv_core ========
================ End Program ================
```
* ss_csv is much faster than csv-core.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment