Created
December 2, 2021 12:48
-
-
Save yiunsr/c0b0768d9e3938461214ec073f053b44 to your computer and use it in GitHub Desktop.
ss_csv vs csv-core
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "ss-csv-diff" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
csv-core = "0.1.10" | |
ss-csv = { git = "https://github.com/yiunsr/ss-csv.git", tag="v0.2.0" } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::time::Instant; | |
use std::io::prelude::*; | |
use std::io::BufReader; | |
use std::fs::File; | |
use std::result::Result; | |
use ss_csv::ss_csv::{CoreBuilder, FieldResult}; | |
use csv_core::{Reader, ReadFieldResult}; | |
static TEST_COUNT:i32 = 100; | |
static CSV_HAYSTACK: &'static [u8] = include_bytes!("../data/WPP2019_TotalPopulationBySex.csv"); | |
fn test_loop(buf:&[u8]) -> Result<(), Box<dyn std::error::Error>> { | |
let before = Instant::now(); | |
println!("======== Start prepare ========"); | |
let mut count_fields = 0; | |
let mut count_records = 0; | |
for _ in 0..TEST_COUNT { | |
count_fields = 0; | |
count_records = 0; | |
for ch in buf.into_iter() { | |
if *ch == b','{ | |
count_fields += 1; | |
} | |
else if *ch == b'\n'{ | |
count_fields += 1; | |
count_records += 1; | |
} | |
} | |
} | |
let elapsed = before.elapsed().as_secs_f64() / TEST_COUNT as f64; | |
println!("row : {}, col : {}", count_records, count_fields); | |
println!("result time: {:.6?}", elapsed); | |
println!("======== End prepare ========"); | |
Ok(()) | |
} | |
fn test_ss_csv(buf:&[u8]) -> Result<(), Box<dyn std::error::Error>> { | |
println!("======== Start ss_csv ========"); | |
let before = Instant::now(); | |
let mut count_fields = 0; | |
let mut count_records = 0; | |
for _ in 0..TEST_COUNT { | |
let mut csv_parser = CoreBuilder::new().from_buffer(buf); | |
count_fields = 0; | |
count_records = 0; | |
loop{ | |
let (csv_type, _) = csv_parser.next(); | |
match csv_type{ | |
FieldResult::Field => { | |
count_fields += 1; | |
}, | |
FieldResult::FieldEnd =>{ | |
count_fields += 1; | |
count_records += 1; | |
}, | |
_ =>{ | |
break; | |
} | |
} | |
} | |
} | |
let elapsed = before.elapsed().as_secs_f64() / TEST_COUNT as f64; | |
println!("row : {}, col : {}", count_records, count_fields); | |
println!("result time: {:.6?}", elapsed); | |
println!("======== End ss_csv ========"); | |
Ok(()) | |
} | |
fn test_csv_core(buf:&[u8]) -> Result<(), Box<dyn std::error::Error>> { | |
println!("======== Start csv_core ========"); | |
let before = Instant::now(); | |
let mut count_fields = 0; | |
let mut count_records = 0; | |
for _ in 0..TEST_COUNT { | |
let mut rdr = Reader::new(); | |
count_fields = 0; | |
count_records = 0; | |
let mut buffer = buf; | |
loop { | |
// We skip handling the output since we don't need it for counting. | |
let (result, nin, _) = rdr.read_field(buffer, &mut [0; 1024]); | |
buffer = &buffer[nin..]; | |
match result { | |
ReadFieldResult::InputEmpty => {}, | |
ReadFieldResult::OutputFull => panic!("field too large"), | |
ReadFieldResult::Field { record_end } => { | |
count_fields += 1; | |
if record_end { | |
count_records += 1; | |
} | |
} | |
ReadFieldResult::End => break, | |
} | |
} | |
} | |
let elapsed = before.elapsed().as_secs_f64() / TEST_COUNT as f64; | |
println!("row : {}, col : {}", count_records, count_fields); | |
println!("result time: {:.6?}", elapsed); | |
println!("======== End csv_core ========"); | |
Ok(()) | |
} | |
fn main() -> Result<(), Box<dyn std::error::Error>> { | |
println!("================ Start Program ================"); | |
let _ = test_loop(CSV_HAYSTACK); | |
let _ = test_ss_csv(CSV_HAYSTACK); | |
let _ = test_csv_core(CSV_HAYSTACK); | |
println!("================ End Program ================"); | |
Ok(()) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# sample csv file | |
* https://github.com/yiunsr/files/blob/main/WPP2019_TotalPopulationBySex.csv | |
``` | |
================ Start Program ================ | |
======== Start prepare ======== | |
row : 280934, col : 2815239 | |
result time: 0.022518 | |
======== End prepare ======== | |
======== Start ss_csv ======== | |
row : 280934, col : 2809331 | |
result time: 0.041631 | |
======== End ss_csv ======== | |
======== Start csv_core ======== | |
row : 280934, col : 2809331 | |
result time: 0.150060 | |
======== End csv_core ======== | |
================ End Program ================ | |
``` | |
* ss_csv is much faster than csv-core. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment