Skip to content

Instantly share code, notes, and snippets.

@frol
Created May 6, 2019 14:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save frol/547f36c5f736f3651dfeac53d8fde34e to your computer and use it in GitHub Desktop.
Save frol/547f36c5f736f3651dfeac53d8fde34e to your computer and use it in GitHub Desktop.
My refactored version of Rust implementation to the article "Swift против Rust — бенчмаркинг на Linux с (не)понятным финалом" https://habr.com/en/post/450512/
//[dependencies]
//serde_json = "1.0"
use serde_json::Value;
use std::collections::{HashMap, HashSet};
const FILE_BUFFER_SIZE: usize = 50000;
//source data
#[derive(Default)]
struct DebtRec {
company: String,
phones: Vec<String>,
debt: f64,
}
//result data
#[derive(Default)]
struct Debtor {
companies: HashSet<String>,
phones: HashSet<String>,
debt: f64,
}
#[derive(Default)]
struct Debtors {
all: Vec<Debtor>,
index_by_phone: HashMap<String, usize>,
}
fn main() {
let mut res = Debtors::default();
let mut fflag = 0;
for arg in std::env::args() {
if arg == "-f" {
fflag = 1;
} else if fflag == 1 {
fflag = 2;
println!("{}:", &arg);
let tbegin = std::time::Instant::now();
let (count, errcount) = process_file(&arg, &mut res);
println!(
"PROCESSED: {} objects in {:?}, {} errors found",
count,
tbegin.elapsed(),
errcount
);
}
}
for (di, d) in res.all.iter().enumerate() {
println!("-------------------------------");
println!("#{}: debt: {}", di, &d.debt);
println!("companies: {:?}\nphones: {:?}", &d.companies, &d.phones);
}
if fflag < 2 {
println!("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...");
}
}
fn process_file(fname: &str, res: &mut Debtors) -> (i32, i32) {
use std::io::prelude::*;
let mut count = 0;
let mut errcount = 0;
let mut file = match std::fs::File::open(fname) {
Ok(file) => file,
Err(e) => {
println!("ERROR: {}", e);
return (0, 0);
}
};
let mut obj = vec![];
let mut braces = 0;
let mut buf = [0; FILE_BUFFER_SIZE];
loop {
let blen = match file.read(&mut buf) {
Err(ref error) if error.kind() == std::io::ErrorKind::Interrupted => continue,
Ok(0) | Err(_) => break,
Ok(blen) => blen,
};
for b in &buf[..blen] {
if *b == b'{' {
braces += 1;
obj.push(*b);
} else if *b == b'}' {
braces -= 1;
obj.push(*b);
if braces == 0 {
//object formed !
match serde_json::from_slice(&obj) {
Ok(o) => {
process_object(&o, res);
}
Err(e) => {
println!("JSON ERROR: {}:\n{:?}", e, &obj);
errcount += 1;
}
}
count += 1;
obj.clear();
}
} else if braces > 0 {
obj.push(*b);
}
}
}
(count, errcount)
}
fn process_object(o: &Value, res: &mut Debtors) {
let dr = extract_data(o);
//println!("{} - {:?} - {}", &dr.company, &dr.phones, &dr.debt,);
let debtor_index = dr.phones.iter().filter_map(|p| res.index_by_phone.get(p).cloned()).next();
let i = debtor_index.unwrap_or_else(|| {
res.all.push(Debtor::default());
res.all.len() - 1
});
let d = &mut res.all[i];
d.companies.insert(dr.company);
for p in &dr.phones {
d.phones.insert(p.to_owned());
res.index_by_phone.insert(p.to_owned(), i);
}
d.debt += dr.debt;
}
fn val2str(v: &Value) -> String {
match v {
Value::String(vs) => vs.to_owned(), //to avoid additional quotes
_ => v.to_string(),
}
}
fn extract_data(o: &Value) -> DebtRec {
let mut dr = DebtRec::default();
dr.company = match &o["company"] {
Value::Object(c1) => match &c1["name"] {
Value::String(c2) => c2.to_owned(),
c => val2str(c),
},
c => val2str(c),
};
match &o["phones"] {
Value::Null => {}
Value::Array(pp) => dr.phones.extend(pp.iter().map(|p| val2str(p))),
pp => dr.phones.push(val2str(&pp)),
}
let p = &o["phone"];
if !p.is_null() {
dr.phones.push(val2str(&p));
}
dr.debt = match &o["debt"] {
Value::Number(d) => d.as_f64().unwrap_or(0.0),
Value::String(d) => d.parse().unwrap_or(0.0),
_ => 0.0,
};
dr
}
@technic
Copy link

technic commented May 7, 2019

There is a library to parse command line arguments.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment