Skip to content

Instantly share code, notes, and snippets.

@red75prime
Created May 12, 2019 09:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save red75prime/676ba0f8b8e830861679f03f6824cb77 to your computer and use it in GitHub Desktop.
Save red75prime/676ba0f8b8e830861679f03f6824cb77 to your computer and use it in GitHub Desktop.
// [dependencies]
// serde = "1.0"
// serde_derive = "1.0"
// serde_json = { version = "1.0", features = ["raw_value"] }
// fnv = "1.0.5"
// memmap = "0.7"
use fnv::{FnvHashSet as HashSet, FnvHashMap as HashMap};
use memmap::Mmap;
use serde_derive::{Deserialize};
use serde_json::{value::RawValue};
const FILE_BUFFER_SIZE: usize = 50000;
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum Company<'a> {
Name(&'a str),
NameRec{ name: &'a str},
}
#[derive(Debug, Deserialize)]
struct Rec<'a> {
#[serde(borrow)]
company: Company<'a>,
debt: &'a RawValue,
phones: &'a RawValue,
phone: Option<&'a RawValue>,
}
//source data
struct DebtRec<'a> {
pub company: &'a str,
pub phones: Vec<&'a str>,
pub debt: f64
}
//result data
struct Debtor {
companies: HashSet<String>,
phones: HashSet<String>,
debt: f64
}
struct Debtors {
all: Vec<Debtor>,
index_by_phone: HashMap<String, usize>
}
impl Debtor {
fn new() -> Debtor {
Debtor {
companies: HashSet::default(),
phones: HashSet::default(),
debt: 0.0
}
}
}
impl Debtors {
fn new() -> Debtors {
Debtors {
all: Vec::new(),
index_by_phone: HashMap::default()
}
}
}
fn main() {
let mut res = Debtors::new();
let mut fflag = 0;
for arg in std::env::args() {
if arg == "-f" {
fflag = 1;
}
else if fflag == 1 {
fflag = 2;
println!("{}:", &arg);
let tbegin = std::time::Instant::now();
let (count, errcount) = process_file(&arg, &mut res);
println!("PROCESSED: {} objects in {:?}, {} errors found", count, tbegin.elapsed(), errcount);
}
}
for (di, d) in res.all.iter().enumerate() {
println!("-------------------------------");
println!("#{}: debt: {}", di, &d.debt);
println!("companies: {:?}\nphones: {:?}", &d.companies, &d.phones);
}
if fflag < 2 {
println!("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...");
}
}
fn process_file(fname: &str, res: &mut Debtors) -> (i32, i32) {
use std::io::prelude::*;
let mut count = 0;
let mut errcount = 0;
match std::fs::File::open(fname) {
Ok(file) => {
let mmap = match unsafe{ Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(e) => {
println!("Cannot open '{}': {:?}", fname, e);
return (0, 0);
}
};
let mut braces = 0;
let mut start_idx = 0;
for (idx, &b) in mmap.iter().enumerate() {
if b == b'{' {
if braces == 0 {
start_idx = idx;
}
braces += 1;
}
else if b == b'}' {
braces -= 1;
if braces == 0 { //object formed !
let obj = &mmap[start_idx ..= idx];
match serde_json::from_slice(obj) {
Ok(o) => {
process_object(o, res);
}
Err(e) => {
println!("JSON ERROR: {}:\n{}", e, String::from_utf8_lossy(obj));
errcount +=1;
}
}
count += 1;
}
}
}
}
Err(e) => {
println!("ERROR: {}", e);
}
}
return (count, errcount);
}
fn process_object(o: Rec, res: &mut Debtors) {
let dr = extract_data(o);
//println!("{} - {:?} - {}", &dr.company, &dr.phones, &dr.debt,);
let mut di: Option<usize> = Option::None; //debtor index search result
for &p in &dr.phones {
if let Some(i) = res.index_by_phone.get(p) {
di = Some(*i);
break;
}
}
match di {
Some(i) => { //existing debtor
let d = &mut res.all[i];
d.companies.insert(dr.company.to_string());
for p in &dr.phones {
d.phones.insert(p.to_string());
res.index_by_phone.insert(p.to_string(), i);
}
d.debt += dr.debt;
}
None => { //new debtor
let mut d = Debtor::new();
let i = res.all.len();
d.companies.insert(dr.company.to_string());
for p in &dr.phones {
d.phones.insert(p.to_string());
res.index_by_phone.insert(p.to_string(), i);
}
d.debt = dr.debt;
res.all.push(d);
}
}
}
fn raw2str(raw: &RawValue) -> &str {
let payload = raw.get();
if payload.starts_with('"') {
&payload[1 .. payload.len() -1]
} else if payload.starts_with(|ch| char::is_digit(ch, 10) ) {
payload
} else {
""
}
}
fn extract_data(o: Rec) -> DebtRec {
use std::str::FromStr;
let company = match o.company {
Company::Name(c) | Company::NameRec{ name: c} => c,
};
let mut phones = vec![];
let payload = o.phones.get();
if payload.starts_with('[') {
let ps: Vec<&RawValue> = serde_json::from_str(payload).unwrap_or_else(|_| vec![]);
phones.extend(ps.into_iter().map(raw2str));
} else {
phones.push(raw2str(o.phones));
};
if let Some(p) = o.phone {
phones.push(raw2str(p));
};
let debt = f64::from_str(raw2str(o.debt)).unwrap_or(0.0);
DebtRec {
company, phones, debt,
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment