Skip to content

Instantly share code, notes, and snippets.

@carlomilanesi
Created May 27, 2017 22:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carlomilanesi/083b12112f860a33c8ae78e7f97c47ec to your computer and use it in GitHub Desktop.
Save carlomilanesi/083b12112f860a33c8ae78e7f97c47ec to your computer and use it in GitHub Desktop.
max_column_sum_by_key
extern crate ordermap;
extern crate memmap;
extern crate num;
fn main() {
type KeyType = u32;
type ValueType = u32;
let path = std::env::args().nth(1).expect("supply path of data file");
let key_field_index: usize = std::env::args()
.nth(2)
.expect("supply key field index")
.parse()
.expect("key field index must be a natural number");
let value_field_index: usize = std::env::args()
.nth(3)
.expect("supply value field index")
.parse()
.expect("value field index must be a natural number");
// let mut sum_by_key = std::collections::HashMap::<KeyType, ValueType>::new();
let mut sum_by_key = ordermap::OrderMap::<KeyType, ValueType>::new();
let mut value = 0;
let mut key = 0;
// This code loads all the file in a vector.
// let mut file = std::fs::File::open(path).unwrap();
// let mut file_contents = Vec::<u8>::new();
// use std::io::Read;
// file.read_to_end(&mut file_contents).unwrap();
// let s = &file_contents;
// This code maps all the file as a slice.
let mmap = memmap::Mmap::open_path(path, memmap::Protection::Read).unwrap();
let s = unsafe { mmap.as_slice() };
fn parse_bytes_to_int<T>(s: &[u8]) -> T
where T: num::PrimInt + num::FromPrimitive
{
s.iter().fold(T::zero(), |result, ch| {
(result * T::from_isize(10).unwrap()) +
T::from_u8(ch - b'0').unwrap()
})
}
fn process_field(
s: &[u8],
i_byte: usize,
i_field_begin_byte: usize,
i_field: usize,
key_field_index: usize,
value_field_index: usize,
key: &mut KeyType,
value: &mut ValueType
) {
if i_field == key_field_index {
*key = parse_bytes_to_int(&s[i_field_begin_byte..i_byte]);
} else if i_field == value_field_index {
*value = parse_bytes_to_int(&s[i_field_begin_byte..i_byte]);
}
}
let mut i_field = 0;
let mut i_field_begin_byte = 0;
for (i_byte, &ch) in s.iter().enumerate() {
match ch {
b'\t' => {
process_field(
s,
i_byte,
i_field_begin_byte,
i_field,
key_field_index,
value_field_index,
&mut key,
&mut value);
i_field += 1;
i_field_begin_byte = i_byte + 1;
}
b'\n' => {
process_field(
s,
i_byte,
i_field_begin_byte,
i_field,
key_field_index,
value_field_index,
&mut key,
&mut value);
*sum_by_key.entry(key).or_insert(0) += value;
i_field = 0;
i_field_begin_byte = i_byte + 1;
}
_ => {}
}
}
let result = sum_by_key.iter().max_by_key(|&(_, v)| v).unwrap();
println!("max_key: {} sum: {}", result.0, result.1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment