Created
May 27, 2017 22:06
-
-
Save carlomilanesi/083b12112f860a33c8ae78e7f97c47ec to your computer and use it in GitHub Desktop.
max_column_sum_by_key
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate ordermap; | |
extern crate memmap; | |
extern crate num; | |
fn main() { | |
type KeyType = u32; | |
type ValueType = u32; | |
let path = std::env::args().nth(1).expect("supply path of data file"); | |
let key_field_index: usize = std::env::args() | |
.nth(2) | |
.expect("supply key field index") | |
.parse() | |
.expect("key field index must be a natural number"); | |
let value_field_index: usize = std::env::args() | |
.nth(3) | |
.expect("supply value field index") | |
.parse() | |
.expect("value field index must be a natural number"); | |
// let mut sum_by_key = std::collections::HashMap::<KeyType, ValueType>::new(); | |
let mut sum_by_key = ordermap::OrderMap::<KeyType, ValueType>::new(); | |
let mut value = 0; | |
let mut key = 0; | |
// This code loads all the file in a vector. | |
// let mut file = std::fs::File::open(path).unwrap(); | |
// let mut file_contents = Vec::<u8>::new(); | |
// use std::io::Read; | |
// file.read_to_end(&mut file_contents).unwrap(); | |
// let s = &file_contents; | |
// This code maps all the file as a slice. | |
let mmap = memmap::Mmap::open_path(path, memmap::Protection::Read).unwrap(); | |
let s = unsafe { mmap.as_slice() }; | |
fn parse_bytes_to_int<T>(s: &[u8]) -> T | |
where T: num::PrimInt + num::FromPrimitive | |
{ | |
s.iter().fold(T::zero(), |result, ch| { | |
(result * T::from_isize(10).unwrap()) + | |
T::from_u8(ch - b'0').unwrap() | |
}) | |
} | |
fn process_field( | |
s: &[u8], | |
i_byte: usize, | |
i_field_begin_byte: usize, | |
i_field: usize, | |
key_field_index: usize, | |
value_field_index: usize, | |
key: &mut KeyType, | |
value: &mut ValueType | |
) { | |
if i_field == key_field_index { | |
*key = parse_bytes_to_int(&s[i_field_begin_byte..i_byte]); | |
} else if i_field == value_field_index { | |
*value = parse_bytes_to_int(&s[i_field_begin_byte..i_byte]); | |
} | |
} | |
let mut i_field = 0; | |
let mut i_field_begin_byte = 0; | |
for (i_byte, &ch) in s.iter().enumerate() { | |
match ch { | |
b'\t' => { | |
process_field( | |
s, | |
i_byte, | |
i_field_begin_byte, | |
i_field, | |
key_field_index, | |
value_field_index, | |
&mut key, | |
&mut value); | |
i_field += 1; | |
i_field_begin_byte = i_byte + 1; | |
} | |
b'\n' => { | |
process_field( | |
s, | |
i_byte, | |
i_field_begin_byte, | |
i_field, | |
key_field_index, | |
value_field_index, | |
&mut key, | |
&mut value); | |
*sum_by_key.entry(key).or_insert(0) += value; | |
i_field = 0; | |
i_field_begin_byte = i_byte + 1; | |
} | |
_ => {} | |
} | |
} | |
let result = sum_by_key.iter().max_by_key(|&(_, v)| v).unwrap(); | |
println!("max_key: {} sum: {}", result.0, result.1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment