-
-
Save YuriGor/5e0dc379a340e5d13b53c5501892ab14 to your computer and use it in GitHub Desktop.
[package] | |
name = "indexed_hash_map" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
mongodb = "2.3.0" | |
rustc-hash = "1.1.0" | |
rand = "0.8.5" | |
criterion = "0.4.0" | |
[profile.dev] | |
opt-level = 0 | |
[profile.release] | |
opt-level = 3 | |
use core::cell::RefCell; | |
use mongodb::bson::oid; | |
use rand::prelude::*; | |
use rustc_hash::FxHashMap; | |
use std::rc::Rc; | |
// use std::hint::black_box; | |
use criterion::black_box; | |
use std::time::Instant; | |
pub struct Item { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub out_id: Vec<oid::ObjectId>, | |
pub out_idx: Vec<usize>, | |
} | |
pub struct Edge { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub from_id: oid::ObjectId, | |
pub from_idx: usize, | |
pub to_id: oid::ObjectId, | |
pub to_idx: usize, | |
} | |
pub struct ItemRef { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub out: Vec<Rc<RefCell<EdgeRef>>>, | |
} | |
pub struct EdgeRef { | |
pub id: oid::ObjectId, | |
pub from_idx: usize, | |
pub from: Rc<RefCell<ItemRef>>, | |
pub to_idx: usize, | |
pub to: Rc<RefCell<ItemRef>>, | |
} | |
const TOTAL: usize = 3000; | |
const NODE_EDGES: usize = 3; | |
const READS: usize = 1 + NODE_EDGES * 2; | |
fn main() { | |
hash(); | |
vec(); | |
cell(); | |
} | |
fn hash() { | |
let mut item_map = FxHashMap::default(); // HashMap with rustc_hash, should be fast? | |
let mut edge_map = FxHashMap::default(); // HashMap with rustc_hash, should be fast? | |
let mut item_keys = Vec::new(); // remember used keys | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate 1K records same way | |
let item = Item { | |
id: oid::ObjectId::new(), | |
idx, | |
out_idx: Vec::new(), | |
out_id: Vec::new(), | |
}; | |
item_keys.push(item.id); // push rec to map this time | |
item_map.insert(item.id, item); // remember key used | |
} | |
let mut ridx = 0; | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let from_id = &item_keys[from_idx]; | |
let to_id = &item_keys[to_idx]; | |
let edge = Edge { | |
id: oid::ObjectId::new(), // new random UUID | |
idx: ridx, // index | |
from_idx: item_map[from_id].idx, | |
to_idx: item_map[to_id].idx, | |
from_id: item_map[from_id].id, | |
to_id: item_map[to_id].id, | |
}; | |
let from = item_map.get_mut(from_id).unwrap(); | |
from.out_id.push(edge.id); | |
from.out_idx.push(edge.idx); | |
edge_map.insert(edge.id, edge); | |
ridx += 1; | |
} | |
} | |
let now = Instant::now(); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_keys { | |
let item = &item_map[&idx]; | |
for ri in 0..NODE_EDGES { | |
black_box(&item_map[&edge_map[&item.out_id[ri]].to_id]); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M HashMap (rustc_hash) read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} | |
fn vec() { | |
let mut item_vec = Vec::new(); // items registry | |
let mut edge_vec = Vec::new(); // items registry | |
let mut item_indexes = Vec::new(); // list of used item indexes (just to iterate over Vec of key when benching same as for HashMap) | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate total records | |
let item = Item { | |
id: oid::ObjectId::new(), // new random UUID | |
idx, // index | |
out_id: Vec::new(), | |
out_idx: Vec::new(), | |
}; | |
item_vec.push(item); //push to Vec as main storage | |
item_indexes.push(idx); // remember indexes used | |
} | |
let mut ridx = 0; | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let edge = Edge { | |
id: oid::ObjectId::new(), // new random UUID | |
idx: ridx, // index | |
from_idx, | |
to_idx, | |
from_id: item_vec[from_idx].id, | |
to_id: item_vec[to_idx].id, | |
}; | |
item_vec[from_idx].out_id.push(edge.id); | |
item_vec[from_idx].out_idx.push(edge.idx); | |
edge_vec.push(edge); | |
ridx += 1; | |
} | |
} | |
let now = Instant::now(); | |
item_indexes.shuffle(&mut rng); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_indexes { | |
let item = &item_vec[*idx]; // noob here, not sure if doing nice here with ref/deref | |
for ri in 0..NODE_EDGES { | |
black_box(&item_vec[edge_vec[item.out_idx[ri]].to_idx]); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M Vec read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} | |
fn cell() { | |
let mut item_vec = Vec::new(); | |
let mut edge_vec = Vec::new(); // HashMap with rustc_hash, should be fast? | |
let mut item_indexes = Vec::new(); // remember used keys | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate 1K records same way | |
let item = Rc::new(RefCell::new(ItemRef { | |
id: oid::ObjectId::new(), | |
idx, | |
out: Vec::new(), | |
})); | |
item_indexes.push(item.borrow().idx); // push rec to map this time | |
item_vec.push(item); // remember key used | |
} | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let edge = Rc::new(RefCell::new(EdgeRef { | |
id: oid::ObjectId::new(), // new random UUID | |
from_idx, | |
to_idx, | |
from: item_vec[from_idx].clone(), | |
to: item_vec[to_idx].clone(), | |
})); | |
edge.borrow_mut().from.borrow_mut().out.push(edge.clone()); | |
edge_vec.push(edge); | |
} | |
} | |
let now = Instant::now(); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_indexes { | |
let item = &item_vec[*idx]; | |
for ri in 0..NODE_EDGES { | |
black_box(&item.borrow().out[ri].borrow().to); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M Vec/Rc/RefCell read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} |
63M HashMap (rustc_hash) read time: 22,645s | |
63M Vec read time: 103,53ms | |
63M Vec/Rc/RefCell read time: 65.88ms |
Thanks, switched to criterion but with no visible effect.
Also added 3d approach using Rc+RefCell - it's even faster and more readable, and also it's kind of recommended by Rust docs for Graphs, so looks like I'll pick it finally.
Oh, I meant not blackbox itself, but whole lobrary/harness to perform measurements.
It is really smart, and it would measure each funtion hundreds of times and then calculate average result. And even would apply statistical methods to throw out wierd results, that don't match.
Say you have 98 measurements with similar time and two outstanding, coz your computer was doing something in background, it woud throw out that 2 measirements.
I can try uploading my test project with criterion somewhere tomorrow, if you like.
Today is late, and electricity was just disabled.
Feel free to ping me, if I forget to upload it tomorrow.
This exact case is ultimately simple and isolated, so my approach is good enough, but will pay attention to this crate, thanks.
Take care of electricity and yourself!
I've uploaded this as example
But I don't know perhaps it's to complex example. You could try examples from documtation of criterion from link of my original comment
This exact case is ultimately simple and isolated, so my approach is good enough.
That's bold claim.
You can't guarante that it would give same results on each execution.
You should use criterion for benchmarks