Last active
February 3, 2023 15:08
-
-
Save YuriGor/5e0dc379a340e5d13b53c5501892ab14 to your computer and use it in GitHub Desktop.
Vec vs HashMap read performance (ObjectId keys + rustc_hash)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "indexed_hash_map" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
mongodb = "2.3.0" | |
rustc-hash = "1.1.0" | |
rand = "0.8.5" | |
criterion = "0.4.0" | |
[profile.dev] | |
opt-level = 0 | |
[profile.release] | |
opt-level = 3 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use core::cell::RefCell; | |
use mongodb::bson::oid; | |
use rand::prelude::*; | |
use rustc_hash::FxHashMap; | |
use std::rc::Rc; | |
// use std::hint::black_box; | |
use criterion::black_box; | |
use std::time::Instant; | |
pub struct Item { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub out_id: Vec<oid::ObjectId>, | |
pub out_idx: Vec<usize>, | |
} | |
pub struct Edge { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub from_id: oid::ObjectId, | |
pub from_idx: usize, | |
pub to_id: oid::ObjectId, | |
pub to_idx: usize, | |
} | |
pub struct ItemRef { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub out: Vec<Rc<RefCell<EdgeRef>>>, | |
} | |
pub struct EdgeRef { | |
pub id: oid::ObjectId, | |
pub from_idx: usize, | |
pub from: Rc<RefCell<ItemRef>>, | |
pub to_idx: usize, | |
pub to: Rc<RefCell<ItemRef>>, | |
} | |
const TOTAL: usize = 3000; | |
const NODE_EDGES: usize = 3; | |
const READS: usize = 1 + NODE_EDGES * 2; | |
fn main() { | |
hash(); | |
vec(); | |
cell(); | |
} | |
fn hash() { | |
let mut item_map = FxHashMap::default(); // HashMap with rustc_hash, should be fast? | |
let mut edge_map = FxHashMap::default(); // HashMap with rustc_hash, should be fast? | |
let mut item_keys = Vec::new(); // remember used keys | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate 1K records same way | |
let item = Item { | |
id: oid::ObjectId::new(), | |
idx, | |
out_idx: Vec::new(), | |
out_id: Vec::new(), | |
}; | |
item_keys.push(item.id); // push rec to map this time | |
item_map.insert(item.id, item); // remember key used | |
} | |
let mut ridx = 0; | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let from_id = &item_keys[from_idx]; | |
let to_id = &item_keys[to_idx]; | |
let edge = Edge { | |
id: oid::ObjectId::new(), // new random UUID | |
idx: ridx, // index | |
from_idx: item_map[from_id].idx, | |
to_idx: item_map[to_id].idx, | |
from_id: item_map[from_id].id, | |
to_id: item_map[to_id].id, | |
}; | |
let from = item_map.get_mut(from_id).unwrap(); | |
from.out_id.push(edge.id); | |
from.out_idx.push(edge.idx); | |
edge_map.insert(edge.id, edge); | |
ridx += 1; | |
} | |
} | |
let now = Instant::now(); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_keys { | |
let item = &item_map[&idx]; | |
for ri in 0..NODE_EDGES { | |
black_box(&item_map[&edge_map[&item.out_id[ri]].to_id]); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M HashMap (rustc_hash) read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} | |
fn vec() { | |
let mut item_vec = Vec::new(); // items registry | |
let mut edge_vec = Vec::new(); // items registry | |
let mut item_indexes = Vec::new(); // list of used item indexes (just to iterate over Vec of key when benching same as for HashMap) | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate total records | |
let item = Item { | |
id: oid::ObjectId::new(), // new random UUID | |
idx, // index | |
out_id: Vec::new(), | |
out_idx: Vec::new(), | |
}; | |
item_vec.push(item); //push to Vec as main storage | |
item_indexes.push(idx); // remember indexes used | |
} | |
let mut ridx = 0; | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let edge = Edge { | |
id: oid::ObjectId::new(), // new random UUID | |
idx: ridx, // index | |
from_idx, | |
to_idx, | |
from_id: item_vec[from_idx].id, | |
to_id: item_vec[to_idx].id, | |
}; | |
item_vec[from_idx].out_id.push(edge.id); | |
item_vec[from_idx].out_idx.push(edge.idx); | |
edge_vec.push(edge); | |
ridx += 1; | |
} | |
} | |
let now = Instant::now(); | |
item_indexes.shuffle(&mut rng); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_indexes { | |
let item = &item_vec[*idx]; // noob here, not sure if doing nice here with ref/deref | |
for ri in 0..NODE_EDGES { | |
black_box(&item_vec[edge_vec[item.out_idx[ri]].to_idx]); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M Vec read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} | |
fn cell() { | |
let mut item_vec = Vec::new(); | |
let mut edge_vec = Vec::new(); // HashMap with rustc_hash, should be fast? | |
let mut item_indexes = Vec::new(); // remember used keys | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate 1K records same way | |
let item = Rc::new(RefCell::new(ItemRef { | |
id: oid::ObjectId::new(), | |
idx, | |
out: Vec::new(), | |
})); | |
item_indexes.push(item.borrow().idx); // push rec to map this time | |
item_vec.push(item); // remember key used | |
} | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let edge = Rc::new(RefCell::new(EdgeRef { | |
id: oid::ObjectId::new(), // new random UUID | |
from_idx, | |
to_idx, | |
from: item_vec[from_idx].clone(), | |
to: item_vec[to_idx].clone(), | |
})); | |
edge.borrow_mut().from.borrow_mut().out.push(edge.clone()); | |
edge_vec.push(edge); | |
} | |
} | |
let now = Instant::now(); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_indexes { | |
let item = &item_vec[*idx]; | |
for ri in 0..NODE_EDGES { | |
black_box(&item.borrow().out[ri].borrow().to); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M Vec/Rc/RefCell read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
63M HashMap (rustc_hash) read time: 22,645s | |
63M Vec read time: 103,53ms | |
63M Vec/Rc/RefCell read time: 65.88ms |
This exact case is ultimately simple and isolated, so my approach is good enough, but will pay attention to this crate, thanks.
Take care of electricity and yourself!
I've uploaded this as example
But I don't know perhaps it's to complex example. You could try examples from documtation of criterion from link of my original comment
This exact case is ultimately simple and isolated, so my approach is good enough.
That's bold claim.
You can't guarante that it would give same results on each execution.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I can try uploading my test project with criterion somewhere tomorrow, if you like.
Today is late, and electricity was just disabled.
Feel free to ping me, if I forget to upload it tomorrow.