Skip to content

Instantly share code, notes, and snippets.

@matklad
Last active April 3, 2020 22:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matklad/44ba1a5a6168bc0c26c995131c007907 to your computer and use it in GitHub Desktop.
Save matklad/44ba1a5a6168bc0c26c995131c007907 to your computer and use it in GitHub Desktop.
Concurrent Interner for Rust
use std::{
cell::RefCell,
hash::{BuildHasher, Hash, Hasher},
sync::Arc,
};
// hashbrown = "0.7"
use hashbrown::{hash_map::DefaultHashBuilder, HashMap};
// parking_lot = "0.10"
use parking_lot::{Mutex, RwLock};
#[derive(PartialEq, Eq, Clone, Copy)]
pub struct Symbol(u32);
pub struct GlobalInterner<K: 'static> {
map_arena: Mutex<(HashMap<&'static K, Symbol>, Arena<K>)>,
vec: RwLock<Vec<&'static K>>,
}
pub struct LocalInterner<K: 'static> {
global: Arc<GlobalInterner<K>>,
map: HashMap<&'static K, Symbol>,
vec: RefCell<Vec<Option<&'static K>>>,
}
impl<K: 'static + Hash + Eq> GlobalInterner<K> {
pub fn with_capacity(cap: usize) -> Arc<GlobalInterner<K>> {
Arc::new(GlobalInterner {
map_arena: Mutex::new((Default::default(), Arena::with_capacity(cap))),
vec: Default::default(),
})
}
pub fn fork(self: &Arc<GlobalInterner<K>>) -> LocalInterner<K> {
LocalInterner {
global: Arc::clone(self),
map: Default::default(),
vec: Default::default(),
}
}
pub fn intern(&self, key: K) -> Symbol {
self.intern_hashed(hash(&key), key).1
}
fn intern_hashed(&self, hash: u64, key: K) -> (&K, Symbol) {
let mut map_arena = self.map_arena.lock();
let (map, arena) = &mut *map_arena;
let (&mut interned, &mut sym) = map
.raw_entry_mut()
.from_key_hashed_nocheck(hash, &key)
.or_insert_with(|| {
let interned = arena.push(key);
let interned: &'static K = unsafe { &*interned };
let mut vec = self.vec.write();
let sym = Symbol(vec.len() as u32);
vec.push(interned);
(interned, sym)
});
(interned, sym)
}
pub fn lookup(&self, symbol: Symbol) -> &K {
self.vec.read()[symbol.0 as usize]
}
}
impl<K: Hash + Eq + 'static> LocalInterner<K> {
pub fn intern(&mut self, key: K) -> Symbol {
let global = &*self.global;
let hash = hash(&key);
let (&mut interned, &mut sym) = self
.map
.raw_entry_mut()
.from_key_hashed_nocheck(hash, &key)
.or_insert_with(|| {
let (interned, sym) = global.intern_hashed(hash, key);
let interned: &'static K = unsafe { &*(interned as *const _) };
(interned, sym)
});
self.record(interned, sym);
sym
}
pub fn lookup(&self, sym: Symbol) -> &K {
match self.vec.borrow().get(sym.0 as usize) {
Some(Some(interned)) => return interned,
_ => (),
}
let interned = self.global.lookup(sym);
let interned: &'static K = unsafe { &*(interned as *const _) };
self.record(interned, sym);
interned
}
fn record(&self, interned: &'static K, sym: Symbol) {
let mut vec = self.vec.borrow_mut();
let len = vec.len().max(sym.0 as usize + 1);
vec.resize(len, None);
vec[sym.0 as usize] = Some(interned);
}
}
struct Arena<K> {
buf: Vec<K>,
full: Vec<Vec<K>>,
}
impl<K> Arena<K> {
fn with_capacity(cap: usize) -> Arena<K> {
let cap = cap.next_power_of_two();
Arena {
buf: Vec::with_capacity(cap),
full: Vec::new(),
}
}
fn push(&mut self, value: K) -> *const K {
if self.buf.len() == self.buf.capacity() {
let new_buf = Vec::with_capacity(self.buf.capacity() * 2);
let old_buf = std::mem::replace(&mut self.buf, new_buf);
self.full.push(old_buf);
}
self.buf.push(value);
self.buf.last().unwrap()
}
}
fn hash<K: Hash>(key: &K) -> u64 {
let mut hasher = DefaultHashBuilder::new().build_hasher();
key.hash(&mut hasher);
hasher.finish()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment