Skip to content

Instantly share code, notes, and snippets.

@ayende
Created January 24, 2017 18:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ayende/13dc2d15563ba04638d2e5c223e4bbe4 to your computer and use it in GitHub Desktop.
Save ayende/13dc2d15563ba04638d2e5c223e4bbe4 to your computer and use it in GitHub Desktop.
extern crate alloc;
extern crate libc;
use std::mem;
use std::ptr;
pub struct Trie {
data: alloc::raw_vec::RawVec<u8>,
}
/*
The trie format relies on a single allocation of 32KB in size (using RawVec).
We manipulate the memory there directly, using unsafe code.
The format is:
[Trie Header - global information about the trie, number of items, allocations, etc]
[Root node (first one after the trie header)
Contains the key offset for the portion of the key it owns, this contains just {key_size} bytes with the key
Contains the offset to its children
This contains first a u16 number of children
Then an array of that size of NodeHeader for the children (unsorted right now)
]
No attempt is made to reclaim memory, we always allocate from the end, and if needed, we'll defrag.
*/
#[derive(Debug)]
pub enum TrieErrors {
NewVal,
UpdatedVal,
NotEnoughSpace,
ValNotFound,
RequiresDefrag,
KeyTooBig,
KeyPrefixOnly,
}
enum NodeErrors<'a> {
ValNotFound,
PartialMatch { closest : &'a NodeHeader }
}
#[derive(Debug)]
struct TrieHeader {
items: u16,
next_alloc: u16,
size_used: u16,
reserved: u16,
}
#[derive(Debug)]
struct NodeHeader {
key_offset: u16,
key_size: u8,
has_value: bool,
children_offset: u16,
}
struct NodeAndParent<'a> {
parent: Option<&'a NodeHeader> ,
node : &'a NodeHeader,
node_index_at_parent: u16
}
const TRIE_SIZE: usize = 32 * 1024;
impl NodeHeader {
fn number_of_children(self: &NodeHeader) -> u16 {
unsafe{
let ptr = ((self as *const NodeHeader) as *const u8)
.offset(self.children_offset as isize) as *const u16;
*ptr
}
}
fn set_number_of_children(self: &NodeHeader, val: u16) {
unsafe{
let ptr = ((self as *const NodeHeader) as *const u8)
.offset(self.children_offset as isize) as *mut u16;
*ptr = val
}
}
fn nth_child(self: &NodeHeader, n: u16) -> &NodeHeader {
unsafe{
let child_offset : isize = self.children_offset as isize + (mem::size_of::<u16>() +
(mem::size_of::<NodeHeader>() * n as usize) ) as isize;
&*(((self as *const NodeHeader) as *const u8).offset(child_offset) as *const NodeHeader)
}
}
fn key(self: &NodeHeader) -> *const u8 {
unsafe{
((self as *const NodeHeader) as *const u8).offset(self.key_offset as isize)
}
}
fn val(self: &NodeHeader) -> Option<i64> {
if self.has_value == false {
return None;
}
unsafe{
Some(*(((self as *const NodeHeader) as *const u8).offset(mem::size_of::<NodeHeader>() as isize) as *const i64))
}
}
fn find_match<'a>(self: &'a NodeHeader, parent: Option<&'a NodeHeader>, parent_index: u16, key: &str) -> Result<NodeAndParent, NodeErrors>{
unsafe {
let ptr = ((self as *const NodeHeader) as *const u8)
.offset(mem::size_of::<NodeHeader>() as isize);
if self.key_size as usize > key.len() {
// if the key is longer, this will obviously not match
return Err(NodeErrors::ValNotFound);
}
for i in 0..self.key_size {
if *ptr.offset(i as isize) != key.as_bytes()[i as usize] {
return Err(NodeErrors::ValNotFound);
}
}
// found an exact match, can return this node
if self.key_size as usize == key.len() {
return Ok(NodeAndParent{parent: parent, node_index_at_parent: parent_index, node: self})
}
// found partial match, now need to find to see we can recurse
for i in 0..self.number_of_children() {
let child = self.nth_child(i);
let child_key = child.key();
if *child_key == key.as_bytes()[self.key_size as usize] {
return child.find_match(Some(self), i , &key[self.key_size as usize..]);
}
}
Err(NodeErrors::PartialMatch{closest: self})
}
}
}
impl Trie {
pub fn new() -> Trie {
let vec = alloc::raw_vec::RawVec::with_capacity(TRIE_SIZE);
let mut header = vec.ptr() as *mut TrieHeader;
unsafe {
(*header).items = 0;
(*header).next_alloc = mem::size_of::<TrieHeader>() as u16;
(*header).size_used = mem::size_of::<TrieHeader>() as u16;
}
Trie { data: vec }
}
pub fn write(self: &mut Trie, key: &str, val: i64) -> Result<(), TrieErrors> {
if key.len() > u8::max_value() as usize {
return Err(TrieErrors::KeyTooBig);
}
let header = self.trie_header();
let required_size = mem::size_of::<NodeHeader>() + key.len() + mem::size_of::<i64>();
if required_size > TRIE_SIZE {
return Err(TrieErrors::NotEnoughSpace);
}
if (required_size + header.size_used as usize) > TRIE_SIZE {
return Err(TrieErrors::NotEnoughSpace);
}
if (required_size + header.next_alloc as usize) > TRIE_SIZE {
return Err(TrieErrors::RequiresDefrag);
}
unimplemented!()
}
pub fn read(self: &Trie, key: &str) -> Result<i64, TrieErrors> {
let result = try! (self.find_node(key));
match result.node.val() {
None => Err(TrieErrors::KeyPrefixOnly),
Some(a) => Ok(a)
}
}
fn find_node(self: &Trie, key: &str) -> Result<NodeAndParent, TrieErrors> {
let header = self.trie_header();
if header.items == 0 {
return Err(TrieErrors::ValNotFound);
}
let node = self.node_header(mem::size_of::<TrieHeader>() as isize);
match node.find_match(None, 0, key) {
Err(NodeErrors::ValNotFound) |
Err(NodeErrors::PartialMatch{ closest: _})
=> Err(TrieErrors::ValNotFound),
Ok(matching_node) => Ok(matching_node)
}
}
pub fn delete(self: &mut Trie, key: &str) -> Result<(), TrieErrors> {
{
let result = try! (self.find_node(key));
if result.node.has_value == false{
return Err(TrieErrors::KeyPrefixOnly);
}
}
self.delete_internal(key);
Ok(())
}
fn delete_internal(self: &mut Trie, key: &str) {
let parent_key_size: usize;
let state : u32;
{
let result = self.find_node(key).unwrap(); // we are ensured that this cannot fail from our callers
parent_key_size = key.len() - result.node.key_size as usize;
match result.parent {
None => {
state = 0;
},
Some(parent) => {
if parent.number_of_children() == 1 {
state = 1;
}
else {
// we need to move it over
state = 2
}
}
}
}
match state {
0 => {
// there is just one entry in the trie
let mut header = self.mut_trie_header();
header.items = 0;
header.next_alloc = mem::size_of::<TrieHeader>() as u16;
header.size_used = mem::size_of::<TrieHeader>() as u16;
},
1 => {
// this has just one entry, so need to remove it as well
self.delete_internal(&key[ ..parent_key_size]);
}
_ => unimplemented!()
}
}
fn mut_trie_header(self: &mut Trie) -> &mut TrieHeader {
unsafe {
let header = self.data.ptr() as *mut TrieHeader;
return &mut *header;
}
}
fn trie_header(self: &Trie) -> &TrieHeader {
unsafe {
let header = self.data.ptr() as *const TrieHeader;
return &*header;
}
}
fn node_header_mut(self: &mut Trie, offset: isize) -> &mut NodeHeader {
unsafe {
return &mut *(self.data.ptr().offset(offset) as *mut NodeHeader);
}
}
fn node_header(self: &Trie, offset: isize) -> &NodeHeader {
unsafe {
return &*(self.data.ptr().offset(offset) as *const NodeHeader);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment