Skip to content

Instantly share code, notes, and snippets.

@leecade
Forked from sevagh/fs_trie.rs
Created February 28, 2024 13:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leecade/9aff037756d432456f107fbb9109b467 to your computer and use it in GitHub Desktop.
Save leecade/9aff037756d432456f107fbb9109b467 to your computer and use it in GitHub Desktop.
Rust trie that serializes to filesystem
//! # A trie that can be saved to and loaded from a file
//!
//! This crate implements a Trie with char keys.
//! The trie can be saved to and loaded from a file on the local filesystem.
//! This allows the user to persist the trie between executions.
//!
//! Basic example:
//!
//! ```ignore
//! let trie_file = "/path/to/trie-file";
//! let mut trie = fs_trie::Trie::default();
//! trie.insert("abc", String::from("contents1"));
//! trie.insert("abd", String::from("contents2"));
//! trie.insert("hello", String::from("world"));
//! trie.save_to_file(trie_file).expect(
//! "Couldn't save trie to file",
//! );
//! let trie2 = fs_trie::Trie::load_from_file(trie_file).expect("Couldn't load trie from file");
//! assert_eq!(trie, trie2);
//! ```
#![deny(missing_docs)]
#![feature(box_patterns)]
#![feature(test)]
#[cfg(test)]
#[macro_use]
extern crate quickcheck;
#[cfg(test)]
extern crate test;
#[cfg(test)]
extern crate tempfile;
extern crate serde;
#[macro_use]
extern crate serde_derive;
extern crate bincode;
use std::fs::OpenOptions;
use std::io::{self, BufReader, BufWriter};
use std::collections::HashMap;
use bincode::{serialize_into, deserialize_from, Infinite};
pub use bincode::Error as BincodeError;
type Result<T> = std::result::Result<T, BincodeError>;
/// The Trie struct. The children are a `std::collections::HashMap` of other Tries.
#[derive(Default, Debug, PartialEq, Serialize, Deserialize)]
pub struct Trie<V> {
key: Option<char>,
children: HashMap<Option<char>, Trie<V>>,
contents: Option<V>,
}
impl<V> Trie<V> {
/// Returns a Trie<V> from a file - can be empty or a previously saved trie.
/// The type of V (the Trie values) must be known
pub fn load_from_file(path: &str) -> Result<Self>
where
for<'de> V: serde::Serialize + serde::Deserialize<'de>,
{
let f = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(path)
.expect("Couldn't open trie file");
let mut br = BufReader::new(f);
match deserialize_from(&mut br, Infinite) {
Ok(x) => Ok(x),
Err(box bincode::ErrorKind::IoError(e)) => {
if e.kind() == io::ErrorKind::UnexpectedEof {
return Ok(Trie {
key: None,
children: HashMap::new(),
contents: None,
});
}
Err(Box::new(bincode::ErrorKind::IoError(e)))
}
Err(e) => Err(e),
}
}
/// Inserts a Trie entry. The &str key is split into its chars to generate
/// the children.
pub fn insert(&mut self, key: &str, contents: V) -> Option<V> {
let mut chars = key.chars();
let mut key_i_need = chars.next();
if self.key == key_i_need {
if chars.size_hint().0 == 0 {
let ret = self.contents.take();
self.contents = Some(contents);
return ret;
}
key_i_need = chars.next();
}
if let Some(c) = self.children.get_mut(&key_i_need) {
return c.insert(chars.as_str(), contents);
}
let mut trie = Trie {
key: key_i_need,
children: HashMap::new(),
contents: None,
};
trie.insert(chars.as_str(), contents);
self.children.insert(key_i_need, trie);
None
}
/// Get an entry from the Trie. Traverses the trie with the chars in the key.
pub fn get(&self, key: &str) -> Option<&V> {
let mut chars = key.chars();
let mut key_i_need = chars.next();
if self.key == key_i_need {
if chars.size_hint().0 == 0 {
return self.contents.as_ref();
}
key_i_need = chars.next();
}
if let Some(c) = self.children.get(&key_i_need) {
return c.get(chars.as_str());
}
None
}
/// Get a mutable reference to the Trie entry. Use this to modify entries.
pub fn get_mut(&mut self, key: &str) -> Option<&mut V> {
let mut chars = key.chars();
let mut key_i_need = chars.next();
if self.key == key_i_need {
if chars.size_hint().0 == 0 {
return self.contents.as_mut();
}
key_i_need = chars.next();
}
if let Some(c) = self.children.get_mut(&key_i_need) {
return c.get_mut(chars.as_str());
}
None
}
/// Save the Trie to a file.
pub fn save_to_file(&mut self, path: &str) -> Result<()>
where
for<'de> V: serde::Serialize + serde::Deserialize<'de>,
{
let f = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(path)
.expect("Couldn't open trie file");
let mut bw = BufWriter::new(f);
serialize_into(&mut bw, self, Infinite)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
use std::collections::BTreeMap;
use quickcheck::TestResult;
use test::Bencher;
fn insertion_test_helper(mut v: Vec<(String, String)>, replace: bool) -> TestResult {
let v = v.iter_mut()
.map(|&mut (ref i, ref j)| {
let v = if !replace { j } else { "this_will_be_replaced" };
(i, v)
})
.collect::<Vec<_>>();
let mut t = Trie::default();
let mut bt = BTreeMap::new();
for &(i, j) in v.iter() {
assert_eq!(t.insert(i, j), bt.insert(i, j));
}
for &(i, _) in v.iter() {
assert_eq!(t.get(i), bt.get(i));
}
TestResult::from_bool(true)
}
#[test]
fn basic_insertion() -> () {
let testcases = vec![
(String::from("def"), String::from("contents1")),
(String::from("abc"), String::from("contents2")),
(String::from("abf"), String::from("contents3")),
];
insertion_test_helper(testcases, false);
}
#[test]
fn test_get_mut() -> () {
let mut trie = Trie::default();
trie.insert("abc", vec!["test1"]);
{
let thing_to_modify = trie.get_mut("abc").unwrap();
thing_to_modify.push("test2");
}
assert_eq!(*trie.get("abc").unwrap(), vec!["test1", "test2"]);
}
quickcheck! {
fn random_insertion(v: Vec<(String, String)>) -> TestResult {
insertion_test_helper(v, false)
}
fn replace_insertion(v: Vec<(String, String)>) -> TestResult {
insertion_test_helper(v, true)
}
}
#[test]
fn save_to_file_roundtrip() -> () {
let trie_file = NamedTempFile::new().expect("failed to create temporary file");
let trie_file_name = trie_file.path().to_str().unwrap();
let mut trie = Trie::default();
trie.insert("abc", String::from("contents1"));
trie.insert("abd", String::from("contents2"));
trie.insert("hello", String::from("world"));
trie.save_to_file(trie_file_name).expect(
"Couldn't save trie to file",
);
let trie2 = Trie::load_from_file(trie_file_name).expect("Couldn't load trie from file");
assert_eq!(trie, trie2);
}
#[test]
fn load_from_empty_file() -> () {
let trie_file = NamedTempFile::new().expect("failed to create temporary file");
let trie_file_name = trie_file.path().to_str().unwrap();
let trie = Trie::<Trie<String>>::load_from_file(trie_file_name)
.expect("Couldn't load trie from file");
assert_eq!(trie.key, None);
assert_eq!(trie.children.len(), 0);
assert_eq!(trie.contents, None);
}
#[bench]
fn bench_many_children(b: &mut Bencher) {
let mut trie = Trie::default();
let utf8_max_char = 128;
let last_child = &String::from_utf8(vec![utf8_max_char - 1]).unwrap();
for i in 0..utf8_max_char {
trie.insert(&String::from_utf8(vec![i]).unwrap(), "");
}
assert_eq!((utf8_max_char) as usize, trie.children.len());
b.iter(|| trie.get(&last_child));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment