Last active
February 10, 2024 00:24
-
-
Save PurpleMyst/9fd70736ffb29218a4de1f004b1175e9 to your computer and use it in GitHub Desktop.
Simple prefix tree (AKA Trie) implementation in C using 60SLOC (+ a C++ implementation of the same size for fun)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> // assert | |
#include <stdlib.h> // calloc, free | |
/** A prefix tree | |
* An efficient key-value storage with string keys | |
* | |
* There exist two type of nodes: | |
* 1. node->symbol == 0 -> node where a key ended, has node->value | |
* 2. node->symbol != 0 -> node representing a character in a key, has node->child | |
* | |
* Each node potentially has a node->next, which represents a node in the same | |
* key position but with a different symbol | |
*/ | |
struct Trie { | |
char symbol; | |
struct Trie *next; | |
union { | |
void *value; | |
struct Trie *child; | |
}; | |
}; | |
/** Create a new Trie struct | |
* This function assumes NULL == 0, but that's probably true in every machine | |
* I'm not sure it is true in C++ but eh | |
*/ | |
struct Trie *trie_new() { | |
return calloc(1, sizeof(struct Trie)); | |
} | |
/** Walk through a Trie and call a function on every value | |
* This *must* be used to free values | |
* | |
* Example: | |
* struct Trie *trie = trie_new(); | |
* | |
* int *x = malloc(sizeof *x); | |
* *x = 1; | |
* trie_set(trie, "a", x); | |
* | |
* int *y = malloc(sizeof *y); | |
* *y = 2; | |
* trie_set(trie, "b", y); | |
* | |
* assert(trie_get(trie, "a") == x); | |
* assert(trie_get(trie, "b") == y); | |
* | |
* trie_foreach_value(struct Trie *root, free); | |
* trie_free(trie); | |
*/ | |
void trie_foreach_value(struct Trie *root, void (*f)(void*)) { | |
if (root == NULL) return; | |
trie_foreach_value(root->next, f); | |
if (root->symbol != 0) trie_foreach_value(root->child, f); | |
else f(root->value); | |
} | |
/** Free a Trie | |
* This function does not try to free values, for that utilize | |
* trie_foreach_value with a custom free function | |
*/ | |
void trie_free(struct Trie *root) { | |
if (root == NULL) return; | |
trie_free(root->next); | |
if (root->symbol != 0) trie_free(root->child); | |
free(root); | |
} | |
/** Set a key to a value in a Trie | |
* This function requires that `value != NULL` so that missing keys can return | |
* NULL in `trie_get` | |
* | |
* It also requires that the key is non-empty (i.e. strlen(key) != 0) for ease | |
* of implementation | |
*/ | |
void trie_set(struct Trie *root, const char *key, void *value) { | |
assert(value != NULL); | |
assert(*key != 0); | |
struct Trie *conductor = root; | |
/* For each character of the key */ | |
for (; *key != 0; ++key) { | |
/* Find a suitable node at this level */ | |
for (;; conductor = conductor->next) { | |
/* If there already is a node at this level with the key's current | |
* character, just use that and move on to the next level */ | |
if (conductor->symbol == *key) { | |
conductor = conductor->child; | |
break; | |
} | |
/* If we reach this case, it means there are no more nodes in the | |
* current level */ | |
if (conductor->next == NULL) { | |
/* So we create a new node and set it to be the last node's | |
* successor */ | |
struct Trie *next = trie_new(); | |
next->symbol = *key; | |
conductor->next = next; | |
conductor = next; | |
/* Then we move on to the next level as usual */ | |
struct Trie *child = trie_new(); | |
conductor->child = child; | |
conductor = child; | |
break; | |
} | |
} | |
} | |
/* When we're here, we're at the last character of the key and so we just | |
* set the current node's value */ | |
conductor->value = value; | |
} | |
/** Get a key's value in a Trie | |
* This function returns NULL if a key is not present | |
*/ | |
void *trie_get(struct Trie *root, const char *key) { | |
assert(*key != 0); | |
if (root == NULL) return NULL; | |
struct Trie *conductor = root; | |
for (; *key != 0; ++key) { | |
for (; conductor->symbol != *key; conductor = conductor->next) { | |
if (conductor->next == NULL) return NULL; | |
} | |
if (conductor->child == NULL) return NULL; | |
conductor = conductor->child; | |
} | |
return conductor->value; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cassert> // assert | |
#include <memory> // std::unique_ptr | |
#include <optional> // std::optional | |
#include <string> // std::string | |
#include <variant> // std::variant | |
/** A prefix tree | |
* An efficient key-value storage with string keys | |
* | |
* There exist two type of nodes: | |
* 1. node->symbol == 0 -> node where a key ended, has node->value() | |
* 2. node->symbol != 0 -> node representing a character in a key, has node->child() | |
* | |
* Each node potentially has a node->next, which represents a node in the same | |
* key position but with a different symbol | |
*/ | |
template <typename V> | |
class Trie { | |
char symbol; | |
std::unique_ptr<Trie<V>> next; | |
std::variant<std::monostate, std::unique_ptr<Trie<V>>, V> value_or_child; | |
std::unique_ptr<Trie<V>>& child() { | |
return std::get<std::unique_ptr<Trie<V>>>(this->value_or_child); | |
} | |
V& value() { | |
return std::get<V>(this->value_or_child); | |
} | |
public: | |
std::optional<std::reference_wrapper<V>> get(std::string const& key) { | |
assert(!key.empty()); | |
auto conductor = this; | |
for (char c : key) { | |
for (; conductor->symbol != c; conductor = conductor->next.get()) { | |
if (!conductor->next) return std::nullopt; | |
} | |
if (!conductor->child()) return std::nullopt; | |
conductor = conductor->child().get(); | |
} | |
return std::ref(conductor->value()); | |
} | |
void set(std::string const& key, V && value) { | |
assert(!key.empty()); | |
auto conductor = this; | |
for (char c : key) { | |
/* Iterate over the current level */ | |
for (;; conductor = conductor->next.get()) { | |
/* If we find a node with the current character, use it */ | |
if (conductor->symbol == c) { | |
conductor = conductor->child().get(); | |
break; | |
} | |
/* If we don't find any nodes before we reach the last one */ | |
if (!conductor->next) { | |
/* Create a new node and assign the current character to it */ | |
conductor->next = std::make_unique<Trie>(); | |
conductor->next->symbol = c; | |
conductor = conductor->next.get(); | |
/* Create a new level too and move to it */ | |
conductor->value_or_child = std::make_unique<Trie>(); | |
conductor = conductor->child().get(); | |
break; | |
} | |
} | |
} | |
/* We've iterated over all the characters and hopefully found a leaf | |
* node to terminate on, so we check that the value/child union is in | |
* an unitialized state and if it is assign the value to it */ | |
assert(std::holds_alternative<std::monostate>(conductor->value_or_child)); | |
conductor->value_or_child = value; | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment