Skip to content

Instantly share code, notes, and snippets.

@PurpleMyst
Last active February 10, 2024 00:24
Show Gist options
  • Save PurpleMyst/9fd70736ffb29218a4de1f004b1175e9 to your computer and use it in GitHub Desktop.
Save PurpleMyst/9fd70736ffb29218a4de1f004b1175e9 to your computer and use it in GitHub Desktop.
Simple prefix tree (AKA Trie) implementation in C using 60SLOC (+ a C++ implementation of the same size for fun)
#include <assert.h> // assert
#include <stdlib.h> // calloc, free
/** A prefix tree
* An efficient key-value storage with string keys
*
* There exist two type of nodes:
* 1. node->symbol == 0 -> node where a key ended, has node->value
* 2. node->symbol != 0 -> node representing a character in a key, has node->child
*
* Each node potentially has a node->next, which represents a node in the same
* key position but with a different symbol
*/
struct Trie {
char symbol;
struct Trie *next;
union {
void *value;
struct Trie *child;
};
};
/** Create a new Trie struct
* This function assumes NULL == 0, but that's probably true in every machine
* I'm not sure it is true in C++ but eh
*/
struct Trie *trie_new() {
return calloc(1, sizeof(struct Trie));
}
/** Walk through a Trie and call a function on every value
* This *must* be used to free values
*
* Example:
* struct Trie *trie = trie_new();
*
* int *x = malloc(sizeof *x);
* *x = 1;
* trie_set(trie, "a", x);
*
* int *y = malloc(sizeof *y);
* *y = 2;
* trie_set(trie, "b", y);
*
* assert(trie_get(trie, "a") == x);
* assert(trie_get(trie, "b") == y);
*
* trie_foreach_value(struct Trie *root, free);
* trie_free(trie);
*/
void trie_foreach_value(struct Trie *root, void (*f)(void*)) {
if (root == NULL) return;
trie_foreach_value(root->next, f);
if (root->symbol != 0) trie_foreach_value(root->child, f);
else f(root->value);
}
/** Free a Trie
* This function does not try to free values, for that utilize
* trie_foreach_value with a custom free function
*/
void trie_free(struct Trie *root) {
if (root == NULL) return;
trie_free(root->next);
if (root->symbol != 0) trie_free(root->child);
free(root);
}
/** Set a key to a value in a Trie
* This function requires that `value != NULL` so that missing keys can return
* NULL in `trie_get`
*
* It also requires that the key is non-empty (i.e. strlen(key) != 0) for ease
* of implementation
*/
void trie_set(struct Trie *root, const char *key, void *value) {
assert(value != NULL);
assert(*key != 0);
struct Trie *conductor = root;
/* For each character of the key */
for (; *key != 0; ++key) {
/* Find a suitable node at this level */
for (;; conductor = conductor->next) {
/* If there already is a node at this level with the key's current
* character, just use that and move on to the next level */
if (conductor->symbol == *key) {
conductor = conductor->child;
break;
}
/* If we reach this case, it means there are no more nodes in the
* current level */
if (conductor->next == NULL) {
/* So we create a new node and set it to be the last node's
* successor */
struct Trie *next = trie_new();
next->symbol = *key;
conductor->next = next;
conductor = next;
/* Then we move on to the next level as usual */
struct Trie *child = trie_new();
conductor->child = child;
conductor = child;
break;
}
}
}
/* When we're here, we're at the last character of the key and so we just
* set the current node's value */
conductor->value = value;
}
/** Get a key's value in a Trie
* This function returns NULL if a key is not present
*/
void *trie_get(struct Trie *root, const char *key) {
assert(*key != 0);
if (root == NULL) return NULL;
struct Trie *conductor = root;
for (; *key != 0; ++key) {
for (; conductor->symbol != *key; conductor = conductor->next) {
if (conductor->next == NULL) return NULL;
}
if (conductor->child == NULL) return NULL;
conductor = conductor->child;
}
return conductor->value;
}
#include <cassert> // assert
#include <memory> // std::unique_ptr
#include <optional> // std::optional
#include <string> // std::string
#include <variant> // std::variant
/** A prefix tree
* An efficient key-value storage with string keys
*
* There exist two type of nodes:
* 1. node->symbol == 0 -> node where a key ended, has node->value()
* 2. node->symbol != 0 -> node representing a character in a key, has node->child()
*
* Each node potentially has a node->next, which represents a node in the same
* key position but with a different symbol
*/
template <typename V>
class Trie {
char symbol;
std::unique_ptr<Trie<V>> next;
std::variant<std::monostate, std::unique_ptr<Trie<V>>, V> value_or_child;
std::unique_ptr<Trie<V>>& child() {
return std::get<std::unique_ptr<Trie<V>>>(this->value_or_child);
}
V& value() {
return std::get<V>(this->value_or_child);
}
public:
std::optional<std::reference_wrapper<V>> get(std::string const& key) {
assert(!key.empty());
auto conductor = this;
for (char c : key) {
for (; conductor->symbol != c; conductor = conductor->next.get()) {
if (!conductor->next) return std::nullopt;
}
if (!conductor->child()) return std::nullopt;
conductor = conductor->child().get();
}
return std::ref(conductor->value());
}
void set(std::string const& key, V && value) {
assert(!key.empty());
auto conductor = this;
for (char c : key) {
/* Iterate over the current level */
for (;; conductor = conductor->next.get()) {
/* If we find a node with the current character, use it */
if (conductor->symbol == c) {
conductor = conductor->child().get();
break;
}
/* If we don't find any nodes before we reach the last one */
if (!conductor->next) {
/* Create a new node and assign the current character to it */
conductor->next = std::make_unique<Trie>();
conductor->next->symbol = c;
conductor = conductor->next.get();
/* Create a new level too and move to it */
conductor->value_or_child = std::make_unique<Trie>();
conductor = conductor->child().get();
break;
}
}
}
/* We've iterated over all the characters and hopefully found a leaf
* node to terminate on, so we check that the value/child union is in
* an unitialized state and if it is assign the value to it */
assert(std::holds_alternative<std::monostate>(conductor->value_or_child));
conductor->value_or_child = value;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment