Skip to content

Instantly share code, notes, and snippets.

@jacksmith15
Last active January 18, 2021 15:47
Show Gist options
  • Save jacksmith15/a55c76432ea1406f457202f3a157d693 to your computer and use it in GitHub Desktop.
Save jacksmith15/a55c76432ea1406f457202f3a157d693 to your computer and use it in GitHub Desktop.
Searchable dictionary, allowing fast substring matching on keys.
"""Searchable dictionary, allowing fast substring matching on keys.
Requires:
- Python 3.8 or greater
- pytrie 0.4.0 or greater
Example:
>>> dictionary = SearchableDict({
... "foo bar": 1,
... "foo baz": 2,
... "bar baz": 3,
... })
>>> dictionary.search("foo")
{
"foo bar": 1,
"foo baz": 2,
}
>>> dictionary.search("baz")
{
"foo baz": 2,
"bar baz": 3,
}
"""
from __future__ import annotations
from typing import Dict, Set, TypeVar
from pytrie import Trie
ValueT = TypeVar("ValueT")
class SearchableDict(Dict[str, ValueT]):
"""Fast substring matching on dictionary keys.
Based on a suffix tree:
https://en.wikipedia.org/wiki/Suffix_tree
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._trie = Trie()
for key in self:
self._index_key(key)
def _index_key(self, key: str):
suffixes = [key[-i:] for i in range(len(key), 0, -1)]
for suffix in suffixes:
if suffix not in self._trie:
self._trie[suffix] = set()
self._trie[suffix].add(key)
def __setitem__(self, key: str, value: ValueT):
if key not in self:
self._index_key(key)
super().__setitem__(key, value)
def __delitem__(self, key: str):
for key_set in self._trie.values():
if key in key_set:
key_set.remove(key)
super().__delitem__(key)
def search(self, query: str) -> SearchableDict[ValueT]:
keys: Set[str] = set.union(set(), *self._trie.values(query))
return SearchableDict({key: self[key] for key in keys})
def example():
dictionary: SearchableDict[int] = SearchableDict()
print(dictionary)
dictionary["pork green curry"] = 1
dictionary["chicken red curry"] = 2
dictionary["lemon chicken"] = 3
print(dictionary)
assert dictionary.search("curry") == {
"pork green curry": 1,
"chicken red curry": 2,
}
assert dictionary.search("chicken") == {
"chicken red curry": 2,
"lemon chicken": 3,
}
assert dictionary.search("chicken").search("lemon") == {
"lemon chicken": 3,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment