Last active
January 18, 2021 15:47
-
-
Save jacksmith15/a55c76432ea1406f457202f3a157d693 to your computer and use it in GitHub Desktop.
Searchable dictionary, allowing fast substring matching on keys.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Searchable dictionary, allowing fast substring matching on keys. | |
Requires: | |
- Python 3.8 or greater | |
- pytrie 0.4.0 or greater | |
Example: | |
>>> dictionary = SearchableDict({ | |
... "foo bar": 1, | |
... "foo baz": 2, | |
... "bar baz": 3, | |
... }) | |
>>> dictionary.search("foo") | |
{ | |
"foo bar": 1, | |
"foo baz": 2, | |
} | |
>>> dictionary.search("baz") | |
{ | |
"foo baz": 2, | |
"bar baz": 3, | |
} | |
""" | |
from __future__ import annotations | |
from typing import Dict, Set, TypeVar | |
from pytrie import Trie | |
ValueT = TypeVar("ValueT") | |
class SearchableDict(Dict[str, ValueT]): | |
"""Fast substring matching on dictionary keys. | |
Based on a suffix tree: | |
https://en.wikipedia.org/wiki/Suffix_tree | |
""" | |
def __init__(self, *args, **kwargs): | |
super().__init__(*args, **kwargs) | |
self._trie = Trie() | |
for key in self: | |
self._index_key(key) | |
def _index_key(self, key: str): | |
suffixes = [key[-i:] for i in range(len(key), 0, -1)] | |
for suffix in suffixes: | |
if suffix not in self._trie: | |
self._trie[suffix] = set() | |
self._trie[suffix].add(key) | |
def __setitem__(self, key: str, value: ValueT): | |
if key not in self: | |
self._index_key(key) | |
super().__setitem__(key, value) | |
def __delitem__(self, key: str): | |
for key_set in self._trie.values(): | |
if key in key_set: | |
key_set.remove(key) | |
super().__delitem__(key) | |
def search(self, query: str) -> SearchableDict[ValueT]: | |
keys: Set[str] = set.union(set(), *self._trie.values(query)) | |
return SearchableDict({key: self[key] for key in keys}) | |
def example(): | |
dictionary: SearchableDict[int] = SearchableDict() | |
print(dictionary) | |
dictionary["pork green curry"] = 1 | |
dictionary["chicken red curry"] = 2 | |
dictionary["lemon chicken"] = 3 | |
print(dictionary) | |
assert dictionary.search("curry") == { | |
"pork green curry": 1, | |
"chicken red curry": 2, | |
} | |
assert dictionary.search("chicken") == { | |
"chicken red curry": 2, | |
"lemon chicken": 3, | |
} | |
assert dictionary.search("chicken").search("lemon") == { | |
"lemon chicken": 3, | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment