Last active
February 4, 2016 08:50
-
-
Save wassname/2621b30eea1322693bf8 to your computer and use it in GitHub Desktop.
Visualize a list of search suggestions as an ascii trie.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Visualize a list of search suggestions as an ascii trie. | |
Inputs: | |
- searches: a list of strings, each representing a sentance | |
Usage: | |
>>>searches=[ | |
'online calculator', | |
'online photo editor', | |
'online photoshop', | |
'online shopping sites', | |
'online stopwatch' | |
] | |
>>>ascii_trie(searches) | |
/calculator | |
| | |
| / editor | |
- online photo | |
| \shop | |
| | |
| /topwatch | |
\s | |
\hopping sites | |
This component is GPL as it inherits it's licence from the code it's based on: etetoolkit | |
Ref: https://github.com/etetoolkit/ete/blob/3.0.0b29/ete3/coretype/tree.py#L1415 | |
""" | |
from ete3 import Tree | |
def search_to_trie(searches): | |
"""Convert a list of search string to a ete trie""" | |
#Creates a empty tree | |
tree = Tree() | |
tree.name = "" | |
# Lets keep tree structure indexed | |
name2node = {} | |
# Make sure there are no duplicates | |
searches = set(searches) | |
# Populate tree | |
for wd in searches: | |
# If no similar words exist, add it to the base of tree | |
target = tree | |
# Find relatives in the tree | |
for pos in range(len(wd), -1, -1): | |
root = wd[:pos] | |
if root in name2node: | |
target = name2node[root] | |
break | |
# Add new nodes as necessary | |
fullname = root | |
for letter in wd[pos:]: | |
fullname += letter | |
new_node = target.add_child(name=letter, dist=1.0) | |
name2node[fullname] = new_node | |
target = new_node | |
return tree | |
def _asciiArt(self, char1='-', show_internal=True, compact=False, attributes=None): | |
""" | |
Returns the ASCII representation of the tree. | |
Code based on the PyCogent GPL project. | |
""" | |
if not attributes: | |
attributes = ["name"] | |
node_name = ', '.join(map(str, [getattr(self, v) for v in attributes if hasattr(self, v)])) | |
MIN=2 | |
LEN = max(MIN, len(node_name) if not self.children or show_internal else 0) | |
PAD = ' ' * LEN | |
PA = ' ' * (LEN-1) | |
if not self.is_leaf(): | |
mids = [] | |
result = [] | |
for c in self.children: | |
if len(self.children) == 1: | |
char2 = '' | |
elif c is self.children[0]: | |
char2 = '/' | |
elif c is self.children[-1]: | |
char2 = '\\' | |
else: | |
char2 = '-' | |
(clines, mid) = _asciiArt(c, char2, show_internal, compact, attributes) | |
mids.append(mid+len(result)) | |
result.extend(clines) | |
if not compact: | |
result.append('') | |
if not compact: | |
result.pop() | |
(lo, hi, end) = (mids[0], mids[-1], len(result)) | |
prefixes = [PAD] * (lo+1) + [PA+'|'] * (hi-lo-1) + [PAD] * (end-hi) | |
mid = int((lo + hi) / 2) | |
if char1 in ['/','\\','-']: | |
prefixes[mid] = char1 + '-'*(LEN-2) | |
else: | |
prefixes[mid] = char1 + '-'*(LEN-2) + prefixes[mid][-1] | |
result = [p+l for (p,l) in zip(prefixes, result)] | |
if show_internal: | |
stem = result[mid] | |
result[mid] = stem[0] + node_name + stem[len(node_name)+1:] | |
return (result, mid) | |
else: | |
return ([char1 + '-' + node_name], 0) | |
def ascii_trie(searches): | |
""" | |
Visualize a list of search suggestions as an ascii trie. | |
Inputs: | |
- searches: a list of strings, each representing a sentance | |
Usage: | |
>>>searches=[ | |
'online calculator', | |
'online photo editor', | |
'online photoshop', | |
'online shopping sites', | |
'online stopwatch' | |
] | |
>>>ascii_trie(searches) | |
/calculator | |
| | |
| / editor | |
- online photo | |
| \shop | |
| | |
| /topwatch | |
\s | |
\hopping sites | |
This component is GPL as it inherits it's licence from the code it's based on: etetoolkit | |
Ref: https://github.com/etetoolkit/ete/blob/3.0.0b29/ete3/coretype/tree.py#L1415 | |
""" | |
tree = search_to_trie(searches) | |
(lines,mid)=_asciiArt(tree) | |
asciik = '\n'+'\n'.join(lines) | |
return asciik | |
if __name__=="__main__": | |
searches=[ | |
'online calculator', | |
'online photo editor', | |
'online photoshop', | |
'online shopping sites', | |
'online stopwatch' | |
] | |
print(ascii_trie(searches)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment