Skip to content

Instantly share code, notes, and snippets.

@wassname
Last active February 4, 2016 08:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wassname/2621b30eea1322693bf8 to your computer and use it in GitHub Desktop.
Save wassname/2621b30eea1322693bf8 to your computer and use it in GitHub Desktop.
Visualize a list of search suggestions as an ascii trie.
"""
Visualize a list of search suggestions as an ascii trie.
Inputs:
- searches: a list of strings, each representing a sentance
Usage:
>>>searches=[
'online calculator',
'online photo editor',
'online photoshop',
'online shopping sites',
'online stopwatch'
]
>>>ascii_trie(searches)
/calculator
|
| / editor
- online photo
| \shop
|
| /topwatch
\s
\hopping sites
This component is GPL as it inherits it's licence from the code it's based on: etetoolkit
Ref: https://github.com/etetoolkit/ete/blob/3.0.0b29/ete3/coretype/tree.py#L1415
"""
from ete3 import Tree
def search_to_trie(searches):
"""Convert a list of search string to a ete trie"""
#Creates a empty tree
tree = Tree()
tree.name = ""
# Lets keep tree structure indexed
name2node = {}
# Make sure there are no duplicates
searches = set(searches)
# Populate tree
for wd in searches:
# If no similar words exist, add it to the base of tree
target = tree
# Find relatives in the tree
for pos in range(len(wd), -1, -1):
root = wd[:pos]
if root in name2node:
target = name2node[root]
break
# Add new nodes as necessary
fullname = root
for letter in wd[pos:]:
fullname += letter
new_node = target.add_child(name=letter, dist=1.0)
name2node[fullname] = new_node
target = new_node
return tree
def _asciiArt(self, char1='-', show_internal=True, compact=False, attributes=None):
"""
Returns the ASCII representation of the tree.
Code based on the PyCogent GPL project.
"""
if not attributes:
attributes = ["name"]
node_name = ', '.join(map(str, [getattr(self, v) for v in attributes if hasattr(self, v)]))
MIN=2
LEN = max(MIN, len(node_name) if not self.children or show_internal else 0)
PAD = ' ' * LEN
PA = ' ' * (LEN-1)
if not self.is_leaf():
mids = []
result = []
for c in self.children:
if len(self.children) == 1:
char2 = ''
elif c is self.children[0]:
char2 = '/'
elif c is self.children[-1]:
char2 = '\\'
else:
char2 = '-'
(clines, mid) = _asciiArt(c, char2, show_internal, compact, attributes)
mids.append(mid+len(result))
result.extend(clines)
if not compact:
result.append('')
if not compact:
result.pop()
(lo, hi, end) = (mids[0], mids[-1], len(result))
prefixes = [PAD] * (lo+1) + [PA+'|'] * (hi-lo-1) + [PAD] * (end-hi)
mid = int((lo + hi) / 2)
if char1 in ['/','\\','-']:
prefixes[mid] = char1 + '-'*(LEN-2)
else:
prefixes[mid] = char1 + '-'*(LEN-2) + prefixes[mid][-1]
result = [p+l for (p,l) in zip(prefixes, result)]
if show_internal:
stem = result[mid]
result[mid] = stem[0] + node_name + stem[len(node_name)+1:]
return (result, mid)
else:
return ([char1 + '-' + node_name], 0)
def ascii_trie(searches):
"""
Visualize a list of search suggestions as an ascii trie.
Inputs:
- searches: a list of strings, each representing a sentance
Usage:
>>>searches=[
'online calculator',
'online photo editor',
'online photoshop',
'online shopping sites',
'online stopwatch'
]
>>>ascii_trie(searches)
/calculator
|
| / editor
- online photo
| \shop
|
| /topwatch
\s
\hopping sites
This component is GPL as it inherits it's licence from the code it's based on: etetoolkit
Ref: https://github.com/etetoolkit/ete/blob/3.0.0b29/ete3/coretype/tree.py#L1415
"""
tree = search_to_trie(searches)
(lines,mid)=_asciiArt(tree)
asciik = '\n'+'\n'.join(lines)
return asciik
if __name__=="__main__":
searches=[
'online calculator',
'online photo editor',
'online photoshop',
'online shopping sites',
'online stopwatch'
]
print(ascii_trie(searches))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment