Skip to content

Instantly share code, notes, and snippets.

@maheshakya
Created April 8, 2014 12:31
Show Gist options
  • Save maheshakya/10117227 to your computer and use it in GitHub Desktop.
Save maheshakya/10117227 to your computer and use it in GitHub Desktop.
Implements a simple prefix tree. This has a function which accepts an array of values and returns matching indices of that array for a query.
# a: array of hashes
# h: number of hash bits to be considered
import numpy as np
class Node():
""""
Defines a node in the prefix tree
"""
def __init__(self, value):
self.children = list()
self.value = value
self.indecies = list()
class PrefixTree():
"""
Prefix tree
"""
def __init__(self):
self.root = Node(None)
self.curr = self.root
def __insert_bit(self, head, l):
"""
Inserts a single bit into the prefix tree
"""
for c in head.children:
if c.value == l:
return c
else:
n = Node(l)
head.children.append(n)
return n
def insert(self, hash_array, index):
"""
Insert an array of hashes into the prefix tree
"""
self.curr = self.root
for l in hash_array:
self.curr = self.__insert_bit(self.curr, l)
self.curr.indecies.append(index)
def has_prefix(self, w):
"""
Checks whether there is a maching prefix in the prefix tree
to the given array
"""
self.curr = self.root
for l in w:
for x in self.curr.children:
if x.value == l:
self.curr = x
break
else:
return list()
return self.curr.indecies
def matching_indecies(hashes, h, query):
"""
Given a sorted array of binary hashes ,
number h of hash bits to use, and a query hash q,
returns the array og indices of matching entries
in the array.
"""
gona = PrefixTree()
for i in range(len(hashes)):
gona.insert(hashes[i], i)
req = query[:h]
indecies = gona.has_prefix(req)
return indecies
def exhaustive_match(a, h, query):
"""
Performs an exhaustive search for query prefix match
in the array a
"""
matches = []
for i in range(len(a)):
count = 0
for j in range(h):
if query[j] != a[i][j]:
break
count = count + 1
if count == h:
matches.append(i)
return matches
def prefix_match(trie, h, query):
"""
Performs prefix match in a prebuild prefix tree
for a query
"""
return trie.has_prefix(query[:h])
def test_insert_trie(a, trie):
"""
Insertes hashes in the array `a` into prefix tree `trie`
"""
for i in range(len(a)):
trie.insert(a[i],i)
##Example on matching entries for query in the given array of hashes
a = []
#Builds array of random binary hashes
for i in range(20):
a.append(np.random.randint(0,2, size=np.random.randint(5,33)))
print 'Hash array'
for i in range(len(a)):
print str(i) + ': ' + str(a[i])
query = np.array([1,1,1,1,0])
#prints the maching indices of the hash array
print 'Macthing indecies: ' + str(matching_indecies(a, 3, query))
print
##Tests time measure time
print "Tests to compare time"
print "---------------------"
trie = PrefixTree()
for i in range(len(a)):
trie.insert(a[i], i)
print "Exhaustive search in the array of hashes:"
%timeit exhastive_match(a, 3, query)
print "Create prefix tree and search for matching entries:"
%timeit matching_indecies(a, 3, query)
print "Search for matching entries in the prebuilt trie:"
%timeit prefix_match(trie, 3, query)
print "Insert values into a trie:"
trie = PrefixTree()
%timeit test_insert_trie(a, trie)
@maheshakya
Copy link
Author

This is a simple implementation of a prefix tree in order to perform LSH based approximate neighbor search. The prefix tree is build using lists in Python.

According to the tests to compare time, following conclusions can be drawn:
(Note: These conclusions are valid only for this implementation)

  1. Searching in the prefix tree for matching entries is faster than the exhaustive search.
  2. Inserting values into the prefix tree is significantly slow.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment