Created
April 8, 2014 12:31
-
-
Save maheshakya/10117227 to your computer and use it in GitHub Desktop.
Implements a simple prefix tree. This has a function which accepts an array of values and returns matching indices of that array for a query.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# a: array of hashes | |
# h: number of hash bits to be considered | |
import numpy as np | |
class Node(): | |
"""" | |
Defines a node in the prefix tree | |
""" | |
def __init__(self, value): | |
self.children = list() | |
self.value = value | |
self.indecies = list() | |
class PrefixTree(): | |
""" | |
Prefix tree | |
""" | |
def __init__(self): | |
self.root = Node(None) | |
self.curr = self.root | |
def __insert_bit(self, head, l): | |
""" | |
Inserts a single bit into the prefix tree | |
""" | |
for c in head.children: | |
if c.value == l: | |
return c | |
else: | |
n = Node(l) | |
head.children.append(n) | |
return n | |
def insert(self, hash_array, index): | |
""" | |
Insert an array of hashes into the prefix tree | |
""" | |
self.curr = self.root | |
for l in hash_array: | |
self.curr = self.__insert_bit(self.curr, l) | |
self.curr.indecies.append(index) | |
def has_prefix(self, w): | |
""" | |
Checks whether there is a maching prefix in the prefix tree | |
to the given array | |
""" | |
self.curr = self.root | |
for l in w: | |
for x in self.curr.children: | |
if x.value == l: | |
self.curr = x | |
break | |
else: | |
return list() | |
return self.curr.indecies | |
def matching_indecies(hashes, h, query): | |
""" | |
Given a sorted array of binary hashes , | |
number h of hash bits to use, and a query hash q, | |
returns the array og indices of matching entries | |
in the array. | |
""" | |
gona = PrefixTree() | |
for i in range(len(hashes)): | |
gona.insert(hashes[i], i) | |
req = query[:h] | |
indecies = gona.has_prefix(req) | |
return indecies | |
def exhaustive_match(a, h, query): | |
""" | |
Performs an exhaustive search for query prefix match | |
in the array a | |
""" | |
matches = [] | |
for i in range(len(a)): | |
count = 0 | |
for j in range(h): | |
if query[j] != a[i][j]: | |
break | |
count = count + 1 | |
if count == h: | |
matches.append(i) | |
return matches | |
def prefix_match(trie, h, query): | |
""" | |
Performs prefix match in a prebuild prefix tree | |
for a query | |
""" | |
return trie.has_prefix(query[:h]) | |
def test_insert_trie(a, trie): | |
""" | |
Insertes hashes in the array `a` into prefix tree `trie` | |
""" | |
for i in range(len(a)): | |
trie.insert(a[i],i) | |
##Example on matching entries for query in the given array of hashes | |
a = [] | |
#Builds array of random binary hashes | |
for i in range(20): | |
a.append(np.random.randint(0,2, size=np.random.randint(5,33))) | |
print 'Hash array' | |
for i in range(len(a)): | |
print str(i) + ': ' + str(a[i]) | |
query = np.array([1,1,1,1,0]) | |
#prints the maching indices of the hash array | |
print 'Macthing indecies: ' + str(matching_indecies(a, 3, query)) | |
##Tests time measure time | |
print "Tests to compare time" | |
print "---------------------" | |
trie = PrefixTree() | |
for i in range(len(a)): | |
trie.insert(a[i], i) | |
print "Exhaustive search in the array of hashes:" | |
%timeit exhastive_match(a, 3, query) | |
print "Create prefix tree and search for matching entries:" | |
%timeit matching_indecies(a, 3, query) | |
print "Search for matching entries in the prebuilt trie:" | |
%timeit prefix_match(trie, 3, query) | |
print "Insert values into a trie:" | |
trie = PrefixTree() | |
%timeit test_insert_trie(a, trie) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a simple implementation of a prefix tree in order to perform LSH based approximate neighbor search. The prefix tree is build using lists in Python.
According to the tests to compare time, following conclusions can be drawn:
(Note: These conclusions are valid only for this implementation)