Skip to content

Instantly share code, notes, and snippets.

@abbot
Created October 4, 2011 11:52
Show Gist options
  • Save abbot/1261449 to your computer and use it in GitHub Desktop.
Save abbot/1261449 to your computer and use it in GitHub Desktop.
complex key lookup using k-d tree
from scipy.spatial import kdtree
import itertools
import random
import time
random.seed(1)
data = list(itertools.permutations(range(30), 4))
random.shuffle(data)
print "Data contains", len(data), "elements."
print "building tree"
now = time.time()
tree = kdtree.KDTree(data)
print "done in %.2f sec" % (time.time()-now)
def match(a, b):
assert len(a) == len(b)
for i, v in enumerate(a):
if v != b[i] and (v is not None) and (b[i] is not None):
return False
return True
def find_like(kdtree, needle):
assert len(needle) == kdtree.m
def do_find(tree, needle):
if hasattr(tree, 'idx'):
return list(itertools.ifilter(lambda x: match(needle, x),
kdtree.data[tree.idx]))
if needle[tree.split_dim] is None:
return do_find(tree.less, needle) + do_find(tree.greater, needle)
if needle[tree.split_dim] <= tree.split:
return do_find(tree.less, needle)
else:
return do_find(tree.greater, needle)
return do_find(kdtree.tree, needle)
def find_like_bf(kdtree, needle):
assert len(needle) == kdtree.m
return list(itertools.ifilter(lambda x: match(needle, x),
kdtree.data))
import sqlite3
db = sqlite3.connect(":memory:")
db.execute("CREATE TABLE a (x1 INTEGER, x2 INTEGER, x3 INTEGER, x4 INTEGER)")
db.execute("CREATE INDEX x1 ON a(x1)")
db.execute("CREATE INDEX x2 ON a(x2)")
db.execute("CREATE INDEX x3 ON a(x3)")
db.execute("CREATE INDEX x4 ON a(x4)")
print "building db"
now = time.time()
db.executemany("INSERT INTO a VALUES (?, ?, ?, ?)",
[[int(x) for x in value] for value in tree.data])
print "done in %.2f sec" % (time.time()-now)
def db_test():
cur = db.cursor()
cur.execute("SELECT * FROM a WHERE x1=? AND x3=?", (1, 2))
return cur.fetchall()
import timeit
print "k-d tree:"
print "%.2f sec" % timeit.timeit("find_like(tree, (1, None, 2, None))",
"from __main__ import find_like, tree",
number=100)
# print "brute force:"
# print "%.2f sec" % timeit.timeit("find_like_bf(tree, (1, None, 2, None))",
# "from __main__ import find_like_bf, tree",
# number=1000)
print "sqlite db:"
print "%.2f sec" % timeit.timeit("db_test()",
"from __main__ import db_test",
number=100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment