Skip to content

Instantly share code, notes, and snippets.

@sevagh
Created December 7, 2019 14:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sevagh/9497cb5a4fcf88c51b05a08006a1bb7a to your computer and use it in GitHub Desktop.
Save sevagh/9497cb5a4fcf88c51b05a08006a1bb7a to your computer and use it in GitHub Desktop.
scipy-quadtree-sparse-matrix
'''
quadtree.py
A matrix implementation based on a pointer region quadtree, implemented in pure Python.
The script takes a scipy sparse matrix format as the only argument and calls 'eval' to use that format as the control matrix.
See: https://docs.scipy.org/doc/scipy/reference/sparse.html
Several sizes and densities are tested (all matrices are square). Measured:
* Sets
* Gets
* Memory used (using guppy/hpy)
E.g. quadtree vs control dok (dictionary-of-keys):
for size 8192, density 0.5
quadtree mem: 11.317016MB
control mem: 0.797296MB
quadtree avg set: 1.598156640780779e-05
control avg set: 4.696345214827424e-06
quadtree avg get: 6.536757081176958e-06
control avg get: 6.89310375656671e-06
'''
import sys
import random
import time
import numpy
import scipy
import scipy.sparse as scipy_sparse
from guppy import hpy; h=hpy()
class _QuadTreeNode:
def __init__(self, datum):
self.datum = datum
def ne(self):
try:
return self.ne_node
except AttributeError:
return None
def nw(self):
try:
return self.nw_node
except AttributeError:
return None
def se(self):
try:
return self.se_node
except AttributeError:
return None
def sw(self):
try:
return self.sw_node
except AttributeError:
return None
def get_val(self, x, y, M, N):
if M == 0 and N == 0:
return self.val
m = M//2
n = N//2
if x < m and y < n:
nw = self.nw()
if nw:
return nw.get_val(x, y, m, n)
return 0.0
if x >= m and y < n:
ne = self.ne()
if ne:
return ne.get_val(x-m, y, m, n)
return 0.0
if x < m and y >= n:
sw = self.sw()
if sw:
return sw.get_val(x, y-n, m, n)
return 0.0
if x >= m and y >= n:
se = self.se()
if se:
return se.get_val(x-m, y-n, m, n)
return 0.0
return self.val
def set_val(self, x, y, M, N, val):
if M == 0 and N == 0:
self.val = val
return
m = M//2
n = N//2
if x < m and y < n:
nw = self.nw()
if not nw:
self.nw_node = _QuadTreeNode(0.0)
self.nw().set_val(x, y, m, n, val)
if x >= m and y < n:
ne = self.ne()
if not ne:
self.ne_node = _QuadTreeNode(0.0)
self.ne().set_val(x-m, y, m, n, val)
if x < m and y >= n:
sw = self.sw()
if not sw:
self.sw_node = _QuadTreeNode(0.0)
self.sw().set_val(x, y-n, m, n, val)
if x >= m and y >= n:
se = self.se()
if not se:
self.se_node = _QuadTreeNode(0.0)
self.se().set_val(x-m, y-n, m, n, val)
self.val = val
class QuadTree:
def __init__(self, M, N):
self.root = _QuadTreeNode(0.0)
self.M = M
self.N = N
def __getitem__(self, index):
if not isinstance(index, tuple):
raise ValueError('can only index via x,y')
x = int(index[0])
y = int(index[1])
if x >= self.M or y >= self.N:
raise IndexError('index {0} is out of bounds for {1}x{2} matrix'.format(index, self.M, self.N))
return self.root.get_val(x, y, self.M, self.N)
def __setitem__(self, index, value):
if not isinstance(index, tuple):
raise ValueError('can only index via x,y')
x = int(index[0])
y = int(index[1])
if x >= self.M or y >= self.N:
raise IndexError('index {0} is out of bounds for {1}x{2} matrix'.format(index, self.M, self.N))
self.root.set_val(x, y, self.M, self.N, value)
if __name__ == '__main__':
M = [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
density = [0.001, 0.01, 0.05, 0.1, 0.25, 0.5]
control_spm_func = 'scipy_sparse.{0}_matrix'.format(sys.argv[1])
for m in M:
for d in density:
if int(d*m) == 0:
print('skipping size {0}, density {1}'.format(m, d))
continue
print('for size {0}, density {1}'.format(m, d))
l_orig = [(random.uniform(-100, 100), *numpy.random.choice(a=numpy.arange(m), size=2, replace=False)) for _ in range(int(d*m))]
l_seen = set()
l = []
for l_ in l_orig:
if (l_[1], l_[2]) in l_seen:
continue
l.append(l_)
l_seen.add((l_[1], l_[2]))
qt_set_time = 0.0
ct_set_time = 0.0
h.setrelheap()
control = eval(control_spm_func)((m, m))
for tup in l:
val, x, y = tup
ct1 = time.perf_counter()
control[x, y] = val
ct_set_time += time.perf_counter() - ct1
quadtree = QuadTree(m, m)
for tup in l:
val, x, y = tup
qt1 = time.perf_counter()
quadtree[x, y] = val
qt_set_time += time.perf_counter() - qt1
print('quadtree mem: {0}MB'.format(h.iso(quadtree.root).domisize/1000000.0))
print('control mem: {0}MB'.format(h.iso(control).domisize/1000000.0))
qt_get_time = 0.0
ct_get_time = 0.0
for tup in l:
val, x, y = tup
qt1 = time.perf_counter()
assert(quadtree[x, y] == val)
qt_get_time += time.perf_counter() - qt1
ct1 = time.perf_counter()
assert(control[x, y] == val)
ct_get_time += time.perf_counter() - ct1
qt_get_time /= len(l)
qt_set_time /= len(l)
ct_get_time /= len(l)
ct_set_time /= len(l)
print('quadtree avg set: {0}'.format(qt_set_time))
print('control avg set: {0}'.format(ct_set_time))
print('quadtree avg get: {0}'.format(qt_get_time))
print('control avg get: {0}'.format(ct_get_time))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment