Created
December 7, 2019 14:41
-
-
Save sevagh/9497cb5a4fcf88c51b05a08006a1bb7a to your computer and use it in GitHub Desktop.
scipy-quadtree-sparse-matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
quadtree.py | |
A matrix implementation based on a pointer region quadtree, implemented in pure Python. | |
The script takes a scipy sparse matrix format as the only argument and calls 'eval' to use that format as the control matrix. | |
See: https://docs.scipy.org/doc/scipy/reference/sparse.html | |
Several sizes and densities are tested (all matrices are square). Measured: | |
* Sets | |
* Gets | |
* Memory used (using guppy/hpy) | |
E.g. quadtree vs control dok (dictionary-of-keys): | |
for size 8192, density 0.5 | |
quadtree mem: 11.317016MB | |
control mem: 0.797296MB | |
quadtree avg set: 1.598156640780779e-05 | |
control avg set: 4.696345214827424e-06 | |
quadtree avg get: 6.536757081176958e-06 | |
control avg get: 6.89310375656671e-06 | |
''' | |
import sys | |
import random | |
import time | |
import numpy | |
import scipy | |
import scipy.sparse as scipy_sparse | |
from guppy import hpy; h=hpy() | |
class _QuadTreeNode: | |
def __init__(self, datum): | |
self.datum = datum | |
def ne(self): | |
try: | |
return self.ne_node | |
except AttributeError: | |
return None | |
def nw(self): | |
try: | |
return self.nw_node | |
except AttributeError: | |
return None | |
def se(self): | |
try: | |
return self.se_node | |
except AttributeError: | |
return None | |
def sw(self): | |
try: | |
return self.sw_node | |
except AttributeError: | |
return None | |
def get_val(self, x, y, M, N): | |
if M == 0 and N == 0: | |
return self.val | |
m = M//2 | |
n = N//2 | |
if x < m and y < n: | |
nw = self.nw() | |
if nw: | |
return nw.get_val(x, y, m, n) | |
return 0.0 | |
if x >= m and y < n: | |
ne = self.ne() | |
if ne: | |
return ne.get_val(x-m, y, m, n) | |
return 0.0 | |
if x < m and y >= n: | |
sw = self.sw() | |
if sw: | |
return sw.get_val(x, y-n, m, n) | |
return 0.0 | |
if x >= m and y >= n: | |
se = self.se() | |
if se: | |
return se.get_val(x-m, y-n, m, n) | |
return 0.0 | |
return self.val | |
def set_val(self, x, y, M, N, val): | |
if M == 0 and N == 0: | |
self.val = val | |
return | |
m = M//2 | |
n = N//2 | |
if x < m and y < n: | |
nw = self.nw() | |
if not nw: | |
self.nw_node = _QuadTreeNode(0.0) | |
self.nw().set_val(x, y, m, n, val) | |
if x >= m and y < n: | |
ne = self.ne() | |
if not ne: | |
self.ne_node = _QuadTreeNode(0.0) | |
self.ne().set_val(x-m, y, m, n, val) | |
if x < m and y >= n: | |
sw = self.sw() | |
if not sw: | |
self.sw_node = _QuadTreeNode(0.0) | |
self.sw().set_val(x, y-n, m, n, val) | |
if x >= m and y >= n: | |
se = self.se() | |
if not se: | |
self.se_node = _QuadTreeNode(0.0) | |
self.se().set_val(x-m, y-n, m, n, val) | |
self.val = val | |
class QuadTree: | |
def __init__(self, M, N): | |
self.root = _QuadTreeNode(0.0) | |
self.M = M | |
self.N = N | |
def __getitem__(self, index): | |
if not isinstance(index, tuple): | |
raise ValueError('can only index via x,y') | |
x = int(index[0]) | |
y = int(index[1]) | |
if x >= self.M or y >= self.N: | |
raise IndexError('index {0} is out of bounds for {1}x{2} matrix'.format(index, self.M, self.N)) | |
return self.root.get_val(x, y, self.M, self.N) | |
def __setitem__(self, index, value): | |
if not isinstance(index, tuple): | |
raise ValueError('can only index via x,y') | |
x = int(index[0]) | |
y = int(index[1]) | |
if x >= self.M or y >= self.N: | |
raise IndexError('index {0} is out of bounds for {1}x{2} matrix'.format(index, self.M, self.N)) | |
self.root.set_val(x, y, self.M, self.N, value) | |
if __name__ == '__main__': | |
M = [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192] | |
density = [0.001, 0.01, 0.05, 0.1, 0.25, 0.5] | |
control_spm_func = 'scipy_sparse.{0}_matrix'.format(sys.argv[1]) | |
for m in M: | |
for d in density: | |
if int(d*m) == 0: | |
print('skipping size {0}, density {1}'.format(m, d)) | |
continue | |
print('for size {0}, density {1}'.format(m, d)) | |
l_orig = [(random.uniform(-100, 100), *numpy.random.choice(a=numpy.arange(m), size=2, replace=False)) for _ in range(int(d*m))] | |
l_seen = set() | |
l = [] | |
for l_ in l_orig: | |
if (l_[1], l_[2]) in l_seen: | |
continue | |
l.append(l_) | |
l_seen.add((l_[1], l_[2])) | |
qt_set_time = 0.0 | |
ct_set_time = 0.0 | |
h.setrelheap() | |
control = eval(control_spm_func)((m, m)) | |
for tup in l: | |
val, x, y = tup | |
ct1 = time.perf_counter() | |
control[x, y] = val | |
ct_set_time += time.perf_counter() - ct1 | |
quadtree = QuadTree(m, m) | |
for tup in l: | |
val, x, y = tup | |
qt1 = time.perf_counter() | |
quadtree[x, y] = val | |
qt_set_time += time.perf_counter() - qt1 | |
print('quadtree mem: {0}MB'.format(h.iso(quadtree.root).domisize/1000000.0)) | |
print('control mem: {0}MB'.format(h.iso(control).domisize/1000000.0)) | |
qt_get_time = 0.0 | |
ct_get_time = 0.0 | |
for tup in l: | |
val, x, y = tup | |
qt1 = time.perf_counter() | |
assert(quadtree[x, y] == val) | |
qt_get_time += time.perf_counter() - qt1 | |
ct1 = time.perf_counter() | |
assert(control[x, y] == val) | |
ct_get_time += time.perf_counter() - ct1 | |
qt_get_time /= len(l) | |
qt_set_time /= len(l) | |
ct_get_time /= len(l) | |
ct_set_time /= len(l) | |
print('quadtree avg set: {0}'.format(qt_set_time)) | |
print('control avg set: {0}'.format(ct_set_time)) | |
print('quadtree avg get: {0}'.format(qt_get_time)) | |
print('control avg get: {0}'.format(ct_get_time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment