Last active
August 29, 2015 14:23
-
-
Save hypirion/74403afb516a47127f63 to your computer and use it in GitHub Desktop.
Small (unoptimised) persistent vector in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## This is not an optimised persistent vector, just an implementation "straight | |
## off" http://hypirion.com/thesis.pdf. However, compared to Clojure's | |
## persistent vector, it is interesting in that this implementation | |
## | |
## - has O(n) runtime on iteration/reduce and map | |
## -> Clojure's implementations run in O(n log n) time | |
## - has submap and subiterators that runs in O(min(log n, m)) time, | |
## where m is the amount of elements to walk over | |
## -> Clojure doesn't support submap, although has subiterator indirectly | |
## through subvec, which runs in O(min(log n, m log n)) time | |
## - has O(log n) take | |
## -> In constrast to O(m log n) in Clojure, where m is the amount of | |
## elements to take. However, subvec can give you take in O(1) time if | |
## you can accept semantic garbage. | |
## | |
## For large branching factors (M), 'log n' gets smaller, and can in practice be | |
## considered effectively constant ('~1'). | |
## What could be useful for some is that the root is easily printable. Try to | |
## make a persistent vector, then print out p.root. It will give you the actual | |
## internal trie. | |
## branching = 2, but you can change this value to any integer >= 2. This must | |
## be done before you've made any vectors, however. (Usually change it here) | |
M = 2 | |
## Silly example usage: | |
## | |
## p = PVec() | |
## for i in range(100): | |
## p = p.push(i) | |
## | |
## p = p.submap(lambda x: x * 2, 50, 70) | |
## partial_sum = sum(p.iter(20, 80)) # -> 4160 | |
## total_sum = sum(p) # -> 6140 | |
## | |
## for i in p: | |
## print i | |
def set_child(node, idx, val): | |
"""Replaces the child at index idx with val. Mutable""" | |
node[idx] = val | |
def child(node, idx): | |
"""Returns the child at index idx""" | |
return node[idx] | |
def node_clone(node): | |
"""Returns a shallow copy of this node""" | |
return node[:] | |
def node_create(): | |
"""Creates an empty node""" | |
return [None]*M | |
def strip_right_of(node, idx): | |
"""Replaces all elements to the right of idx in this node with None.""" | |
for i in range(idx + 1, M): | |
node[i] = None | |
class PVec: | |
def __init__(self, vals=None): | |
"""Creates an empty persistent vector if not val is provided, otherwise | |
returns a persistent vector with vals in it""" | |
self.size = 0 | |
self.root = node_create() | |
self.height = 0 | |
if vals != None: ## hack to get eval(__repr__(p)) == p working | |
inj = self.into(vals) | |
self.size = inj.size | |
self.root = inj.root | |
self.height = inj.height | |
def __clone(self): | |
"""Performs a shallow copy of a persistent vector""" | |
theClone = PVec() | |
theClone.size = self.size | |
theClone.root = self.root | |
theClone.height = self.height | |
return theClone | |
def get(self, idx): | |
"""Returns the element at index idx""" | |
assert idx < self.size | |
node = self.root | |
for h in range(self.height, -1, -1): | |
subidx = (idx / (M ** h)) % M | |
node = child(node, subidx) | |
return node | |
def update(self, idx, val): | |
"""Returns a new vector with element at index idx replaced with val""" | |
assert idx < self.size | |
updated = self.__clone() | |
node = node_clone(self.root) | |
updated.root = node | |
for h in range(self.height, 0, -1): | |
subidx = (idx / (M ** h)) % M | |
new_child = node_clone(child(node, subidx)) | |
set_child(node, subidx, new_child) | |
node = new_child | |
set_child(node, idx % M, val) | |
return updated | |
def __capacity(self): | |
"""Returns the capacity of this vector. The capacity is the amount of | |
elements this vector can contain without increasing its height.""" | |
return M ** (self.height + 1) | |
def __full(self): | |
"""Returns true if we don't have more space for a new element, and have | |
to increase height""" | |
return self.__capacity() == self.size | |
def push(self, val): | |
"""Returns a new vector with val appended at the end.""" | |
updated = self.__clone() | |
idx = self.size | |
updated.size = self.size + 1 | |
if self.__full(): | |
n = node_create() | |
set_child(n, 0, self.root) | |
updated.root = n | |
updated.height = self.height + 1 | |
else: | |
updated.root = node_clone(self.root) | |
node = updated.root | |
for h in range(updated.height, 0, -1): | |
subidx = (idx / (M ** h)) % M | |
if child(node, subidx) == None: | |
set_child(node, subidx, node_create()) | |
else: | |
set_child(node, subidx, node_clone(child(node, subidx))) | |
node = child(node, subidx) | |
subidx = idx % M | |
set_child(node, subidx, val) | |
return updated | |
def pop(self): | |
"""Returns a new vector with the last element removed.""" | |
assert self.size > 0 | |
popped = self.__clone() | |
idx = self.size - 1 | |
popped.size = self.size - 1 | |
if popped.size == self.__capacity()/M and popped.size != 1: | |
popped.height = self.height - 1 | |
popped.root = child(popped.root, 0) | |
else: | |
node = node_clone(popped.root) | |
popped.root = node | |
for h in range(popped.height, -1, -1): | |
subidx = (idx / (M ** h)) % M | |
if popped.size % (M ** h) == 0: | |
set_child(node, subidx, None) | |
return popped | |
set_child(node, subidx, node_clone(child(node, subidx))) | |
node = child(node, subidx) | |
return popped | |
def submap(self, f, start_idx=0, stop_idx=None): | |
"""Maps part of a vector, from start_idx to, but not including, stop_idx.""" | |
if stop_idx == None: | |
stop_idx = self.size | |
assert 0 <= start_idx and stop_idx <= self.size and stop_idx >= start_idx | |
mapped = self.__clone() | |
jump = start_idx + M - (start_idx % M) | |
stack = [None]*(self.height+1) | |
stack[self.height] = node_clone(self.root) | |
for h in range(self.height, 0, -1): | |
subidx = (start_idx / (M ** h)) % M | |
stack[h-1] = node_clone(child(stack[h], subidx)) | |
for idx in range(start_idx, stop_idx): | |
if idx == jump: | |
prev_idx = idx - 1 | |
h = 1 | |
while (prev_idx / (M ** h)) % M != (idx / (M ** h)) % M: | |
prev_subidx = (prev_idx / (M ** h)) % M | |
set_child(stack[h], prev_subidx, stack[h-1]) | |
h += 1 | |
for h in range(h - 1, 0, -1): | |
subidx = (idx / (M ** h)) % M | |
stack[h-1] = node_clone(child(stack[h], subidx)) | |
jump += M | |
subidx = idx % M | |
set_child(stack[0], subidx, f(child(stack[0], subidx))) | |
last_idx = stop_idx - 1 | |
for h in range(1, self.height + 1): | |
subidx = (last_idx / (M ** h)) % M | |
set_child(stack[h], subidx, stack[h-1]) | |
mapped.root = stack[self.height] | |
return mapped | |
def map(self, f): | |
"""Returns a new vector where all elements are replaced with f(elt)""" | |
return self.submap(f, 0, self.size) | |
def take(self, count): | |
"""Returns a new vector with only the first count elements. If count is | |
higher than the total amount of elements in this vector, the original | |
vector is returned.""" | |
if count <= 0: | |
return PVec() | |
if count >= self.size: | |
return self | |
taken = self.__clone() | |
idx = count - 1 | |
taken.size = count | |
## while the path taken to rightmost element is 0, we can shrink its height | |
h = self.height | |
root = self.root | |
while (idx / (M ** h)) % M == 0 and h > 0: | |
h -= 1 | |
root = child(root, 0) | |
taken.height = h | |
## Special case where root is exactly what we need | |
if count == M ** (h+1): | |
taken.root = root | |
return taken | |
root = node_clone(root) | |
taken.root = root | |
node = root | |
for h in range(h, 0, -1): | |
subidx = (idx / (M ** h)) % M | |
strip_right_of(node, subidx) | |
if idx % (M ** h) == (M ** h) - 1: # short circuit when subtrie is perfect | |
return taken | |
set_child(node, subidx, node_clone(child(node, subidx))) | |
node = child(node, subidx) | |
strip_right_of(node, idx % M) | |
return taken | |
def iter(self, start_idx=0, stop_idx=None): | |
"""Returns an iterator over a slice of this vector""" | |
if stop_idx == None: | |
stop_idx = self.size | |
assert 0 <= start_idx and stop_idx <= self.size and stop_idx >= start_idx | |
jump = start_idx + M - (start_idx % M) | |
stack = [None]*(self.height+1) | |
stack[self.height] = self.root | |
for h in range(self.height, 0, -1): | |
subidx = (start_idx / (M ** h)) % M | |
stack[h-1] = child(stack[h], subidx) | |
for idx in range(start_idx, stop_idx): | |
if idx == jump: | |
prev_idx = idx - 1 | |
h = 1 | |
while (prev_idx / (M ** h)) % M != (idx / (M ** h)) % M: | |
h += 1 | |
for h in range(h - 1, 0, -1): | |
subidx = (idx / (M ** h)) % M | |
stack[h-1] = child(stack[h], subidx) | |
jump += M | |
subidx = idx % M | |
yield child(stack[0], subidx) | |
def __iter__(self): | |
"""Returns an iterator over this vector""" | |
return self.iter(0, self.size) | |
def into(self, it): | |
"""Returns a new vector with all elements in it appended""" | |
intoed = self.__clone() | |
start_idx = self.size - 1 | |
jump = start_idx + M - (start_idx % M) | |
if jump == 0: # edge case for empty pvec | |
jump = M | |
stack = [None]*(self.height+1) | |
stack[self.height] = node_clone(self.root) | |
for h in range(self.height, 0, -1): | |
subidx = (start_idx / (M ** h)) % M | |
stack[h-1] = node_clone(child(stack[h], subidx)) | |
prev_idx = -1 | |
for (prev_idx, val) in enumerate(it, start_idx): | |
idx = prev_idx + 1 | |
if idx == jump: | |
h = 1 | |
while (prev_idx / (M ** h)) % M != (idx / (M ** h)) % M: | |
prev_subidx = (prev_idx / (M ** h)) % M | |
if len(stack) == h: # increase stack size | |
new_root = node_create() | |
set_child(new_root, 0, stack[h-1]) | |
stack.append(new_root) | |
else: | |
set_child(stack[h], prev_subidx, stack[h-1]) | |
h += 1 | |
for h in range(h - 1, 0, -1): | |
stack[h-1] = node_create() | |
jump += M | |
subidx = idx % M | |
set_child(stack[0], subidx, val) | |
if prev_idx == -1: ## Edge case for empty iterator | |
return self | |
last_idx = prev_idx + 1 | |
intoed.height = len(stack)-1 | |
for h in range(1, intoed.height + 1): | |
subidx = (last_idx / (M ** h)) % M | |
set_child(stack[h], subidx, stack[h-1]) | |
intoed.root = stack[intoed.height] | |
intoed.size = last_idx + 1 | |
return intoed | |
def __add__(self, other): | |
return self.into(other) | |
def __repr__(self): | |
return 'PVec('+list(self).__repr__()+')' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment