Skip to content

Instantly share code, notes, and snippets.

@hypirion
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hypirion/74403afb516a47127f63 to your computer and use it in GitHub Desktop.
Save hypirion/74403afb516a47127f63 to your computer and use it in GitHub Desktop.
Small (unoptimised) persistent vector in Python
## This is not an optimised persistent vector, just an implementation "straight
## off" http://hypirion.com/thesis.pdf. However, compared to Clojure's
## persistent vector, it is interesting in that this implementation
##
## - has O(n) runtime on iteration/reduce and map
## -> Clojure's implementations run in O(n log n) time
## - has submap and subiterators that runs in O(min(log n, m)) time,
## where m is the amount of elements to walk over
## -> Clojure doesn't support submap, although has subiterator indirectly
## through subvec, which runs in O(min(log n, m log n)) time
## - has O(log n) take
## -> In constrast to O(m log n) in Clojure, where m is the amount of
## elements to take. However, subvec can give you take in O(1) time if
## you can accept semantic garbage.
##
## For large branching factors (M), 'log n' gets smaller, and can in practice be
## considered effectively constant ('~1').
## What could be useful for some is that the root is easily printable. Try to
## make a persistent vector, then print out p.root. It will give you the actual
## internal trie.
## branching = 2, but you can change this value to any integer >= 2. This must
## be done before you've made any vectors, however. (Usually change it here)
M = 2
## Silly example usage:
##
## p = PVec()
## for i in range(100):
## p = p.push(i)
##
## p = p.submap(lambda x: x * 2, 50, 70)
## partial_sum = sum(p.iter(20, 80)) # -> 4160
## total_sum = sum(p) # -> 6140
##
## for i in p:
## print i
def set_child(node, idx, val):
"""Replaces the child at index idx with val. Mutable"""
node[idx] = val
def child(node, idx):
"""Returns the child at index idx"""
return node[idx]
def node_clone(node):
"""Returns a shallow copy of this node"""
return node[:]
def node_create():
"""Creates an empty node"""
return [None]*M
def strip_right_of(node, idx):
"""Replaces all elements to the right of idx in this node with None."""
for i in range(idx + 1, M):
node[i] = None
class PVec:
def __init__(self, vals=None):
"""Creates an empty persistent vector if not val is provided, otherwise
returns a persistent vector with vals in it"""
self.size = 0
self.root = node_create()
self.height = 0
if vals != None: ## hack to get eval(__repr__(p)) == p working
inj = self.into(vals)
self.size = inj.size
self.root = inj.root
self.height = inj.height
def __clone(self):
"""Performs a shallow copy of a persistent vector"""
theClone = PVec()
theClone.size = self.size
theClone.root = self.root
theClone.height = self.height
return theClone
def get(self, idx):
"""Returns the element at index idx"""
assert idx < self.size
node = self.root
for h in range(self.height, -1, -1):
subidx = (idx / (M ** h)) % M
node = child(node, subidx)
return node
def update(self, idx, val):
"""Returns a new vector with element at index idx replaced with val"""
assert idx < self.size
updated = self.__clone()
node = node_clone(self.root)
updated.root = node
for h in range(self.height, 0, -1):
subidx = (idx / (M ** h)) % M
new_child = node_clone(child(node, subidx))
set_child(node, subidx, new_child)
node = new_child
set_child(node, idx % M, val)
return updated
def __capacity(self):
"""Returns the capacity of this vector. The capacity is the amount of
elements this vector can contain without increasing its height."""
return M ** (self.height + 1)
def __full(self):
"""Returns true if we don't have more space for a new element, and have
to increase height"""
return self.__capacity() == self.size
def push(self, val):
"""Returns a new vector with val appended at the end."""
updated = self.__clone()
idx = self.size
updated.size = self.size + 1
if self.__full():
n = node_create()
set_child(n, 0, self.root)
updated.root = n
updated.height = self.height + 1
else:
updated.root = node_clone(self.root)
node = updated.root
for h in range(updated.height, 0, -1):
subidx = (idx / (M ** h)) % M
if child(node, subidx) == None:
set_child(node, subidx, node_create())
else:
set_child(node, subidx, node_clone(child(node, subidx)))
node = child(node, subidx)
subidx = idx % M
set_child(node, subidx, val)
return updated
def pop(self):
"""Returns a new vector with the last element removed."""
assert self.size > 0
popped = self.__clone()
idx = self.size - 1
popped.size = self.size - 1
if popped.size == self.__capacity()/M and popped.size != 1:
popped.height = self.height - 1
popped.root = child(popped.root, 0)
else:
node = node_clone(popped.root)
popped.root = node
for h in range(popped.height, -1, -1):
subidx = (idx / (M ** h)) % M
if popped.size % (M ** h) == 0:
set_child(node, subidx, None)
return popped
set_child(node, subidx, node_clone(child(node, subidx)))
node = child(node, subidx)
return popped
def submap(self, f, start_idx=0, stop_idx=None):
"""Maps part of a vector, from start_idx to, but not including, stop_idx."""
if stop_idx == None:
stop_idx = self.size
assert 0 <= start_idx and stop_idx <= self.size and stop_idx >= start_idx
mapped = self.__clone()
jump = start_idx + M - (start_idx % M)
stack = [None]*(self.height+1)
stack[self.height] = node_clone(self.root)
for h in range(self.height, 0, -1):
subidx = (start_idx / (M ** h)) % M
stack[h-1] = node_clone(child(stack[h], subidx))
for idx in range(start_idx, stop_idx):
if idx == jump:
prev_idx = idx - 1
h = 1
while (prev_idx / (M ** h)) % M != (idx / (M ** h)) % M:
prev_subidx = (prev_idx / (M ** h)) % M
set_child(stack[h], prev_subidx, stack[h-1])
h += 1
for h in range(h - 1, 0, -1):
subidx = (idx / (M ** h)) % M
stack[h-1] = node_clone(child(stack[h], subidx))
jump += M
subidx = idx % M
set_child(stack[0], subidx, f(child(stack[0], subidx)))
last_idx = stop_idx - 1
for h in range(1, self.height + 1):
subidx = (last_idx / (M ** h)) % M
set_child(stack[h], subidx, stack[h-1])
mapped.root = stack[self.height]
return mapped
def map(self, f):
"""Returns a new vector where all elements are replaced with f(elt)"""
return self.submap(f, 0, self.size)
def take(self, count):
"""Returns a new vector with only the first count elements. If count is
higher than the total amount of elements in this vector, the original
vector is returned."""
if count <= 0:
return PVec()
if count >= self.size:
return self
taken = self.__clone()
idx = count - 1
taken.size = count
## while the path taken to rightmost element is 0, we can shrink its height
h = self.height
root = self.root
while (idx / (M ** h)) % M == 0 and h > 0:
h -= 1
root = child(root, 0)
taken.height = h
## Special case where root is exactly what we need
if count == M ** (h+1):
taken.root = root
return taken
root = node_clone(root)
taken.root = root
node = root
for h in range(h, 0, -1):
subidx = (idx / (M ** h)) % M
strip_right_of(node, subidx)
if idx % (M ** h) == (M ** h) - 1: # short circuit when subtrie is perfect
return taken
set_child(node, subidx, node_clone(child(node, subidx)))
node = child(node, subidx)
strip_right_of(node, idx % M)
return taken
def iter(self, start_idx=0, stop_idx=None):
"""Returns an iterator over a slice of this vector"""
if stop_idx == None:
stop_idx = self.size
assert 0 <= start_idx and stop_idx <= self.size and stop_idx >= start_idx
jump = start_idx + M - (start_idx % M)
stack = [None]*(self.height+1)
stack[self.height] = self.root
for h in range(self.height, 0, -1):
subidx = (start_idx / (M ** h)) % M
stack[h-1] = child(stack[h], subidx)
for idx in range(start_idx, stop_idx):
if idx == jump:
prev_idx = idx - 1
h = 1
while (prev_idx / (M ** h)) % M != (idx / (M ** h)) % M:
h += 1
for h in range(h - 1, 0, -1):
subidx = (idx / (M ** h)) % M
stack[h-1] = child(stack[h], subidx)
jump += M
subidx = idx % M
yield child(stack[0], subidx)
def __iter__(self):
"""Returns an iterator over this vector"""
return self.iter(0, self.size)
def into(self, it):
"""Returns a new vector with all elements in it appended"""
intoed = self.__clone()
start_idx = self.size - 1
jump = start_idx + M - (start_idx % M)
if jump == 0: # edge case for empty pvec
jump = M
stack = [None]*(self.height+1)
stack[self.height] = node_clone(self.root)
for h in range(self.height, 0, -1):
subidx = (start_idx / (M ** h)) % M
stack[h-1] = node_clone(child(stack[h], subidx))
prev_idx = -1
for (prev_idx, val) in enumerate(it, start_idx):
idx = prev_idx + 1
if idx == jump:
h = 1
while (prev_idx / (M ** h)) % M != (idx / (M ** h)) % M:
prev_subidx = (prev_idx / (M ** h)) % M
if len(stack) == h: # increase stack size
new_root = node_create()
set_child(new_root, 0, stack[h-1])
stack.append(new_root)
else:
set_child(stack[h], prev_subidx, stack[h-1])
h += 1
for h in range(h - 1, 0, -1):
stack[h-1] = node_create()
jump += M
subidx = idx % M
set_child(stack[0], subidx, val)
if prev_idx == -1: ## Edge case for empty iterator
return self
last_idx = prev_idx + 1
intoed.height = len(stack)-1
for h in range(1, intoed.height + 1):
subidx = (last_idx / (M ** h)) % M
set_child(stack[h], subidx, stack[h-1])
intoed.root = stack[intoed.height]
intoed.size = last_idx + 1
return intoed
def __add__(self, other):
return self.into(other)
def __repr__(self):
return 'PVec('+list(self).__repr__()+')'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment