Last active
January 9, 2020 19:37
-
-
Save zzzeek/caa4a7ed94f326fbbc031acecb9d7a44 to your computer and use it in GitHub Desktop.
Copy-on-operate vs. copy on evaluate
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
this demonstration illustrates how a library like pandas could | |
theoretically (or maybe it does already with some flag?) not require | |
a copy of the data when an operation takes place on the structure. | |
this is based on the article https://pythonspeed.com/articles/minimizing-copying/ | |
which illustrates specific programming techniques that can be used with a numpy | |
array in order to minimize data copying; this gist presents an alternative by | |
which the library could perhaps sheild this implementation detail from the | |
end-user. | |
""" | |
import operator | |
import random | |
class Array: | |
"""A array class that implements a few simplified numpy-ish operations.""" | |
def __init__(self, values): | |
print("New %s class being created" % self.__class__) | |
self.values = values | |
def min(self): | |
return min(self.values) | |
def max(self): | |
return max(self.values) | |
def __sub__(self, other): | |
return self.operate(operator.sub, other) | |
def __truediv__(self, other): | |
return self.operate(operator.truediv, other) | |
def __add__(self, other): | |
return self.operate(operator.add, other) | |
def __mul__(self, other): | |
return self.operate(operator.mul, other) | |
def __str__(self): | |
return "[%s]" % (", ".join(str(v) for v in self.values)) | |
class CopyOnOperateArray(Array): | |
"""A class that implements the operations by copying the data each time.""" | |
def operate(self, operator, other): | |
# copy for each operation | |
return CopyOnOperateArray( | |
[operator(elem, other) for elem in self.values] | |
) | |
class CopyOnEvaluateArray(Array): | |
"""A class that implements the operations by accumulating intent and | |
running them all when needed.""" | |
_operations = () | |
def _clone(self): | |
# shallow copy. while there's a new CopyOnEvaluateArray object | |
# here, we aren't copying the underlying _values, it is being shared. | |
s = self.__class__.__new__(self.__class__) | |
s.__dict__ = self.__dict__.copy() | |
return s | |
def operate(self, operator, other): | |
new = self._clone() | |
new._operations += ((operator, other),) | |
return new | |
@property | |
def values(self): | |
return self._evaluate() | |
@values.setter | |
def values(self, values): | |
self._values = values | |
def _evaluate(self): | |
_values = list(self._values) | |
for op, other in self._operations: | |
_values[:] = [op(v, other) for v in _values] | |
return _values | |
def normalize(array): | |
""" | |
Takes a floating point array. | |
Returns a normalized array with values between 0 and 1. | |
""" | |
low = array.min() | |
high = array.max() | |
return (array - low) / (high - low) | |
data = random.choices(list(range(-100, 100)), k=10) | |
print("original data: ", data) | |
print("normalize using COO:", normalize(CopyOnOperateArray(data))) | |
print("normalize using COE:", normalize(CopyOnEvaluateArray(data))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
output below. note there are three copies of CopyOnOperateArray created but only one CopyOnEvaluate array.