Skip to content

Instantly share code, notes, and snippets.

@zzzeek
Last active January 9, 2020 19:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zzzeek/caa4a7ed94f326fbbc031acecb9d7a44 to your computer and use it in GitHub Desktop.
Save zzzeek/caa4a7ed94f326fbbc031acecb9d7a44 to your computer and use it in GitHub Desktop.
Copy-on-operate vs. copy on evaluate
"""
this demonstration illustrates how a library like pandas could
theoretically (or maybe it does already with some flag?) not require
a copy of the data when an operation takes place on the structure.
this is based on the article https://pythonspeed.com/articles/minimizing-copying/
which illustrates specific programming techniques that can be used with a numpy
array in order to minimize data copying; this gist presents an alternative by
which the library could perhaps sheild this implementation detail from the
end-user.
"""
import operator
import random
class Array:
"""A array class that implements a few simplified numpy-ish operations."""
def __init__(self, values):
print("New %s class being created" % self.__class__)
self.values = values
def min(self):
return min(self.values)
def max(self):
return max(self.values)
def __sub__(self, other):
return self.operate(operator.sub, other)
def __truediv__(self, other):
return self.operate(operator.truediv, other)
def __add__(self, other):
return self.operate(operator.add, other)
def __mul__(self, other):
return self.operate(operator.mul, other)
def __str__(self):
return "[%s]" % (", ".join(str(v) for v in self.values))
class CopyOnOperateArray(Array):
"""A class that implements the operations by copying the data each time."""
def operate(self, operator, other):
# copy for each operation
return CopyOnOperateArray(
[operator(elem, other) for elem in self.values]
)
class CopyOnEvaluateArray(Array):
"""A class that implements the operations by accumulating intent and
running them all when needed."""
_operations = ()
def _clone(self):
# shallow copy. while there's a new CopyOnEvaluateArray object
# here, we aren't copying the underlying _values, it is being shared.
s = self.__class__.__new__(self.__class__)
s.__dict__ = self.__dict__.copy()
return s
def operate(self, operator, other):
new = self._clone()
new._operations += ((operator, other),)
return new
@property
def values(self):
return self._evaluate()
@values.setter
def values(self, values):
self._values = values
def _evaluate(self):
_values = list(self._values)
for op, other in self._operations:
_values[:] = [op(v, other) for v in _values]
return _values
def normalize(array):
"""
Takes a floating point array.
Returns a normalized array with values between 0 and 1.
"""
low = array.min()
high = array.max()
return (array - low) / (high - low)
data = random.choices(list(range(-100, 100)), k=10)
print("original data: ", data)
print("normalize using COO:", normalize(CopyOnOperateArray(data)))
print("normalize using COE:", normalize(CopyOnEvaluateArray(data)))
@zzzeek
Copy link
Author

zzzeek commented Jan 9, 2020

output below. note there are three copies of CopyOnOperateArray created but only one CopyOnEvaluate array.

$ python test3.py 
original data:  [-90, 90, -39, -16, 76, 25, -13, 16, -66, 11]
New <class '__main__.CopyOnOperateArray'> class being created
New <class '__main__.CopyOnOperateArray'> class being created
New <class '__main__.CopyOnOperateArray'> class being created
normalize using COO: [0.0, 1.0, 0.2833333333333333, 0.4111111111111111, 0.9222222222222223, 0.6388888888888888, 0.42777777777777776, 0.5888888888888889, 0.13333333333333333, 0.5611111111111111]
New <class '__main__.CopyOnEvaluateArray'> class being created
normalize using COE: [0.0, 1.0, 0.2833333333333333, 0.4111111111111111, 0.9222222222222223, 0.6388888888888888, 0.42777777777777776, 0.5888888888888889, 0.13333333333333333, 0.5611111111111111]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment