Last active
April 20, 2018 09:06
-
-
Save ogrisel/0e7b3282c84ae4a581f3b9ec1d84b45a to your computer and use it in GitHub Desktop.
Memory profiling for Python pickling of large buffers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pickle import Pickler, _Pickler, Unpickler, _Unpickler, HIGHEST_PROTOCOL | |
import os | |
import time | |
import sys | |
import gc | |
from multiprocessing import get_context | |
PROTOCOL = HIGHEST_PROTOCOL | |
ctx = get_context('spawn') | |
out_filename = 'output.pkl' | |
if '--use-pypickle' in sys.argv: | |
PicklerFactory = _Pickler | |
UnpicklerFactory = _Unpickler | |
else: | |
PicklerFactory = Pickler | |
UnpicklerFactory = Unpickler | |
def monitor_worker(pid, queue, stop_event, delay=0.05): | |
from psutil import Process | |
p = Process(pid) | |
peak = 0 | |
def make_measurement(peak): | |
mem = p.memory_info().rss | |
if mem > peak: | |
peak = mem | |
return peak | |
# Make measurements every 'delay' seconds until we receive the stop event: | |
while not stop_event.wait(timeout=delay): | |
peak = make_measurement(peak) | |
# Make one last measurement in case memory has increased just before | |
# receiving the stop event: | |
peak = make_measurement(peak) | |
queue.put(peak) | |
class PeakMemoryMonitor: | |
def __enter__(self): | |
pid = os.getpid() | |
self.queue = q = ctx.Queue() | |
self.stop_event = e = ctx.Event() | |
self.worker = ctx.Process(target=monitor_worker, args=(pid, q, e)) | |
self.worker.start() | |
return self | |
def __exit__(self, exc_type, exc_value, tb): | |
self.stop_event.set() | |
if exc_type is not None: | |
self.worker.terminate() | |
return False | |
else: | |
self.peak = self.queue.get() | |
print("=> peak memory usage: {:.3f} GB".format(self.peak / 1e9)) | |
return True | |
if __name__ == "__main__": | |
size = int(2e9) | |
print('Allocating source data...') | |
with PeakMemoryMonitor(): | |
#data = '0' * size | |
data = bytearray(size) | |
print('Dumping to disk...') | |
with PeakMemoryMonitor(): | |
t0 = time.time() | |
with open(out_filename, 'wb') as f: | |
p = PicklerFactory(f, protocol=PROTOCOL) | |
p.dump(data) | |
print('done in {:0.3f}s'.format(time.time() - t0)) | |
del data, p | |
gc.collect() | |
print('Loading back from disk...') | |
with PeakMemoryMonitor(): | |
t0 = time.time() | |
with open(out_filename, 'rb') as f: | |
data = UnpicklerFactory(f).load() | |
print('done in {:0.3f}s'.format(time.time() - t0)) | |
print('Checking data...') | |
assert len(data) == size | |
print('ok') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is the output when on python master (C pickler and Python pickler):
Here the output with the Python pickler fixed in python/cpython#4353: