Skip to content

Instantly share code, notes, and snippets.

@braingram
Last active April 28, 2023 21:43
Show Gist options
  • Save braingram/ba388b166bc08b2c42ce1abee317f1b0 to your computer and use it in GitHub Desktop.
Save braingram/ba388b166bc08b2c42ce1abee317f1b0 to your computer and use it in GitHub Desktop.
Performance of memmapping vs chunking (zarr)
import os
import shutil
import time
import numpy
import zarr
n_times = 10
image_size = (4000, 4000, 50)
dtype = 'uint8'
mm_fn = 'test_memmap.bin'
zarr_dir = 'test_zarr'
def setup_memmap():
# generate the memmap file
arr = numpy.zeros(image_size, dtype)
with open(mm_fn, 'wb') as f:
f.write(arr.tobytes())
return numpy.memmap(mm_fn, shape=image_size, dtype='uint8')
def re_memmap():
return numpy.memmap(mm_fn, shape=image_size, dtype='uint8')
def setup_zarr(chunk_shape):
# generate the zarr directory store
if os.path.exists(zarr_dir):
shutil.rmtree(zarr_dir)
z = zarr.open(zarr_dir, mode='w', shape=image_size, chunks=chunk_shape, dtype=dtype)
z[:] = 0
return z
chunk_shapes = [
image_size, # one chunk for whole image
[1000, 1000, 50], # first 2 dimensions
[4000, 4000, 10], # only z
[1000, 1000, 10], # all 3
[300, 300, 1], # very small chunks
]
accessors = [
((lambda arr: arr[:]), "whole array"),
((lambda arr: arr[:, 0, 0]), "first dimension slice"),
((lambda arr: arr[0, :, 0]), "second dimension slice"),
((lambda arr: arr[0, 0, :]), "third dimension slice"),
((lambda arr: arr[10:40, 20:50, 0]), "cutout for 1 z"),
((lambda arr: arr[:30, :30, 0]), "cutout aligned to one chunk"),
((lambda arr: arr[10:40, 20:50, :]), "chutout for all z"),
((lambda arr: arr[100:400, 200:500, 0]), "larger cutout for 1 z"),
((lambda arr: arr[:300, :300, 0]), "larger cutout aligned to one chunk"),
((lambda arr: arr[100:400, 200:500, :]), "larger cutout for all z"),
]
def time_it(arr, accessor, n_times, mean=True):
ts = []
v = 0
for _ in range(n_times):
t0 = time.monotonic()
v += numpy.sum(accessor(arr))
t1 = time.monotonic()
ts.append(t1 - t0)
if not mean:
return ts
return numpy.mean(ts)
if __name__ == '__main__':
print(f"Running each test {n_times} times")
print(f"Testing image shape: {image_size}")
mm = setup_memmap()
for cs in chunk_shapes:
print(f"\tchunk shape: {cs}")
z = setup_zarr(cs)
for (a, an) in accessors:
# to make this 'fair' make a new memmap every time
mm = re_memmap()
zt = time_it(z, a, n_times)
mmt = time_it(mm, a, n_times)
winner = 'memmap' if mmt < zt else 'chunking'
print(f"\t\t{winner} win! {an}: chunking={zt}, memmap={mmt}")
@braingram
Copy link
Author

braingram commented Apr 28, 2023

Results for a run with n_times = 1

Running each test 1 times
Testing image shape: (4000, 4000, 50)
	chunk shape: (4000, 4000, 50)
		memmap win! whole array: chunking=0.21326945899636485, memmap=0.202122041999246
		memmap win! first dimension slice: chunking=0.05284383399703074, memmap=0.003369375001057051
		memmap win! second dimension slice: chunking=0.051087416999507695, memmap=4.366700886748731e-05
		memmap win! third dimension slice: chunking=0.05022858399024699, memmap=3.1291987397708e-05
		memmap win! cutout for 1 z: chunking=0.05552812499809079, memmap=6.087501242291182e-05
		memmap win! cutout aligned to one chunk: chunking=0.05585804200381972, memmap=6.279099034145474e-05
		memmap win! chutout for all z: chunking=0.05316399999719579, memmap=7.2750000981614e-05
		memmap win! larger cutout for 1 z: chunking=0.05350100000214297, memmap=0.0007666250021429732
		memmap win! larger cutout aligned to one chunk: chunking=0.05246841699408833, memmap=0.0006311250035651028
		memmap win! larger cutout for all z: chunking=0.05274008400738239, memmap=0.001432582997949794
	chunk shape: [1000, 1000, 50]
		memmap win! whole array: chunking=0.2975324580038432, memmap=0.1921742090053158
		memmap win! first dimension slice: chunking=0.016103000001749024, memmap=0.0031840830051805824
		memmap win! second dimension slice: chunking=0.017106999992392957, memmap=4.279200220480561e-05
		memmap win! third dimension slice: chunking=0.0031345830066129565, memmap=2.8416005079634488e-05
		memmap win! cutout for 1 z: chunking=0.0030621659971075132, memmap=6.0999998822808266e-05
		memmap win! cutout aligned to one chunk: chunking=0.0033969170035561547, memmap=6.09580019954592e-05
		memmap win! chutout for all z: chunking=0.006794417000492103, memmap=7.933301094453782e-05
		memmap win! larger cutout for 1 z: chunking=0.00476850000268314, memmap=0.0008481660042889416
		memmap win! larger cutout aligned to one chunk: chunking=0.005172499993932433, memmap=0.0008740420016692951
		memmap win! larger cutout for all z: chunking=0.006961416002013721, memmap=0.0020638749992940575
	chunk shape: [4000, 4000, 10]
		memmap win! whole array: chunking=0.5640181249909801, memmap=0.1960677500028396
		memmap win! first dimension slice: chunking=0.009735750005347654, memmap=0.0032849169947439805
		memmap win! second dimension slice: chunking=0.015671082990593277, memmap=4.616699879989028e-05
		memmap win! third dimension slice: chunking=0.05057274999853689, memmap=3.2625001040287316e-05
		memmap win! cutout for 1 z: chunking=0.010244542005239055, memmap=6.0000005760230124e-05
		memmap win! cutout aligned to one chunk: chunking=0.014627208991441876, memmap=5.395899643190205e-05
		memmap win! chutout for all z: chunking=0.04832499999611173, memmap=8.091700146906078e-05
		memmap win! larger cutout for 1 z: chunking=0.009592207992682233, memmap=0.0006277909997152165
		memmap win! larger cutout aligned to one chunk: chunking=0.009771666998858564, memmap=0.0006600000051548705
		memmap win! larger cutout for all z: chunking=0.05056187500304077, memmap=0.001403082991600968
	chunk shape: [1000, 1000, 10]
		memmap win! whole array: chunking=0.5623051669972483, memmap=0.19121000000450294
		chunking win! first dimension slice: chunking=0.002491374994860962, memmap=0.002904458000557497
		memmap win! second dimension slice: chunking=0.0023209999926621094, memmap=5.154100654181093e-05
		memmap win! third dimension slice: chunking=0.0031024999916553497, memmap=2.333299198653549e-05
		memmap win! cutout for 1 z: chunking=0.0007400829927064478, memmap=7.183299749158323e-05
		memmap win! cutout aligned to one chunk: chunking=0.0008918749954318628, memmap=6.516701250802726e-05
		memmap win! chutout for all z: chunking=0.0040137920004781336, memmap=9.458299609832466e-05
		memmap win! larger cutout for 1 z: chunking=0.001043583994032815, memmap=0.0010238329996354878
		memmap win! larger cutout aligned to one chunk: chunking=0.0011161250004079193, memmap=0.0009834999946178868
		memmap win! larger cutout for all z: chunking=0.010943916990072466, memmap=0.002653999996255152
	chunk shape: [300, 300, 1]
		memmap win! whole array: chunking=1.595065750007052, memmap=0.19998262499575503
		chunking win! first dimension slice: chunking=0.001296250004088506, memmap=0.0033537500130478293
		memmap win! second dimension slice: chunking=0.0013369590014917776, memmap=4.5958004193380475e-05
		memmap win! third dimension slice: chunking=0.004903499997453764, memmap=3.095899592153728e-05
		memmap win! cutout for 1 z: chunking=0.0002564169990364462, memmap=6.795799708925188e-05
		memmap win! cutout aligned to one chunk: chunking=0.0002527920005377382, memmap=6.558300810866058e-05
		memmap win! chutout for all z: chunking=0.005408583005191758, memmap=9.845799650065601e-05
		chunking win! larger cutout for 1 z: chunking=0.0006278750079218298, memmap=0.0008047919982345775
		chunking win! larger cutout aligned to one chunk: chunking=0.0002954170049633831, memmap=0.0007959579961607233
		memmap win! larger cutout for all z: chunking=0.029081792003125884, memmap=0.001844792001065798

@perrygreenfield
Copy link

Is the mmap accessor actually loading any data into memory?

@braingram
Copy link
Author

Is the mmap accessor actually loading any data into memory?

Good point! I updated it to sum the array (and updated the results above).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment