belltailjp/bench.py

## bench.py
import argparse
import glob
import os
import time
import numpy as np
from torch.utils.data import DataLoader
from pfio.cache import MultiprocessFileCache
from pfio.cache import FileCache


class CachedDataset:
    def __init__(self, cache):
        self.cache = cache

    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        return self.cache.get(idx)


def main():
    args = argparse.ArgumentParser()
    args.add_argument('--cache-dir', default='/tmp')
    args = args.parse_args()

    cache_dir = args.cache_dir

    num_workers = [16, 32, 64, 128]
    n_trials = 5
    all_N_l = [
        (32768, 1024 ** 2),
        (1024 ** 2, 32768),
    ]

    print('| # samples | sample size | # workers | mean time per sample (us) | stddev (us) |')
    print('|:---|:---|:---|:---|:---|')
    for i, (N, l) in enumerate(all_N_l):
        # build the cache
        cache = FileCache(N, do_pickle=False, dir=cache_dir)
        for j in range(N):
            buf = np.random.bytes(l)
            cache.put(j, buf)
        cache.preserve('cache_data')

        # Load cache
        for n_worker in num_workers:
            cache = MultiprocessFileCache(N, do_pickle=False,
                                          dir=cache_dir)
            cache.preload('cache_data')
            ds = CachedDataset(cache)

            times = []
            for _ in range(n_trials):
                loader = DataLoader(ds, collate_fn=lambda x: x,
                                    batch_size=128,
                                    num_workers=n_worker, shuffle=True)
                before = time.time()
                for samples in loader:
                    assert all(len(s) == l for s in samples)
                after = time.time()
                times.append((after - before) / N)
            mean, std = np.mean(times), np.std(times)
            print('| {} | {} | {} | {:.2f} | {:.2f} |'
                  .format(N, l, n_worker, 1e+6 * mean, 1e+6 * std))

        for f in glob.glob('{}/cache_data*'.format(cache_dir)):
            os.remove(f)


if __name__ == '__main__':
    main()

## result.md

      
    Raw
  

              result.md
            
          
    Environment

Xeon 6254 x 2
DDR4 384GB
OS: Ubuntu 18.04x64 (4.15.0-58-generic)
Local storage: local SSD
Python: 3.8.6

master


# samples
sample size
# workers
mean time per sample (us)
stddev (us)


32768
1048576
16
2030.02
307.05


32768
1048576
32
1876.47
271.42


32768
1048576
64
2151.52
238.03


32768
1048576
128
2059.07
149.95


1048576
32768
16
33.73
0.39


1048576
32768
32
36.07
0.56


1048576
32768
64
35.81
0.19


1048576
32768
128
38.54
0.63


single cache


# samples
sample size
# workers
mean time per sample (us)
stddev (us)


32768
1048576
16
2276.69
221.75


32768
1048576
32
2253.60
197.82


32768
1048576
64
1975.38
121.20


32768
1048576
128
1903.31
77.03


1048576
32768
16
33.59
0.21


1048576
32768
32
35.94
0.36


1048576
32768
64
35.97
0.21


1048576
32768
128
38.96
0.14
	import argparse
	import glob
	import os
	import time
	import numpy as np
	from torch.utils.data import DataLoader
	from pfio.cache import MultiprocessFileCache
	from pfio.cache import FileCache


	class CachedDataset:
	def __init__(self, cache):
	self.cache = cache

	def __len__(self):
	return len(self.cache)

	def __getitem__(self, idx):
	return self.cache.get(idx)


	def main():
	args = argparse.ArgumentParser()
	args.add_argument('--cache-dir', default='/tmp')
	args = args.parse_args()

	cache_dir = args.cache_dir

	num_workers = [16, 32, 64, 128]
	n_trials = 5
	all_N_l = [
	(32768, 1024 ** 2),
	(1024 ** 2, 32768),
	]

	print('\| # samples \| sample size \| # workers \| mean time per sample (us) \| stddev (us) \|')
	print('\|:---\|:---\|:---\|:---\|:---\|')
	for i, (N, l) in enumerate(all_N_l):
	# build the cache
	cache = FileCache(N, do_pickle=False, dir=cache_dir)
	for j in range(N):
	buf = np.random.bytes(l)
	cache.put(j, buf)
	cache.preserve('cache_data')

	# Load cache
	for n_worker in num_workers:
	cache = MultiprocessFileCache(N, do_pickle=False,
	dir=cache_dir)
	cache.preload('cache_data')
	ds = CachedDataset(cache)

	times = []
	for _ in range(n_trials):
	loader = DataLoader(ds, collate_fn=lambda x: x,
	batch_size=128,
	num_workers=n_worker, shuffle=True)
	before = time.time()
	for samples in loader:
	assert all(len(s) == l for s in samples)
	after = time.time()
	times.append((after - before) / N)
	mean, std = np.mean(times), np.std(times)
	print('\| {} \| {} \| {} \| {:.2f} \| {:.2f} \|'
	.format(N, l, n_worker, 1e+6 * mean, 1e+6 * std))

	for f in glob.glob('{}/cache_data*'.format(cache_dir)):
	os.remove(f)


	if __name__ == '__main__':
	main()
# samples	sample size	# workers	mean time per sample (us)	stddev (us)
32768	1048576	16	2030.02	307.05
32768	1048576	32	1876.47	271.42
32768	1048576	64	2151.52	238.03
32768	1048576	128	2059.07	149.95
1048576	32768	16	33.73	0.39
1048576	32768	32	36.07	0.56
1048576	32768	64	35.81	0.19
1048576	32768	128	38.54	0.63
# samples	sample size	# workers	mean time per sample (us)	stddev (us)
32768	1048576	16	2276.69	221.75
32768	1048576	32	2253.60	197.82
32768	1048576	64	1975.38	121.20
32768	1048576	128	1903.31	77.03
1048576	32768	16	33.59	0.21
1048576	32768	32	35.94	0.36
1048576	32768	64	35.97	0.21
1048576	32768	128	38.96	0.14