Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Last active May 27, 2016 13:33
Show Gist options
  • Save ogrisel/7fa242ea267028828d03dac887cf8e71 to your computer and use it in GitHub Desktop.
Save ogrisel/7fa242ea267028828d03dac887cf8e71 to your computer and use it in GitHub Desktop.
Utility script to time the effective memory bandwidth of CPUs
model name : Intel(R) Xeon(R) CPU @ 2.30GHz
Loading arrays to memory
Loading speed: 0.670GB/s
timing bandwidth for sequential memory access
bandwidth: 9.0 GB/s
n_workers=2 (threads)
bandwidth: 17.4 GB/s (1.9x)
n_workers=4 (threads)
bandwidth: 29.5 GB/s (3.3x)
n_workers=8 (threads)
bandwidth: 36.6 GB/s (4.1x)
n_workers=16 (threads)
bandwidth: 44.8 GB/s (5.0x)
n_workers=32 (threads)
bandwidth: 60.4 GB/s (6.7x)
model name : Intel(R) Core(TM) i7-6560U CPU @ 2.20GHz
Loading arrays to memory
Loading speed: 0.530GB/s
timing bandwidth for sequential memory access
bandwidth: 5.8 GB/s
n_workers=2 (threads)
bandwidth: 8.5 GB/s (1.5x)
n_workers=4 (threads)
bandwidth: 8.3 GB/s (1.4x)
n_workers=8 (threads)
bandwidth: 7.9 GB/s (1.4x)
from time import time
import os
import os.path as op
import numpy as np
from concurrent.futures import ThreadPoolExecutor
def prepare_data_files(n=8):
fnames = []
for i in range(n):
fname = 'random_data_%d.npy' % i
if not op.exists(fname):
print('generating %s' % fname)
a = np.random.randn(int(1e9 / 8)) # 1GB
np.save(fname, a)
fnames.append(fname)
return fnames
def run_bench_bandwidth(data_size_gb=8):
os.system("cat /proc/cpuinfo | grep 'model name' | uniq")
fnames = prepare_data_files(n=data_size_gb)
print('Loading arrays to memory')
t0 = time()
arrays = [np.load(fname) for fname in fnames]
duration = time() - t0
print("Loading speed: %0.3fGB/s\n" % (data_size_gb / duration))
# sequential access
print("timing bandwidth for sequential memory access")
t0 = time()
list(map(np.max, arrays))
sequential_access_time = time() - t0
sequential_bandwidth = data_size_gb / sequential_access_time
print("bandwidth: %0.1f GB/s" % sequential_bandwidth)
print()
n_workers = 2
while n_workers <= data_size_gb:
print('n_workers=%d (threads)' % n_workers)
with ThreadPoolExecutor(n_workers) as e:
t0 = time()
list(e.map(np.max, arrays))
access_time = time() - t0
bandwidth = data_size_gb / access_time
print("bandwidth: %0.1f GB/s (%0.1fx)" %
(bandwidth, bandwidth / sequential_bandwidth))
n_workers *= 2
print()
if __name__ == "__main__":
import sys
if len(sys.argv) == 2:
n_gb = int(sys.argv[1])
else:
n_gb = 8
run_bench_bandwidth(n_gb)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment