Skip to content

Instantly share code, notes, and snippets.

@asi1024
Created December 4, 2019 09:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save asi1024/01081416f54d28cb7043f6d1fa755764 to your computer and use it in GitHub Desktop.
Save asi1024/01081416f54d28cb7043f6d1fa755764 to your computer and use it in GitHub Desktop.
import time
import cupy
import numpy
class _PerfCaseResult(object):
def __init__(self, name, ts):
assert ts.ndim == 2 and ts.shape[0] == 2 and ts.shape[1] > 0
self.name = name
self._ts = ts
@staticmethod
def _to_str_per_item(t):
assert t.size > 0
t *= 1e6
s = ' {:9.03f} us'.format(t.mean())
if t.size > 1:
s += ' +/-{:6.03f} (min:{:9.03f} / max:{:9.03f}) us'.format(
t.std(), t.min(), t.max())
return s
def to_str(self, show_gpu=False):
ts = self._ts if show_gpu else self._ts[[0]]
return '{:<20s}:{}'.format(
self.name, ' '.join([self._to_str_per_item(t) for t in ts]))
def __str__(self):
return self.to_str(show_gpu=True)
def run(name, func, args=(), n=10000, *, n_warmup=10):
ts = numpy.empty((2, n,), dtype=numpy.float64)
ev1 = cupy.cuda.stream.Event()
ev2 = cupy.cuda.stream.Event()
for i in range(n_warmup):
func(*args)
for i in range(n):
ev1.synchronize()
ev1.record()
t1 = time.perf_counter()
func(*args)
t2 = time.perf_counter()
ev2.record()
ev2.synchronize()
cpu_time = t2 - t1
gpu_time = cupy.cuda.get_elapsed_time(ev1, ev2) * 1e-3
ts[0, i] = cpu_time
ts[1, i] = gpu_time
return _PerfCaseResult(name, ts)
def main():
log_size = 24
n_repeat = 500
shapes_axis = []
for axis in (0, 1):
for i in range(0, log_size + 1):
dim1, dim2 = 2 ** i, 2 ** (log_size - i)
name = 'cupy.sum (shape = (%8d, %8d), axis=%d)' % (dim1, dim2, axis)
x = cupy.testing.shaped_random((dim1, dim2))
perf = run(name, cupy.sum, (x, axis), n_repeat)
print(perf)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment