Skip to content

Instantly share code, notes, and snippets.

@asi1024
Created January 8, 2020 09:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save asi1024/ee62c50fd1254acb0e9431473862a014 to your computer and use it in GitHub Desktop.
Save asi1024/ee62c50fd1254acb0e9431473862a014 to your computer and use it in GitHub Desktop.
import numpy
import cupy
from cupy import cutensor
import cupyx
assert cupy.cuda.cub_enabled
assert cupy.cuda.cutensor_enabled
shape_axes = [
((200, 300, 400), (0, 1, 2)),
((200, 300, 400), (0,)), # CUB is not used.
((200, 300, 400), (1,)), # CUB is not used.
((200, 300, 400), (2,)),
]
for shape, axes in shape_axes:
size = int(numpy.prod(shape))
x = cupy.random.uniform(0, 1, size, dtype='float32').reshape(shape)
out_shape = [dim for i, dim in enumerate(shape) if (i not in axes)]
out = cupy.zeros(out_shape, dtype='float32')
cupy.cuda.cub_enabled = False
perf = cupyx.time.repeat(
cupy.sum,
(x, axes, None, out),
n=100,
name=('basic (axes: %10s)' % str(axes))
)
print(perf)
cupy.cuda.cub_enabled = True
perf = cupyx.time.repeat(
cupy.sum,
(x, axes, None, out),
name=('cub (axes: %10s)' % str(axes))
)
print(perf)
desc_x = cutensor.create_tensor_descriptor(x)
desc_out = cutensor.create_tensor_descriptor(out)
mode_x = (0, 1, 2)
mode_out = [i for i in mode_x if (i not in axes)]
perf = cupyx.time.repeat(
cutensor.reduction,
(1, x, desc_x, mode_x, 0, out, desc_out, mode_out),
name=('cutensor (axes: %10s)' % str(axes))
)
print(perf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment