Skip to content

Instantly share code, notes, and snippets.

@bonprosoft
Last active September 11, 2017 08:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bonprosoft/26065f41c9a4ab5c05c8620ca1606833 to your computer and use it in GitHub Desktop.
Save bonprosoft/26065f41c9a4ab5c05c8620ca1606833 to your computer and use it in GitHub Desktop.
import cProfile
import random
import cupy
from cupy import cudnn
libcudnn = cudnn.cudnn
N_LAYER = 10
N_TESTCASE = 1000
RATIO = []
for i in range(N_LAYER):
RATIO.append(random.random())
X = cupy.random.random_sample((10, 10))
DY = cupy.random.random_sample((10, 10))
SEED = 0
def test_cudnn_singleton():
handle = cudnn.get_handle()
def initialize(env):
env['d_states'] = cudnn.create_dropout_states(handle)
env['desc'] = cudnn.create_dropout_descriptor(
handle, 0.0, env['d_states'].data.ptr, env['d_states'].size, SEED)
def forward(env, states):
for i in range(N_LAYER):
x = cupy.ascontiguousarray(X)
x = cudnn._as4darray(x)
x_desc = cudnn.create_tensor_descriptor(x)
y = cupy.empty_like(x)
cudnn.set_dropout_descriptor(env['desc'], handle, RATIO[i])
reserve_size = libcudnn.getDropoutReserveSpaceSize(x_desc.value)
reserve_space = cupy.empty((reserve_size,))
libcudnn.dropoutForward(handle, env['desc'].value,
x_desc.value, X.data.ptr,
x_desc.value, y.data.ptr,
reserve_space.data.ptr, reserve_size)
states.append(reserve_space)
def backward(env, states):
for i in range(N_LAYER)[::-1]:
dx = cupy.empty_like(DY)
dy = cupy.ascontiguousarray(DY)
dy = cudnn._as4darray(dy)
dy_desc = cudnn.create_tensor_descriptor(dy)
cudnn.set_dropout_descriptor(env['desc'], handle, RATIO[i])
libcudnn.dropoutBackward(handle, env['desc'].value,
dy_desc.value, DY.data.ptr,
dy_desc.value, dx.data.ptr,
states[i].data.ptr, states[i].size)
for i in range(N_TESTCASE):
env = {}
states = []
initialize(env)
forward(env, states)
backward(env, states)
def test_cudnn_new_api():
handle = cudnn.get_handle()
def initialize(env):
env['desc'] = cudnn.DropoutTransaction(handle, SEED)
def forward(env, states):
for i in range(N_LAYER):
reserve_space, _ = env['desc'].forward(handle, X, RATIO[i])
states.append(reserve_space)
def backward(env, states):
for i in range(N_LAYER)[::-1]:
env['desc'].backward(handle, DY, RATIO[i], states[i])
for i in range(N_TESTCASE):
env = {}
states = []
initialize(env)
forward(env, states)
backward(env, states)
if __name__ == '__main__':
cProfile.run('test_cudnn_singleton()')
cProfile.run('test_cudnn_new_api()')
383006 function calls in 1.824 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 1.824 1.824 <string>:1(<module>)
20000 0.221 0.000 0.221 0.000 basic.py:22(empty_like)
11000 0.202 0.000 0.202 0.000 basic.py:4(empty)
1 0.000 0.000 0.000 0.000 cudnn.py:19(get_handle)
20000 0.014 0.000 0.054 0.000 cudnn.py:207(_as4darray)
1000 0.002 0.000 0.009 0.000 cudnn.py:256(create_dropout_descriptor)
20000 0.008 0.000 0.015 0.000 cudnn.py:265(set_dropout_descriptor)
1000 0.001 0.000 0.095 0.000 cudnn.py:310(create_dropout_states)
1000 0.002 0.000 0.106 0.000 cudnn.py:333(__init__)
10000 0.036 0.000 0.414 0.000 cudnn.py:339(forward)
10000 0.028 0.000 0.265 0.000 cudnn.py:360(backward)
21000 0.005 0.000 0.005 0.000 cudnn.py:46(__init__)
21000 0.011 0.000 0.015 0.000 cudnn.py:50(__del__)
20000 0.011 0.000 0.011 0.000 cudnn.py:56(get_data_type)
20000 0.054 0.000 0.082 0.000 cudnn.py:71(create_tensor_descriptor)
20000 0.006 0.000 0.009 0.000 from_data.py:63(ascontiguousarray)
1 0.104 0.104 1.824 1.824 perf_cudnn.py:67(test_cudnn_new_api)
1000 0.002 0.000 0.108 0.000 perf_cudnn.py:70(initialize)
1000 0.833 0.001 1.256 0.001 perf_cudnn.py:73(forward)
1000 0.084 0.000 0.355 0.000 perf_cudnn.py:78(backward)
1 0.000 0.000 1.824 1.824 {built-in method builtins.exec}
20000 0.004 0.000 0.004 0.000 {built-in method builtins.isinstance}
20000 0.003 0.000 0.003 0.000 {built-in method cupy.core.core.ascontiguousarray}
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.createDropoutDescriptor}
20000 0.005 0.000 0.005 0.000 {built-in method cupy.cuda.cudnn.createTensorDescriptor}
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.destroyDropoutDescriptor}
20000 0.003 0.000 0.003 0.000 {built-in method cupy.cuda.cudnn.destroyTensorDescriptor}
10000 0.059 0.000 0.059 0.000 {built-in method cupy.cuda.cudnn.dropoutBackward}
10000 0.062 0.000 0.062 0.000 {built-in method cupy.cuda.cudnn.dropoutForward}
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.dropoutGetStatesSize}
10000 0.002 0.000 0.002 0.000 {built-in method cupy.cuda.cudnn.getDropoutReserveSpaceSize}
21000 0.014 0.000 0.014 0.000 {built-in method cupy.cuda.cudnn.setDropoutDescriptor}
20000 0.007 0.000 0.007 0.000 {built-in method cupy.cuda.cudnn.setTensor4dDescriptor}
1 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.device.get_device_id}
10000 0.001 0.000 0.001 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
20000 0.040 0.000 0.040 0.000 {method 'reshape' of 'cupy.core.core.ndarray' objects}
342007 function calls in 1.992 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 1.992 1.992 <string>:1(<module>)
20000 0.253 0.000 0.253 0.000 basic.py:22(empty_like)
11000 0.203 0.000 0.203 0.000 basic.py:4(empty)
1 0.000 0.000 0.137 0.137 cudnn.py:19(get_handle)
20000 0.016 0.000 0.058 0.000 cudnn.py:207(_as4darray)
1000 0.002 0.000 0.010 0.000 cudnn.py:256(create_dropout_descriptor)
20000 0.008 0.000 0.017 0.000 cudnn.py:265(set_dropout_descriptor)
1000 0.001 0.000 0.095 0.000 cudnn.py:310(create_dropout_states)
21000 0.006 0.000 0.006 0.000 cudnn.py:46(__init__)
21000 0.009 0.000 0.012 0.000 cudnn.py:50(__del__)
20000 0.011 0.000 0.011 0.000 cudnn.py:56(get_data_type)
20000 0.054 0.000 0.083 0.000 cudnn.py:71(create_tensor_descriptor)
20000 0.007 0.000 0.011 0.000 from_data.py:63(ascontiguousarray)
1 0.117 0.117 1.992 1.992 perf_cudnn.py:21(test_cudnn_singleton)
1000 0.002 0.000 0.107 0.000 perf_cudnn.py:24(initialize)
1000 0.877 0.001 1.272 0.001 perf_cudnn.py:29(forward)
1000 0.091 0.000 0.355 0.000 perf_cudnn.py:45(backward)
1 0.000 0.000 1.992 1.992 {built-in method builtins.exec}
20000 0.004 0.000 0.004 0.000 {built-in method cupy.core.core.ascontiguousarray}
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.createDropoutDescriptor}
20000 0.005 0.000 0.005 0.000 {built-in method cupy.cuda.cudnn.createTensorDescriptor}
1 0.137 0.137 0.137 0.137 {built-in method cupy.cuda.cudnn.create}
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.destroyDropoutDescriptor}
20000 0.003 0.000 0.003 0.000 {built-in method cupy.cuda.cudnn.destroyTensorDescriptor}
10000 0.056 0.000 0.056 0.000 {built-in method cupy.cuda.cudnn.dropoutBackward}
10000 0.060 0.000 0.060 0.000 {built-in method cupy.cuda.cudnn.dropoutForward}
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.dropoutGetStatesSize}
10000 0.002 0.000 0.002 0.000 {built-in method cupy.cuda.cudnn.getDropoutReserveSpaceSize}
21000 0.017 0.000 0.017 0.000 {built-in method cupy.cuda.cudnn.setDropoutDescriptor}
20000 0.007 0.000 0.007 0.000 {built-in method cupy.cuda.cudnn.setTensor4dDescriptor}
1 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.device.get_device_id}
10000 0.001 0.000 0.001 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
20000 0.042 0.000 0.042 0.000 {method 'reshape' of 'cupy.core.core.ndarray' objects}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment