Last active
September 11, 2017 08:00
-
-
Save bonprosoft/26065f41c9a4ab5c05c8620ca1606833 to your computer and use it in GitHub Desktop.
cuDNN dropout performance using cupy fcd8f02 https://github.com/bonprosoft/cupy/tree/fcd8f02044972ffe5ada716fa9355badf3ebe674
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cProfile | |
import random | |
import cupy | |
from cupy import cudnn | |
libcudnn = cudnn.cudnn | |
N_LAYER = 10 | |
N_TESTCASE = 1000 | |
RATIO = [] | |
for i in range(N_LAYER): | |
RATIO.append(random.random()) | |
X = cupy.random.random_sample((10, 10)) | |
DY = cupy.random.random_sample((10, 10)) | |
SEED = 0 | |
def test_cudnn_singleton(): | |
handle = cudnn.get_handle() | |
def initialize(env): | |
env['d_states'] = cudnn.create_dropout_states(handle) | |
env['desc'] = cudnn.create_dropout_descriptor( | |
handle, 0.0, env['d_states'].data.ptr, env['d_states'].size, SEED) | |
def forward(env, states): | |
for i in range(N_LAYER): | |
x = cupy.ascontiguousarray(X) | |
x = cudnn._as4darray(x) | |
x_desc = cudnn.create_tensor_descriptor(x) | |
y = cupy.empty_like(x) | |
cudnn.set_dropout_descriptor(env['desc'], handle, RATIO[i]) | |
reserve_size = libcudnn.getDropoutReserveSpaceSize(x_desc.value) | |
reserve_space = cupy.empty((reserve_size,)) | |
libcudnn.dropoutForward(handle, env['desc'].value, | |
x_desc.value, X.data.ptr, | |
x_desc.value, y.data.ptr, | |
reserve_space.data.ptr, reserve_size) | |
states.append(reserve_space) | |
def backward(env, states): | |
for i in range(N_LAYER)[::-1]: | |
dx = cupy.empty_like(DY) | |
dy = cupy.ascontiguousarray(DY) | |
dy = cudnn._as4darray(dy) | |
dy_desc = cudnn.create_tensor_descriptor(dy) | |
cudnn.set_dropout_descriptor(env['desc'], handle, RATIO[i]) | |
libcudnn.dropoutBackward(handle, env['desc'].value, | |
dy_desc.value, DY.data.ptr, | |
dy_desc.value, dx.data.ptr, | |
states[i].data.ptr, states[i].size) | |
for i in range(N_TESTCASE): | |
env = {} | |
states = [] | |
initialize(env) | |
forward(env, states) | |
backward(env, states) | |
def test_cudnn_new_api(): | |
handle = cudnn.get_handle() | |
def initialize(env): | |
env['desc'] = cudnn.DropoutTransaction(handle, SEED) | |
def forward(env, states): | |
for i in range(N_LAYER): | |
reserve_space, _ = env['desc'].forward(handle, X, RATIO[i]) | |
states.append(reserve_space) | |
def backward(env, states): | |
for i in range(N_LAYER)[::-1]: | |
env['desc'].backward(handle, DY, RATIO[i], states[i]) | |
for i in range(N_TESTCASE): | |
env = {} | |
states = [] | |
initialize(env) | |
forward(env, states) | |
backward(env, states) | |
if __name__ == '__main__': | |
cProfile.run('test_cudnn_singleton()') | |
cProfile.run('test_cudnn_new_api()') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
383006 function calls in 1.824 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 1.824 1.824 <string>:1(<module>) | |
20000 0.221 0.000 0.221 0.000 basic.py:22(empty_like) | |
11000 0.202 0.000 0.202 0.000 basic.py:4(empty) | |
1 0.000 0.000 0.000 0.000 cudnn.py:19(get_handle) | |
20000 0.014 0.000 0.054 0.000 cudnn.py:207(_as4darray) | |
1000 0.002 0.000 0.009 0.000 cudnn.py:256(create_dropout_descriptor) | |
20000 0.008 0.000 0.015 0.000 cudnn.py:265(set_dropout_descriptor) | |
1000 0.001 0.000 0.095 0.000 cudnn.py:310(create_dropout_states) | |
1000 0.002 0.000 0.106 0.000 cudnn.py:333(__init__) | |
10000 0.036 0.000 0.414 0.000 cudnn.py:339(forward) | |
10000 0.028 0.000 0.265 0.000 cudnn.py:360(backward) | |
21000 0.005 0.000 0.005 0.000 cudnn.py:46(__init__) | |
21000 0.011 0.000 0.015 0.000 cudnn.py:50(__del__) | |
20000 0.011 0.000 0.011 0.000 cudnn.py:56(get_data_type) | |
20000 0.054 0.000 0.082 0.000 cudnn.py:71(create_tensor_descriptor) | |
20000 0.006 0.000 0.009 0.000 from_data.py:63(ascontiguousarray) | |
1 0.104 0.104 1.824 1.824 perf_cudnn.py:67(test_cudnn_new_api) | |
1000 0.002 0.000 0.108 0.000 perf_cudnn.py:70(initialize) | |
1000 0.833 0.001 1.256 0.001 perf_cudnn.py:73(forward) | |
1000 0.084 0.000 0.355 0.000 perf_cudnn.py:78(backward) | |
1 0.000 0.000 1.824 1.824 {built-in method builtins.exec} | |
20000 0.004 0.000 0.004 0.000 {built-in method builtins.isinstance} | |
20000 0.003 0.000 0.003 0.000 {built-in method cupy.core.core.ascontiguousarray} | |
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.createDropoutDescriptor} | |
20000 0.005 0.000 0.005 0.000 {built-in method cupy.cuda.cudnn.createTensorDescriptor} | |
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.destroyDropoutDescriptor} | |
20000 0.003 0.000 0.003 0.000 {built-in method cupy.cuda.cudnn.destroyTensorDescriptor} | |
10000 0.059 0.000 0.059 0.000 {built-in method cupy.cuda.cudnn.dropoutBackward} | |
10000 0.062 0.000 0.062 0.000 {built-in method cupy.cuda.cudnn.dropoutForward} | |
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.dropoutGetStatesSize} | |
10000 0.002 0.000 0.002 0.000 {built-in method cupy.cuda.cudnn.getDropoutReserveSpaceSize} | |
21000 0.014 0.000 0.014 0.000 {built-in method cupy.cuda.cudnn.setDropoutDescriptor} | |
20000 0.007 0.000 0.007 0.000 {built-in method cupy.cuda.cudnn.setTensor4dDescriptor} | |
1 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.device.get_device_id} | |
10000 0.001 0.000 0.001 0.000 {method 'append' of 'list' objects} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
20000 0.040 0.000 0.040 0.000 {method 'reshape' of 'cupy.core.core.ndarray' objects} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
342007 function calls in 1.992 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 1.992 1.992 <string>:1(<module>) | |
20000 0.253 0.000 0.253 0.000 basic.py:22(empty_like) | |
11000 0.203 0.000 0.203 0.000 basic.py:4(empty) | |
1 0.000 0.000 0.137 0.137 cudnn.py:19(get_handle) | |
20000 0.016 0.000 0.058 0.000 cudnn.py:207(_as4darray) | |
1000 0.002 0.000 0.010 0.000 cudnn.py:256(create_dropout_descriptor) | |
20000 0.008 0.000 0.017 0.000 cudnn.py:265(set_dropout_descriptor) | |
1000 0.001 0.000 0.095 0.000 cudnn.py:310(create_dropout_states) | |
21000 0.006 0.000 0.006 0.000 cudnn.py:46(__init__) | |
21000 0.009 0.000 0.012 0.000 cudnn.py:50(__del__) | |
20000 0.011 0.000 0.011 0.000 cudnn.py:56(get_data_type) | |
20000 0.054 0.000 0.083 0.000 cudnn.py:71(create_tensor_descriptor) | |
20000 0.007 0.000 0.011 0.000 from_data.py:63(ascontiguousarray) | |
1 0.117 0.117 1.992 1.992 perf_cudnn.py:21(test_cudnn_singleton) | |
1000 0.002 0.000 0.107 0.000 perf_cudnn.py:24(initialize) | |
1000 0.877 0.001 1.272 0.001 perf_cudnn.py:29(forward) | |
1000 0.091 0.000 0.355 0.000 perf_cudnn.py:45(backward) | |
1 0.000 0.000 1.992 1.992 {built-in method builtins.exec} | |
20000 0.004 0.000 0.004 0.000 {built-in method cupy.core.core.ascontiguousarray} | |
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.createDropoutDescriptor} | |
20000 0.005 0.000 0.005 0.000 {built-in method cupy.cuda.cudnn.createTensorDescriptor} | |
1 0.137 0.137 0.137 0.137 {built-in method cupy.cuda.cudnn.create} | |
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.destroyDropoutDescriptor} | |
20000 0.003 0.000 0.003 0.000 {built-in method cupy.cuda.cudnn.destroyTensorDescriptor} | |
10000 0.056 0.000 0.056 0.000 {built-in method cupy.cuda.cudnn.dropoutBackward} | |
10000 0.060 0.000 0.060 0.000 {built-in method cupy.cuda.cudnn.dropoutForward} | |
1000 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.cudnn.dropoutGetStatesSize} | |
10000 0.002 0.000 0.002 0.000 {built-in method cupy.cuda.cudnn.getDropoutReserveSpaceSize} | |
21000 0.017 0.000 0.017 0.000 {built-in method cupy.cuda.cudnn.setDropoutDescriptor} | |
20000 0.007 0.000 0.007 0.000 {built-in method cupy.cuda.cudnn.setTensor4dDescriptor} | |
1 0.000 0.000 0.000 0.000 {built-in method cupy.cuda.device.get_device_id} | |
10000 0.001 0.000 0.001 0.000 {method 'append' of 'list' objects} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
20000 0.042 0.000 0.042 0.000 {method 'reshape' of 'cupy.core.core.ndarray' objects} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment