Activation |
{'data': (1024, 1024), 'act_type': 'relu'} |
1564139.25 |
0.0566 |
0.0686 |
Activation |
{'data': (1024, 1024), 'act_type': 'sigmoid'} |
1566236.375 |
0.506 |
0.8792 |
Activation |
{'data': (1024, 1024), 'act_type': 'softrelu'} |
1568333.5 |
0.1783 |
0.8208 |
Activation |
{'data': (1024, 1024), 'act_type': 'softsign'} |
1570430.625 |
0.3635 |
0.0677 |
Activation |
{'data': (1024, 1024), 'act_type': 'tanh'} |
1572527.75 |
0.2232 |
1.7112 |
BatchNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'moving_mean': (3,), 'moving_var': (3,), 'eps': 1e-08, 'axis': 1} |
416853.1562 |
2.6452 |
1.378 |
BatchNorm |
{'data': (32, 3, 10000, 10), 'gamma': (3,), 'beta': (3,), 'moving_mean': (3,), 'moving_var': (3,), 'eps': 1e-08, 'axis': 1} |
442670.25 |
6.2914 |
1.9779 |
BilinearSampler |
{'data': (32, 2, 256, 256), 'grid': (32, 2, 256, 256)} |
25165.8242 |
103.9467 |
116.2548 |
BlockGrad |
{'data': (1024, 1024)} |
4194.3042 |
0.4 |
--- |
BlockGrad |
{'data': (10000, 1)} |
40.0 |
0.0101 |
--- |
BlockGrad |
{'data': (10000, 100)} |
2000.0 |
0.3253 |
--- |
CTCLoss |
{'data': (1024, 100, 100), 'label': (100, 100)} |
5406962.5 |
60.1745 |
--- |
Convolution |
{'data': (32, 3, 64, 64), 'weight': (64, 3, 3, 3), 'bias': (64,), 'kernel': (3, 3), 'stride': (1, 1), 'dilate': (1, 1), 'pad': (0, 0), 'num_filter': 64, 'layout': 'NCHW'} |
4707539.5 |
2.1701 |
4.1398 |
Correlation |
{'data1': (32, 3, 256, 256), 'data2': (32, 3, 256, 256), 'kernel_size': 3, 'max_displacement': 2, 'stride1': 2, 'stride2': 2} |
475636.0625 |
491.1135 |
2688.7466 |
Correlation |
{'data1': (32, 3, 10000, 10), 'data2': (32, 3, 10000, 10), 'kernel_size': 3, 'max_displacement': 2, 'stride1': 2, 'stride2': 2} |
491383.3438 |
353.6813 |
1713.1298 |
Custom |
{'args': [(1024, 1024)], 'op_type': 'CustomAddOne'} |
5530295.0 |
0.0097 |
0.0099 |
Custom |
{'args': [(10000, 1)], 'op_type': 'CustomAddOne'} |
5530295.0 |
0.0113 |
0.0093 |
Custom |
{'args': [(10000, 10)], 'op_type': 'CustomAddOne'} |
5530295.0 |
0.0101 |
0.0106 |
Deconvolution |
{'data': (32, 3, 64, 64), 'weight': (3, 64, 3, 3), 'bias': (64,), 'kernel': (3, 3), 'stride': (1, 1), 'dilate': (1, 1), 'pad': (0, 0), 'num_filter': 64, 'no_bias': False, 'layout': 'NCHW'} |
5386482.0 |
25.3685 |
24.9116 |
Dropout |
{'data': (32, 3, 256, 256), 'p': 1, 'mode': 'always', 'axes': [0, 1]} |
485106.5938 |
1.4163 |
0.94 |
Dropout |
{'data': (10000, 10), 'p': 1, 'mode': 'always', 'axes': [0, 1]} |
472792.625 |
0.1068 |
0.0812 |
ElementWiseSum |
{'args': (1024, 1024)} |
2097.1521 |
0.0934 |
--- |
Embedding |
{'data': (1024, 1024), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'float32', 'sparse_grad': False} |
497758.4375 |
0.4161 |
--- |
Embedding |
{'data': (10000, 1), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'int32', 'sparse_grad': False} |
489529.8125 |
0.0385 |
--- |
Embedding |
{'data': (10000, 100), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'float32', 'sparse_grad': False} |
505449.8125 |
0.4026 |
--- |
FullyConnected |
{'data': (32, 3, 256, 256), 'weight': (64, 196608), 'bias': (64,), 'num_hidden': 64, 'flatten': True} |
497462.125 |
1.7125 |
4.8088 |
FullyConnected |
{'data': (32, 3, 10000, 10), 'weight': (64, 10), 'bias': (64,), 'num_hidden': 64, 'flatten': False} |
743222.125 |
44.2965 |
76.7286 |
GridGenerator |
{'data': (32, 2, 256, 256), 'transform_type': 'warp', 'target_shape': (256, 6)} |
17039.3594 |
10.911 |
8.744 |
GridGenerator |
{'data': (256, 6), 'transform_type': 'affine', 'target_shape': (256, 6)} |
1582.08 |
0.5582 |
1.5901 |
GroupNorm |
{'data': (32, 3, 256, 256), 'gamma': (1,), 'beta': (1,), 'num_groups': 1, 'eps': 1e-08} |
658091.0 |
11.7595 |
175.6284 |
GroupNorm |
{'data': (32, 10, 10000, 10), 'gamma': (10,), 'beta': (10,), 'num_groups': 10, 'eps': 1e-08} |
895288.6875 |
52.0266 |
255.2737 |
InstanceNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'eps': 1e-08} |
869036.5 |
11.2944 |
184.461 |
InstanceNorm |
{'data': (32, 3, 10000, 10), 'gamma': (3,), 'beta': (3,), 'eps': 1e-08} |
894853.625 |
24.1254 |
281.2478 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-08, 'mode': 'channel'} |
917596.25 |
4.2096 |
9.7036 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-08, 'mode': 'instance'} |
925984.9375 |
0.7249 |
1.4459 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-08, 'mode': 'spatial'} |
938568.0 |
0.6508 |
7.5462 |
LRN |
{'data': (32, 3, 256, 256), 'alpha': 0.001, 'beta': 0.2, 'nsize': 3} |
963733.625 |
22.1928 |
53.8383 |
LRN |
{'data': (32, 3, 10000, 10), 'alpha': 0.001, 'beta': 0.2, 'nsize': 3} |
989550.6875 |
46.5718 |
94.5082 |
LayerNorm |
{'data': (32, 3, 256, 256), 'gamma': (32,), 'beta': (32,), 'axis': 0, 'eps': 1e-08} |
1008885.875 |
13.4946 |
22.2397 |
LayerNorm |
{'data': (32, 3, 10000, 10), 'gamma': (32,), 'beta': (32,), 'axis': 0, 'eps': 1e-08} |
1048485.875 |
20.7723 |
33.9548 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'leaky'} |
1576722.125 |
0.0538 |
0.0649 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'elu'} |
1578819.25 |
0.1205 |
0.76 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'selu'} |
1580916.375 |
0.7919 |
0.3222 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'gelu'} |
1583013.625 |
0.296 |
2.1755 |
LinearRegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
1065834.625 |
1.5131 |
--- |
LinearRegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
1091651.625 |
5.034 |
--- |
LogisticRegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
1110200.375 |
4.4785 |
--- |
LogisticRegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
1136017.5 |
9.3099 |
--- |
MAERegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
1154566.25 |
1.2484 |
--- |
MAERegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
1180383.375 |
4.9788 |
--- |
MakeLoss |
{'data': (1024, 1024), 'grad_scale': 0.5, 'normalization': 'batch'} |
5392774.0 |
0.0569 |
0.0411 |
MakeLoss |
{'data': (10000, 1), 'grad_scale': 0.5, 'normalization': 'batch'} |
5390716.5 |
0.0356 |
0.031 |
MakeLoss |
{'data': (10000, 100), 'grad_scale': 0.5, 'normalization': 'batch'} |
5394696.5 |
0.0611 |
0.0398 |
Pooling |
{'data': (32, 3, 64, 64), 'kernel': (3, 3), 'pool_type': 'sum', 'global_pool': 1, 'stride': (1, 1), 'pad': (0, 0)} |
1749846.0 |
0.5543 |
0.2783 |
RNN |
{'data': (32, 4, 4), 'parameters': (7,), 'state': (1, 4, 1), 'state_size': 1, 'num_layers': 1, 'mode': 'rnn_relu', 'p': 0.5} |
1161186.0 |
0.9437 |
1.3606 |
RNN |
{'data': (512, 10000, 10), 'parameters': (104,), 'state': (2, 10000, 4), 'state_size': 4, 'num_layers': 2, 'mode': 'rnn_tanh', 'p': 0.5} |
1284088.375 |
434.3607 |
753.2392 |
ROIPooling |
{'data': (32, 3, 64, 64), 'rois': (32, 5), 'pooled_size': (2, 2), 'spatial_scale': 0.5} |
1749851.875 |
0.541 |
0.0435 |
SVMOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256), 'margin': 0.5, 'regularization_coefficient': 0.5} |
1321837.125 |
2.5928 |
29.7243 |
SVMOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000), 'margin': 0.5, 'regularization_coefficient': 0.5} |
1347654.25 |
5.5232 |
64.2004 |
SequenceLast |
{'data': (1024, 1024), 'axis': 0} |
5530301.0 |
0.049 |
--- |
SequenceLast |
{'data': (10000, 1), 'axis': 0} |
5530299.0 |
0.0483 |
--- |
SequenceLast |
{'data': (10000, 100), 'axis': 0} |
5530299.5 |
0.0477 |
--- |
SequenceMask |
{'data': (1024, 1024), 'axis': 0} |
5536590.5 |
0.5358 |
0.2289 |
SequenceMask |
{'data': (10000, 1), 'axis': 0} |
5534533.5 |
0.0406 |
0.0344 |
SequenceMask |
{'data': (10000, 100), 'axis': 0} |
5538513.5 |
0.3242 |
0.223 |
SequenceReverse |
{'data': (1024, 1024), 'axis': 0} |
5542805.0 |
1.085 |
1.0869 |
SequenceReverse |
{'data': (10000, 1), 'axis': 0} |
5540747.5 |
0.0704 |
0.0329 |
SequenceReverse |
{'data': (10000, 100), 'axis': 0} |
5544727.5 |
1.3211 |
1.2445 |
Softmax |
{'data': (1024, 1024), 'label': (1024, 1024), 'grad_scale': 0.5, 'normalization': 'batch'} |
1587207.875 |
0.101 |
--- |
Softmax |
{'data': (10000, 1), 'label': (10000, 1), 'grad_scale': 0.5, 'normalization': 'batch'} |
1585150.75 |
0.1121 |
--- |
Softmax |
{'data': (10000, 100), 'label': (10000, 100), 'grad_scale': 0.5, 'normalization': 'batch'} |
1589130.75 |
0.2125 |
--- |
SoftmaxActivation |
{'data': (1024, 1024)} |
1593422.125 |
0.7383 |
1.1597 |
SoftmaxActivation |
{'data': (10000, 1)} |
1591365.0 |
0.0376 |
0.0678 |
SoftmaxActivation |
{'data': (10000, 100)} |
1595345.0 |
0.7143 |
1.1253 |
SoftmaxOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256), 'grad_scale': 0.5, 'normalization': 'batch'} |
1366203.0 |
4.5316 |
0.5743 |
SoftmaxOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000), 'grad_scale': 0.5, 'normalization': 'batch'} |
1392020.0 |
9.957 |
1.5397 |
SpatialTransformer |
{'data': (32, 3, 256, 6), 'loc': (32, 6), 'target_shape': (32, 6), 'transform_type': 'affine', 'sampler_type': 'bilinear'} |
1372961.0 |
2.8351 |
0.5095 |
SpatialTransformer |
{'data': (256, 3, 10000, 6), 'loc': (256, 6), 'target_shape': (256, 6), 'transform_type': 'affine', 'sampler_type': 'bilinear'} |
1379257.0 |
182.0544 |
53.164 |
SwapAxis |
{'data': (1024, 1024), 'dim1': 0, 'dim2': 1} |
248309.8594 |
1.4315 |
1.5278 |
SwapAxis |
{'data': (10000, 1), 'dim1': 0, 'dim2': 1} |
246252.7188 |
0.0397 |
0.0377 |
SwapAxis |
{'data': (10000, 100), 'dim1': 0, 'dim2': 1} |
250232.7188 |
1.2698 |
1.264 |
UpSampling |
{'args': (32, 3, 256, 256), 'scale': 2, 'sample_type': 'nearest'} |
5549906.5 |
16.3392 |
3.3291 |
UpSampling |
{'args': (32, 3, 10000, 1), 'scale': 4, 'sample_type': 'nearest'} |
5561015.0 |
10.6011 |
2.3157 |
abs |
{'data': (1024, 1024)} |
2097.1521 |
0.0434 |
0.2535 |
abs |
{'data': (10000, 1)} |
20.0 |
0.0139 |
0.0177 |
abs |
{'data': (10000, 100)} |
4000.0 |
0.0526 |
0.3456 |
adam_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mean': (1024, 1024), 'var': (1024, 1024), 'lr': 0.1, 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-08, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
4698086.0 |
0.4284 |
--- |
adam_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mean': (10000, 1), 'var': (10000, 1), 'lr': 0.5, 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-08, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
4696028.5 |
0.0406 |
--- |
adam_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mean': (10000, 100), 'var': (10000, 100), 'lr': 0.9, 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-08, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
4700008.5 |
0.7697 |
--- |
add_n |
{'args': [(1024, 1024)]} |
5398911.5 |
0.0352 |
--- |
add_n |
{'args': [(10000, 1)]} |
5398911.5 |
0.0364 |
--- |
add_n |
{'args': [(10000, 10)]} |
5398911.5 |
0.0353 |
--- |
all_finite |
{'data': (1024, 1024)} |
5542727.5 |
0.092 |
--- |
all_finite |
{'data': (10000, 1)} |
5542727.5 |
0.0499 |
--- |
all_finite |
{'data': (10000, 100)} |
5542727.5 |
0.0926 |
--- |
amp_cast |
{'data': (1024, 1024), 'dtype': 'float32'} |
2097.1521 |
0.0847 |
0.0733 |
amp_cast |
{'data': (10000, 1), 'dtype': 'int32'} |
20.0 |
0.352 |
0.3344 |
amp_cast |
{'data': (10000, 100), 'dtype': 'float32'} |
2000.0 |
0.0872 |
0.0756 |
amp_multicast |
{'args': [(1024, 1024)], 'num_outputs': 1} |
0.004 |
0.0913 |
0.0808 |
amp_multicast |
{'args': [(10000, 1)], 'num_outputs': 1} |
0.008 |
0.0538 |
0.0504 |
arccos |
{'data': (1024, 1024)} |
4194.3042 |
1.1368 |
0.9571 |
arccos |
{'data': (10000, 1)} |
20.0 |
0.0376 |
0.0342 |
arccos |
{'data': (10000, 100)} |
4000.0 |
0.813 |
0.6066 |
arccosh |
{'data': (1024, 1024)} |
4194.3042 |
0.6973 |
0.7103 |
arccosh |
{'data': (10000, 1)} |
40.0 |
0.0459 |
0.0333 |
arccosh |
{'data': (10000, 100)} |
4000.0 |
0.7189 |
0.6841 |
arcsin |
{'data': (1024, 1024)} |
4194.3042 |
0.7536 |
0.6546 |
arcsin |
{'data': (10000, 1)} |
20.0 |
0.0469 |
0.0325 |
arcsin |
{'data': (10000, 100)} |
4000.0 |
0.7938 |
0.7792 |
arcsinh |
{'data': (1024, 1024)} |
4194.3042 |
1.7109 |
0.5158 |
arcsinh |
{'data': (10000, 1)} |
40.0 |
0.0449 |
0.035 |
arcsinh |
{'data': (10000, 100)} |
2000.0 |
1.5514 |
0.4768 |
arctan |
{'data': (1024, 1024)} |
2097.1521 |
0.9648 |
0.083 |
arctan |
{'data': (10000, 1)} |
20.0 |
0.0377 |
0.036 |
arctan |
{'data': (10000, 100)} |
4000.0 |
0.9344 |
0.0813 |
arctanh |
{'data': (1024, 1024)} |
2097.1521 |
1.3064 |
0.0814 |
arctanh |
{'data': (10000, 1)} |
20.0 |
0.042 |
0.0356 |
arctanh |
{'data': (10000, 100)} |
2000.0 |
1.2438 |
0.0809 |
argmax |
{'data': (1024, 1024), 'axis': 0} |
223297.25 |
7.6787 |
--- |
argmax |
{'data': (10000, 1), 'axis': 0} |
223295.2031 |
0.0867 |
--- |
argmax |
{'data': (10000, 100), 'axis': 0} |
223295.6094 |
6.6238 |
--- |
argmax_channel |
{'data': (1024, 1024)} |
2.048 |
0.2884 |
--- |
argmax_channel |
{'data': (10000, 1)} |
20.0 |
0.0385 |
--- |
argmax_channel |
{'data': (10000, 100)} |
20.0 |
0.303 |
--- |
argmin |
{'data': (1024, 1024), 'axis': 0} |
223301.5469 |
7.6812 |
--- |
argmin |
{'data': (10000, 1), 'axis': 0} |
223299.5 |
0.0861 |
--- |
argmin |
{'data': (10000, 100), 'axis': 0} |
223299.9062 |
6.6216 |
--- |
argsort |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
229591.1562 |
35.6613 |
--- |
argsort |
{'data': (10000, 1), 'axis': 0, 'dtype': 'int32'} |
227534.0 |
1.1665 |
--- |
argsort |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float32'} |
231514.0 |
31.1643 |
--- |
batch_dot |
{'lhs': (32, 1024, 1024), 'rhs': (32, 1024, 1024)} |
134217.7344 |
550.3716 |
--- |
batch_dot |
{'lhs': (32, 1000, 10), 'rhs': (32, 1000, 10), 'transpose_b': True} |
128000.0 |
380.5898 |
--- |
batch_dot |
{'lhs': (32, 1000, 1), 'rhs': (32, 100, 1000), 'transpose_a': True, 'transpose_b': True} |
12.8 |
4.2182 |
--- |
broadcast_add |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0959 |
0.1302 |
broadcast_add |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0573 |
0.0481 |
broadcast_add |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0328 |
0.036 |
broadcast_axes |
{'data': (1, 1024), 'axis': 0, 'size': 2} |
293046.875 |
0.031 |
--- |
broadcast_axes |
{'data': (1, 1), 'axis': 0, 'size': 2} |
293042.7812 |
0.0306 |
--- |
broadcast_axes |
{'data': (1, 100), 'axis': 0, 'size': 2} |
293043.5938 |
0.0311 |
--- |
broadcast_axis |
{'data': (1, 1024), 'axis': 0, 'size': 2} |
293055.4688 |
0.0358 |
--- |
broadcast_axis |
{'data': (1, 1), 'axis': 0, 'size': 2} |
293051.375 |
0.0314 |
--- |
broadcast_axis |
{'data': (1, 100), 'axis': 0, 'size': 2} |
293052.1875 |
0.0331 |
--- |
broadcast_div |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.0857 |
0.1299 |
broadcast_div |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0335 |
0.0459 |
broadcast_div |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0268 |
0.0423 |
broadcast_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0742 |
--- |
broadcast_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0326 |
--- |
broadcast_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0263 |
--- |
broadcast_greater |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0766 |
--- |
broadcast_greater |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0328 |
--- |
broadcast_greater |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0283 |
--- |
broadcast_greater_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0753 |
--- |
broadcast_greater_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0326 |
--- |
broadcast_greater_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0271 |
--- |
broadcast_hypot |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.4796 |
1.0338 |
broadcast_hypot |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0735 |
0.1301 |
broadcast_hypot |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0393 |
0.0522 |
broadcast_lesser |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0756 |
--- |
broadcast_lesser |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0374 |
--- |
broadcast_lesser |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0317 |
--- |
broadcast_lesser_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.0995 |
--- |
broadcast_lesser_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0371 |
--- |
broadcast_lesser_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0311 |
--- |
broadcast_like |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
299343.25 |
0.7073 |
--- |
broadcast_like |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
297646.0938 |
0.1026 |
--- |
broadcast_like |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
297486.0938 |
0.0417 |
--- |
broadcast_logical_and |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0797 |
--- |
broadcast_logical_and |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0378 |
--- |
broadcast_logical_and |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0393 |
--- |
broadcast_logical_or |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.1025 |
--- |
broadcast_logical_or |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0363 |
--- |
broadcast_logical_or |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0415 |
--- |
broadcast_logical_xor |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.1622 |
--- |
broadcast_logical_xor |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0418 |
--- |
broadcast_logical_xor |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0463 |
--- |
broadcast_maximum |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0892 |
0.1256 |
broadcast_maximum |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0355 |
0.0498 |
broadcast_maximum |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0315 |
0.0506 |
broadcast_minimum |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.1152 |
0.1305 |
broadcast_minimum |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0375 |
0.051 |
broadcast_minimum |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0325 |
0.052 |
broadcast_minus |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0753 |
--- |
broadcast_minus |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0389 |
--- |
broadcast_minus |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0299 |
--- |
broadcast_mod |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
1.9691 |
0.2051 |
broadcast_mod |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.2143 |
0.0547 |
broadcast_mod |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.047 |
0.045 |
broadcast_mul |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0667 |
0.1019 |
broadcast_mul |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0382 |
0.0509 |
broadcast_mul |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0321 |
0.0404 |
broadcast_not_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.0955 |
--- |
broadcast_not_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0375 |
--- |
broadcast_not_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0326 |
--- |
broadcast_plus |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0713 |
--- |
broadcast_plus |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0376 |
--- |
broadcast_plus |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0289 |
--- |
broadcast_power |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
2.4365 |
5.7618 |
broadcast_power |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.3054 |
0.6323 |
broadcast_power |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0534 |
0.0925 |
broadcast_sub |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.0696 |
0.0863 |
broadcast_sub |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0386 |
0.0498 |
broadcast_sub |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0304 |
0.0364 |
broadcast_to |
{'data': (1, 1024), 'shape': (1024, 1024)} |
303757.5312 |
0.711 |
--- |
broadcast_to |
{'data': (1, 1), 'shape': (10000, 1)} |
301700.375 |
0.0369 |
--- |
broadcast_to |
{'data': (1, 100), 'shape': (10000, 100)} |
305680.375 |
0.4137 |
--- |
cast |
{'data': (1024, 1024), 'dtype': 'float32'} |
2097.1521 |
--- |
0.7171 |
cast |
{'data': (10000, 1), 'dtype': 'int32'} |
40.0 |
--- |
0.0616 |
cast |
{'data': (10000, 100), 'dtype': 'float32'} |
2000.0 |
--- |
1.3491 |
cast_storage |
{'data': (1024, 1024), 'stype': 'default'} |
5549019.0 |
0.4026 |
--- |
cast_storage |
{'data': (10000, 1), 'stype': 'csr'} |
5547122.0 |
0.1022 |
--- |
cast_storage |
{'data': (10000, 100), 'stype': 'row_sparse'} |
5551102.0 |
0.6637 |
--- |
cbrt |
{'data': (1024, 1024)} |
4194.3042 |
1.3934 |
0.0854 |
cbrt |
{'data': (10000, 1)} |
20.0 |
0.045 |
0.0364 |
cbrt |
{'data': (10000, 100)} |
4000.0 |
1.335 |
0.0852 |
ceil |
{'data': (1024, 1024)} |
341655.75 |
0.1613 |
--- |
ceil |
{'data': (10000, 1)} |
339598.5938 |
0.0319 |
--- |
ceil |
{'data': (10000, 100)} |
343578.5938 |
0.153 |
--- |
choose_element_0index |
{'data': (1024, 1024), 'index': (1, 1024), 'axis': 0} |
4.096 |
0.0371 |
--- |
choose_element_0index |
{'data': (10000, 1), 'index': (1, 1), 'axis': 0} |
0.004 |
0.0365 |
--- |
choose_element_0index |
{'data': (10000, 100), 'index': (1, 100), 'axis': 0} |
0.4 |
0.0362 |
--- |
clip |
{'data': (1024, 1024), 'a_min': 0.1, 'a_max': 0.9} |
5555353.5 |
0.2876 |
0.2823 |
clip |
{'data': (10000, 1), 'a_min': 0.1, 'a_max': 0.9} |
5553296.5 |
0.0356 |
0.0323 |
clip |
{'data': (10000, 100), 'a_min': 0.1, 'a_max': 0.9} |
5557276.5 |
0.2733 |
0.2671 |
col2im |
{'data': (32, 64, 256), 'output_size': (64, 16, 1), 'kernel': (1, 1, 1), 'stride': (2, 2, 2)} |
1387898.625 |
22.5543 |
21.4321 |
col2im |
{'data': (32, 64, 256), 'output_size': (32, 8, 1), 'kernel': (1, 1, 1), 'stride': (1, 1, 1)} |
1385801.375 |
21.7266 |
20.5925 |
concat |
{'args0': '<NDArray 100x100 @cpu(0)>', 'args1': '<NDArray 100x100 @cpu(0)>', 'args2': '<NDArray 100x100 @cpu(0)>'} |
372830.125 |
--- |
--- |
cos |
{'data': (1024, 1024)} |
2097.1521 |
0.5013 |
0.5392 |
cos |
{'data': (10000, 1)} |
20.0 |
0.0382 |
0.0324 |
cos |
{'data': (10000, 100)} |
2000.0 |
0.4034 |
0.369 |
cosh |
{'data': (1024, 1024)} |
2097.1521 |
0.6531 |
1.0635 |
cosh |
{'data': (10000, 1)} |
20.0 |
0.0364 |
0.0355 |
cosh |
{'data': (10000, 100)} |
4000.0 |
0.7704 |
1.3442 |
ctc_loss |
{'data': (1024, 100, 100), 'label': (100, 100)} |
5433657.5 |
62.4658 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
5561568.0 |
1.1224 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'int32'} |
5563665.0 |
1.576 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
5565762.0 |
1.1237 |
--- |
degrees |
{'data': (1024, 1024)} |
4194.3042 |
0.063 |
0.051 |
degrees |
{'data': (10000, 1)} |
20.0 |
0.0275 |
0.0263 |
degrees |
{'data': (10000, 100)} |
2000.0 |
0.0502 |
0.0455 |
depth_to_space |
{'data': (1, 4, 2, 4), 'block_size': 2} |
248232.9062 |
0.0708 |
--- |
depth_to_space |
{'data': (10, 25, 10, 100), 'block_size': 5} |
249232.8438 |
0.8308 |
--- |
diag |
{'data': (1024, 1024), 'k': 1} |
274096.3125 |
0.0311 |
0.3142 |
diag |
{'data': (10000, 1), 'k': 1} |
--- |
0.0294 |
0.0456 |
diag |
{'data': (10000, 100), 'k': 1} |
274094.6875 |
0.0375 |
0.3087 |
dot |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
5.3503 |
7.048 |
dot |
{'lhs': (1000, 10), 'rhs': (1000, 10), 'transpose_b': True} |
2000.0 |
0.423 |
4.6241 |
dot |
{'lhs': (1000, 1), 'rhs': (100, 1000), 'transpose_a': True, 'transpose_b': True} |
0.2 |
0.1632 |
0.2758 |
elemwise_add |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.1915 |
--- |
elemwise_add |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0545 |
--- |
elemwise_add |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0526 |
--- |
elemwise_div |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.17 |
--- |
elemwise_div |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0354 |
--- |
elemwise_div |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.033 |
--- |
elemwise_mul |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.2112 |
--- |
elemwise_mul |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0441 |
--- |
elemwise_mul |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0315 |
--- |
elemwise_sub |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
4194.3042 |
0.0944 |
--- |
elemwise_sub |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
400.0 |
0.0507 |
--- |
elemwise_sub |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
40.0 |
0.0308 |
--- |
erf |
{'data': (1024, 1024)} |
2097.1521 |
0.9296 |
0.7832 |
erf |
{'data': (10000, 1)} |
20.0 |
0.0399 |
0.0349 |
erf |
{'data': (10000, 100)} |
2000.0 |
0.9264 |
0.7504 |
erfinv |
{'data': (1024, 1024)} |
2097.1521 |
2.5779 |
0.5366 |
erfinv |
{'data': (10000, 1)} |
40.0 |
0.0601 |
0.0363 |
erfinv |
{'data': (10000, 100)} |
2000.0 |
2.9925 |
0.6673 |
exp |
{'data': (1024, 1024)} |
2097.1521 |
0.3765 |
--- |
exp |
{'data': (10000, 1)} |
20.0 |
0.0361 |
--- |
exp |
{'data': (10000, 100)} |
2000.0 |
0.3574 |
--- |
expand_dims |
{'data': (1024, 1024), 'axis': 0} |
309971.8438 |
0.0476 |
--- |
expand_dims |
{'data': (10000, 1), 'axis': 0} |
307914.6875 |
0.0466 |
--- |
expand_dims |
{'data': (10000, 100), 'axis': 0} |
311894.6875 |
0.0785 |
--- |
expm1 |
{'data': (1024, 1024)} |
2097.1521 |
0.7082 |
0.4433 |
expm1 |
{'data': (10000, 1)} |
20.0 |
0.0385 |
0.0339 |
expm1 |
{'data': (10000, 100)} |
4000.0 |
0.795 |
0.5046 |
fill_element_0index |
{'lhs': (1024, 1024), 'mhs': (1024,), 'rhs': (1024,)} |
5569956.5 |
0.105 |
--- |
fill_element_0index |
{'lhs': (10000, 1), 'mhs': (10000,), 'rhs': (10000,)} |
5567899.0 |
0.0326 |
--- |
fill_element_0index |
{'lhs': (10000, 100), 'mhs': (10000,), 'rhs': (10000,)} |
5571879.0 |
0.1058 |
--- |
fix |
{'data': (1024, 1024)} |
347870.0625 |
0.3468 |
--- |
fix |
{'data': (10000, 1)} |
345812.9062 |
0.0329 |
--- |
fix |
{'data': (10000, 100)} |
349792.9062 |
0.337 |
--- |
flatten |
{'data': (1024, 1024)} |
4194.3042 |
0.3266 |
--- |
flatten |
{'data': (10000, 1)} |
40.0 |
0.0516 |
--- |
flatten |
{'data': (10000, 100)} |
4000.0 |
0.0738 |
--- |
flip |
{'data': (1024, 1024), 'axis': 0} |
255024.2969 |
0.4731 |
--- |
flip |
{'data': (10000, 1), 'axis': 0} |
252967.1406 |
0.0365 |
--- |
flip |
{'data': (10000, 100), 'axis': 0} |
256947.1406 |
0.4615 |
--- |
floor |
{'data': (1024, 1024)} |
354084.3438 |
0.1206 |
--- |
floor |
{'data': (10000, 1)} |
352027.2188 |
0.0289 |
--- |
floor |
{'data': (10000, 100)} |
356007.2188 |
0.111 |
--- |
ftml_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'd': (1024, 1024), 'v': (1024, 1024), 'z': (1024, 1024), 'lr': 0.1, 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-08, 't': 1, 'wd': 0.1, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
4704300.0 |
1.5838 |
--- |
ftml_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'd': (10000, 1), 'v': (10000, 1), 'z': (10000, 1), 'lr': 0.5, 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-08, 't': 1, 'wd': 0.5, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
4702243.0 |
0.0561 |
--- |
ftml_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'd': (10000, 100), 'v': (10000, 100), 'z': (10000, 100), 'lr': 0.9, 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-08, 't': 1, 'wd': 0.9, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
4706223.0 |
1.5817 |
--- |
ftrl_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'z': (1024, 1024), 'n': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
4710514.5 |
1.2678 |
--- |
ftrl_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'z': (10000, 1), 'n': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
4708457.5 |
0.0425 |
--- |
ftrl_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'z': (10000, 100), 'n': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
4712437.5 |
1.2607 |
--- |
gamma |
{'data': (1024, 1024)} |
4194.3042 |
3.2805 |
5.8549 |
gamma |
{'data': (10000, 1)} |
20.0 |
0.0714 |
0.099 |
gamma |
{'data': (10000, 100)} |
4000.0 |
3.2982 |
5.7524 |
gammaln |
{'data': (1024, 1024)} |
4194.3042 |
25.025 |
3.1289 |
gammaln |
{'data': (10000, 1)} |
20.0 |
0.2754 |
0.0555 |
gammaln |
{'data': (10000, 100)} |
2000.0 |
24.0462 |
2.2594 |
gather_nd |
{'data': (1024, 1024), 'indices': (1, 1)} |
235738.75 |
0.0534 |
--- |
gather_nd |
{'data': (10000, 1), 'indices': (1, 1)} |
235736.7031 |
0.0534 |
--- |
gather_nd |
{'data': (10000, 100), 'indices': (1, 1)} |
235737.1094 |
0.0539 |
--- |
hard_sigmoid |
{'data': (1024, 1024)} |
1599636.5 |
0.1093 |
0.128 |
hard_sigmoid |
{'data': (10000, 1)} |
1597579.375 |
0.032 |
0.0269 |
hard_sigmoid |
{'data': (10000, 100)} |
1601559.375 |
0.1088 |
0.1215 |
identity |
{'data': (1024, 1024)} |
2097.1521 |
0.4072 |
--- |
identity |
{'data': (10000, 1)} |
20.0 |
0.0244 |
--- |
identity |
{'data': (10000, 100)} |
2000.0 |
0.3965 |
--- |
im2col |
{'data': (32, 3, 256, 256), 'kernel': (3,), 'stride': (1,), 'dilate': (1,), 'pad': (1,)} |
1385195.25 |
1.4523 |
1.353 |
im2col |
{'data': (32, 3, 10000, 10), 'kernel': (3, 3), 'stride': (1, 1), 'dilate': (1, 1), 'pad': (1, 1)} |
1730647.75 |
342.6642 |
133.3937 |
khatri_rao |
{'args': [(32, 32), (32, 32)]} |
0.008 |
0.0505 |
--- |
khatri_rao |
{'args': [(64, 64), (64, 64)]} |
0.008 |
0.0522 |
--- |
lamb_update_phase1 |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mean': (1024, 1024), 'var': (1024, 1024), 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-08, 't': 1, 'wd': 0.1, 'rescale_grad': 0.4} |
4716729.0 |
0.9193 |
--- |
lamb_update_phase1 |
{'weight': (10000, 1), 'grad': (10000, 1), 'mean': (10000, 1), 'var': (10000, 1), 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-08, 't': 1, 'wd': 0.5, 'rescale_grad': 0.4} |
4714671.5 |
0.0428 |
--- |
lamb_update_phase1 |
{'weight': (10000, 100), 'grad': (10000, 100), 'mean': (10000, 100), 'var': (10000, 100), 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-08, 't': 1, 'wd': 0.9, 'rescale_grad': 0.4} |
4718651.5 |
2.0237 |
--- |
lamb_update_phase2 |
{'weight': (1024, 1024), 'g': (1024, 1024), 'r1': (1, 1024), 'r2': (1, 1024), 'lr': 0.1} |
4722943.0 |
0.236 |
--- |
lamb_update_phase2 |
{'weight': (10000, 1), 'g': (10000, 1), 'r1': (1, 1), 'r2': (1, 1), 'lr': 0.5} |
4720886.0 |
0.0414 |
--- |
lamb_update_phase2 |
{'weight': (10000, 100), 'g': (10000, 100), 'r1': (1, 100), 'r2': (1, 100), 'lr': 0.9} |
4724866.0 |
0.2239 |
--- |
linalg_det |
{'A': (1024, 1024)} |
5586581.5 |
15.4087 |
39.9563 |
linalg_extractdiag |
{'A': (1024, 1024)} |
5584488.5 |
0.0517 |
0.0562 |
linalg_extracttrian |
{'A': (1024, 1024)} |
5587635.0 |
0.6429 |
0.6641 |
linalg_gelqf |
{'A': (1024, 1024)} |
5599168.5 |
182.5085 |
--- |
linalg_gemm |
{'A': (1024, 1024), 'B': (1024, 1024), 'C': (1024, 1024), 'axis': 0} |
5601265.5 |
3.8929 |
6.7858 |
linalg_gemm2 |
{'A': (1024, 1024), 'B': (1024, 1024), 'axis': 0} |
5605460.0 |
2.8579 |
5.8484 |
linalg_inverse |
{'A': (1024, 1024)} |
5609654.0 |
58.9799 |
8.6117 |
linalg_makediag |
{'A': (1024, 1024)} |
12050008.0 |
558.6019 |
4.2716 |
linalg_maketrian |
{'A': (1024, 1035)} |
9914966.0 |
1.304 |
1.1431 |
linalg_potrf |
{'A': [[1, 0], [0, 1]]} |
5584482.0 |
0.0351 |
--- |
linalg_potrf |
{'A': [[2, -1, 0], [-1, 2, -1], [0, -1, 2]]} |
5584482.0 |
0.0917 |
--- |
linalg_potri |
{'A': (1024, 1024)} |
9917110.0 |
20.7041 |
12.2103 |
linalg_slogdet |
{'A': (1024, 1024)} |
9917112.0 |
17.7089 |
--- |
linalg_sumlogdiag |
{'A': (1024, 1024)} |
9915013.0 |
0.0604 |
0.4945 |
linalg_syrk |
{'A': (1024, 1024)} |
9921304.0 |
3.4626 |
5.865 |
linalg_trmm |
{'A': (1024, 1024), 'B': (1024, 1024)} |
9925499.0 |
2.5562 |
5.658 |
linalg_trsm |
{'A': (1024, 1024), 'B': (1024, 1024)} |
9929693.0 |
3.4078 |
7.5423 |
log |
{'data': (1024, 1024)} |
2097.1521 |
0.6218 |
0.044 |
log |
{'data': (10000, 1)} |
40.0 |
0.0416 |
0.031 |
log |
{'data': (10000, 100)} |
4000.0 |
0.8078 |
0.067 |
log10 |
{'data': (1024, 1024)} |
2097.1521 |
0.9951 |
0.0784 |
log10 |
{'data': (10000, 1)} |
20.0 |
0.0414 |
0.0299 |
log10 |
{'data': (10000, 100)} |
2000.0 |
0.9594 |
0.0769 |
log1p |
{'data': (1024, 1024)} |
2097.1521 |
0.9963 |
0.0909 |
log1p |
{'data': (10000, 1)} |
20.0 |
0.0398 |
0.0332 |
log1p |
{'data': (10000, 100)} |
2000.0 |
0.9476 |
0.0786 |
log2 |
{'data': (1024, 1024)} |
2097.1521 |
0.829 |
0.0758 |
log2 |
{'data': (10000, 1)} |
40.0 |
0.039 |
0.0291 |
log2 |
{'data': (10000, 100)} |
2000.0 |
0.7962 |
0.0781 |
log_softmax |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
1602705.0 |
3.269 |
2.0835 |
log_softmax |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
1601696.5 |
0.5576 |
0.268 |
log_softmax |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
1609676.5 |
5.5442 |
4.5522 |
logical_not |
{'data': (1024, 1024)} |
4194.3042 |
0.1906 |
--- |
logical_not |
{'data': (10000, 1)} |
20.0 |
0.0294 |
--- |
logical_not |
{'data': (10000, 100)} |
2000.0 |
0.0566 |
--- |
make_loss |
{'data': (1024, 1024)} |
2097.1521 |
0.4085 |
--- |
make_loss |
{'data': (10000, 1)} |
20.0 |
0.0203 |
--- |
make_loss |
{'data': (10000, 100)} |
2000.0 |
0.3917 |
--- |
max |
{'data': (1024, 1024), 'axis': 0} |
223247.9219 |
0.7283 |
0.7584 |
max |
{'data': (10000, 1), 'axis': 0} |
223245.875 |
0.1406 |
0.038 |
max |
{'data': (10000, 100), 'axis': 0} |
223246.2812 |
0.6682 |
0.7202 |
max_axis |
{'data': (1024, 1024), 'axis': 0} |
223252.2188 |
0.7647 |
--- |
max_axis |
{'data': (10000, 1), 'axis': 0} |
223250.1719 |
0.1397 |
--- |
max_axis |
{'data': (10000, 100), 'axis': 0} |
223250.5781 |
0.7048 |
--- |
mean |
{'data': (1024, 1024), 'axis': 0} |
223256.5156 |
0.8271 |
1.4 |
mean |
{'data': (10000, 1), 'axis': 0} |
223254.4688 |
0.242 |
0.0494 |
mean |
{'data': (10000, 100), 'axis': 0} |
223254.875 |
1.2087 |
1.3809 |
min |
{'data': (1024, 1024), 'axis': 0} |
223262.8594 |
0.7243 |
0.7596 |
min |
{'data': (10000, 1), 'axis': 0} |
223260.8125 |
0.1407 |
0.0372 |
min |
{'data': (10000, 100), 'axis': 0} |
223261.2188 |
0.6795 |
0.7245 |
min_axis |
{'data': (1024, 1024), 'axis': 0} |
223267.1562 |
0.7068 |
--- |
min_axis |
{'data': (10000, 1), 'axis': 0} |
223265.125 |
0.139 |
--- |
min_axis |
{'data': (10000, 100), 'axis': 0} |
223265.5156 |
0.6728 |
--- |
moments |
{'data': (1024, 1024), 'axes': [0, 1]} |
9927596.0 |
34.0742 |
--- |
moments |
{'data': (10000, 1), 'axes': [0, 1]} |
9927596.0 |
0.4383 |
--- |
moments |
{'data': (10000, 100), 'axes': [0, 1]} |
9927596.0 |
30.9714 |
--- |
mp_nag_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
4729157.5 |
0.3158 |
--- |
mp_nag_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
4727100.5 |
0.0407 |
--- |
mp_nag_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
4731080.5 |
0.2828 |
--- |
mp_sgd_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
4735371.5 |
0.2167 |
--- |
mp_sgd_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
4733314.5 |
0.0405 |
--- |
mp_sgd_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
4737294.5 |
0.2028 |
--- |
mp_sgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
4741586.0 |
0.49 |
--- |
mp_sgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
4739529.0 |
0.0438 |
--- |
mp_sgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
4743509.0 |
0.1801 |
--- |
multi_all_finite |
{'args': [(1024, 1024)], 'num_arrays': 1} |
5398911.5 |
0.0459 |
--- |
multi_all_finite |
{'args': [(10000, 1)], 'num_arrays': 1} |
5398911.5 |
0.0968 |
--- |
multi_all_finite |
{'args': [(10000, 10)], 'num_arrays': 1} |
5398911.5 |
0.0438 |
--- |
multi_lars |
{'lrs': (1024, 1024), 'weights_sum_sq': (1024, 1024), 'grads_sum_sq': (1024, 1024), 'wds': (1024, 1024), 'eta': 0.5, 'eps': 1e-08, 'rescale_grad': 0.4} |
5576170.5 |
0.9196 |
--- |
multi_lars |
{'lrs': (10000, 1), 'weights_sum_sq': (10000, 1), 'grads_sum_sq': (10000, 1), 'wds': (10000, 1), 'eta': 0.5, 'eps': 1e-08, 'rescale_grad': 0.4} |
5574113.5 |
0.039 |
--- |
multi_lars |
{'lrs': (10000, 100), 'weights_sum_sq': (10000, 100), 'grads_sum_sq': (10000, 100), 'wds': (10000, 100), 'eta': 0.5, 'eps': 1e-08, 'rescale_grad': 0.4} |
5578093.5 |
0.9514 |
--- |
multi_mp_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0403 |
--- |
multi_mp_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0332 |
--- |
multi_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0352 |
--- |
multi_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0364 |
--- |
multi_sum_sq |
{'args': [(1024, 1024)], 'num_arrays': 1} |
5398911.5 |
0.0298 |
--- |
multi_sum_sq |
{'args': [(10000, 1)], 'num_arrays': 1} |
5398911.5 |
0.0308 |
--- |
multi_sum_sq |
{'args': [(10000, 10)], 'num_arrays': 1} |
5398911.5 |
0.0309 |
--- |
nag_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
4747800.5 |
0.2863 |
--- |
nag_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
4745743.0 |
0.0655 |
--- |
nag_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
4749723.0 |
0.3044 |
--- |
nanprod |
{'data': (1024, 1024), 'axis': 0} |
223271.4531 |
0.7834 |
0.7836 |
nanprod |
{'data': (10000, 1), 'axis': 0} |
223269.4219 |
0.14 |
0.0384 |
nanprod |
{'data': (10000, 100), 'axis': 0} |
223269.8125 |
0.6744 |
0.7459 |
nansum |
{'data': (1024, 1024), 'axis': 0} |
223275.7656 |
0.796 |
0.8368 |
nansum |
{'data': (10000, 1), 'axis': 0} |
223273.7188 |
0.1565 |
0.0383 |
nansum |
{'data': (10000, 100), 'axis': 0} |
223274.1094 |
0.7841 |
0.7963 |
negative |
{'data': (1024, 1024)} |
2097.1521 |
0.0551 |
--- |
negative |
{'data': (10000, 1)} |
20.0 |
0.0271 |
--- |
negative |
{'data': (10000, 100)} |
2000.0 |
0.0585 |
--- |
norm |
{'data': (1024, 1024), 'axis': 0} |
223280.0625 |
0.9442 |
0.8252 |
norm |
{'data': (10000, 1), 'axis': 0} |
223278.0156 |
0.1732 |
0.0399 |
norm |
{'data': (10000, 100), 'axis': 0} |
223278.4062 |
0.8574 |
0.7871 |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'float32'} |
--- |
0.0137 |
--- |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'int32'} |
--- |
0.0136 |
--- |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'float32'} |
--- |
0.0139 |
--- |
ones_like |
{'data': (1024, 1024)} |
2097.1521 |
0.0543 |
--- |
ones_like |
{'data': (10000, 1)} |
20.0 |
0.0246 |
--- |
ones_like |
{'data': (10000, 100)} |
2000.0 |
0.0536 |
--- |
pad |
{'data': (1, 4, 2, 4), 'mode': 'constant', 'pad_width': (0, 0, 0, 0, 1, 1, 1, 1)} |
309895.2812 |
--- |
--- |
pad |
{'data': (10, 25, 10, 100), 'mode': 'constant', 'pad_width': (0, 0, 0, 0, 1, 1, 1, 1)} |
311119.0625 |
--- |
--- |
pick |
{'data': (1024, 1024), 'index': (1, 1024), 'axis': 0} |
235743.0469 |
0.0411 |
0.286 |
pick |
{'data': (10000, 1), 'index': (1, 1), 'axis': 0} |
235741.0 |
0.0372 |
0.0492 |
pick |
{'data': (10000, 100), 'index': (1, 100), 'axis': 0} |
235741.4062 |
0.0384 |
0.2822 |
preloaded_multi_mp_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 5x5 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'args5': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0353 |
--- |
preloaded_multi_mp_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 1 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0341 |
--- |
preloaded_multi_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 1 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0341 |
--- |
preloaded_multi_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'args5': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0328 |
--- |
prod |
{'data': (1024, 1024), 'axis': 0} |
223284.3594 |
0.7798 |
0.7656 |
prod |
{'data': (10000, 1), 'axis': 0} |
223282.3125 |
0.1594 |
0.0406 |
prod |
{'data': (10000, 100), 'axis': 0} |
223282.7031 |
0.7659 |
0.9068 |
radians |
{'data': (1024, 1024)} |
2097.1521 |
0.0504 |
0.0417 |
radians |
{'data': (10000, 1)} |
20.0 |
0.0301 |
0.0287 |
radians |
{'data': (10000, 100)} |
2000.0 |
0.0507 |
0.0403 |
random_exponential |
{'shape': (1024, 1024), 'dtype': 'float16'} |
2097.1521 |
2.5186 |
--- |
random_exponential |
{'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.0514 |
--- |
random_exponential |
{'shape': (10000, 100), 'dtype': 'float64'} |
8000.0 |
2.2788 |
--- |
random_gamma |
{'shape': (1024, 1024), 'dtype': 'float16'} |
2097.1521 |
6.947 |
--- |
random_gamma |
{'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.0989 |
--- |
random_gamma |
{'shape': (10000, 100), 'dtype': 'float64'} |
8000.0 |
8.3352 |
--- |
random_generalized_negative_binomial |
{'shape': (1024, 1024), 'dtype': 'float16'} |
2097.1521 |
9.38 |
--- |
random_generalized_negative_binomial |
{'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.1396 |
--- |
random_generalized_negative_binomial |
{'shape': (10000, 100), 'dtype': 'float64'} |
8000.0 |
8.9768 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (1024, 1024), 'dtype': 'float16'} |
2097.1521 |
6.5638 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (10000, 1), 'dtype': 'float32'} |
40.0 |
0.1053 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (10000, 100), 'dtype': 'float64'} |
8000.0 |
7.8058 |
--- |
random_normal |
{'shape': (1024, 1024), 'dtype': 'float16'} |
1048.576 |
3.0816 |
--- |
random_normal |
{'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.0512 |
--- |
random_normal |
{'shape': (10000, 100), 'dtype': 'float64'} |
4000.0 |
2.6179 |
--- |
random_pdf_dirichlet |
{'sample': (2,), 'alpha': [0.0, 2.5]} |
0.002 |
0.0372 |
--- |
random_pdf_exponential |
{'sample': (2,), 'lam': [1.0, 8.5]} |
0.004 |
0.0355 |
--- |
random_pdf_gamma |
{'sample': (2,), 'alpha': [0.0, 2.5], 'beta': [1.0, 0.7]} |
0.008 |
0.0411 |
--- |
random_pdf_generalized_negative_binomial |
{'sample': (2,), 'mu': [2.0, 2.5], 'alpha': [0.0, 2.5]} |
0.004 |
0.034 |
--- |
random_pdf_negative_binomial |
{'sample': (2,), 'k': [20, 49], 'p': [0.4, 0.77]} |
0.008 |
0.0347 |
--- |
random_pdf_normal |
{'sample': (2,), 'mu': [2.0, 2.5], 'sigma': [1.0, 3.7]} |
0.008 |
0.0355 |
--- |
random_pdf_poisson |
{'sample': (2,), 'lam': [1.0, 8.5]} |
0.004 |
0.0356 |
--- |
random_pdf_uniform |
{'sample': (2,), 'low': [0.0, 2.5], 'high': [1.0, 3.7]} |
0.004 |
0.0335 |
--- |
random_poisson |
{'shape': (1024, 1024), 'dtype': 'float16'} |
1048.576 |
2.0767 |
--- |
random_poisson |
{'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.0537 |
--- |
random_poisson |
{'shape': (10000, 100), 'dtype': 'float64'} |
4000.0 |
1.5296 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (1024, 1024), 'dtype': 'int32'} |
4194.3042 |
1.3714 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (10000, 1), 'dtype': 'int64'} |
40.0 |
0.0434 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (10000, 100), 'dtype': 'int32'} |
2000.0 |
1.3065 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (1024, 1024), 'dtype': 'float16'} |
2097.1521 |
0.9942 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.0394 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (10000, 100), 'dtype': 'float64'} |
4000.0 |
0.9265 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (1024, 1024)} |
235747.3438 |
0.0381 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (10000, 1)} |
235749.3906 |
0.0368 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (10000, 100)} |
235751.4375 |
0.0378 |
--- |
rcbrt |
{'data': (1024, 1024)} |
4194.3042 |
1.3384 |
1.397 |
rcbrt |
{'data': (10000, 1)} |
20.0 |
0.0464 |
0.0381 |
rcbrt |
{'data': (10000, 100)} |
4000.0 |
1.2793 |
1.3368 |
reciprocal |
{'data': (1024, 1024)} |
2097.1521 |
0.0621 |
0.0617 |
reciprocal |
{'data': (10000, 1)} |
40.0 |
0.0297 |
0.0315 |
reciprocal |
{'data': (10000, 100)} |
2000.0 |
0.0643 |
0.0583 |
relu |
{'data': (1024, 1024)} |
2097.1521 |
0.081 |
0.1016 |
relu |
{'data': (10000, 1)} |
20.0 |
0.0316 |
0.0388 |
relu |
{'data': (10000, 100)} |
2000.0 |
0.0813 |
0.0965 |
repeat |
{'data': (1024, 1024), 'repeats': 2, 'axis': 0} |
323090.0 |
2.0962 |
2.9878 |
repeat |
{'data': (10000, 1), 'repeats': 2, 'axis': 0} |
318975.6875 |
0.0654 |
0.0628 |
repeat |
{'data': (10000, 100), 'repeats': 2, 'axis': 0} |
326935.6875 |
3.1059 |
4.5136 |
reset_arrays |
{'args': [(1024, 1024)], 'num_arrays': 1} |
--- |
0.0096 |
--- |
reset_arrays |
{'args': [(10000, 1)], 'num_arrays': 1} |
--- |
0.0098 |
--- |
reset_arrays |
{'args': [(10000, 10)], 'num_arrays': 1} |
--- |
0.0097 |
--- |
reshape |
{'data': (1024, 1024), 'shape': (1024, 1024), 'target_shape': (32, 6)} |
282483.0938 |
0.1375 |
--- |
reshape |
{'data': (10000, 1), 'shape': (10000, 1), 'target_shape': (32, 6)} |
280425.9375 |
0.0547 |
--- |
reshape |
{'data': (10000, 100), 'shape': (10000, 100), 'target_shape': (32, 6)} |
284405.9375 |
0.1234 |
--- |
reshape_like |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
288697.375 |
0.5433 |
--- |
reshape_like |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
287000.25 |
0.0601 |
--- |
reshape_like |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
286840.25 |
0.0195 |
--- |
reverse |
{'data': (1024, 1024), 'axis': 0} |
261238.6094 |
0.4752 |
0.4646 |
reverse |
{'data': (10000, 1), 'axis': 0} |
259181.4531 |
0.0375 |
0.0312 |
reverse |
{'data': (10000, 100), 'axis': 0} |
263161.4375 |
0.4917 |
0.4575 |
rint |
{'data': (1024, 1024)} |
360298.6562 |
0.1924 |
--- |
rint |
{'data': (10000, 1)} |
358241.5 |
0.0287 |
--- |
rint |
{'data': (10000, 100)} |
362221.5 |
0.3056 |
--- |
rmsprop_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'n': (1024, 1024), 'lr': 0.1, 'gamma1': 0.1, 'epsilon': 1e-08, 'wd': 0.1, 'rescale_grad': 0.4} |
4754014.5 |
0.4045 |
--- |
rmsprop_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'n': (10000, 1), 'lr': 0.5, 'gamma1': 0.5, 'epsilon': 1e-08, 'wd': 0.5, 'rescale_grad': 0.4} |
4751957.5 |
0.0384 |
--- |
rmsprop_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'n': (10000, 100), 'lr': 0.9, 'gamma1': 0.9, 'epsilon': 1e-08, 'wd': 0.9, 'rescale_grad': 0.4} |
4755937.5 |
0.4541 |
--- |
rmspropalex_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'n': (1024, 1024), 'g': (1024, 1024), 'delta': (1024, 1024), 'lr': 0.1, 'gamma1': 0.1, 'gamma2': 0.1, 'epsilon': 1e-08, 'wd': 0.1, 'rescale_grad': 0.4} |
4760229.0 |
0.6353 |
--- |
rmspropalex_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'n': (10000, 1), 'g': (10000, 1), 'delta': (10000, 1), 'lr': 0.5, 'gamma1': 0.5, 'gamma2': 0.5, 'epsilon': 1e-08, 'wd': 0.5, 'rescale_grad': 0.4} |
4758172.0 |
0.0677 |
--- |
rmspropalex_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'n': (10000, 100), 'g': (10000, 100), 'delta': (10000, 100), 'lr': 0.9, 'gamma1': 0.9, 'gamma2': 0.9, 'epsilon': 1e-08, 'wd': 0.9, 'rescale_grad': 0.4} |
4762152.0 |
1.1841 |
--- |
round |
{'data': (1024, 1024)} |
366512.9688 |
0.3535 |
--- |
round |
{'data': (10000, 1)} |
364455.8125 |
0.0269 |
--- |
round |
{'data': (10000, 100)} |
368435.8125 |
0.3296 |
--- |
rsqrt |
{'data': (1024, 1024)} |
2097.1521 |
0.6999 |
0.6844 |
rsqrt |
{'data': (10000, 1)} |
40.0 |
0.0391 |
0.0317 |
rsqrt |
{'data': (10000, 100)} |
2000.0 |
0.6705 |
0.6459 |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (1024, 1024), 'dtype': 'float16'} |
2097.1521 |
3.6229 |
--- |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (10000, 1), 'dtype': 'float32'} |
40.0 |
0.0661 |
--- |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (10000, 100), 'dtype': 'float64'} |
8000.0 |
3.2448 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'beta': [1.0, 0.7]} |
4194.3042 |
13.3519 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'beta': [1.0, 0.7]} |
40.0 |
0.1437 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'beta': [1.0, 0.7]} |
16000.0 |
13.662 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'alpha': [0.0, 2.5]} |
4194.3042 |
29.5582 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'alpha': [0.0, 2.5]} |
40.0 |
0.3035 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'alpha': [0.0, 2.5]} |
8000.0 |
26.279 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (1024, 1024), 'dtype': 'float16'} |
79218.9922 |
176.1481 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (10000, 1), 'dtype': 'float32'} |
46944.5625 |
1.7724 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (10000, 100), 'dtype': 'float64'} |
302304.5625 |
180.7354 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (1024, 1024), 'dtype': 'float16', 'p': [0.4, 0.77]} |
180596.0156 |
330.7904 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (10000, 1), 'dtype': 'float32', 'p': [0.4, 0.77]} |
178578.8594 |
3.191 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (10000, 100), 'dtype': 'float64', 'p': [0.4, 0.77]} |
194538.8594 |
315.403 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'sigma': [1.0, 3.7]} |
192830.3281 |
8.1756 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'sigma': [1.0, 3.7]} |
190813.1719 |
0.1012 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'sigma': [1.0, 3.7]} |
206773.1719 |
7.9631 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (1024, 1024), 'dtype': 'float16'} |
205064.625 |
9.5478 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (10000, 1), 'dtype': 'float32'} |
203047.4688 |
0.1273 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (10000, 100), 'dtype': 'float64'} |
219007.4688 |
7.2977 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'high': [1.0, 3.7]} |
217298.9375 |
1.9348 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'high': [1.0, 3.7]} |
215281.7812 |
0.0429 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'high': [1.0, 3.7]} |
231241.7812 |
1.7466 |
--- |
sgd_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
4766443.0 |
0.2115 |
--- |
sgd_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
4764386.0 |
0.0422 |
--- |
sgd_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
4768366.0 |
0.201 |
--- |
sgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
4772657.5 |
0.1861 |
--- |
sgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
4770600.5 |
0.0307 |
--- |
sgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
4774580.5 |
0.1693 |
--- |
shape_array |
{'data': (1024, 1024)} |
286820.25 |
0.0127 |
--- |
shape_array |
{'data': (10000, 1)} |
286820.2812 |
0.0124 |
--- |
shape_array |
{'data': (10000, 100)} |
286820.2812 |
0.0121 |
--- |
shuffle |
{'data': (1024, 1024)} |
2097.1521 |
1.1539 |
--- |
shuffle |
{'data': (10000, 1)} |
20.0 |
0.3627 |
--- |
shuffle |
{'data': (10000, 100)} |
2000.0 |
1.2481 |
--- |
sigmoid |
{'data': (1024, 1024)} |
4194.3042 |
0.9294 |
0.0746 |
sigmoid |
{'data': (10000, 1)} |
20.0 |
0.0371 |
0.03 |
sigmoid |
{'data': (10000, 100)} |
4000.0 |
0.7201 |
0.0713 |
sign |
{'data': (1024, 1024)} |
2097.1521 |
0.2711 |
0.0529 |
sign |
{'data': (10000, 1)} |
20.0 |
0.0828 |
0.0253 |
sign |
{'data': (10000, 100)} |
2000.0 |
0.3402 |
0.1345 |
signsgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
4778872.0 |
0.2178 |
--- |
signsgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
4776814.5 |
0.0362 |
--- |
signsgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
4780794.5 |
0.2093 |
--- |
signum_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
4785086.0 |
0.3259 |
--- |
signum_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
4783029.0 |
0.0391 |
--- |
signum_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
4787009.0 |
0.3102 |
--- |
sin |
{'data': (1024, 1024)} |
2097.1521 |
0.5525 |
0.5599 |
sin |
{'data': (10000, 1)} |
40.0 |
0.0829 |
0.0316 |
sin |
{'data': (10000, 100)} |
4000.0 |
0.5252 |
0.5173 |
sinh |
{'data': (1024, 1024)} |
4194.3042 |
1.5371 |
0.9416 |
sinh |
{'data': (10000, 1)} |
20.0 |
0.0453 |
0.0379 |
sinh |
{'data': (10000, 100)} |
4000.0 |
1.4809 |
0.9119 |
size_array |
{'data': (1024, 1024)} |
286820.2812 |
0.0124 |
--- |
size_array |
{'data': (10000, 1)} |
286820.2812 |
0.0124 |
--- |
size_array |
{'data': (10000, 100)} |
286820.2812 |
0.0125 |
--- |
slice |
{'data': (1024, 1024), 'begin': 0, 'end': 1} |
235755.5312 |
0.0577 |
0.0556 |
slice |
{'data': (10000, 1), 'begin': 0, 'end': 1} |
235753.5 |
0.0557 |
0.0377 |
slice |
{'data': (10000, 100), 'begin': 0, 'end': 1} |
235753.8906 |
0.0554 |
0.0558 |
slice_axis |
{'data': (1024, 1024), 'axis': 0, 'begin': 0, 'end': 1} |
235759.8438 |
0.0373 |
0.0551 |
slice_axis |
{'data': (10000, 1), 'axis': 0, 'begin': 0, 'end': 1} |
235757.7969 |
0.0371 |
0.0475 |
slice_axis |
{'data': (10000, 100), 'axis': 0, 'begin': 0, 'end': 1} |
235758.1875 |
0.0363 |
0.0554 |
slice_like |
{'data': (1024, 1024), 'shape_like': (100, 100), 'axes': [0, 1]} |
235817.9844 |
0.0431 |
0.0632 |
slice_like |
{'data': (10000, 1), 'shape_like': (10, 1), 'axes': [0, 1]} |
235798.0312 |
0.0405 |
0.04 |
slice_like |
{'data': (10000, 100), 'shape_like': (100, 10), 'axes': [0, 1]} |
235802.0156 |
0.067 |
0.0623 |
smooth_l1 |
{'data': (1024, 1024)} |
5398988.5 |
0.2996 |
0.2847 |
smooth_l1 |
{'data': (10000, 1)} |
5396931.5 |
0.0717 |
0.0648 |
smooth_l1 |
{'data': (10000, 100)} |
5400911.5 |
0.5835 |
0.2741 |
softmax |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
1608822.25 |
2.0278 |
0.9154 |
softmax |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
1607813.625 |
0.4857 |
0.0424 |
softmax |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
1615793.625 |
5.3102 |
2.2144 |
softmax_cross_entropy |
{'data': (1024, 1024), 'label': (1024,)} |
5398911.5 |
0.8294 |
--- |
softmin |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
1614939.375 |
3.4102 |
1.8065 |
softmin |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
1614979.375 |
0.4609 |
0.0443 |
softmin |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
1622959.375 |
4.6611 |
2.0522 |
softsign |
{'data': (1024, 1024)} |
4194.3042 |
0.0804 |
0.0684 |
softsign |
{'data': (10000, 1)} |
20.0 |
0.0359 |
0.0382 |
softsign |
{'data': (10000, 100)} |
2000.0 |
0.0681 |
0.0653 |
sort |
{'data': (1024, 1024), 'axis': 0} |
237902.6094 |
22.5737 |
--- |
sort |
{'data': (10000, 1), 'axis': 0} |
233768.3125 |
1.0186 |
--- |
sort |
{'data': (10000, 100), 'axis': 0} |
239728.3125 |
21.2701 |
--- |
space_to_depth |
{'data': (1, 4, 2, 4), 'block_size': 2} |
261161.6406 |
0.0546 |
--- |
space_to_depth |
{'data': (10, 25, 10, 100), 'block_size': 5} |
262161.5625 |
0.5257 |
--- |
split |
{'data': (1024, 1024), 'num_outputs': 2} |
379061.5625 |
--- |
--- |
split |
{'data': (10000, 1), 'num_outputs': 1} |
377004.4062 |
--- |
--- |
split |
{'data': (10000, 100), 'num_outputs': 10} |
380984.4062 |
--- |
--- |
sqrt |
{'data': (1024, 1024)} |
2097.1521 |
0.7633 |
0.0697 |
sqrt |
{'data': (10000, 1)} |
40.0 |
0.0369 |
0.032 |
sqrt |
{'data': (10000, 100)} |
4000.0 |
0.644 |
0.066 |
square |
{'data': (1024, 1024)} |
2097.1521 |
0.0591 |
0.063 |
square |
{'data': (10000, 1)} |
20.0 |
0.0268 |
0.0285 |
square |
{'data': (10000, 100)} |
2000.0 |
0.0578 |
0.0577 |
squeeze |
{'data': (1, 1024, 1024), 'axis': 0} |
5582385.0 |
0.447 |
0.6478 |
squeeze |
{'data': (32, 1, 256, 256), 'axis': 1} |
5588676.5 |
1.4974 |
1.2437 |
stack |
{'args0': '<NDArray 100x100 @cpu(0)>', 'args1': '<NDArray 100x100 @cpu(0)>', 'args2': '<NDArray 100x100 @cpu(0)>'} |
379164.4062 |
0.0672 |
--- |
stop_gradient |
{'data': (1024, 1024)} |
2097.1521 |
0.4951 |
--- |
stop_gradient |
{'data': (10000, 1)} |
40.0 |
0.0206 |
--- |
stop_gradient |
{'data': (10000, 100)} |
4000.0 |
0.5134 |
--- |
sum |
{'data': (1024, 1024), 'axis': 0} |
223288.6562 |
0.8658 |
0.6977 |
sum |
{'data': (10000, 1), 'axis': 0} |
223286.6094 |
0.1189 |
0.0306 |
sum |
{'data': (10000, 100), 'axis': 0} |
223287.0 |
0.6296 |
0.6669 |
sum_axis |
{'data': (1024, 1024), 'axis': 0} |
223292.9531 |
0.8614 |
--- |
sum_axis |
{'data': (10000, 1), 'axis': 0} |
223290.9062 |
0.148 |
--- |
sum_axis |
{'data': (10000, 100), 'axis': 0} |
223291.2969 |
0.5627 |
--- |
swapaxes |
{'data': (1024, 1024), 'dim1': 0, 'dim2': 1} |
267953.0312 |
1.442 |
--- |
swapaxes |
{'data': (10000, 1), 'dim1': 0, 'dim2': 1} |
265895.875 |
0.0415 |
--- |
swapaxes |
{'data': (10000, 100), 'dim1': 0, 'dim2': 1} |
269875.875 |
1.2189 |
--- |
take |
{'a': (1024, 1024), 'indices': (1, 1), 'axis': 0} |
235806.1562 |
0.038 |
0.0609 |
tan |
{'data': (1024, 1024)} |
2097.1521 |
1.1871 |
0.0752 |
tan |
{'data': (10000, 1)} |
20.0 |
0.0398 |
0.0308 |
tan |
{'data': (10000, 100)} |
2000.0 |
0.8581 |
0.0552 |
tanh |
{'data': (1024, 1024)} |
2097.1521 |
1.4491 |
0.0813 |
tanh |
{'data': (10000, 1)} |
20.0 |
0.0405 |
0.0272 |
tanh |
{'data': (10000, 100)} |
2000.0 |
1.379 |
0.0796 |
tile |
{'data': (1024, 1024), 'reps': 2} |
335518.5938 |
3.1205 |
4.5071 |
tile |
{'data': (10000, 1), 'reps': 2} |
331404.2812 |
0.0503 |
0.0451 |
tile |
{'data': (10000, 100), 'reps': 2} |
339364.2812 |
2.9696 |
4.3805 |
topk |
{'data': (1024, 1024), 'axis': 0, 'k': 1, 'dtype': 'float32'} |
235734.4531 |
17.0116 |
--- |
topk |
{'data': (10000, 1), 'axis': 0, 'k': 1, 'dtype': 'int32'} |
235732.4062 |
0.2139 |
--- |
topk |
{'data': (10000, 100), 'axis': 0, 'k': 1, 'dtype': 'float32'} |
235732.8125 |
15.8706 |
--- |
transpose |
{'data': (1024, 1024), 'axes': [0, 1]} |
274167.3438 |
0.0696 |
--- |
transpose |
{'data': (10000, 1), 'axes': [0, 1]} |
272110.1875 |
0.0499 |
--- |
transpose |
{'data': (10000, 100), 'axes': [0, 1]} |
276090.1875 |
0.0627 |
--- |
trunc |
{'data': (1024, 1024)} |
372727.2812 |
0.3051 |
--- |
trunc |
{'data': (10000, 1)} |
370670.125 |
0.0274 |
--- |
trunc |
{'data': (10000, 100)} |
374650.125 |
0.293 |
--- |
where |
{'condition': (1024,), 'x': (1024, 1024), 'y': (1024, 1024)} |
242095.5625 |
0.2336 |
0.4337 |
where |
{'condition': (10000,), 'x': (10000, 1), 'y': (10000, 1)} |
240038.4062 |
0.0409 |
0.0476 |
where |
{'condition': (10000,), 'x': (10000, 100), 'y': (10000, 100)} |
244018.4062 |
0.2264 |
0.4194 |
zeros_like |
{'data': (1024, 1024)} |
2097.1521 |
0.0485 |
--- |
zeros_like |
{'data': (10000, 1)} |
20.0 |
0.016 |
--- |
zeros_like |
{'data': (10000, 100)} |
4000.0 |
0.0672 |
--- |