Created
August 2, 2019 22:22
-
-
Save ChaiBapchya/b3c4d14d039daa9e72cafd92fcc3255f to your computer and use it in GitHub Desktop.
4D tensor fix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"BatchNorm": [ | |
{ | |
"avg_time_backward_BatchNorm": 156.6311, | |
"avg_time_forward_BatchNorm": 210.4633, | |
"inputs": { | |
"beta": [ | |
3 | |
], | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"gamma": [ | |
3 | |
], | |
"moving_mean": [ | |
3 | |
], | |
"moving_var": [ | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 25165.8359 | |
}, | |
{ | |
"avg_time_backward_BatchNorm": 122.4576, | |
"avg_time_forward_BatchNorm": 280.6864, | |
"inputs": { | |
"beta": [ | |
3 | |
], | |
"data": [ | |
32, | |
3, | |
10000, | |
10 | |
], | |
"gamma": [ | |
3 | |
], | |
"moving_mean": [ | |
3 | |
], | |
"moving_var": [ | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 38400.0117 | |
} | |
], | |
"BlockGrad": [ | |
{ | |
"avg_time_forward_BlockGrad": 0.0621, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_BlockGrad": 0.1483, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"Convolution": [ | |
{ | |
"avg_time_backward_Convolution": 2.6613, | |
"avg_time_forward_Convolution": 0.9554, | |
"inputs": { | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256 | |
], | |
"dilate": [ | |
1 | |
], | |
"kernel": [ | |
3 | |
], | |
"layout": "NCW", | |
"num_filter": 64, | |
"pad": [ | |
0 | |
], | |
"stride": [ | |
1 | |
], | |
"weight": [ | |
64, | |
3, | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2080.7681 | |
} | |
], | |
"Deconvolution": [ | |
{ | |
"avg_time_backward_Deconvolution": 11.0532, | |
"avg_time_forward_Deconvolution": 7.1124, | |
"inputs": { | |
"adj": [ | |
0 | |
], | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256 | |
], | |
"dilate": [ | |
1 | |
], | |
"kernel": [ | |
3 | |
], | |
"layout": "NCW", | |
"no_bias": false, | |
"num_filter": 64, | |
"pad": [ | |
0 | |
], | |
"stride": [ | |
1 | |
], | |
"weight": [ | |
3, | |
64, | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2674506.75 | |
} | |
], | |
"Dropout": [ | |
{ | |
"avg_time_backward_Dropout": 10.4086, | |
"avg_time_forward_Dropout": 10.9835, | |
"inputs": { | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"mode": "always", | |
"p": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 37748.7344 | |
}, | |
{ | |
"avg_time_backward_Dropout": 0.1794, | |
"avg_time_forward_Dropout": 0.5266, | |
"inputs": { | |
"data": [ | |
10000, | |
10 | |
], | |
"mode": "always", | |
"p": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 12582.9121 | |
} | |
], | |
"Flatten": [ | |
{ | |
"avg_time_forward_Flatten": 4.0294, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_Flatten": 0.7552, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2999.8081 | |
} | |
], | |
"FullyConnected": [ | |
{ | |
"avg_time_backward_FullyConnected": 168.3843, | |
"avg_time_forward_FullyConnected": 65.1918, | |
"inputs": { | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"flatten": true, | |
"num_hidden": 64, | |
"weight": [ | |
64, | |
196608 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8.192 | |
}, | |
{ | |
"avg_time_backward_FullyConnected": 56.8156, | |
"avg_time_forward_FullyConnected": 35.6103, | |
"inputs": { | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"flatten": false, | |
"num_hidden": 64, | |
"weight": [ | |
64, | |
256 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 6291.4561 | |
} | |
], | |
"LeakyReLU": [ | |
{ | |
"avg_time_backward_LeakyReLU": 7.6923, | |
"avg_time_forward_LeakyReLU": 5.2564, | |
"inputs": { | |
"act_type": "leaky", | |
"data": [ | |
1024, | |
1024 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 0.3185, | |
"avg_time_forward_LeakyReLU": 0.2896, | |
"inputs": { | |
"act_type": "leaky", | |
"data": [ | |
10000, | |
1 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 10.1505, | |
"avg_time_forward_LeakyReLU": 7.0816, | |
"inputs": { | |
"act_type": "leaky", | |
"data": [ | |
10000, | |
100 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 8.7645, | |
"avg_time_forward_LeakyReLU": 8.8864, | |
"inputs": { | |
"act_type": "elu", | |
"data": [ | |
1024, | |
1024 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 0.2357, | |
"avg_time_forward_LeakyReLU": 0.3665, | |
"inputs": { | |
"act_type": "elu", | |
"data": [ | |
10000, | |
1 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 8.5541, | |
"avg_time_forward_LeakyReLU": 7.6785, | |
"inputs": { | |
"act_type": "elu", | |
"data": [ | |
10000, | |
100 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 7.6104, | |
"avg_time_forward_LeakyReLU": 6.6837, | |
"inputs": { | |
"act_type": "selu", | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 0.2095, | |
"avg_time_forward_LeakyReLU": 0.3388, | |
"inputs": { | |
"act_type": "selu", | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 7.9922, | |
"avg_time_forward_LeakyReLU": 6.6564, | |
"inputs": { | |
"act_type": "selu", | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 42.7829, | |
"avg_time_forward_LeakyReLU": 7.4405, | |
"inputs": { | |
"act_type": "prelu", | |
"data": [ | |
1024, | |
1024 | |
], | |
"gamma": [ | |
1, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 5.061, | |
"avg_time_forward_LeakyReLU": 2.8498, | |
"inputs": { | |
"act_type": "prelu", | |
"data": [ | |
10000, | |
1 | |
], | |
"gamma": [ | |
1, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 45.0656, | |
"avg_time_forward_LeakyReLU": 9.3874, | |
"inputs": { | |
"act_type": "prelu", | |
"data": [ | |
10000, | |
100 | |
], | |
"gamma": [ | |
1, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"Pooling": [ | |
{ | |
"avg_time_backward_Pooling": 1.1285, | |
"avg_time_forward_Pooling": 0.9102, | |
"inputs": { | |
"data": [ | |
32, | |
3, | |
256 | |
], | |
"global_pool": 0, | |
"kernel": 3, | |
"pad": 1, | |
"pool_type": "avg", | |
"stride": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 98.304 | |
} | |
], | |
"abs": [ | |
{ | |
"avg_time_backward_abs": 0.0531, | |
"avg_time_forward_abs": 0.063, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_abs": 1.6817, | |
"avg_time_forward_abs": 1.7262, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"adam_update": [ | |
{ | |
"avg_time_forward_adam_update": 8.6561, | |
"inputs": { | |
"beta1": 0.1, | |
"beta2": 0.1, | |
"epsilon": 1e-08, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"mean": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"var": [ | |
1024, | |
1024 | |
], | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2621365.25 | |
}, | |
{ | |
"avg_time_forward_adam_update": 0.2118, | |
"inputs": { | |
"beta1": 0.5, | |
"beta2": 0.5, | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"mean": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"var": [ | |
10000, | |
1 | |
], | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2621405.25 | |
}, | |
{ | |
"avg_time_forward_adam_update": 7.1585, | |
"inputs": { | |
"beta1": 0.9, | |
"beta2": 0.9, | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"mean": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"var": [ | |
10000, | |
100 | |
], | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2625385.25 | |
} | |
], | |
"arccos": [ | |
{ | |
"avg_time_backward_arccos": 0.0535, | |
"avg_time_forward_arccos": 0.066, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_arccos": 1.4923, | |
"avg_time_forward_arccos": 1.5972, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"arccosh": [ | |
{ | |
"avg_time_backward_arccosh": 0.0498, | |
"avg_time_forward_arccosh": 0.0586, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_arccosh": 1.0051, | |
"avg_time_forward_arccosh": 0.629, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"arcsin": [ | |
{ | |
"avg_time_backward_arcsin": 0.0461, | |
"avg_time_forward_arcsin": 0.0553, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_arcsin": 0.897, | |
"avg_time_forward_arcsin": 0.826, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"arcsinh": [ | |
{ | |
"avg_time_backward_arcsinh": 0.0503, | |
"avg_time_forward_arcsinh": 0.0646, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_arcsinh": 0.9379, | |
"avg_time_forward_arcsinh": 1.9002, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"arctan": [ | |
{ | |
"avg_time_backward_arctan": 0.0476, | |
"avg_time_forward_arctan": 0.0577, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_arctan": 0.6256, | |
"avg_time_forward_arctan": 1.7448, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"arctanh": [ | |
{ | |
"avg_time_backward_arctanh": 0.0497, | |
"avg_time_forward_arctanh": 0.0629, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_arctanh": 2.7349, | |
"avg_time_forward_arctanh": 1.0067, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"argmax": [ | |
{ | |
"avg_time_forward_argmax": 0.0791, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_argmax": 9.6415, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"argmax_channel": [ | |
{ | |
"avg_time_forward_argmax_channel": 0.1709, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.016 | |
}, | |
{ | |
"avg_time_forward_argmax_channel": 0.8691, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
} | |
], | |
"argmin": [ | |
{ | |
"avg_time_forward_argmin": 0.0773, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_argmin": 7.6457, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"argsort": [ | |
{ | |
"avg_time_forward_argsort": 0.1387, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_argsort": 33.2482, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 5000.0 | |
} | |
], | |
"batch_dot": [ | |
{ | |
"avg_time_backward_batch_dot": 1389.9982, | |
"avg_time_forward_batch_dot": 616.2855, | |
"inputs": { | |
"lhs": [ | |
32, | |
1024, | |
1024 | |
], | |
"rhs": [ | |
32, | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 134217.7344 | |
}, | |
{ | |
"avg_time_backward_batch_dot": 168.2654, | |
"avg_time_forward_batch_dot": 429.7177, | |
"inputs": { | |
"lhs": [ | |
32, | |
1000, | |
10 | |
], | |
"rhs": [ | |
32, | |
1000, | |
10 | |
], | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 128000.0 | |
}, | |
{ | |
"avg_time_backward_batch_dot": 5.9127, | |
"avg_time_forward_batch_dot": 3.4453, | |
"inputs": { | |
"lhs": [ | |
32, | |
1000, | |
1 | |
], | |
"rhs": [ | |
32, | |
100, | |
1000 | |
], | |
"transpose_a": true, | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 6.4 | |
} | |
], | |
"broadcast_add": [ | |
{ | |
"avg_time_backward_broadcast_add": 0.0485, | |
"avg_time_forward_broadcast_add": 0.0611, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_div": [ | |
{ | |
"avg_time_backward_broadcast_div": 0.0665, | |
"avg_time_forward_broadcast_div": 0.0727, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_equal": [ | |
{ | |
"avg_time_forward_broadcast_equal": 0.0548, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_greater": [ | |
{ | |
"avg_time_forward_broadcast_greater": 0.0517, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_greater_equal": [ | |
{ | |
"avg_time_forward_broadcast_greater_equal": 0.0496, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_hypot": [ | |
{ | |
"avg_time_backward_broadcast_hypot": 0.054, | |
"avg_time_forward_broadcast_hypot": 0.0593, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_lesser": [ | |
{ | |
"avg_time_forward_broadcast_lesser": 0.0646, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_lesser_equal": [ | |
{ | |
"avg_time_forward_broadcast_lesser_equal": 0.0537, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_logical_and": [ | |
{ | |
"avg_time_forward_broadcast_logical_and": 0.0511, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_logical_or": [ | |
{ | |
"avg_time_forward_broadcast_logical_or": 0.0639, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_logical_xor": [ | |
{ | |
"avg_time_forward_broadcast_logical_xor": 0.0672, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_maximum": [ | |
{ | |
"avg_time_backward_broadcast_maximum": 0.0547, | |
"avg_time_forward_broadcast_maximum": 0.0624, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_minimum": [ | |
{ | |
"avg_time_backward_broadcast_minimum": 0.0547, | |
"avg_time_forward_broadcast_minimum": 0.0613, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_minus": [ | |
{ | |
"avg_time_forward_broadcast_minus": 0.0523, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_mod": [ | |
{ | |
"avg_time_backward_broadcast_mod": 0.0494, | |
"avg_time_forward_broadcast_mod": 0.0536, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_mul": [ | |
{ | |
"avg_time_backward_broadcast_mul": 0.0505, | |
"avg_time_forward_broadcast_mul": 0.0545, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_not_equal": [ | |
{ | |
"avg_time_forward_broadcast_not_equal": 0.0528, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_plus": [ | |
{ | |
"avg_time_forward_broadcast_plus": 0.0513, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_power": [ | |
{ | |
"avg_time_backward_broadcast_power": 0.051, | |
"avg_time_forward_broadcast_power": 0.0563, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_sub": [ | |
{ | |
"avg_time_backward_broadcast_sub": 0.0442, | |
"avg_time_forward_broadcast_sub": 0.0558, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"cbrt": [ | |
{ | |
"avg_time_backward_cbrt": 0.0503, | |
"avg_time_forward_cbrt": 0.0589, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_cbrt": 1.9752, | |
"avg_time_forward_cbrt": 1.9699, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"ceil": [ | |
{ | |
"avg_time_forward_ceil": 0.0503, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_ceil": 0.5654, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"cos": [ | |
{ | |
"avg_time_backward_cos": 0.0492, | |
"avg_time_forward_cos": 0.0581, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_cos": 0.9369, | |
"avg_time_forward_cos": 1.8058, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"cosh": [ | |
{ | |
"avg_time_backward_cosh": 0.0501, | |
"avg_time_forward_cosh": 0.0601, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_cosh": 1.929, | |
"avg_time_forward_cosh": 1.5808, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"degrees": [ | |
{ | |
"avg_time_backward_degrees": 0.0462, | |
"avg_time_forward_degrees": 0.0546, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_degrees": 1.3703, | |
"avg_time_forward_degrees": 1.8427, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"depth_to_space": [ | |
{ | |
"avg_time_forward_depth_to_space": 0.0843, | |
"inputs": { | |
"block_size": 2, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_depth_to_space": 1.6798, | |
"inputs": { | |
"block_size": 5, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"dot": [ | |
{ | |
"avg_time_backward_dot": 108.8924, | |
"avg_time_forward_dot": 52.1692, | |
"inputs": { | |
"lhs": [ | |
1024, | |
1024 | |
], | |
"rhs": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_dot": 7.3194, | |
"avg_time_forward_dot": 27.2215, | |
"inputs": { | |
"lhs": [ | |
1000, | |
10 | |
], | |
"rhs": [ | |
1000, | |
10 | |
], | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_dot": 0.3362, | |
"avg_time_forward_dot": 0.2043, | |
"inputs": { | |
"lhs": [ | |
1000, | |
1 | |
], | |
"rhs": [ | |
100, | |
1000 | |
], | |
"transpose_a": true, | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"elemwise_add": [ | |
{ | |
"avg_time_forward_elemwise_add": 17.4534, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"elemwise_div": [ | |
{ | |
"avg_time_forward_elemwise_div": 0.0624, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"elemwise_mul": [ | |
{ | |
"avg_time_forward_elemwise_mul": 0.0683, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"elemwise_sub": [ | |
{ | |
"avg_time_forward_elemwise_sub": 0.0518, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"erf": [ | |
{ | |
"avg_time_backward_erf": 0.0455, | |
"avg_time_forward_erf": 0.0543, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_erf": 1.2152, | |
"avg_time_forward_erf": 1.3477, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"erfinv": [ | |
{ | |
"avg_time_backward_erfinv": 0.0522, | |
"avg_time_forward_erfinv": 0.0592, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_erfinv": 2.9299, | |
"avg_time_forward_erfinv": 2.5378, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"exp": [ | |
{ | |
"avg_time_forward_exp": 0.0479, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_exp": 0.5052, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"expm1": [ | |
{ | |
"avg_time_backward_expm1": 0.0504, | |
"avg_time_forward_expm1": 0.0587, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_expm1": 0.7071, | |
"avg_time_forward_expm1": 0.8253, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"fix": [ | |
{ | |
"avg_time_forward_fix": 0.0582, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_fix": 2.5691, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"flatten": [ | |
{ | |
"avg_time_forward_flatten": 0.1343, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_flatten": 0.2836, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"flip": [ | |
{ | |
"avg_time_forward_flip": 0.0765, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_flip": 4.6748, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"floor": [ | |
{ | |
"avg_time_forward_floor": 0.0619, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_floor": 0.2315, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"ftml_update": [ | |
{ | |
"avg_time_forward_ftml_update": 3.1145, | |
"inputs": { | |
"beta1": 0.1, | |
"beta2": 0.1, | |
"clip_grad": -1.0, | |
"d": [ | |
1024, | |
1024 | |
], | |
"epsilon": 1e-08, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"t": 1, | |
"v": [ | |
1024, | |
1024 | |
], | |
"weight": [ | |
1024, | |
1024 | |
], | |
"z": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2625482.5 | |
}, | |
{ | |
"avg_time_forward_ftml_update": 0.1084, | |
"inputs": { | |
"beta1": 0.5, | |
"beta2": 0.5, | |
"clip_grad": -1.0, | |
"d": [ | |
10000, | |
1 | |
], | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"t": 1, | |
"v": [ | |
10000, | |
1 | |
], | |
"weight": [ | |
10000, | |
1 | |
], | |
"z": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2625522.5 | |
}, | |
{ | |
"avg_time_forward_ftml_update": 3.2331, | |
"inputs": { | |
"beta1": 0.9, | |
"beta2": 0.9, | |
"clip_grad": -1.0, | |
"d": [ | |
10000, | |
100 | |
], | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"t": 1, | |
"v": [ | |
10000, | |
100 | |
], | |
"weight": [ | |
10000, | |
100 | |
], | |
"z": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2629502.5 | |
} | |
], | |
"ftrl_update": [ | |
{ | |
"avg_time_forward_ftrl_update": 10.5369, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"n": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
], | |
"z": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2629599.75 | |
}, | |
{ | |
"avg_time_forward_ftrl_update": 0.2797, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"n": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
], | |
"z": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2629639.75 | |
}, | |
{ | |
"avg_time_forward_ftrl_update": 10.0023, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"n": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
], | |
"z": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2631619.75 | |
} | |
], | |
"gamma": [ | |
{ | |
"avg_time_backward_gamma": 0.0584, | |
"avg_time_forward_gamma": 0.0625, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_gamma": 5.0862, | |
"avg_time_forward_gamma": 2.8856, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"gammaln": [ | |
{ | |
"avg_time_backward_gammaln": 0.0536, | |
"avg_time_forward_gammaln": 0.114, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_gammaln": 3.2498, | |
"avg_time_forward_gammaln": 6.8876, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"hard_sigmoid": [ | |
{ | |
"avg_time_backward_hard_sigmoid": 8.3043, | |
"avg_time_forward_hard_sigmoid": 7.1415, | |
"inputs": { | |
"alpha": 0.25, | |
"beta": 0.5, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_hard_sigmoid": 2.2843, | |
"avg_time_forward_hard_sigmoid": 2.433, | |
"inputs": { | |
"alpha": 0.25, | |
"beta": 0.5, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_hard_sigmoid": 5.6079, | |
"avg_time_forward_hard_sigmoid": 4.9296, | |
"inputs": { | |
"alpha": 0.25, | |
"beta": 0.5, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"identity": [ | |
{ | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"log": [ | |
{ | |
"avg_time_backward_log": 0.0441, | |
"avg_time_forward_log": 0.0561, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_log": 0.8132, | |
"avg_time_forward_log": 1.5464, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"log10": [ | |
{ | |
"avg_time_backward_log10": 0.043, | |
"avg_time_forward_log10": 0.0525, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_log10": 1.2752, | |
"avg_time_forward_log10": 1.7511, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"log1p": [ | |
{ | |
"avg_time_backward_log1p": 0.0485, | |
"avg_time_forward_log1p": 0.0526, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_log1p": 0.8552, | |
"avg_time_forward_log1p": 1.7527, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"log2": [ | |
{ | |
"avg_time_backward_log2": 0.0496, | |
"avg_time_forward_log2": 0.06, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_log2": 2.221, | |
"avg_time_forward_log2": 0.533, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"log_softmax": [ | |
{ | |
"avg_time_backward_log_softmax": 2.4782, | |
"avg_time_forward_log_softmax": 3.4694, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
1024, | |
1024 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_log_softmax": 0.3533, | |
"avg_time_forward_log_softmax": 0.2447, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
1 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_log_softmax": 1.8645, | |
"avg_time_forward_log_softmax": 2.6784, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
100 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"logical_not": [ | |
{ | |
"avg_time_forward_logical_not": 0.06, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_logical_not": 1.7432, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"make_loss": [ | |
{ | |
"avg_time_forward_make_loss": 0.0604, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_make_loss": 0.139, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"max": [ | |
{ | |
"avg_time_backward_max": 2.6719, | |
"avg_time_forward_max": 1.5891, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_max": 1.278, | |
"avg_time_forward_max": 0.872, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"max_axis": [ | |
{ | |
"avg_time_forward_max_axis": 0.0922, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_max_axis": 0.6912, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"mean": [ | |
{ | |
"avg_time_backward_mean": 1.2062, | |
"avg_time_forward_mean": 2.3667, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_mean": 6.5703, | |
"avg_time_forward_mean": 1.7575, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"min": [ | |
{ | |
"avg_time_backward_min": 2.8996, | |
"avg_time_forward_min": 3.1496, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_min": 1.8376, | |
"avg_time_forward_min": 1.1606, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"min_axis": [ | |
{ | |
"avg_time_forward_min_axis": 0.3476, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_min_axis": 1.9567, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"mp_sgd_mom_update": [ | |
{ | |
"avg_time_forward_mp_sgd_mom_update": 1.377, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"mom": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
], | |
"weight32": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2633716.75 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_mom_update": 0.0897, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"mom": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
], | |
"weight32": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2633756.75 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_mom_update": 1.3166, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"mom": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
], | |
"weight32": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2637736.75 | |
} | |
], | |
"mp_sgd_update": [ | |
{ | |
"avg_time_forward_mp_sgd_update": 0.8036, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
], | |
"weight32": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2637834.0 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_update": 0.0848, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
], | |
"weight32": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2637874.0 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_update": 0.9158, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
], | |
"weight32": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2641854.0 | |
} | |
], | |
"nanprod": [ | |
{ | |
"avg_time_backward_nanprod": 1.4978, | |
"avg_time_forward_nanprod": 1.4161, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_nanprod": 1.9797, | |
"avg_time_forward_nanprod": 1.2056, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"nansum": [ | |
{ | |
"avg_time_backward_nansum": 1.9112, | |
"avg_time_forward_nansum": 1.9512, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_nansum": 2.8204, | |
"avg_time_forward_nansum": 2.0081, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"negative": [ | |
{ | |
"avg_time_forward_negative": 0.0467, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_negative": 1.3599, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"ones_like": [ | |
{ | |
"avg_time_forward_ones_like": 0.0564, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_ones_like": 1.0479, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"prod": [ | |
{ | |
"avg_time_backward_prod": 1.9021, | |
"avg_time_forward_prod": 2.4085, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_prod": 1.8727, | |
"avg_time_forward_prod": 1.2899, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"radians": [ | |
{ | |
"avg_time_backward_radians": 0.0442, | |
"avg_time_forward_radians": 0.0517, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_radians": 13.8761, | |
"avg_time_forward_radians": 1.8484, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"random_exponential": [ | |
{ | |
"avg_time_forward_random_exponential": 13.4686, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_exponential": 0.1969, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_exponential": 12.6015, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"random_gamma": [ | |
{ | |
"avg_time_forward_random_gamma": 46.56, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_gamma": 0.5503, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_gamma": 44.4213, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"random_generalized_negative_binomial": [ | |
{ | |
"avg_time_forward_random_generalized_negative_binomial": 83.2524, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_generalized_negative_binomial": 1.5213, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_generalized_negative_binomial": 78.4477, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"random_negative_binomial": [ | |
{ | |
"avg_time_forward_random_negative_binomial": 72.5323, | |
"inputs": { | |
"k": 1, | |
"p": 1, | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_negative_binomial": 1.0716, | |
"inputs": { | |
"k": 1, | |
"p": 1, | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_negative_binomial": 65.2265, | |
"inputs": { | |
"k": 1, | |
"p": 1, | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_normal": [ | |
{ | |
"avg_time_forward_random_normal": 26.0932, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_normal": 2.2621, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_normal": 25.5475, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_pdf_dirichlet": [ | |
{ | |
"avg_time_forward_random_pdf_dirichlet": 0.0717, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.002 | |
} | |
], | |
"random_pdf_exponential": [ | |
{ | |
"avg_time_forward_random_pdf_exponential": 0.0704, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_gamma": [ | |
{ | |
"avg_time_forward_random_pdf_gamma": 0.0795, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_generalized_negative_binomial": [ | |
{ | |
"avg_time_forward_random_pdf_generalized_negative_binomial": 0.0763, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_negative_binomial": [ | |
{ | |
"avg_time_forward_random_pdf_negative_binomial": 0.5033, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.008 | |
} | |
], | |
"random_pdf_normal": [ | |
{ | |
"avg_time_forward_random_pdf_normal": 0.5395, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_poisson": [ | |
{ | |
"avg_time_forward_random_pdf_poisson": 0.0779, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_uniform": [ | |
{ | |
"avg_time_forward_random_pdf_uniform": 0.0792, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_poisson": [ | |
{ | |
"avg_time_forward_random_poisson": 24.4973, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_random_poisson": 0.2956, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_poisson": 22.4235, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_randint": [ | |
{ | |
"avg_time_forward_random_randint": 3.446, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_randint": 0.2203, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_randint": 4.3555, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"random_uniform": [ | |
{ | |
"avg_time_forward_random_uniform": 12.4228, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_uniform": 0.178, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_random_uniform": 12.1362, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"rcbrt": [ | |
{ | |
"avg_time_backward_rcbrt": 0.0493, | |
"avg_time_forward_rcbrt": 0.0572, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_rcbrt": 14.8481, | |
"avg_time_forward_rcbrt": 12.7554, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"reciprocal": [ | |
{ | |
"avg_time_backward_reciprocal": 0.0423, | |
"avg_time_forward_reciprocal": 0.0499, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_reciprocal": 16.5206, | |
"avg_time_forward_reciprocal": 1.9734, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"relu": [ | |
{ | |
"avg_time_backward_relu": 0.0475, | |
"avg_time_forward_relu": 0.0536, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_relu": 14.5768, | |
"avg_time_forward_relu": 14.2648, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"rint": [ | |
{ | |
"avg_time_forward_rint": 0.0569, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_rint": 7.4647, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"rmsprop_update": [ | |
{ | |
"avg_time_forward_rmsprop_update": 5.8604, | |
"inputs": { | |
"epsilon": 1e-08, | |
"gamma1": 0.1, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"n": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2641951.25 | |
}, | |
{ | |
"avg_time_forward_rmsprop_update": 0.1754, | |
"inputs": { | |
"epsilon": 1e-08, | |
"gamma1": 0.5, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"n": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2641991.25 | |
}, | |
{ | |
"avg_time_forward_rmsprop_update": 5.4292, | |
"inputs": { | |
"epsilon": 1e-08, | |
"gamma1": 0.9, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"n": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2643971.25 | |
} | |
], | |
"rmspropalex_update": [ | |
{ | |
"avg_time_forward_rmspropalex_update": 9.5355, | |
"inputs": { | |
"delta": [ | |
1024, | |
1024 | |
], | |
"epsilon": 1e-08, | |
"g": [ | |
1024, | |
1024 | |
], | |
"gamma1": 0.1, | |
"gamma2": 0.1, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"n": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2646068.25 | |
}, | |
{ | |
"avg_time_forward_rmspropalex_update": 0.2481, | |
"inputs": { | |
"delta": [ | |
10000, | |
1 | |
], | |
"epsilon": 1e-08, | |
"g": [ | |
10000, | |
1 | |
], | |
"gamma1": 0.5, | |
"gamma2": 0.5, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"n": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2646108.25 | |
}, | |
{ | |
"avg_time_forward_rmspropalex_update": 8.3807, | |
"inputs": { | |
"delta": [ | |
10000, | |
100 | |
], | |
"epsilon": 1e-08, | |
"g": [ | |
10000, | |
100 | |
], | |
"gamma1": 0.9, | |
"gamma2": 0.9, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"n": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2650088.25 | |
} | |
], | |
"round": [ | |
{ | |
"avg_time_forward_round": 0.053, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_round": 5.844, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"rsqrt": [ | |
{ | |
"avg_time_backward_rsqrt": 0.0478, | |
"avg_time_forward_rsqrt": 0.0549, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_rsqrt": 14.4992, | |
"avg_time_forward_rsqrt": 15.9506, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"sample_exponential": [ | |
{ | |
"avg_time_forward_sample_exponential": 28.7887, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_sample_exponential": 1.7761, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_exponential": 33.5597, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_gamma": [ | |
{ | |
"avg_time_forward_sample_gamma": 137.7197, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_gamma": 1.6504, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 80.0 | |
}, | |
{ | |
"avg_time_forward_sample_gamma": 139.7798, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"sample_generalized_negative_binomial": [ | |
{ | |
"avg_time_forward_sample_generalized_negative_binomial": 156.509, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_generalized_negative_binomial": 2.0148, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_generalized_negative_binomial": 163.6763, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_negative_binomial": [ | |
{ | |
"avg_time_forward_sample_negative_binomial": 462.3102, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_negative_binomial": 3.1806, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_negative_binomial": 386.3756, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_normal": [ | |
{ | |
"avg_time_forward_sample_normal": 61.3393, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_normal": 0.5756, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_normal": 51.1177, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_poisson": [ | |
{ | |
"avg_time_forward_sample_poisson": 168.7623, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_poisson": 1.7241, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 80.0 | |
}, | |
{ | |
"avg_time_forward_sample_poisson": 169.5054, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_uniform": [ | |
{ | |
"avg_time_forward_sample_uniform": 28.5002, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_sample_uniform": 1.2548, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_uniform": 23.017, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"sgd_mom_update": [ | |
{ | |
"avg_time_forward_sgd_mom_update": 0.8894, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"mom": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2650185.5 | |
}, | |
{ | |
"avg_time_forward_sgd_mom_update": 0.0876, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"mom": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2650225.5 | |
}, | |
{ | |
"avg_time_forward_sgd_mom_update": 0.8818, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"mom": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2654205.5 | |
} | |
], | |
"sgd_update": [ | |
{ | |
"avg_time_forward_sgd_update": 0.6525, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2656399.75 | |
}, | |
{ | |
"avg_time_forward_sgd_update": 0.0803, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2654342.5 | |
}, | |
{ | |
"avg_time_forward_sgd_update": 0.6345, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2658322.5 | |
} | |
], | |
"shuffle": [ | |
{ | |
"avg_time_forward_shuffle": 0.0575, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_shuffle": 0.3129, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"sigmoid": [ | |
{ | |
"avg_time_backward_sigmoid": 0.0444, | |
"avg_time_forward_sigmoid": 0.0523, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_sigmoid": 16.9119, | |
"avg_time_forward_sigmoid": 16.1368, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"sign": [ | |
{ | |
"avg_time_backward_sign": 0.0464, | |
"avg_time_forward_sign": 0.0506, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_sign": 15.1003, | |
"avg_time_forward_sign": 15.7445, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"signsgd_update": [ | |
{ | |
"avg_time_forward_signsgd_update": 0.692, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2658419.75 | |
}, | |
{ | |
"avg_time_forward_signsgd_update": 0.0798, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2658459.75 | |
}, | |
{ | |
"avg_time_forward_signsgd_update": 0.6663, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2662439.75 | |
} | |
], | |
"signum_update": [ | |
{ | |
"avg_time_forward_signum_update": 1.0717, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"mom": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2662537.0 | |
}, | |
{ | |
"avg_time_forward_signum_update": 0.0932, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"mom": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2662577.0 | |
}, | |
{ | |
"avg_time_forward_signum_update": 1.0416, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"mom": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2666557.0 | |
} | |
], | |
"sin": [ | |
{ | |
"avg_time_backward_sin": 0.0544, | |
"avg_time_forward_sin": 0.0649, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_sin": 13.139, | |
"avg_time_forward_sin": 10.9879, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"sinh": [ | |
{ | |
"avg_time_backward_sinh": 0.0425, | |
"avg_time_forward_sinh": 0.0527, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_sinh": 16.1289, | |
"avg_time_forward_sinh": 17.3903, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"size_array": [ | |
{ | |
"avg_time_forward_size_array": 0.0547, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.008 | |
}, | |
{ | |
"avg_time_forward_size_array": 0.0457, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"softmax": [ | |
{ | |
"avg_time_backward_softmax": 7.6926, | |
"avg_time_forward_softmax": 8.6355, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
1024, | |
1024 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_softmax": 0.5735, | |
"avg_time_forward_softmax": 0.3881, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
1 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_softmax": 5.4585, | |
"avg_time_forward_softmax": 6.7907, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
100 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"softsign": [ | |
{ | |
"avg_time_backward_softsign": 0.0435, | |
"avg_time_forward_softsign": 0.0504, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_softsign": 15.6889, | |
"avg_time_forward_softsign": 15.2094, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"sort": [ | |
{ | |
"avg_time_forward_sort": 0.1562, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.192 | |
}, | |
{ | |
"avg_time_forward_sort": 36.9801, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"space_to_depth": [ | |
{ | |
"avg_time_forward_space_to_depth": 0.506, | |
"inputs": { | |
"block_size": 2, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_space_to_depth": 1.6777, | |
"inputs": { | |
"block_size": 5, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"sqrt": [ | |
{ | |
"avg_time_backward_sqrt": 0.0424, | |
"avg_time_forward_sqrt": 0.0539, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_sqrt": 13.8014, | |
"avg_time_forward_sqrt": 13.1733, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"square": [ | |
{ | |
"avg_time_backward_square": 0.0469, | |
"avg_time_forward_square": 0.0562, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_square": 15.1503, | |
"avg_time_forward_square": 15.8975, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"stop_gradient": [ | |
{ | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"sum": [ | |
{ | |
"avg_time_backward_sum": 0.2451, | |
"avg_time_forward_sum": 0.3055, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_backward_sum": 4.2928, | |
"avg_time_forward_sum": 2.9365, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"sum_axis": [ | |
{ | |
"avg_time_forward_sum_axis": 0.0915, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_sum_axis": 0.6606, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"swapaxes": [ | |
{ | |
"avg_time_forward_swapaxes": 0.0793, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
], | |
"dim1": 0, | |
"dim2": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_swapaxes": 0.0791, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
], | |
"dim1": 1, | |
"dim2": 2 | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_swapaxes": 1.7991, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
], | |
"dim1": 2, | |
"dim2": 3 | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_swapaxes": 0.076, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
], | |
"dim1": 3, | |
"dim2": 0 | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
} | |
], | |
"tan": [ | |
{ | |
"avg_time_backward_tan": 0.049, | |
"avg_time_forward_tan": 0.0586, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_tan": 12.2302, | |
"avg_time_forward_tan": 13.2709, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"tanh": [ | |
{ | |
"avg_time_backward_tanh": 0.046, | |
"avg_time_forward_tanh": 0.0585, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_backward_tanh": 4.6273, | |
"avg_time_forward_tanh": 3.8714, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"topk": [ | |
{ | |
"avg_time_forward_topk": 0.1346, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
], | |
"k": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_topk": 33.4073, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
], | |
"k": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 100.0 | |
} | |
], | |
"transpose": [ | |
{ | |
"avg_time_forward_transpose": 0.0983, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_transpose": 0.4081, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"trunc": [ | |
{ | |
"avg_time_forward_trunc": 0.0536, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_trunc": 0.2106, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 500.0 | |
} | |
], | |
"zeros_like": [ | |
{ | |
"avg_time_forward_zeros_like": 0.0539, | |
"inputs": { | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_zeros_like": 1.0055, | |
"inputs": { | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"BatchNorm": [ | |
{ | |
"avg_time_backward_BatchNorm": 95.2271, | |
"avg_time_forward_BatchNorm": 183.2972, | |
"inputs": { | |
"beta": [ | |
3 | |
], | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"gamma": [ | |
3 | |
], | |
"moving_mean": [ | |
3 | |
], | |
"moving_var": [ | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 25165.8359 | |
}, | |
{ | |
"avg_time_backward_BatchNorm": 127.9307, | |
"avg_time_forward_BatchNorm": 261.9488, | |
"inputs": { | |
"beta": [ | |
3 | |
], | |
"data": [ | |
32, | |
3, | |
10000, | |
10 | |
], | |
"gamma": [ | |
3 | |
], | |
"moving_mean": [ | |
3 | |
], | |
"moving_var": [ | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 38400.0117 | |
} | |
], | |
"BlockGrad": [ | |
{ | |
"avg_time_forward_BlockGrad": 0.7681, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_BlockGrad": 0.0706, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_BlockGrad": 0.5783, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"Convolution": [ | |
{ | |
"avg_time_backward_Convolution": 56.0465, | |
"avg_time_forward_Convolution": 10.6884, | |
"inputs": { | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256 | |
], | |
"dilate": [ | |
1 | |
], | |
"kernel": [ | |
3 | |
], | |
"layout": "NCW", | |
"num_filter": 64, | |
"pad": [ | |
0 | |
], | |
"stride": [ | |
1 | |
], | |
"weight": [ | |
64, | |
3, | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40360.293 | |
} | |
], | |
"Deconvolution": [ | |
{ | |
"avg_time_backward_Deconvolution": 12.1316, | |
"avg_time_forward_Deconvolution": 10.0797, | |
"inputs": { | |
"adj": [ | |
0 | |
], | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256 | |
], | |
"dilate": [ | |
1 | |
], | |
"kernel": [ | |
3 | |
], | |
"layout": "NCW", | |
"no_bias": false, | |
"num_filter": 64, | |
"pad": [ | |
0 | |
], | |
"stride": [ | |
1 | |
], | |
"weight": [ | |
3, | |
64, | |
3 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3057381.0 | |
} | |
], | |
"Dropout": [ | |
{ | |
"avg_time_backward_Dropout": 16.4491, | |
"avg_time_forward_Dropout": 16.4969, | |
"inputs": { | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"mode": "always", | |
"p": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 37748.7344 | |
}, | |
{ | |
"avg_time_backward_Dropout": 1.0126, | |
"avg_time_forward_Dropout": 1.7015, | |
"inputs": { | |
"data": [ | |
10000, | |
10 | |
], | |
"mode": "always", | |
"p": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 12582.9121 | |
} | |
], | |
"Flatten": [ | |
{ | |
"avg_time_forward_Flatten": 0.5707, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_Flatten": 0.1555, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_Flatten": 0.5317, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"FullyConnected": [ | |
{ | |
"avg_time_backward_FullyConnected": 145.4304, | |
"avg_time_forward_FullyConnected": 58.8149, | |
"inputs": { | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"flatten": true, | |
"num_hidden": 64, | |
"weight": [ | |
64, | |
196608 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8.192 | |
}, | |
{ | |
"avg_time_backward_FullyConnected": 53.5, | |
"avg_time_forward_FullyConnected": 33.3896, | |
"inputs": { | |
"bias": [ | |
64 | |
], | |
"data": [ | |
32, | |
3, | |
256, | |
256 | |
], | |
"flatten": false, | |
"num_hidden": 64, | |
"weight": [ | |
64, | |
256 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 6291.4561 | |
} | |
], | |
"LeakyReLU": [ | |
{ | |
"avg_time_backward_LeakyReLU": 2.8337, | |
"avg_time_forward_LeakyReLU": 2.3635, | |
"inputs": { | |
"act_type": "leaky", | |
"data": [ | |
1024, | |
1024 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 0.4687, | |
"avg_time_forward_LeakyReLU": 0.5069, | |
"inputs": { | |
"act_type": "leaky", | |
"data": [ | |
10000, | |
1 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 1.8752, | |
"avg_time_forward_LeakyReLU": 1.6388, | |
"inputs": { | |
"act_type": "leaky", | |
"data": [ | |
10000, | |
100 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 2.4101, | |
"avg_time_forward_LeakyReLU": 2.8367, | |
"inputs": { | |
"act_type": "elu", | |
"data": [ | |
1024, | |
1024 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 0.2929, | |
"avg_time_forward_LeakyReLU": 0.3591, | |
"inputs": { | |
"act_type": "elu", | |
"data": [ | |
10000, | |
1 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 2.6405, | |
"avg_time_forward_LeakyReLU": 2.45, | |
"inputs": { | |
"act_type": "elu", | |
"data": [ | |
10000, | |
100 | |
], | |
"slope": 0.1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 2.5475, | |
"avg_time_forward_LeakyReLU": 2.0243, | |
"inputs": { | |
"act_type": "selu", | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 0.0738, | |
"avg_time_forward_LeakyReLU": 0.2485, | |
"inputs": { | |
"act_type": "selu", | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 1.4574, | |
"avg_time_forward_LeakyReLU": 1.6605, | |
"inputs": { | |
"act_type": "selu", | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 25.2435, | |
"avg_time_forward_LeakyReLU": 3.0502, | |
"inputs": { | |
"act_type": "prelu", | |
"data": [ | |
1024, | |
1024 | |
], | |
"gamma": [ | |
1, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 1.624, | |
"avg_time_forward_LeakyReLU": 0.1082, | |
"inputs": { | |
"act_type": "prelu", | |
"data": [ | |
10000, | |
1 | |
], | |
"gamma": [ | |
1, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_LeakyReLU": 20.608, | |
"avg_time_forward_LeakyReLU": 2.3458, | |
"inputs": { | |
"act_type": "prelu", | |
"data": [ | |
10000, | |
100 | |
], | |
"gamma": [ | |
1, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"Pooling": [ | |
{ | |
"avg_time_backward_Pooling": 0.8776, | |
"avg_time_forward_Pooling": 0.7215, | |
"inputs": { | |
"data": [ | |
32, | |
3, | |
256 | |
], | |
"global_pool": 0, | |
"kernel": 3, | |
"pad": 1, | |
"pool_type": "avg", | |
"stride": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 98.304 | |
} | |
], | |
"abs": [ | |
{ | |
"avg_time_backward_abs": 0.9423, | |
"avg_time_forward_abs": 0.4321, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_abs": 0.1067, | |
"avg_time_forward_abs": 0.1808, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_abs": 0.8742, | |
"avg_time_forward_abs": 0.6225, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"adam_update": [ | |
{ | |
"avg_time_forward_adam_update": 9.3625, | |
"inputs": { | |
"beta1": 0.1, | |
"beta2": 0.1, | |
"epsilon": 1e-08, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"mean": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"var": [ | |
1024, | |
1024 | |
], | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3008922.0 | |
}, | |
{ | |
"avg_time_forward_adam_update": 0.3305, | |
"inputs": { | |
"beta1": 0.5, | |
"beta2": 0.5, | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"mean": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"var": [ | |
10000, | |
1 | |
], | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3006864.75 | |
}, | |
{ | |
"avg_time_forward_adam_update": 7.6324, | |
"inputs": { | |
"beta1": 0.9, | |
"beta2": 0.9, | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"mean": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"var": [ | |
10000, | |
100 | |
], | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3010844.75 | |
} | |
], | |
"arccos": [ | |
{ | |
"avg_time_backward_arccos": 1.2768, | |
"avg_time_forward_arccos": 1.2732, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_backward_arccos": 0.085, | |
"avg_time_forward_arccos": 0.1107, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_arccos": 1.1336, | |
"avg_time_forward_arccos": 1.3349, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"arccosh": [ | |
{ | |
"avg_time_backward_arccosh": 1.4726, | |
"avg_time_forward_arccosh": 1.1241, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_arccosh": 0.0812, | |
"avg_time_forward_arccosh": 0.3352, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_arccosh": 1.7807, | |
"avg_time_forward_arccosh": 1.4921, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"arcsin": [ | |
{ | |
"avg_time_backward_arcsin": 1.3955, | |
"avg_time_forward_arcsin": 1.499, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_arcsin": 0.1301, | |
"avg_time_forward_arcsin": 0.4643, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_arcsin": 2.1067, | |
"avg_time_forward_arcsin": 1.7313, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"arcsinh": [ | |
{ | |
"avg_time_backward_arcsinh": 1.4938, | |
"avg_time_forward_arcsinh": 2.2061, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_arcsinh": 0.0824, | |
"avg_time_forward_arcsinh": 0.1017, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_arcsinh": 0.9831, | |
"avg_time_forward_arcsinh": 2.0505, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"arctan": [ | |
{ | |
"avg_time_backward_arctan": 0.7033, | |
"avg_time_forward_arctan": 1.4872, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_arctan": 0.1824, | |
"avg_time_forward_arctan": 0.0892, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_arctan": 0.6338, | |
"avg_time_forward_arctan": 1.4923, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"arctanh": [ | |
{ | |
"avg_time_backward_arctanh": 0.9058, | |
"avg_time_forward_arctanh": 1.908, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_arctanh": 0.1889, | |
"avg_time_forward_arctanh": 0.4439, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_arctanh": 1.3421, | |
"avg_time_forward_arctanh": 2.2115, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"argmax": [ | |
{ | |
"avg_time_forward_argmax": 40.214, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_forward_argmax": 0.5357, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.002 | |
}, | |
{ | |
"avg_time_forward_argmax": 28.7662, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"argmax_channel": [ | |
{ | |
"avg_time_forward_argmax_channel": 1.8297, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_forward_argmax_channel": 0.099, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_argmax_channel": 0.9669, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
} | |
], | |
"argmin": [ | |
{ | |
"avg_time_forward_argmin": 53.4402, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2.048 | |
}, | |
{ | |
"avg_time_forward_argmin": 0.6823, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_forward_argmin": 29.9348, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"argsort": [ | |
{ | |
"avg_time_forward_argsort": 140.3303, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_argsort": 5.3644, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_argsort": 144.9115, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"batch_dot": [ | |
{ | |
"avg_time_backward_batch_dot": 332.8731, | |
"avg_time_forward_batch_dot": 197.5804, | |
"inputs": { | |
"lhs": [ | |
32, | |
1024, | |
1024 | |
], | |
"rhs": [ | |
32, | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 134217.7344 | |
}, | |
{ | |
"avg_time_backward_batch_dot": 126.3203, | |
"avg_time_forward_batch_dot": 133.655, | |
"inputs": { | |
"lhs": [ | |
32, | |
1000, | |
10 | |
], | |
"rhs": [ | |
32, | |
1000, | |
10 | |
], | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 128000.0 | |
}, | |
{ | |
"avg_time_backward_batch_dot": 5.9293, | |
"avg_time_forward_batch_dot": 3.4447, | |
"inputs": { | |
"lhs": [ | |
32, | |
1000, | |
1 | |
], | |
"rhs": [ | |
32, | |
100, | |
1000 | |
], | |
"transpose_a": true, | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 6.4 | |
} | |
], | |
"broadcast_add": [ | |
{ | |
"avg_time_backward_broadcast_add": 0.0484, | |
"avg_time_forward_broadcast_add": 0.0557, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_div": [ | |
{ | |
"avg_time_backward_broadcast_div": 0.0608, | |
"avg_time_forward_broadcast_div": 0.0636, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_equal": [ | |
{ | |
"avg_time_forward_broadcast_equal": 0.0532, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_greater": [ | |
{ | |
"avg_time_forward_broadcast_greater": 0.0541, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_greater_equal": [ | |
{ | |
"avg_time_forward_broadcast_greater_equal": 0.0542, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_hypot": [ | |
{ | |
"avg_time_backward_broadcast_hypot": 0.0641, | |
"avg_time_forward_broadcast_hypot": 0.0713, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_lesser": [ | |
{ | |
"avg_time_forward_broadcast_lesser": 0.0558, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_lesser_equal": [ | |
{ | |
"avg_time_forward_broadcast_lesser_equal": 0.0489, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_logical_and": [ | |
{ | |
"avg_time_forward_broadcast_logical_and": 0.0622, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_logical_or": [ | |
{ | |
"avg_time_forward_broadcast_logical_or": 0.0523, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_logical_xor": [ | |
{ | |
"avg_time_forward_broadcast_logical_xor": 0.0608, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_maximum": [ | |
{ | |
"avg_time_backward_broadcast_maximum": 0.0626, | |
"avg_time_forward_broadcast_maximum": 0.0655, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_minimum": [ | |
{ | |
"avg_time_backward_broadcast_minimum": 0.0614, | |
"avg_time_forward_broadcast_minimum": 0.0651, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_minus": [ | |
{ | |
"avg_time_forward_broadcast_minus": 0.0462, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"broadcast_mod": [ | |
{ | |
"avg_time_backward_broadcast_mod": 0.0601, | |
"avg_time_forward_broadcast_mod": 0.0626, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_mul": [ | |
{ | |
"avg_time_backward_broadcast_mul": 0.0594, | |
"avg_time_forward_broadcast_mul": 0.0594, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_not_equal": [ | |
{ | |
"avg_time_forward_broadcast_not_equal": 0.0422, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_plus": [ | |
{ | |
"avg_time_forward_broadcast_plus": 0.0434, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_power": [ | |
{ | |
"avg_time_backward_broadcast_power": 0.0553, | |
"avg_time_forward_broadcast_power": 0.0719, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"broadcast_sub": [ | |
{ | |
"avg_time_backward_broadcast_sub": 0.0527, | |
"avg_time_forward_broadcast_sub": 0.0685, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"cbrt": [ | |
{ | |
"avg_time_backward_cbrt": 0.7282, | |
"avg_time_forward_cbrt": 1.8219, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_cbrt": 0.1883, | |
"avg_time_forward_cbrt": 0.0923, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_cbrt": 0.5885, | |
"avg_time_forward_cbrt": 1.7147, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"ceil": [ | |
{ | |
"avg_time_forward_ceil": 0.4358, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_ceil": 0.1552, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_ceil": 0.4328, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"cos": [ | |
{ | |
"avg_time_backward_cos": 1.157, | |
"avg_time_forward_cos": 1.1354, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_cos": 0.0709, | |
"avg_time_forward_cos": 0.0829, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_cos": 1.0878, | |
"avg_time_forward_cos": 0.9599, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"cosh": [ | |
{ | |
"avg_time_backward_cosh": 2.0996, | |
"avg_time_forward_cosh": 1.4742, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_cosh": 0.0948, | |
"avg_time_forward_cosh": 0.0986, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_cosh": 1.9718, | |
"avg_time_forward_cosh": 1.342, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"degrees": [ | |
{ | |
"avg_time_backward_degrees": 0.4313, | |
"avg_time_forward_degrees": 0.3758, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_backward_degrees": 0.1448, | |
"avg_time_forward_degrees": 0.1297, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_degrees": 0.4842, | |
"avg_time_forward_degrees": 0.3721, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"depth_to_space": [ | |
{ | |
"avg_time_forward_depth_to_space": 0.0882, | |
"inputs": { | |
"block_size": 2, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.064 | |
}, | |
{ | |
"avg_time_forward_depth_to_space": 0.8643, | |
"inputs": { | |
"block_size": 5, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"dot": [ | |
{ | |
"avg_time_backward_dot": 5.6333, | |
"avg_time_forward_dot": 2.7367, | |
"inputs": { | |
"lhs": [ | |
1024, | |
1024 | |
], | |
"rhs": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_dot": 4.555, | |
"avg_time_forward_dot": 0.4004, | |
"inputs": { | |
"lhs": [ | |
1000, | |
10 | |
], | |
"rhs": [ | |
1000, | |
10 | |
], | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
}, | |
{ | |
"avg_time_backward_dot": 0.2897, | |
"avg_time_forward_dot": 0.1823, | |
"inputs": { | |
"lhs": [ | |
1000, | |
1 | |
], | |
"rhs": [ | |
100, | |
1000 | |
], | |
"transpose_a": true, | |
"transpose_b": true | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"elemwise_add": [ | |
{ | |
"avg_time_forward_elemwise_add": 0.3133, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.024 | |
} | |
], | |
"elemwise_div": [ | |
{ | |
"avg_time_forward_elemwise_div": 0.0578, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"elemwise_mul": [ | |
{ | |
"avg_time_forward_elemwise_mul": 0.0411, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"elemwise_sub": [ | |
{ | |
"avg_time_forward_elemwise_sub": 0.0413, | |
"inputs": { | |
"lhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
], | |
"rhs": [ | |
[ | |
1024, | |
1024 | |
], | |
[ | |
10000, | |
10 | |
], | |
[ | |
10000, | |
1 | |
] | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.012 | |
} | |
], | |
"erf": [ | |
{ | |
"avg_time_backward_erf": 1.33, | |
"avg_time_forward_erf": 1.4088, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_erf": 0.0894, | |
"avg_time_forward_erf": 0.1045, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_erf": 1.2803, | |
"avg_time_forward_erf": 1.3425, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"erfinv": [ | |
{ | |
"avg_time_backward_erfinv": 5.5242, | |
"avg_time_forward_erfinv": 4.2845, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_erfinv": 0.1148, | |
"avg_time_forward_erfinv": 0.1154, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_erfinv": 5.7577, | |
"avg_time_forward_erfinv": 4.3094, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"exp": [ | |
{ | |
"avg_time_forward_exp": 1.1224, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_exp": 0.0909, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_exp": 1.4205, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"expm1": [ | |
{ | |
"avg_time_backward_expm1": 1.3468, | |
"avg_time_forward_expm1": 1.5319, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_expm1": 0.079, | |
"avg_time_forward_expm1": 0.0913, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_expm1": 1.2242, | |
"avg_time_forward_expm1": 1.4577, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"fix": [ | |
{ | |
"avg_time_forward_fix": 1.0888, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_fix": 0.0849, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_fix": 1.0383, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"flatten": [ | |
{ | |
"avg_time_forward_flatten": 0.4907, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_flatten": 0.1151, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_flatten": 0.5251, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"flip": [ | |
{ | |
"avg_time_forward_flip": 1.5371, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_flip": 0.1048, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_flip": 1.5261, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"floor": [ | |
{ | |
"avg_time_forward_floor": 0.5271, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_floor": 0.156, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_floor": 0.49, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"ftml_update": [ | |
{ | |
"avg_time_forward_ftml_update": 3.3126, | |
"inputs": { | |
"beta1": 0.1, | |
"beta2": 0.1, | |
"clip_grad": -1.0, | |
"d": [ | |
1024, | |
1024 | |
], | |
"epsilon": 1e-08, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"t": 1, | |
"v": [ | |
1024, | |
1024 | |
], | |
"weight": [ | |
1024, | |
1024 | |
], | |
"z": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3013039.0 | |
}, | |
{ | |
"avg_time_forward_ftml_update": 0.1471, | |
"inputs": { | |
"beta1": 0.5, | |
"beta2": 0.5, | |
"clip_grad": -1.0, | |
"d": [ | |
10000, | |
1 | |
], | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"t": 1, | |
"v": [ | |
10000, | |
1 | |
], | |
"weight": [ | |
10000, | |
1 | |
], | |
"z": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3010982.0 | |
}, | |
{ | |
"avg_time_forward_ftml_update": 2.8423, | |
"inputs": { | |
"beta1": 0.9, | |
"beta2": 0.9, | |
"clip_grad": -1.0, | |
"d": [ | |
10000, | |
100 | |
], | |
"epsilon": 1e-08, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"t": 1, | |
"v": [ | |
10000, | |
100 | |
], | |
"weight": [ | |
10000, | |
100 | |
], | |
"z": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3014962.0 | |
} | |
], | |
"ftrl_update": [ | |
{ | |
"avg_time_forward_ftrl_update": 11.8802, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"n": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
], | |
"z": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3017156.25 | |
}, | |
{ | |
"avg_time_forward_ftrl_update": 0.2245, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"n": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
], | |
"z": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3015079.0 | |
}, | |
{ | |
"avg_time_forward_ftrl_update": 10.9385, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"n": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
], | |
"z": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3019079.0 | |
} | |
], | |
"gamma": [ | |
{ | |
"avg_time_backward_gamma": 9.9033, | |
"avg_time_forward_gamma": 4.2656, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_gamma": 0.1661, | |
"avg_time_forward_gamma": 0.117, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_gamma": 9.2612, | |
"avg_time_forward_gamma": 4.1659, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"gammaln": [ | |
{ | |
"avg_time_backward_gammaln": 6.0389, | |
"avg_time_forward_gammaln": 22.732, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_gammaln": 0.1272, | |
"avg_time_forward_gammaln": 0.3116, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_gammaln": 5.6179, | |
"avg_time_forward_gammaln": 22.2864, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"hard_sigmoid": [ | |
{ | |
"avg_time_backward_hard_sigmoid": 0.5842, | |
"avg_time_forward_hard_sigmoid": 0.6976, | |
"inputs": { | |
"alpha": 0.25, | |
"beta": 0.5, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_hard_sigmoid": 0.0724, | |
"avg_time_forward_hard_sigmoid": 0.0759, | |
"inputs": { | |
"alpha": 0.25, | |
"beta": 0.5, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_hard_sigmoid": 0.5495, | |
"avg_time_forward_hard_sigmoid": 0.6032, | |
"inputs": { | |
"alpha": 0.25, | |
"beta": 0.5, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"identity": [ | |
{ | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"log": [ | |
{ | |
"avg_time_backward_log": 0.6327, | |
"avg_time_forward_log": 1.2952, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_log": 0.1579, | |
"avg_time_forward_log": 0.0922, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_log": 0.541, | |
"avg_time_forward_log": 1.3412, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"log10": [ | |
{ | |
"avg_time_backward_log10": 0.776, | |
"avg_time_forward_log10": 1.3889, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_log10": 0.1432, | |
"avg_time_forward_log10": 0.0857, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_log10": 0.4572, | |
"avg_time_forward_log10": 1.3198, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"log1p": [ | |
{ | |
"avg_time_backward_log1p": 0.6042, | |
"avg_time_forward_log1p": 1.5714, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_log1p": 0.1638, | |
"avg_time_forward_log1p": 0.2938, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_log1p": 0.6855, | |
"avg_time_forward_log1p": 1.3824, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"log2": [ | |
{ | |
"avg_time_backward_log2": 0.534, | |
"avg_time_forward_log2": 1.3171, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_log2": 0.1581, | |
"avg_time_forward_log2": 0.1102, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_log2": 0.4463, | |
"avg_time_forward_log2": 1.1821, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"log_softmax": [ | |
{ | |
"avg_time_backward_log_softmax": 1.4435, | |
"avg_time_forward_log_softmax": 2.533, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
1024, | |
1024 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_log_softmax": 0.1089, | |
"avg_time_forward_log_softmax": 0.265, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
1 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_log_softmax": 1.4182, | |
"avg_time_forward_log_softmax": 2.4533, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
100 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"logical_not": [ | |
{ | |
"avg_time_forward_logical_not": 0.3543, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_logical_not": 0.1576, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_logical_not": 0.3748, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"make_loss": [ | |
{ | |
"avg_time_forward_make_loss": 0.6687, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_make_loss": 0.0771, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_make_loss": 0.5108, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"max": [ | |
{ | |
"avg_time_backward_max": 4.4873, | |
"avg_time_forward_max": 2.7886, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_backward_max": 0.1446, | |
"avg_time_forward_max": 0.6231, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_backward_max": 9.1947, | |
"avg_time_forward_max": 7.9147, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"max_axis": [ | |
{ | |
"avg_time_forward_max_axis": 6.7093, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_forward_max_axis": 0.6153, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_forward_max_axis": 2.5613, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"mean": [ | |
{ | |
"avg_time_backward_mean": 29.7162, | |
"avg_time_forward_mean": 10.6726, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_backward_mean": 1.2068, | |
"avg_time_forward_mean": 1.5761, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_backward_mean": 26.4074, | |
"avg_time_forward_mean": 7.6467, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"min": [ | |
{ | |
"avg_time_backward_min": 6.7506, | |
"avg_time_forward_min": 5.0006, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_backward_min": 0.1243, | |
"avg_time_forward_min": 0.6418, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_backward_min": 5.9705, | |
"avg_time_forward_min": 4.6656, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"min_axis": [ | |
{ | |
"avg_time_forward_min_axis": 5.9799, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2.048 | |
}, | |
{ | |
"avg_time_forward_min_axis": 0.9333, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.002 | |
}, | |
{ | |
"avg_time_forward_min_axis": 5.4386, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"mp_sgd_mom_update": [ | |
{ | |
"avg_time_forward_mp_sgd_mom_update": 0.9944, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"mom": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
], | |
"weight32": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3021273.5 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_mom_update": 0.1156, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"mom": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
], | |
"weight32": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3019216.25 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_mom_update": 1.3228, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"mom": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
], | |
"weight32": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3023196.25 | |
} | |
], | |
"mp_sgd_update": [ | |
{ | |
"avg_time_forward_mp_sgd_update": 0.7942, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
], | |
"weight32": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3025390.5 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_update": 0.112, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
], | |
"weight32": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3023333.5 | |
}, | |
{ | |
"avg_time_forward_mp_sgd_update": 0.7465, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
], | |
"weight32": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3027313.5 | |
} | |
], | |
"nanprod": [ | |
{ | |
"avg_time_backward_nanprod": 9.0231, | |
"avg_time_forward_nanprod": 6.306, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_backward_nanprod": 0.1465, | |
"avg_time_forward_nanprod": 0.6736, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_backward_nanprod": 9.7138, | |
"avg_time_forward_nanprod": 6.5393, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"nansum": [ | |
{ | |
"avg_time_backward_nansum": 9.8604, | |
"avg_time_forward_nansum": 7.3423, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_backward_nansum": 0.5921, | |
"avg_time_forward_nansum": 1.0499, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_backward_nansum": 8.1661, | |
"avg_time_forward_nansum": 5.7515, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"negative": [ | |
{ | |
"avg_time_forward_negative": 0.3934, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_negative": 0.1047, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_negative": 0.3348, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"ones_like": [ | |
{ | |
"avg_time_forward_ones_like": 0.2395, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_ones_like": 0.0948, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_ones_like": 0.2301, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"prod": [ | |
{ | |
"avg_time_backward_prod": 10.5303, | |
"avg_time_forward_prod": 7.0034, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_backward_prod": 0.2485, | |
"avg_time_forward_prod": 0.5975, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.002 | |
}, | |
{ | |
"avg_time_backward_prod": 4.5275, | |
"avg_time_forward_prod": 2.597, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"radians": [ | |
{ | |
"avg_time_backward_radians": 0.4904, | |
"avg_time_forward_radians": 0.4181, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_radians": 0.1346, | |
"avg_time_forward_radians": 0.141, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_radians": 0.4377, | |
"avg_time_forward_radians": 0.3979, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"random_exponential": [ | |
{ | |
"avg_time_forward_random_exponential": 13.923, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_exponential": 0.2173, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_random_exponential": 13.2358, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_gamma": [ | |
{ | |
"avg_time_forward_random_gamma": 47.3766, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_random_gamma": 0.5496, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_gamma": 45.2485, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_generalized_negative_binomial": [ | |
{ | |
"avg_time_forward_random_generalized_negative_binomial": 68.715, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_random_generalized_negative_binomial": 0.7592, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_generalized_negative_binomial": 65.5941, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_negative_binomial": [ | |
{ | |
"avg_time_forward_random_negative_binomial": 60.8086, | |
"inputs": { | |
"k": 1, | |
"p": 1, | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_negative_binomial": 0.6705, | |
"inputs": { | |
"k": 1, | |
"p": 1, | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_negative_binomial": 86.5725, | |
"inputs": { | |
"k": 1, | |
"p": 1, | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"random_normal": [ | |
{ | |
"avg_time_forward_random_normal": 24.6047, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_random_normal": 0.3177, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_normal": 23.6241, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_pdf_dirichlet": [ | |
{ | |
"avg_time_forward_random_pdf_dirichlet": 0.0724, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_exponential": [ | |
{ | |
"avg_time_forward_random_pdf_exponential": 0.0717, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_gamma": [ | |
{ | |
"avg_time_forward_random_pdf_gamma": 0.078, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_generalized_negative_binomial": [ | |
{ | |
"avg_time_forward_random_pdf_generalized_negative_binomial": 0.0794, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_negative_binomial": [ | |
{ | |
"avg_time_forward_random_pdf_negative_binomial": 0.0783, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_normal": [ | |
{ | |
"avg_time_forward_random_pdf_normal": 0.0743, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_poisson": [ | |
{ | |
"avg_time_forward_random_pdf_poisson": 0.0632, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_pdf_uniform": [ | |
{ | |
"avg_time_forward_random_pdf_uniform": 0.064, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"sample": [ | |
2 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"random_poisson": [ | |
{ | |
"avg_time_forward_random_poisson": 22.1896, | |
"inputs": { | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_random_poisson": 0.2942, | |
"inputs": { | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_random_poisson": 21.2591, | |
"inputs": { | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"random_randint": [ | |
{ | |
"avg_time_forward_random_randint": 5.1779, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_randint": 0.1123, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_random_randint": 5.0975, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"random_uniform": [ | |
{ | |
"avg_time_forward_random_uniform": 21.3069, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_random_uniform": 0.3468, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_random_uniform": 11.884, | |
"inputs": { | |
"high": 5, | |
"low": 0, | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"rcbrt": [ | |
{ | |
"avg_time_backward_rcbrt": 1.9121, | |
"avg_time_forward_rcbrt": 1.7945, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_rcbrt": 0.091, | |
"avg_time_forward_rcbrt": 0.1065, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_rcbrt": 1.8326, | |
"avg_time_forward_rcbrt": 1.711, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"reciprocal": [ | |
{ | |
"avg_time_backward_reciprocal": 0.587, | |
"avg_time_forward_reciprocal": 0.393, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_reciprocal": 0.1687, | |
"avg_time_forward_reciprocal": 0.1328, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_reciprocal": 0.703, | |
"avg_time_forward_reciprocal": 0.535, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"relu": [ | |
{ | |
"avg_time_backward_relu": 1.143, | |
"avg_time_forward_relu": 0.8833, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_relu": 0.0714, | |
"avg_time_forward_relu": 0.076, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_relu": 0.8885, | |
"avg_time_forward_relu": 0.7522, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"rint": [ | |
{ | |
"avg_time_forward_rint": 1.2, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_rint": 0.0836, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_rint": 1.0308, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"rmsprop_update": [ | |
{ | |
"avg_time_forward_rmsprop_update": 7.3989, | |
"inputs": { | |
"epsilon": 1e-08, | |
"gamma1": 0.1, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"n": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3031604.75 | |
}, | |
{ | |
"avg_time_forward_rmsprop_update": 0.3062, | |
"inputs": { | |
"epsilon": 1e-08, | |
"gamma1": 0.5, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"n": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3029547.75 | |
}, | |
{ | |
"avg_time_forward_rmsprop_update": 5.5134, | |
"inputs": { | |
"epsilon": 1e-08, | |
"gamma1": 0.9, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"n": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3033527.75 | |
} | |
], | |
"rmspropalex_update": [ | |
{ | |
"avg_time_forward_rmspropalex_update": 9.6312, | |
"inputs": { | |
"delta": [ | |
1024, | |
1024 | |
], | |
"epsilon": 1e-08, | |
"g": [ | |
1024, | |
1024 | |
], | |
"gamma1": 0.1, | |
"gamma2": 0.1, | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"n": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3035722.0 | |
}, | |
{ | |
"avg_time_forward_rmspropalex_update": 0.2181, | |
"inputs": { | |
"delta": [ | |
10000, | |
1 | |
], | |
"epsilon": 1e-08, | |
"g": [ | |
10000, | |
1 | |
], | |
"gamma1": 0.5, | |
"gamma2": 0.5, | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"n": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3033644.75 | |
}, | |
{ | |
"avg_time_forward_rmspropalex_update": 9.3882, | |
"inputs": { | |
"delta": [ | |
10000, | |
100 | |
], | |
"epsilon": 1e-08, | |
"g": [ | |
10000, | |
100 | |
], | |
"gamma1": 0.9, | |
"gamma2": 0.9, | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"n": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3037644.75 | |
} | |
], | |
"round": [ | |
{ | |
"avg_time_forward_round": 0.7695, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_round": 0.2302, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_round": 0.7344, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"rsqrt": [ | |
{ | |
"avg_time_backward_rsqrt": 1.1092, | |
"avg_time_forward_rsqrt": 0.9533, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_rsqrt": 0.0697, | |
"avg_time_forward_rsqrt": 0.0737, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_rsqrt": 1.0668, | |
"avg_time_forward_rsqrt": 0.9114, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"sample_exponential": [ | |
{ | |
"avg_time_forward_sample_exponential": 31.0119, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_exponential": 0.3513, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 80.0 | |
}, | |
{ | |
"avg_time_forward_sample_exponential": 26.6192, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_gamma": [ | |
{ | |
"avg_time_forward_sample_gamma": 144.6942, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_gamma": 1.2817, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_gamma": 118.0242, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"beta": [ | |
1.0, | |
0.7 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_generalized_negative_binomial": [ | |
{ | |
"avg_time_forward_sample_generalized_negative_binomial": 159.4642, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_generalized_negative_binomial": 2.0433, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 80.0 | |
}, | |
{ | |
"avg_time_forward_sample_generalized_negative_binomial": 262.5047, | |
"inputs": { | |
"alpha": [ | |
0.0, | |
2.5 | |
], | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_negative_binomial": [ | |
{ | |
"avg_time_forward_sample_negative_binomial": 422.5384, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_negative_binomial": 7.9008, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_negative_binomial": 426.3637, | |
"inputs": { | |
"k": [ | |
20, | |
49 | |
], | |
"p": [ | |
0.4, | |
0.77 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_normal": [ | |
{ | |
"avg_time_forward_sample_normal": 67.7962, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_normal": 0.6052, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 80.0 | |
}, | |
{ | |
"avg_time_forward_sample_normal": 47.4559, | |
"inputs": { | |
"mu": [ | |
2.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
], | |
"sigma": [ | |
1.0, | |
3.7 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_poisson": [ | |
{ | |
"avg_time_forward_sample_poisson": 160.9398, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_poisson": 1.7523, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 80.0 | |
}, | |
{ | |
"avg_time_forward_sample_poisson": 176.9847, | |
"inputs": { | |
"lam": [ | |
1.0, | |
8.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sample_uniform": [ | |
{ | |
"avg_time_forward_sample_uniform": 42.8783, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"shape": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8388.6084 | |
}, | |
{ | |
"avg_time_forward_sample_uniform": 0.4408, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_sample_uniform": 44.5541, | |
"inputs": { | |
"high": [ | |
1.0, | |
3.7 | |
], | |
"low": [ | |
0.0, | |
2.5 | |
], | |
"shape": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 8000.0 | |
} | |
], | |
"sgd_mom_update": [ | |
{ | |
"avg_time_forward_sgd_mom_update": 1.3982, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"mom": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3039839.25 | |
}, | |
{ | |
"avg_time_forward_sgd_mom_update": 0.206, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"mom": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3037782.0 | |
}, | |
{ | |
"avg_time_forward_sgd_mom_update": 0.9529, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"mom": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3041762.0 | |
} | |
], | |
"sgd_update": [ | |
{ | |
"avg_time_forward_sgd_update": 0.7064, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lazy_update": 0, | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3046053.5 | |
}, | |
{ | |
"avg_time_forward_sgd_update": 0.3077, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lazy_update": 0, | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3043996.25 | |
}, | |
{ | |
"avg_time_forward_sgd_update": 0.876, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lazy_update": 0, | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3047976.25 | |
} | |
], | |
"shuffle": [ | |
{ | |
"avg_time_forward_shuffle": 1.1555, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_shuffle": 0.9048, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_shuffle": 1.6432, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"sigmoid": [ | |
{ | |
"avg_time_backward_sigmoid": 0.6138, | |
"avg_time_forward_sigmoid": 1.1435, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_sigmoid": 0.157, | |
"avg_time_forward_sigmoid": 0.0763, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_sigmoid": 0.6816, | |
"avg_time_forward_sigmoid": 1.0812, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"sign": [ | |
{ | |
"avg_time_backward_sign": 0.5681, | |
"avg_time_forward_sign": 0.7294, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_sign": 0.116, | |
"avg_time_forward_sign": 0.1898, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_sign": 0.4017, | |
"avg_time_forward_sign": 0.6447, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"signsgd_update": [ | |
{ | |
"avg_time_forward_signsgd_update": 0.6803, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3048073.5 | |
}, | |
{ | |
"avg_time_forward_signsgd_update": 0.0795, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3048113.5 | |
}, | |
{ | |
"avg_time_forward_signsgd_update": 0.7036, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3052093.5 | |
} | |
], | |
"signum_update": [ | |
{ | |
"avg_time_forward_signum_update": 1.0772, | |
"inputs": { | |
"grad": [ | |
1024, | |
1024 | |
], | |
"lr": 0.1, | |
"mom": [ | |
1024, | |
1024 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3054287.75 | |
}, | |
{ | |
"avg_time_forward_signum_update": 0.3863, | |
"inputs": { | |
"grad": [ | |
10000, | |
1 | |
], | |
"lr": 0.5, | |
"mom": [ | |
10000, | |
1 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3052230.75 | |
}, | |
{ | |
"avg_time_forward_signum_update": 0.9827, | |
"inputs": { | |
"grad": [ | |
10000, | |
100 | |
], | |
"lr": 0.9, | |
"mom": [ | |
10000, | |
100 | |
], | |
"rescale_grad": 0.4, | |
"weight": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 3056210.75 | |
} | |
], | |
"sin": [ | |
{ | |
"avg_time_backward_sin": 1.1814, | |
"avg_time_forward_sin": 1.0167, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_sin": 0.0633, | |
"avg_time_forward_sin": 0.0731, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_sin": 1.1648, | |
"avg_time_forward_sin": 0.9739, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"sinh": [ | |
{ | |
"avg_time_backward_sinh": 1.4865, | |
"avg_time_forward_sinh": 2.0738, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_sinh": 0.0687, | |
"avg_time_forward_sinh": 0.0882, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_sinh": 1.7298, | |
"avg_time_forward_sinh": 2.1876, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"size_array": [ | |
{ | |
"avg_time_forward_size_array": 0.049, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.008 | |
}, | |
{ | |
"avg_time_forward_size_array": 0.0511, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_forward_size_array": 0.0535, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
} | |
], | |
"softmax": [ | |
{ | |
"avg_time_backward_softmax": 0.9217, | |
"avg_time_forward_softmax": 2.6614, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
1024, | |
1024 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_softmax": 0.0963, | |
"avg_time_forward_softmax": 0.1139, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
1 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_softmax": 0.8978, | |
"avg_time_forward_softmax": 2.627, | |
"inputs": { | |
"axis": -1, | |
"data": [ | |
10000, | |
100 | |
], | |
"temperature": 0.5 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"softsign": [ | |
{ | |
"avg_time_backward_softsign": 0.667, | |
"avg_time_forward_softsign": 0.5278, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_softsign": 0.1815, | |
"avg_time_forward_softsign": 0.1273, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_backward_softsign": 0.9281, | |
"avg_time_forward_softsign": 0.5495, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"sort": [ | |
{ | |
"avg_time_forward_sort": 115.1558, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 6291.4561 | |
}, | |
{ | |
"avg_time_forward_sort": 3.985, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 60.0 | |
}, | |
{ | |
"avg_time_forward_sort": 99.6573, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 6000.0 | |
} | |
], | |
"space_to_depth": [ | |
{ | |
"avg_time_forward_space_to_depth": 0.1137, | |
"inputs": { | |
"block_size": 2, | |
"data": [ | |
1, | |
4, | |
2, | |
4 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.128 | |
}, | |
{ | |
"avg_time_forward_space_to_depth": 0.9296, | |
"inputs": { | |
"block_size": 5, | |
"data": [ | |
10, | |
25, | |
10, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 1000.0 | |
} | |
], | |
"sqrt": [ | |
{ | |
"avg_time_backward_sqrt": 0.7018, | |
"avg_time_forward_sqrt": 0.9091, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_sqrt": 0.145, | |
"avg_time_forward_sqrt": 0.0844, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_sqrt": 0.4649, | |
"avg_time_forward_sqrt": 0.8684, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"square": [ | |
{ | |
"avg_time_backward_square": 0.4919, | |
"avg_time_forward_square": 0.4462, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_backward_square": 0.1438, | |
"avg_time_forward_square": 0.1341, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_square": 0.4803, | |
"avg_time_forward_square": 0.4281, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"stop_gradient": [ | |
{ | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"sum": [ | |
{ | |
"avg_time_backward_sum": 4.2895, | |
"avg_time_forward_sum": 5.9554, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_backward_sum": 0.3618, | |
"avg_time_forward_sum": 0.7086, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_backward_sum": 5.2577, | |
"avg_time_forward_sum": 6.5736, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"sum_axis": [ | |
{ | |
"avg_time_forward_sum_axis": 2.7129, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_forward_sum_axis": 0.615, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.002 | |
}, | |
{ | |
"avg_time_forward_sum_axis": 2.5259, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.2 | |
} | |
], | |
"swapaxes": [ | |
{ | |
"avg_time_forward_swapaxes": 3.1109, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
], | |
"dim1": 0, | |
"dim2": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_forward_swapaxes": 0.1117, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
], | |
"dim1": 0, | |
"dim2": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_swapaxes": 4.6593, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
], | |
"dim1": 0, | |
"dim2": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"tan": [ | |
{ | |
"avg_time_backward_tan": 0.6286, | |
"avg_time_forward_tan": 1.5975, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_tan": 0.1765, | |
"avg_time_forward_tan": 0.1013, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_tan": 0.544, | |
"avg_time_forward_tan": 1.524, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
], | |
"tanh": [ | |
{ | |
"avg_time_backward_tanh": 0.6067, | |
"avg_time_forward_tanh": 1.9354, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4194.3042 | |
}, | |
{ | |
"avg_time_backward_tanh": 0.1795, | |
"avg_time_forward_tanh": 0.1088, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_backward_tanh": 0.8189, | |
"avg_time_forward_tanh": 1.8891, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"topk": [ | |
{ | |
"avg_time_forward_topk": 82.9471, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
1024, | |
1024 | |
], | |
"k": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 4.096 | |
}, | |
{ | |
"avg_time_forward_topk": 1.0335, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
1 | |
], | |
"k": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.004 | |
}, | |
{ | |
"avg_time_forward_topk": 74.6241, | |
"inputs": { | |
"axis": 0, | |
"data": [ | |
10000, | |
100 | |
], | |
"k": 1 | |
}, | |
"max_storage_mem_alloc_cpu/0": 0.4 | |
} | |
], | |
"transpose": [ | |
{ | |
"avg_time_forward_transpose": 0.7991, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_transpose": 0.346, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 40.0 | |
}, | |
{ | |
"avg_time_forward_transpose": 0.5303, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"trunc": [ | |
{ | |
"avg_time_forward_trunc": 0.7218, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_trunc": 0.2102, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_trunc": 0.6937, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 4000.0 | |
} | |
], | |
"zeros_like": [ | |
{ | |
"avg_time_forward_zeros_like": 0.2364, | |
"inputs": { | |
"data": [ | |
1024, | |
1024 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2097.1521 | |
}, | |
{ | |
"avg_time_forward_zeros_like": 0.0514, | |
"inputs": { | |
"data": [ | |
10000, | |
1 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 20.0 | |
}, | |
{ | |
"avg_time_forward_zeros_like": 0.238, | |
"inputs": { | |
"data": [ | |
10000, | |
100 | |
] | |
}, | |
"max_storage_mem_alloc_cpu/0": 2000.0 | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment