Created
September 12, 2019 23:58
-
-
Save Huyuwei/24d4a5465452803bf38f420d71fb9049 to your computer and use it in GitHub Desktop.
TVM performance analysis on mask-rcnn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Node Name Ops Time(us) Time(%) Shape Inputs Outputs | |
--------- --- -------- ------- ----- ------ ------- | |
fused_nn_conv2d_add_nn_relu_15 fused_nn_conv2d_add_nn_relu_15 137011.0 5.903 (1, 64, 300, 478) 3 1 | |
fused_nn_max_pool2d fused_nn_max_pool2d 75.205 0.003 (1, 64, 150, 239) 1 1 | |
fused_nn_conv2d_add_nn_relu_14 fused_nn_conv2d_add_nn_relu_14 928.457 0.04 (1, 64, 150, 239) 3 1 | |
fused_nn_conv2d_add_nn_relu_12 fused_nn_conv2d_add_nn_relu_12 9777.41 0.421 (1, 64, 150, 239) 3 1 | |
fused_nn_conv2d_add fused_nn_conv2d_add 1272.02 0.055 (1, 256, 150, 239) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_3 fused_nn_conv2d_add_add_nn_relu_3 1337.09 0.058 (1, 256, 150, 239) 4 1 | |
fused_nn_conv2d_add_nn_relu_13 fused_nn_conv2d_add_nn_relu_13 1254.6 0.054 (1, 64, 150, 239) 3 1 | |
fused_nn_conv2d_add_nn_relu_121 fused_nn_conv2d_add_nn_relu_12 9815.02 0.423 (1, 64, 150, 239) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_31 fused_nn_conv2d_add_add_nn_relu_3 1337.46 0.058 (1, 256, 150, 239) 4 1 | |
fused_nn_conv2d_add_nn_relu_131 fused_nn_conv2d_add_nn_relu_13 1254.59 0.054 (1, 64, 150, 239) 3 1 | |
fused_nn_conv2d_add_nn_relu_122 fused_nn_conv2d_add_nn_relu_12 9785.64 0.422 (1, 64, 150, 239) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_32 fused_nn_conv2d_add_add_nn_relu_3 1337.67 0.058 (1, 256, 150, 239) 4 1 | |
fused_nn_conv2d_add_nn_relu_11 fused_nn_conv2d_add_nn_relu_11 2407.61 0.104 (1, 128, 150, 239) 3 1 | |
fused_nn_conv2d_add_nn_relu_10 fused_nn_conv2d_add_nn_relu_10 531.159 0.023 (1, 128, 75, 120) 3 1 | |
fused_nn_conv2d_add_1 fused_nn_conv2d_add_1 1220.39 0.053 (1, 512, 75, 120) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_2 fused_nn_conv2d_add_add_nn_relu_2 353.468 0.015 (1, 512, 75, 120) 4 1 | |
fused_nn_conv2d_add_nn_relu_9 fused_nn_conv2d_add_nn_relu_9 179.663 0.008 (1, 128, 75, 120) 3 1 | |
fused_nn_conv2d_add_nn_relu_8 fused_nn_conv2d_add_nn_relu_8 537.014 0.023 (1, 128, 75, 120) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_21 fused_nn_conv2d_add_add_nn_relu_2 350.268 0.015 (1, 512, 75, 120) 4 1 | |
fused_nn_conv2d_add_nn_relu_91 fused_nn_conv2d_add_nn_relu_9 176.562 0.008 (1, 128, 75, 120) 3 1 | |
fused_nn_conv2d_add_nn_relu_81 fused_nn_conv2d_add_nn_relu_8 535.204 0.023 (1, 128, 75, 120) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_22 fused_nn_conv2d_add_add_nn_relu_2 350.311 0.015 (1, 512, 75, 120) 4 1 | |
fused_nn_conv2d_add_nn_relu_92 fused_nn_conv2d_add_nn_relu_9 176.955 0.008 (1, 128, 75, 120) 3 1 | |
fused_nn_conv2d_add_nn_relu_82 fused_nn_conv2d_add_nn_relu_8 545.402 0.023 (1, 128, 75, 120) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_23 fused_nn_conv2d_add_add_nn_relu_2 351.869 0.015 (1, 512, 75, 120) 4 1 | |
fused_nn_conv2d_add_nn_relu_7 fused_nn_conv2d_add_nn_relu_7 341.612 0.015 (1, 256, 75, 120) 3 1 | |
fused_nn_conv2d_add_nn_relu_6 fused_nn_conv2d_add_nn_relu_6 666.205 0.029 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_2 fused_nn_conv2d_add_2 660.601 0.028 (1, 1024, 38, 60) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_1 fused_nn_conv2d_add_add_nn_relu_1 617.353 0.027 (1, 1024, 38, 60) 4 1 | |
fused_nn_conv2d_add_nn_relu_5 fused_nn_conv2d_add_nn_relu_5 261.651 0.011 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_nn_relu_4 fused_nn_conv2d_add_nn_relu_4 591.863 0.026 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_11 fused_nn_conv2d_add_add_nn_relu_1 612.62 0.026 (1, 1024, 38, 60) 4 1 | |
fused_nn_conv2d_add_nn_relu_51 fused_nn_conv2d_add_nn_relu_5 260.98 0.011 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_nn_relu_41 fused_nn_conv2d_add_nn_relu_4 590.348 0.025 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_12 fused_nn_conv2d_add_add_nn_relu_1 612.327 0.026 (1, 1024, 38, 60) 4 1 | |
fused_nn_conv2d_add_nn_relu_52 fused_nn_conv2d_add_nn_relu_5 260.762 0.011 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_nn_relu_42 fused_nn_conv2d_add_nn_relu_4 590.476 0.025 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_13 fused_nn_conv2d_add_add_nn_relu_1 611.572 0.026 (1, 1024, 38, 60) 4 1 | |
fused_nn_conv2d_add_nn_relu_53 fused_nn_conv2d_add_nn_relu_5 261.461 0.011 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_nn_relu_43 fused_nn_conv2d_add_nn_relu_4 591.482 0.025 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_14 fused_nn_conv2d_add_add_nn_relu_1 614.287 0.026 (1, 1024, 38, 60) 4 1 | |
fused_nn_conv2d_add_nn_relu_54 fused_nn_conv2d_add_nn_relu_5 260.617 0.011 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_nn_relu_44 fused_nn_conv2d_add_nn_relu_4 591.476 0.025 (1, 256, 38, 60) 3 1 | |
fused_nn_conv2d_add_add_nn_relu_15 fused_nn_conv2d_add_add_nn_relu_1 616.343 0.027 (1, 1024, 38, 60) 4 1 | |
fused_nn_conv2d_add_nn_relu_16 fused_nn_conv2d_add_nn_relu_16 9460.91 0.408 (1, 1024, 38, 60) 3 1 | |
fused_nn_conv2d_add_3 fused_nn_conv2d_add_3 146.811 0.006 (1, 60, 38, 60) 3 1 | |
fused_nn_conv2d_add_4 fused_nn_conv2d_add_4 134.608 0.006 (1, 15, 38, 60) 3 1 | |
fused_transpose_reshape_split_multiply_add_multiply_slice_like_reshape_split_sub_3153942748240996957_ fused_transpose_reshape_split_multiply_add_multiply_slice_like_reshape_split_sub_3153942748240996957_ 22.196 0.001 (1, 34200, 5) 5 1 | |
fused_vision_get_valid_counts_1 fused_vision_get_valid_counts_1 114.756 0.005 (1,) 1 2 | |
fused_vision_get_valid_counts_1 fused_vision_get_valid_counts_1 114.756 0.005 (1, 34200, 5) 1 2 | |
fused_vision_non_max_suppression_1 fused_vision_non_max_suppression_1 456505.0 19.668 (1, 34200, 5) 2 1 | |
fused_strided_slice_strided_slice fused_strided_slice_strided_slice 15.419 0.001 (1, 1000, 4) 1 1 | |
fused_reshape_concatenate fused_reshape_concatenate 11.663 0.001 (1000, 5) 2 1 | |
fused_vision_roi_align fused_vision_roi_align 5330.2 0.23 (1000, 1024, 14, 14) 2 1 | |
fused_nn_conv2d_add_nn_relu_3 fused_nn_conv2d_add_nn_relu_3 24656.0 1.062 (1000, 512, 14, 14) 3 1 | |
fused_nn_conv2d_add_nn_relu_2 fused_nn_conv2d_add_nn_relu_2 82699.5 3.563 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_5 fused_nn_conv2d_add_5 58327.7 2.513 (1000, 2048, 7, 7) 3 1 | |
fused_nn_conv2d_add_add_nn_relu fused_nn_conv2d_add_add_nn_relu 12224.7 0.527 (1000, 2048, 7, 7) 4 1 | |
fused_nn_conv2d_add_nn_relu_1 fused_nn_conv2d_add_nn_relu_1 12161.8 0.524 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_nn_relu fused_nn_conv2d_add_nn_relu 38819.6 1.673 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_add_nn_relu1 fused_nn_conv2d_add_add_nn_relu 12241.5 0.527 (1000, 2048, 7, 7) 4 1 | |
fused_nn_conv2d_add_nn_relu_111 fused_nn_conv2d_add_nn_relu_1 12175.0 0.525 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_nn_relu1 fused_nn_conv2d_add_nn_relu 38808.4 1.672 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_add_nn_relu2 fused_nn_conv2d_add_add_nn_relu 12238.0 0.527 (1000, 2048, 7, 7) 4 1 | |
fused_contrib_adaptive_avg_pool2d fused_contrib_adaptive_avg_pool2d 1639.59 0.071 (1000, 2048, 1, 1) 1 1 | |
fused_nn_batch_flatten fused_nn_batch_flatten 29.722 0.001 (1000, 2048) 1 1 | |
fused_nn_dense_add fused_nn_dense_add 416.264 0.018 (1000, 81) 3 1 | |
fused_reshape_2 fused_reshape_2 11.647 0.001 (1, 1000, 81) 1 1 | |
fused_nn_softmax fused_nn_softmax 86.201 0.004 (1, 1000, 81) 1 1 | |
fused_nn_dense_add_1 fused_nn_dense_add_1 1254.04 0.054 (1000, 320) 3 1 | |
fused_strided_slice_greater_cast_strided_slice_zeros_like_add_add_add_add_add_ad_8793637701345642327_ fused_strided_slice_greater_cast_strided_slice_zeros_like_add_add_add_add_add_ad_8793637701345642327_ 22.413 0.001 (80, 1000, 6) 3 1 | |
fused_vision_get_valid_counts fused_vision_get_valid_counts 39.77 0.002 (80,) 1 2 | |
fused_vision_get_valid_counts fused_vision_get_valid_counts 39.77 0.002 (80, 1000, 6) 1 2 | |
fused_vision_non_max_suppression fused_vision_non_max_suppression 5208.63 0.224 (80, 1000, 6) 2 1 | |
fused_reshape_stack fused_reshape_stack 14.225 0.001 (1, 80000, 6) 1 1 | |
fused_strided_slice_1 fused_strided_slice_1 13.062 0.001 (1, 80000, 1) 1 1 | |
fused_squeeze_6 fused_squeeze_6 11.518 0.0 (1, 80000) 1 1 | |
fused_argsort fused_argsort 911272.0 39.262 (1, 80000) 1 1 | |
fused_strided_slice_reshape_stack fused_strided_slice_reshape_stack 18.744 0.001 (2, 1000) 2 1 | |
fused_strided_slice_gather_nd_reshape fused_strided_slice_gather_nd_reshape 12.428 0.001 (1, 1000, 1) 2 1 | |
fused_gather_nd_reshape fused_gather_nd_reshape 11.566 0.0 (1, 1000, 1) 2 1 | |
fused_strided_slice_gather_nd_reshape_1 fused_strided_slice_gather_nd_reshape_1 11.798 0.001 (1, 1000, 4) 2 1 | |
fused_reshape_concatenate_1 fused_reshape_concatenate_1 11.544 0.0 (1000, 5) 2 1 | |
fused_vision_roi_align1 fused_vision_roi_align 5036.18 0.217 (1000, 1024, 14, 14) 2 1 | |
fused_nn_conv2d_add_nn_relu_31 fused_nn_conv2d_add_nn_relu_3 24640.7 1.062 (1000, 512, 14, 14) 3 1 | |
fused_nn_conv2d_add_nn_relu_21 fused_nn_conv2d_add_nn_relu_2 82765.1 3.566 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_51 fused_nn_conv2d_add_5 58306.4 2.512 (1000, 2048, 7, 7) 3 1 | |
fused_nn_conv2d_add_add_nn_relu3 fused_nn_conv2d_add_add_nn_relu 12220.0 0.526 (1000, 2048, 7, 7) 4 1 | |
fused_nn_conv2d_add_nn_relu_123 fused_nn_conv2d_add_nn_relu_1 12142.6 0.523 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_nn_relu2 fused_nn_conv2d_add_nn_relu 38772.8 1.671 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_add_nn_relu4 fused_nn_conv2d_add_add_nn_relu 12232.8 0.527 (1000, 2048, 7, 7) 4 1 | |
fused_nn_conv2d_add_nn_relu_132 fused_nn_conv2d_add_nn_relu_1 12148.2 0.523 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_nn_relu3 fused_nn_conv2d_add_nn_relu 38767.3 1.67 (1000, 512, 7, 7) 3 1 | |
fused_nn_conv2d_add_add_nn_relu5 fused_nn_conv2d_add_add_nn_relu 12229.1 0.527 (1000, 2048, 7, 7) 4 1 | |
fused_nn_conv2d_transpose_add_nn_relu fused_nn_conv2d_transpose_add_nn_relu 122149.0 5.263 (1000, 256, 14, 14) 3 1 | |
fused_nn_conv2d_add_6 fused_nn_conv2d_add_6 874.677 0.038 (1000, 80, 14, 14) 3 1 | |
fused_reshape_reshape_clip_stack_gather_nd_reshape_sigmoid fused_reshape_reshape_clip_stack_gather_nd_reshape_sigmoid 16.307 0.001 (1, 1000, 14, 14) 4 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi how did you get this kind of benchmark?