Last active
February 23, 2022 17:57
-
-
Save davidberard98/ac88a2dc56c7601313652f849dddf387 to your computer and use it in GitHub Desktop.
Failures from torchbench + nvfuser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model | default gpu time | default cpu time | default wall time | nvfuser gpu time | nvfuser cpu time | nvfuser wall time | |
---|---|---|---|---|---|---|---|
BERT_pytorch | 129.053 | 83.838 | 129.193 | 127.947 | 83.956 | 128.093 | |
Super_SloMo | 1723.582 | 1344.799 | 1723.634 | 4159.717 | 3643.625 | 4159.733 | |
alexnet | 6.342 | 1.849 | 6.394 | 6.364 | 1.951 | 6.424 | |
dcgan | 27.126 | 3.318 | 27.175 | 26.033 | 3.363 | 26.086 | |
demucs | 369.007 | 24.469 | 369.072 | 370.552 | 24.257 | 370.600 | |
mnasnet1_0 | 33.424 | 12.444 | 33.475 | 33.517 | 12.585 | 33.581 | |
mobilenet_v2 | 18.324 | 13.989 | 18.378 | 20.660 | 20.441 | 20.711 | |
mobilenet_v3_large | 32.164 | 17.507 | 32.219 | 32.162 | 23.343 | 32.221 | |
moco | 408.424 | 408.413 | 408.478 | 397.519 | 397.497 | 397.578 | |
pyhpc_equation_of_state | 10.318 | 0.310 | 10.393 | 23.303 | 0.382 | 23.376 | |
pyhpc_isoneutral_mixing | 58.591 | 58.588 | 58.655 | 71.534 | 71.528 | 71.600 | |
pyhpc_turbulent_kinetic_energy | 36.755 | 36.291 | 36.813 | 42.664 | 42.568 | 42.736 | |
pytorch_stargan | 49.714 | 18.341 | 49.766 | 76.261 | 8.930 | 76.316 | |
pytorch_unet | 201.971 | 20.203 | 202.021 | 194.776 | 20.728 | 194.841 | |
resnet18 | 14.048 | 4.608 | 14.102 | 14.060 | 4.610 | 14.119 | |
resnet50 | 61.194 | 13.743 | 61.249 | 85.739 | 14.144 | 85.795 | |
resnext50_32x4d | 51.154 | 13.555 | 51.210 | 51.194 | 14.025 | 51.250 | |
shufflenet_v2_x1_0 | 21.004 | 14.983 | 21.053 | 22.277 | 18.056 | 22.328 | |
squeezenet1_1 | 9.488 | 6.204 | 9.538 | 9.484 | 6.190 | 9.532 | |
timm_efficientnet | 78.041 | 25.811 | 78.103 | 81.319 | 28.818 | 81.378 | |
timm_nfnet | 665.246 | 37.111 | 665.307 | 598.934 | 32.479 | 598.989 | |
timm_regnet | 98.151 | 37.255 | 98.207 | 135.576 | 37.145 | 135.635 | |
timm_vovnet | 77.074 | 8.241 | 77.134 | 85.297 | 8.244 | 85.354 | |
vgg16 | 38.914 | 3.054 | 38.964 | 38.935 | 3.231 | 38.990 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BERT_pytorch | |
Super_SloMo | |
alexnet | |
dcgan | |
demucs | |
mnasnet1_0 | |
mobilenet_v2 | |
mobilenet_v3_large | |
moco | |
pyhpc_equation_of_state | |
pyhpc_isoneutral_mixing | |
pyhpc_turbulent_kinetic_energy | |
pytorch_stargan | |
pytorch_unet | |
resnet18 | |
resnet50 | |
resnext50_32x4d | |
shufflenet_v2_x1_0 | |
squeezenet1_1 | |
timm_efficientnet | |
timm_nfnet | |
timm_regnet | |
timm_vovnet | |
vgg16 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Repro ### | |
Usage: | |
1. check out torchbench https://github.com/pytorch/benchmark | |
2. python3 run.py [model name] -t eval -m jit -d cuda --no-fp16 --fuser fuser2 (remove --fuser fuser2 to disable nvfuser and use nnc instead) | |
3. you can get a list of models from ls -1 torchbenchmark/models/ | |
### attention_is_all_you_need_pytorch ### | |
# Note - this fails only with PYTORCH_NVFUSER_DISABLE_FALLBACK=1. | |
Running eval method from attention_is_all_you_need_pytorch on cuda in jit mode. | |
Traceback (most recent call last): | |
File "/fsx/users/dberard/benchmark/run.py", line 182, in <module> | |
run_one_step(test, model_flops=model_flops) | |
File "/fsx/users/dberard/benchmark/run.py", line 57, in run_one_step | |
func() | |
File "/fsx/users/dberard/benchmark/torchbenchmark/models/attention_is_all_you_need_pytorch/__init__.py", line 127, in eval | |
self.model(*(src_seq, trg_seq)) | |
File "/fsx/users/dberard/pytorch/torch/nn/modules/module.py", line 1111, in _call_impl | |
return forward_call(*input, **kwargs) | |
RuntimeError: Illegal Cast value from DataType: float to DataType: bool | |
### timm_resnest ### | |
Running eval method from timm_resnest on cuda in jit mode. | |
Traceback (most recent call last): | |
File "/fsx/users/dberard/benchmark/run.py", line 182, in <module> | |
run_one_step(test, model_flops=model_flops) | |
File "/fsx/users/dberard/benchmark/run.py", line 57, in run_one_step | |
func() | |
File "/fsx/users/dberard/benchmark/torchbenchmark/models/timm_resnest/__init__.py", line 70, in eval | |
self._step_eval() | |
File "/fsx/users/dberard/benchmark/torchbenchmark/models/timm_resnest/__init__.py", line 55, in _step_eval | |
output = self.model(self.example_inputs) | |
File "/fsx/users/dberard/pytorch/torch/nn/modules/module.py", line 1111, in _call_impl | |
return forward_call(*input, **kwargs) | |
RuntimeError: view->kind() == prim::view_copy || view->kind() == prim::reshape_copyINTERNAL ASSERT FAILED at "../torch/csrc/jit/codegen/cuda/graph_fuser.cpp":1617, please report a bug to PyTorch. | |
### timm_vision_transformer ### | |
Running eval method from timm_vision_transformer on cuda in jit mode. | |
Traceback (most recent call last): | |
File "/fsx/users/dberard/benchmark/run.py", line 182, in <module> | |
run_one_step(test, model_flops=model_flops) | |
File "/fsx/users/dberard/benchmark/run.py", line 57, in run_one_step | |
func() | |
File "/fsx/users/dberard/benchmark/torchbenchmark/models/timm_vision_transformer/__init__.py", line 72, in eval | |
self._step_eval() | |
File "/fsx/users/dberard/benchmark/torchbenchmark/models/timm_vision_transformer/__init__.py", line 57, in _step_eval | |
output = self.model(self.example_inputs) | |
File "/fsx/users/dberard/pytorch/torch/nn/modules/module.py", line 1111, in _call_impl | |
return forward_call(*input, **kwargs) | |
RuntimeError: fusion_value_to_runtime_size.find(self_value) != fusion_value_to_runtime_size.end()INTERNAL ASSERT FAILED at "../torch/csrc/jit/codegen/cuda/graph_fuser.cpp":1403, please report a bug to PyTorch. Failed to find runtime size for fusion value: aten::add |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model | default gpu time | default cpu time | default wall time | nvfuser gpu time | nvfuser cpu time | nvfuser wall time | |
---|---|---|---|---|---|---|---|
BERT_pytorch | 404.581 | 401.321 | 404.637 | 406.181 | 403.727 | 406.246 | |
Super_SloMo | 3491.842 | 3102.845 | 3491.873 | 7711.835 | 7233.103 | 7711.824 | |
alexnet | 287.524 | 107.881 | 287.593 | 287.318 | 102.409 | 287.390 | |
dcgan | 470.805 | 470.775 | 470.893 | 483.628 | 483.594 | 483.701 | |
mnasnet1_0 | 537.232 | 537.210 | 537.282 | 546.960 | 546.938 | 547.016 | |
mobilenet_v2 | 1085.555 | 1085.534 | 1085.613 | 1085.190 | 1085.164 | 1085.241 | |
mobilenet_v3_large | 616.559 | 616.539 | 616.611 | 621.951 | 621.931 | 622.009 | |
moco | 814.264 | 738.420 | 814.314 | 815.127 | 740.158 | 815.171 | |
pytorch_stargan | 243.187 | 243.164 | 243.242 | 244.144 | 244.128 | 244.201 | |
pytorch_unet | 732.303 | 332.428 | 732.343 | 728.404 | 328.761 | 728.450 | |
resnet18 | 234.218 | 234.201 | 234.278 | 241.843 | 241.827 | 241.905 | |
resnet50 | 891.636 | 891.606 | 891.688 | 891.459 | 891.431 | 891.509 | |
resnext50_32x4d | 595.106 | 595.091 | 595.161 | 607.642 | 607.622 | 607.697 | |
shufflenet_v2_x1_0 | 597.790 | 597.770 | 597.848 | 631.933 | 631.908 | 631.985 | |
timm_efficientnet | 223.196 | 200.771 | 223.249 | 219.823 | 196.861 | 219.879 | |
timm_regnet | 376.412 | 376.404 | 376.470 | 374.565 | 374.560 | 374.625 | |
timm_resnest | 360.266 | 98.682 | 360.320 | 358.060 | 95.989 | 358.118 | |
timm_vovnet | 378.956 | 122.135 | 379.006 | 380.475 | 126.648 | 380.527 | |
vgg16 | 2460.032 | 1090.172 | 2460.081 | 2495.156 | 1102.937 | 2495.206 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment