Skip to content

Instantly share code, notes, and snippets.

@II245
Created February 24, 2021 22:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save II245/270e541547d12d18fbc3b171d422c651 to your computer and use it in GitHub Desktop.
Save II245/270e541547d12d18fbc3b171d422c651 to your computer and use it in GitHub Desktop.
[{"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972770719288, "dur": 2132, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972770725055, "dur": 447, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaDeviceSynchronize", "ph": "X", "cat": "cuda", "ts": 1614203972770727336, "dur": 6223, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972789646883, "dur": 1706, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaSetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972789649325, "dur": 1889, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaEventCreateWithFlags", "ph": "X", "cat": "cuda", "ts": 1614203972789651914, "dur": 2906, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaEventRecord", "ph": "X", "cat": "cuda", "ts": 1614203972789655301, "dur": 2888, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaSetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972789664618, "dur": 369, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972789771748, "dur": 732, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972789774993, "dur": 288, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaPointerGetAttributes", "ph": "X", "cat": "cuda", "ts": 1614203972789775988, "dur": 5917, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203972789782525, "dur": 162, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972789782963, "dur": 293, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaEventQuery", "ph": "X", "cat": "cuda", "ts": 1614203972789785938, "dur": 3692, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaEventDestroy", "ph": "X", "cat": "cuda", "ts": 1614203972789790111, "dur": 1492, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972798696949, "dur": 9214, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972798719241, "dur": 2478, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaDeviceSynchronize", "ph": "X", "cat": "cuda", "ts": 1614203972798729012, "dur": 33972, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799269632, "dur": 4002, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799291720, "dur": 1840, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799299538, "dur": 1444, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799307445, "dur": 1411, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799313323, "dur": 1373, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799315913, "dur": 1334, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799328750, "dur": 1448, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799330988, "dur": 1383, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799369322, "dur": 1624, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799373519, "dur": 1360, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaMemcpyAsync", "ph": "X", "cat": "cuda", "ts": 1614203972799376733, "dur": 161469, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972799545557, "dur": 1822, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801885818, "dur": 6200, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801894821, "dur": 1877, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801901156, "dur": 1314, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801905185, "dur": 1256, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801907148, "dur": 1256, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801916419, "dur": 1296, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801918408, "dur": 1248, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801942581, "dur": 1567, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203972801952167, "dur": 1428, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203972801960880, "dur": 2329748194, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975131712682, "dur": 1161, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975131715261, "dur": 225, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975131716630, "dur": 3224, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132680137, "dur": 1657, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132684227, "dur": 507, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132688400, "dur": 378, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132697629, "dur": 341, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132698974, "dur": 299, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132736113, "dur": 415, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132736692, "dur": 328, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132740727, "dur": 321, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132756147, "dur": 372, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132759568, "dur": 333, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132767820, "dur": 353, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132772170, "dur": 342, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132772630, "dur": 330, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132776562, "dur": 342, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132777084, "dur": 324, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132779245, "dur": 330, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975132781177, "dur": 329, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaEventRecord", "ph": "X", "cat": "cuda", "ts": 1614203975132847517, "dur": 10383, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133053480, "dur": 658, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133054654, "dur": 317, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133056994, "dur": 302, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203975133192786, "dur": 26148, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975133219413, "dur": 278, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203975133248358, "dur": 8539, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975133257129, "dur": 203, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203975133261548, "dur": 9780, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975133271554, "dur": 171, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975133271872, "dur": 126, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203975133277826, "dur": 9677, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975133287722, "dur": 184, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133349757, "dur": 3473, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133369769, "dur": 363, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133395665, "dur": 375, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203975133398196, "dur": 12792, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975133411260, "dur": 199, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133413219, "dur": 381, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133424854, "dur": 496, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133426517, "dur": 318, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975133427297, "dur": 320, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134418047, "dur": 1825, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134422598, "dur": 435, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134434586, "dur": 408, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203975134438366, "dur": 21583, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975134460511, "dur": 283, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134461546, "dur": 458, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134462500, "dur": 306, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134625794, "dur": 699, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134627973, "dur": 372, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134655551, "dur": 475, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134668629, "dur": 431, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134826358, "dur": 724, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134828786, "dur": 405, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134830221, "dur": 362, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134833317, "dur": 331, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134834245, "dur": 303, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134837445, "dur": 320, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134837938, "dur": 291, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134849521, "dur": 385, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134853751, "dur": 324, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaLaunchKernel", "ph": "X", "cat": "cuda", "ts": 1614203975134856624, "dur": 20051, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975134877012, "dur": 269, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetLastError", "ph": "X", "cat": "cuda", "ts": 1614203975134877495, "dur": 131, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134877772, "dur": 429, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134923594, "dur": 503, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975134928797, "dur": 359, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaDeviceSynchronize", "ph": "X", "cat": "cuda", "ts": 1614203975134931678, "dur": 7742, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaMemGetInfo", "ph": "X", "cat": "cuda", "ts": 1614203975135086423, "dur": 84872, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975135247428, "dur": 944, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975135251168, "dur": 373, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaDeviceSynchronize", "ph": "X", "cat": "cuda", "ts": 1614203975135253359, "dur": 6202, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975135350852, "dur": 787, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975135522905, "dur": 788, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975135659278, "dur": 676, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975135662177, "dur": 365, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaDeviceSynchronize", "ph": "X", "cat": "cuda", "ts": 1614203975135663760, "dur": 4848, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaGetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975216116722, "dur": 9427, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaSetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975216127254, "dur": 3171, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaEventCreateWithFlags", "ph": "X", "cat": "cuda", "ts": 1614203975216131352, "dur": 9109, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaEventRecord", "ph": "X", "cat": "cuda", "ts": 1614203975216141837, "dur": 5683, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "cudaSetDevice", "ph": "X", "cat": "cuda", "ts": 1614203975216147865, "dur": 320, "tid": "Thread 1904011072: Runtime API", "pid": "[29085] Process", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:126']}", "ph": "X", "cat": "cuda", "ts": 1614203972771108015, "dur": 81432, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch', 'op': 'zeros', 'args': [{'name': '', 'type': 'int', 'value': 1}]}", "ph": "X", "cat": "cuda", "ts": 1614203972771139318, "dur": 49315, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:126']}", "ph": "X", "cat": "cuda", "ts": 1614203972771237873, "dur": 15104, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch', 'op': 'zeros', 'args': [{'name': '', 'type': 'int', 'value': 1}]}", "ph": "X", "cat": "cuda", "ts": 1614203972771246484, "dur": 6014, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:126']}", "ph": "X", "cat": "cuda", "ts": 1614203972771275938, "dur": 11398, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch', 'op': 'zeros', 'args': [{'name': '', 'type': 'int', 'value': 1}]}", "ph": "X", "cat": "cuda", "ts": 1614203972771282759, "dur": 4148, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:128']}", "ph": "X", "cat": "cuda", "ts": 1614203972771311585, "dur": 18328113, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch', 'op': 'rand', 'args': [{'name': '', 'type': 'int', 'value': 32}, {'name': '', 'type': 'int', 'value': 64}, {'name': '', 'type': 'int', 'value': 1664}]}", "ph": "X", "cat": "cuda", "ts": 1614203972771325613, "dur": 18313221, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:129']}", "ph": "X", "cat": "cuda", "ts": 1614203972789729529, "dur": 8867101, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'pin_memory', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 64, 1664), 'dtype': 'float32'}]}", "ph": "X", "cat": "cuda", "ts": 1614203972789763207, "dur": 8826663, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', 'benchmark.py:80']}", "ph": "X", "cat": "cuda", "ts": 1614203972799056497, "dur": 508319, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'to', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 64, 1664), 'dtype': 'float32'}, {'name': '', 'type': 'str', 'value': 'cuda'}, {'name': 'non_blocking', 'type': 'bool', 'value': True}]}", "ph": "X", "cat": "cuda", "ts": 1614203972799211961, "dur": 349794, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:196', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:51', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:51', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:41', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:30']}", "ph": "X", "cat": "cuda", "ts": 1614203972801215118, "dur": 84896, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'is_floating_point', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 64, 1664), 'dtype': 'float32'}]}", "ph": "X", "cat": "cuda", "ts": 1614203972801283459, "dur": 14240, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:196', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:51', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:51', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:41', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:31']}", "ph": "X", "cat": "cuda", "ts": 1614203972801571904, "dur": 2330190406, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'to', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 64, 1664), 'dtype': 'float32'}]}", "ph": "X", "cat": "cuda", "ts": 1614203972801861551, "dur": 2329898002, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:196', '/home/*/models.py:195', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/home/*/models.py:137', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/container.py:117', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/conv.py:258']}", "ph": "X", "cat": "cuda", "ts": 1614203975132582035, "dur": 855660, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch.nn.functional', 'op': 'conv1d', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 64, 1664), 'dtype': 'float16'}, {'name': '', 'type': 'tensor', 'shape': (256, 64, 11), 'dtype': 'float16'}, {'name': '', 'type': 'tensor', 'shape': (256,), 'dtype': 'float16'}, {'name': '', 'type': 'tuple', 'value': (2,)}, {'name': '', 'type': 'tuple', 'value': (5,)}, {'name': '', 'type': 'tuple', 'value': (1,)}, {'name': '', 'type': 'int', 'value': 1}]}", "ph": "X", "cat": "cuda", "ts": 1614203975132662079, "dur": 774379, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:196', '/home/*/models.py:195', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/home/*/models.py:137', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/home/*/models.py:425']}", "ph": "X", "cat": "cuda", "ts": 1614203975133602667, "dur": 867461, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch.nn.functional', 'op': 'relu', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 256, 832), 'dtype': 'float16'}, {'name': 'inplace', 'type': 'bool', 'value': True}]}", "ph": "X", "cat": "cuda", "ts": 1614203975133634597, "dur": 835003, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:196', '/home/*/models.py:195', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/home/*/models.py:137', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/home/*/models.py:425', '/opt/conda/lib/python3.8/site-packages/apex/pyprof/nvtx/nvmarker.py:95', '/opt/conda/lib/python3.8/site-packages/torch/nn/functional.py:1134']}", "ph": "X", "cat": "cuda", "ts": 1614203975134384591, "dur": 81712, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch', 'op': 'relu_', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 256, 832), 'dtype': 'float16'}]}", "ph": "X", "cat": "cuda", "ts": 1614203975134408161, "dur": 57428, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:196', '/home/*/models.py:195', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/home/*/models.py:137', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/home/*/models.py:426']}", "ph": "X", "cat": "cuda", "ts": 1614203975134567355, "dur": 63189, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'torch.nn.functional', 'op': 'dropout', 'args': [{'name': '', 'type': 'tensor', 'shape': (32, 256, 832), 'dtype': 'float16'}, {'name': 'p', 'type': 'float', 'value': 0.2}, {'name': 'training', 'type': 'bool', 'value': False}]}", "ph": "X", "cat": "cuda", "ts": 1614203975134606744, "dur": 23305, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:198', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:41', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:30']}", "ph": "X", "cat": "cuda", "ts": 1614203975134734913, "dur": 18025, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'is_floating_point', 'args': [{'name': '', 'type': 'tensor', 'shape': (256, 832), 'dtype': 'float16'}]}", "ph": "X", "cat": "cuda", "ts": 1614203975134747515, "dur": 4870, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:132', '/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:727', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:198', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:41', '/opt/conda/lib/python3.8/site-packages/apex/amp/_initialize.py:31']}", "ph": "X", "cat": "cuda", "ts": 1614203975134801573, "dur": 80106, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'to', 'args': [{'name': '', 'type': 'tensor', 'shape': (256, 832), 'dtype': 'float16'}]}", "ph": "X", "cat": "cuda", "ts": 1614203975134814674, "dur": 66355, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:154']}", "ph": "X", "cat": "cuda", "ts": 1614203975135737654, "dur": 166630, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'mean', 'args': [{'name': '', 'type': 'tensor', 'shape': (1,), 'dtype': 'float32'}]}", "ph": "X", "cat": "cuda", "ts": 1614203975135754935, "dur": 148650, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:154']}", "ph": "X", "cat": "cuda", "ts": 1614203975135941405, "dur": 47718, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': '__float__', 'args': [{'name': '', 'type': 'float', 'value': 2.336169958114624}]}", "ph": "X", "cat": "cuda", "ts": 1614203975135986126, "dur": 2533, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:155']}", "ph": "X", "cat": "cuda", "ts": 1614203975136019940, "dur": 31232, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'mean', 'args': [{'name': '', 'type': 'tensor', 'shape': (1,), 'dtype': 'float32'}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136032670, "dur": 17915, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:155']}", "ph": "X", "cat": "cuda", "ts": 1614203975136077200, "dur": 15683, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': '__float__', 'args': [{'name': '', 'type': 'float', 'value': 0.00031876564025878906}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136090397, "dur": 2020, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:158']}", "ph": "X", "cat": "cuda", "ts": 1614203975136126041, "dur": 26237, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'mean', 'args': [{'name': '', 'type': 'tensor', 'shape': (1,), 'dtype': 'float32'}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136136512, "dur": 15174, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:158']}", "ph": "X", "cat": "cuda", "ts": 1614203975136177352, "dur": 13649, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': '__float__', 'args': [{'name': '', 'type': 'float', 'value': 1.29541015625}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136188704, "dur": 1882, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:159']}", "ph": "X", "cat": "cuda", "ts": 1614203975136218870, "dur": 49183, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'sum', 'args': [{'name': '', 'type': 'tensor', 'shape': (1,), 'dtype': 'float32'}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136231425, "dur": 36109, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:159']}", "ph": "X", "cat": "cuda", "ts": 1614203975136296937, "dur": 451138, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': '__rtruediv__', 'args': [{'name': '', 'type': 'float', 'value': 2.336169958114624}, {'name': '', 'type': 'float', 'value': 532.48}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136316145, "dur": 431521, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:159', '/opt/conda/lib/python3.8/site-packages/apex/pyprof/nvtx/nvmarker.py:95', '/opt/conda/lib/python3.8/site-packages/torch/tensor.py:519']}", "ph": "X", "cat": "cuda", "ts": 1614203975136586433, "dur": 62958, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': 'reciprocal', 'args': [{'name': '', 'type': 'float', 'value': 2.336169958114624}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136602903, "dur": 45904, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:159', '/opt/conda/lib/python3.8/site-packages/apex/pyprof/nvtx/nvmarker.py:95', '/opt/conda/lib/python3.8/site-packages/torch/tensor.py:519']}", "ph": "X", "cat": "cuda", "ts": 1614203975136695789, "dur": 49554, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': '__mul__', 'args': [{'name': '', 'type': 'float', 'value': 0.42805105447769165}, {'name': '', 'type': 'float', 'value': 532.48}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136712750, "dur": 32000, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'traceMarker': ['benchmark.py:159']}", "ph": "X", "cat": "cuda", "ts": 1614203975136776177, "dur": 14028, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "{'mod': 'Tensor', 'op': '__float__', 'args': [{'name': '', 'type': 'float', 'value': 227.92861938476562}]}", "ph": "X", "cat": "cuda", "ts": 1614203975136787813, "dur": 1974, "tid": "Markers and Ranges", "pid": "Markers and Ranges", "args": {}}, {"name": "Memcpy HtoD [async]", "ph": "X", "cat": "cuda", "ts": 1614203972799533231, "dur": 1459126, "tid": "MemCpy (HtoD)", "pid": "[0:1] Overview", "args": {"Size": "13.6MB"}}, {"name": "void at::native::unrolled_elementwise_kernel<at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#10}::operator()() const::{lambda(c10::Half)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#10}::operator()() const::{lambda(c10::Half)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast)", "ph": "X", "cat": "cuda", "ts": 1614203975131698684, "dur": 41984, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 13312, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}, {"name": "void at::native::unrolled_elementwise_kernel<at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#10}::operator()() const::{lambda(c10::Half)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#10}::operator()() const::{lambda(c10::Half)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast)", "ph": "X", "cat": "cuda", "ts": 1614203975131698684, "dur": 41984, "tid": "void at::native::unrolled_elementwise_kernel<at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#10}::operator()() const::{lambda(c10::Half)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#10}::operator()() const::{lambda(c10::Half)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast)", "pid": "[0:1] Compute", "args": {"Grid size": "[ 13312, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}, {"name": "void nchwToNhwcKernel<__half, __half, float, true, false>(int, int, int, int, __half const*, __half*, float, float)", "ph": "X", "cat": "cuda", "ts": 1614203975133220211, "dur": 23296, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 52, 2, 32 ]", "Block size": "[ 256, 1, 1 ]"}}, {"name": "void nchwToNhwcKernel<__half, __half, float, true, false>(int, int, int, int, __half const*, __half*, float, float)", "ph": "X", "cat": "cuda", "ts": 1614203975133220211, "dur": 23296, "tid": "void nchwToNhwcKernel<__half, __half, float, true, false>(int, int, int, int, __half const*, __half*, float, float)", "pid": "[0:1] Compute", "args": {"Grid size": "[ 52, 2, 32 ]", "Block size": "[ 256, 1, 1 ]"}}, {"name": "void nchwToNhwcKernel<__half, __half, float, true, false>(int, int, int, int, __half const*, __half*, float, float)", "ph": "X", "cat": "cuda", "ts": 1614203975133258898, "dur": 4288, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 1, 2, 256 ]", "Block size": "[ 256, 1, 1 ]"}}, {"name": "void nchwToNhwcKernel<__half, __half, float, true, false>(int, int, int, int, __half const*, __half*, float, float)", "ph": "X", "cat": "cuda", "ts": 1614203975133258898, "dur": 4288, "tid": "void nchwToNhwcKernel<__half, __half, float, true, false>(int, int, int, int, __half const*, __half*, float, float)", "pid": "[0:1] Compute", "args": {"Grid size": "[ 1, 2, 256 ]", "Block size": "[ 256, 1, 1 ]"}}, {"name": "cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)", "ph": "X", "cat": "cuda", "ts": 1614203975133273458, "dur": 2272, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 7, 1, 1 ]", "Block size": "[ 128, 1, 1 ]"}}, {"name": "cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)", "ph": "X", "cat": "cuda", "ts": 1614203975133273458, "dur": 2272, "tid": "cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)", "pid": "[0:1] Compute", "args": {"Grid size": "[ 7, 1, 1 ]", "Block size": "[ 128, 1, 1 ]"}}, {"name": "volta_fp16_s884cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc2nchw_tn_v1", "ph": "X", "cat": "cuda", "ts": 1614203975133289426, "dur": 170495, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 104, 2, 1 ]", "Block size": "[ 256, 1, 1 ]"}}, {"name": "volta_fp16_s884cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc2nchw_tn_v1", "ph": "X", "cat": "cuda", "ts": 1614203975133289426, "dur": 170495, "tid": "volta_fp16_s884cudnn_fp16_256x128_ldg8_relu_f2f_exp_small_nhwc2nchw_tn_v1", "pid": "[0:1] Compute", "args": {"Grid size": "[ 104, 2, 1 ]", "Block size": "[ 256, 1, 1 ]"}}, {"name": "void at::native::unrolled_elementwise_kernel<at::native::AddFunctor<c10::Half>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor<c10::Half>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "ph": "X", "cat": "cuda", "ts": 1614203975133461265, "dur": 67584, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 26624, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}, {"name": "void at::native::unrolled_elementwise_kernel<at::native::AddFunctor<c10::Half>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor<c10::Half>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "ph": "X", "cat": "cuda", "ts": 1614203975133461265, "dur": 67584, "tid": "void at::native::unrolled_elementwise_kernel<at::native::AddFunctor<c10::Half>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor<c10::Half>, at::detail::Array<char*, 3>, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "pid": "[0:1] Compute", "args": {"Grid size": "[ 26624, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}, {"name": "void at::native::vectorized_elementwise_kernel<4, at::native::threshold_kernel_impl<c10::Half>(at::TensorIterator&, c10::Half, c10::Half)::{lambda(c10::Half, c10::Half)#1}, at::detail::Array<char*, 3> >(int, at::native::threshold_kernel_impl<c10::Half>(at::TensorIterator&, c10::Half, c10::Half)::{lambda(c10::Half, c10::Half)#1}, at::detail::Array<char*, 3>)", "ph": "X", "cat": "cuda", "ts": 1614203975134460459, "dur": 46432, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 26624, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}, {"name": "void at::native::vectorized_elementwise_kernel<4, at::native::threshold_kernel_impl<c10::Half>(at::TensorIterator&, c10::Half, c10::Half)::{lambda(c10::Half, c10::Half)#1}, at::detail::Array<char*, 3> >(int, at::native::threshold_kernel_impl<c10::Half>(at::TensorIterator&, c10::Half, c10::Half)::{lambda(c10::Half, c10::Half)#1}, at::detail::Array<char*, 3>)", "ph": "X", "cat": "cuda", "ts": 1614203975134460459, "dur": 46432, "tid": "void at::native::vectorized_elementwise_kernel<4, at::native::threshold_kernel_impl<c10::Half>(at::TensorIterator&, c10::Half, c10::Half)::{lambda(c10::Half, c10::Half)#1}, at::detail::Array<char*, 3> >(int, at::native::threshold_kernel_impl<c10::Half>(at::TensorIterator&, c10::Half, c10::Half)::{lambda(c10::Half, c10::Half)#1}, at::detail::Array<char*, 3>)", "pid": "[0:1] Compute", "args": {"Grid size": "[ 26624, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}, {"name": "void at::native::unrolled_elementwise_kernel<at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast)", "ph": "X", "cat": "cuda", "ts": 1614203975134877000, "dur": 9632, "tid": "Compute", "pid": "[0:1] Overview", "args": {"Grid size": "[ 832, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}, {"name": "void at::native::unrolled_elementwise_kernel<at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast)", "ph": "X", "cat": "cuda", "ts": 1614203975134877000, "dur": 9632, "tid": "void at::native::unrolled_elementwise_kernel<at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithCast<1>, at::detail::Array<char*, 2>::StoreWithCast)", "pid": "[0:1] Compute", "args": {"Grid size": "[ 832, 1, 1 ]", "Block size": "[ 64, 1, 1 ]"}}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment