Last active
December 8, 2023 10:43
-
-
Save fishmingyu/cc339f22869b8ad9a9ffe74aed1c8e22 to your computer and use it in GitHub Desktop.
GNN breakdown profiling [PT2.0 compiler]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ | |
Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ | |
graph_0_cpp_fused_add_exp_index_select_mul_scatter_a... 29.02% 11.966ms 29.02% 11.966ms 11.966ms 1 | |
graph_0_cpp_fused_add_clone_exp_index_select_mul_rel... 28.60% 11.794ms 28.60% 11.794ms 11.794ms 1 | |
graph_0_cpp_fused_add_clone_exp_index_select_mul_new... 27.49% 11.335ms 27.49% 11.335ms 11.335ms 1 | |
aten::scatter_ 5.92% 2.442ms 5.92% 2.442ms 814.000us 3 | |
aten::scatter_reduce_ 4.85% 2.001ms 10.78% 4.443ms 1.481ms 3 | |
aten::mm 1.38% 571.000us 1.38% 571.000us 190.333us 3 | |
CompiledFunction 0.94% 389.000us 99.34% 40.957ms 40.957ms 1 | |
graph_0_cpp_fused_add_index_select_leaky_relu_mul_ne... 0.46% 189.000us 0.46% 189.000us 189.000us 1 | |
ProfilerStep* 0.42% 173.000us 100.00% 41.231ms 41.231ms 1 | |
graph_0_cpp_fused_add_index_select_leaky_relu_mul_sc... 0.37% 152.000us 0.37% 152.000us 152.000us 1 | |
graph_0_cpp_fused_add_index_select_leaky_relu_mul_sc... 0.25% 102.000us 0.25% 102.000us 102.000us 1 | |
TorchDynamo Cache Lookup 0.23% 94.000us 0.23% 94.000us 94.000us 1 | |
aten::empty_strided 0.03% 11.000us 0.03% 11.000us 0.458us 24 | |
inductor::_reinterpret_tensor 0.01% 5.000us 0.01% 5.000us 0.294us 17 | |
detach 0.01% 4.000us 0.01% 4.000us 2.000us 2 | |
aten::detach 0.01% 3.000us 0.02% 7.000us 3.500us 2 | |
aten::resolve_conj 0.00% 0.000us 0.00% 0.000us 0.000us 6 | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ | |
Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ | |
graph_0_cpp_fused_add_clone_eq_index_index_select_ma... 31.86% 10.464ms 31.86% 10.464ms 10.464ms 1 | |
graph_0_cpp_fused_add_clone_index_select_mul_relu_sc... 31.03% 10.190ms 31.03% 10.190ms 10.190ms 1 | |
graph_0_cpp_fused_add_index_select_mul_scatter_add_3... 30.51% 10.021ms 30.51% 10.021ms 10.021ms 1 | |
graph_0_cpp_fused_new_zeros_ones_scatter_add_0 3.85% 1.265ms 3.85% 1.265ms 1.265ms 1 | |
aten::mm 1.33% 437.000us 1.33% 437.000us 145.667us 3 | |
CompiledFunction 0.65% 214.000us 99.29% 32.608ms 32.608ms 1 | |
ProfilerStep* 0.45% 149.000us 100.00% 32.842ms 32.842ms 1 | |
TorchDynamo Cache Lookup 0.23% 77.000us 0.23% 77.000us 77.000us 1 | |
aten::empty_strided 0.03% 11.000us 0.03% 11.000us 1.100us 10 | |
detach 0.02% 6.000us 0.02% 6.000us 3.000us 2 | |
inductor::_reinterpret_tensor 0.02% 6.000us 0.02% 6.000us 0.857us 7 | |
aten::detach 0.01% 2.000us 0.02% 8.000us 4.000us 2 | |
aten::resolve_conj 0.00% 0.000us 0.00% 0.000us 0.000us 6 | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ | |
Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ | |
graph_0_cpp_fused_add_clone_index_select_mul_relu_sc... 33.41% 11.279ms 33.41% 11.279ms 11.279ms 1 | |
graph_0_cpp_fused_add_index_select_mul_new_zeros_sca... 30.71% 10.366ms 30.71% 10.366ms 10.366ms 1 | |
graph_0_cpp_fused_add_clone_index_select_mul_relu_sc... 30.47% 10.286ms 30.47% 10.286ms 10.286ms 1 | |
aten::addmm 2.23% 753.000us 2.75% 929.000us 154.833us 6 | |
CompiledFunction 0.89% 302.000us 99.14% 33.466ms 33.466ms 1 | |
graph_0_cpp_fused_clone_relu_threshold_backward_1 0.53% 178.000us 0.53% 178.000us 178.000us 1 | |
ProfilerStep* 0.52% 177.000us 100.00% 33.755ms 33.755ms 1 | |
aten::copy_ 0.49% 165.000us 0.49% 165.000us 27.500us 6 | |
TorchDynamo Cache Lookup 0.30% 101.000us 0.30% 101.000us 101.000us 1 | |
graph_0_cpp_fused_clone_relu_threshold_backward_3 0.16% 54.000us 0.16% 54.000us 54.000us 1 | |
graph_0_cpp_fused_clone_relu_threshold_backward_5 0.16% 54.000us 0.16% 54.000us 54.000us 1 | |
aten::empty_strided 0.04% 12.000us 0.04% 12.000us 0.923us 13 | |
aten::expand 0.03% 9.000us 0.03% 11.000us 1.833us 6 | |
aten::detach 0.02% 6.000us 0.03% 11.000us 2.200us 5 | |
inductor::_reinterpret_tensor 0.02% 6.000us 0.02% 6.000us 0.462us 13 | |
detach 0.01% 5.000us 0.01% 5.000us 1.000us 5 | |
aten::as_strided 0.01% 2.000us 0.01% 2.000us 0.333us 6 | |
aten::resolve_conj 0.00% 0.000us 0.00% 0.000us 0.000us 12 | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import os.path as osp | |
import warnings | |
import pytest | |
import torch | |
import torch.nn.functional as F | |
import logging | |
import torch.fx | |
import torch_geometric | |
import torch_geometric.typing | |
from torch_geometric.data import Data | |
from torch_geometric.loader import NeighborLoader | |
from torch_geometric.nn import SAGEConv | |
from torch_geometric.nn.models import GAT, GCN, GIN, PNA, EdgeCNN, GraphSAGE | |
from torch_geometric.profile import benchmark | |
from torch_geometric.testing import ( | |
disableExtensions, | |
onlyFullTest, | |
onlyLinux, | |
withCUDA, | |
withPackage, | |
) | |
from torch._inductor import config | |
config.cpp.enable_kernel_profile = True | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--device", type=str, default="cpu") | |
parser.add_argument("--backward", action="store_true") | |
args = parser.parse_args() | |
kwargs = {} | |
num_nodes, num_edges = 10_000, 200_000 | |
x = torch.randn(num_nodes, 64, device=args.device) | |
edge_index = torch.randint(num_nodes, (2, num_edges), device=args.device) | |
kwargs["add_self_loops"] = False | |
model = GCN(64, 64, num_layers=3, **kwargs).to(args.device) | |
compiled_model = torch_geometric.compile(model, backend="inductor") | |
compiled_model(x, edge_index) | |
from torch.profiler import profile, schedule, ProfilerActivity | |
RESULT_DIR = "./prof_trace" | |
my_schedule = schedule( | |
skip_first=10, | |
wait=5, | |
warmup=5, | |
active=1, | |
repeat=5) | |
def trace_handler(p): | |
output = p.key_averages().table(sort_by="self_cpu_time_total", row_limit=20) | |
print(output) | |
p.export_chrome_trace(f"{RESULT_DIR}/{p.step_num}.json") | |
total = 0 | |
with profile( | |
activities=[ProfilerActivity.CPU], | |
schedule=my_schedule, | |
on_trace_ready=trace_handler | |
) as p: | |
for _ in range(50): | |
compiled_model(x, edge_index) | |
p.step() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment