Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save HDCharles/17300b0c0e2cd2e7a3e49d546dc9e19a to your computer and use it in GitHub Desktop.
Save HDCharles/17300b0c0e2cd2e7a3e49d546dc9e19a to your computer and use it in GitHub Desktop.
dynamically_quantize_per_tensor triton graph
===== __compiled_fn_21 =====
<eval_with_key>.144 class GraphModule(torch.nn.Module):
def forward(self, L_x_ : torch.Tensor):
l_x_ = L_x_
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:29, code: min_val = torch.min(x)
min_1 = torch.min(l_x_)
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:30, code: max_val = torch.max(x)
max_1 = torch.max(l_x_)
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:32, code: min_val_neg = torch.min(min_val, torch.zeros_like(min_val))
zeros_like = torch.zeros_like(min_1)
min_2 = torch.min(min_1, zeros_like); min_1 = zeros_like = None
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:33, code: max_val_pos = torch.max(max_val, torch.zeros_like(max_val))
zeros_like_1 = torch.zeros_like(max_1)
max_2 = torch.max(max_1, zeros_like_1); max_1 = zeros_like_1 = None
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:37, code: scale = (max_val_pos.to(torch.float64) - min_val_neg) / torch.tensor([quant_max - quant_min], dtype=torch.float64).to(x.device)
to = max_2.to(torch.float64); max_2 = None
sub = to - min_2; to = None
tensor = torch.tensor([255], dtype = torch.float64)
to_1 = tensor.to(device(type='cuda', index=0)); tensor = None
truediv = sub / to_1; sub = to_1 = None
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:38, code: scale = torch.clamp(scale, min=eps)
clamp = torch.clamp(truediv, min = 1.1920928955078125e-07); truediv = None
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:40, code: zero_point = quant_min - torch.round(min_val_neg / scale).to(torch.int32)
truediv_1 = min_2 / clamp; min_2 = None
round_1 = torch.round(truediv_1); truediv_1 = None
to_2 = round_1.to(torch.int32); round_1 = None
sub_1 = -128 - to_2; to_2 = None
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:41, code: zero_point = torch.clamp(zero_point, quant_min, quant_max)
clamp_1 = torch.clamp(sub_1, -128, 127); sub_1 = None
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:45, code: x_q = torch.clamp(torch.round(x / scale) + zero_point, quant_min, quant_max).to(target_dtype)
truediv_2 = l_x_ / clamp; l_x_ = None
round_2 = torch.round(truediv_2); truediv_2 = None
add = round_2 + clamp_1; round_2 = None
clamp_2 = torch.clamp(add, -128, 127); add = None
to_3 = clamp_2.to(torch.int8); clamp_2 = None
return (to_3, clamp, clamp_1)
[2023-05-10 00:24:14,488] torch._dynamo.output_graph.__graph: [DEBUG] TRACED GRAPH
__compiled_fn_21 <eval_with_key>.144 opcode name target args kwargs
------------- ------------ ------------------------------------------------------------- -------------------------------------- -------------------------------
placeholder l_x_ L_x_ () {}
call_function min_1 <built-in method min of type object at 0x7f193520b8a0> (l_x_,) {}
call_function max_1 <built-in method max of type object at 0x7f193520b8a0> (l_x_,) {}
call_function zeros_like <built-in method zeros_like of type object at 0x7f193520b8a0> (min_1,) {}
call_function min_2 <built-in method min of type object at 0x7f193520b8a0> (min_1, zeros_like) {}
call_function zeros_like_1 <built-in method zeros_like of type object at 0x7f193520b8a0> (max_1,) {}
call_function max_2 <built-in method max of type object at 0x7f193520b8a0> (max_1, zeros_like_1) {}
call_method to to (max_2, torch.float64) {}
call_function sub <built-in function sub> (to, min_2) {}
call_function tensor <built-in method tensor of type object at 0x7f193520b8a0> ([255],) {'dtype': torch.float64}
call_method to_1 to (tensor, device(type='cuda', index=0)) {}
call_function truediv <built-in function truediv> (sub, to_1) {}
call_function clamp <built-in method clamp of type object at 0x7f193520b8a0> (truediv,) {'min': 1.1920928955078125e-07}
call_function truediv_1 <built-in function truediv> (min_2, clamp) {}
call_function round_1 <built-in method round of type object at 0x7f193520b8a0> (truediv_1,) {}
call_method to_2 to (round_1, torch.int32) {}
call_function sub_1 <built-in function sub> (-128, to_2) {}
call_function clamp_1 <built-in method clamp of type object at 0x7f193520b8a0> (sub_1, -128, 127) {}
call_function truediv_2 <built-in function truediv> (l_x_, clamp) {}
call_function round_2 <built-in method round of type object at 0x7f193520b8a0> (truediv_2,) {}
call_function add <built-in function add> (round_2, clamp_1) {}
call_function clamp_2 <built-in method clamp of type object at 0x7f193520b8a0> (add, -128, 127) {}
call_method to_3 to (clamp_2, torch.int8) {}
output output output ((to_3, clamp, clamp_1),) {}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment