justinchuby/quantize.py

## quantize.py
import torch


class M(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = torch.nn.Linear(5, 10)

    def forward(self, x):
        return self.linear(x)


example_inputs = (torch.randn(1, 5),)
m = M().eval()

# Step 1. program capture

from torch._export import capture_pre_autograd_graph

pt2e_torch_model = capture_pre_autograd_graph(m, example_inputs)

# Step 2. quantization
from torch.ao.quantization.quantize_pt2e import (
    prepare_pt2e,
    convert_pt2e,
)

from torch.ao.quantization.quantizer.xnnpack_quantizer import (
    XNNPACKQuantizer,
    get_symmetric_quantization_config,
)

quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
pt2e_torch_model = prepare_pt2e(pt2e_torch_model, quantizer)

# Run the prepared model with sample input data to ensure that internal observers are populated with correct values
pt2e_torch_model(*example_inputs)

# Convert the prepared model to a quantized model
pt2e_torch_model = convert_pt2e(pt2e_torch_model, fold_quantize=False)


program = torch.export.export(pt2e_torch_model, example_inputs)
# we get a model with aten ops
print(program)

# Convert to ONNX
import torch_onnx

torch_onnx.patch_torch(error_report=True)

onnx_program = torch.onnx.export(program, example_inputs, "quantized.textproto")
	import torch


	class M(torch.nn.Module):
	def __init__(self):
	super().__init__()
	self.linear = torch.nn.Linear(5, 10)

	def forward(self, x):
	return self.linear(x)


	example_inputs = (torch.randn(1, 5),)
	m = M().eval()

	# Step 1. program capture

	from torch._export import capture_pre_autograd_graph

	pt2e_torch_model = capture_pre_autograd_graph(m, example_inputs)

	# Step 2. quantization
	from torch.ao.quantization.quantize_pt2e import (
	prepare_pt2e,
	convert_pt2e,
	)

	from torch.ao.quantization.quantizer.xnnpack_quantizer import (
	XNNPACKQuantizer,
	get_symmetric_quantization_config,
	)

	quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
	pt2e_torch_model = prepare_pt2e(pt2e_torch_model, quantizer)

	# Run the prepared model with sample input data to ensure that internal observers are populated with correct values
	pt2e_torch_model(*example_inputs)

	# Convert the prepared model to a quantized model
	pt2e_torch_model = convert_pt2e(pt2e_torch_model, fold_quantize=False)


	program = torch.export.export(pt2e_torch_model, example_inputs)
	# we get a model with aten ops
	print(program)

	# Convert to ONNX
	import torch_onnx

	torch_onnx.patch_torch(error_report=True)

	onnx_program = torch.onnx.export(program, example_inputs, "quantized.textproto")