Last active
August 1, 2023 22:24
-
-
Save ngaloppo/6dfdcc9af209074f8e7c661b5355f0d9 to your computer and use it in GitHub Desktop.
A script to quantize BART with HF's OVQuantizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import partial | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
from optimum.intel import OVConfig, OVQuantizer | |
max_input_length = 512 | |
max_target_length = 128 | |
model_id = "lidiya/bart-large-xsum-samsum" | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
# The directory where the quantized model will be saved | |
save_dir = "ptq_model" | |
def preprocess_function(examples, tokenizer, max_input_length, max_target_length): | |
inputs = [doc for doc in examples["dialogue"]] | |
model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True) | |
# Setup the tokenizer for targets | |
with tokenizer.as_target_tokenizer(): | |
labels = tokenizer( | |
examples["summary"], max_length=max_target_length, truncation=True | |
) | |
model_inputs["labels"] = labels["input_ids"] | |
return model_inputs | |
# Load the default quantization configuration detailing the quantization we wish to apply | |
bart_config = { | |
"algorithm": "quantization", | |
"preset": "mixed", | |
"overflow_fix": "disable", | |
"initializer": { | |
"range": {"num_init_samples": 300, "type": "mean_min_max"}, | |
"batchnorm_adaptation": {"num_bn_adaptation_samples": 0}, | |
}, | |
"scope_overrides": {"activations": {"{re}.*bmm_0": {"mode": "symmetric"}}}, | |
"ignored_scopes": [ | |
"{re}.*Embedding.*", | |
"{re}.*add___.*", | |
"{re}.*layer_norm_.*", | |
"{re}.*bmm_1", | |
], | |
} | |
quantization_config = OVConfig(compression=bart_config) | |
# Instantiate our OVQuantizer using the desired configuration | |
quantizer = OVQuantizer.from_pretrained(model) | |
# Create the calibration dataset used to perform static quantization | |
calibration_dataset = quantizer.get_calibration_dataset( | |
"samsum", | |
# dataset_config_name="samsum", | |
preprocess_function=partial( | |
preprocess_function, | |
tokenizer=tokenizer, | |
max_input_length=max_input_length, | |
max_target_length=max_target_length, | |
), | |
num_samples=300, | |
dataset_split="train", | |
) | |
# Apply static quantization and export the resulting quantized model to OpenVINO IR format | |
quantizer.quantize( | |
quantization_config=quantization_config, | |
calibration_dataset=calibration_dataset, | |
save_directory=save_dir, | |
) | |
# Save the tokenizer | |
tokenizer.save_pretrained(save_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment