Skip to content

Instantly share code, notes, and snippets.

@yujiepan-work
Last active March 8, 2023 15:12
Show Gist options
  • Save yujiepan-work/f427ba92efa985c063501743ac4dd306 to your computer and use it in GitHub Desktop.
Save yujiepan-work/f427ba92efa985c063501743ac4dd306 to your computer and use it in GitHub Desktop.
from contextlib import contextmanager
from unittest.mock import patch
from optimum.intel.openvino import OVModelForQuestionAnswering
import pandas as pd
import datasets
import evaluate
from evaluate import evaluator
from transformers import AutoTokenizer, pipeline, AutoModelForQuestionAnswering
MODEL_IDS = [
"yujiepan/test.mobilebert-uncased-squadv1"
]
@contextmanager
def patch_tokenizer(tokenizer):
# ensure the input is padded to a fixed length
_original_call = tokenizer.__class__.__call__
pad_on_right = tokenizer.padding_side == "right"
def _new_call(self, *args, **kwargs):
kwargs['max_length'] = 384
kwargs['padding'] = 'max_length'
kwargs['truncation'] = "only_second" if pad_on_right else "only_first"
kwargs['return_overflowing_tokens'] = True
kwargs['return_offsets_mapping'] = True
kwargs['stride'] = 128
return _original_call(self, *args, **kwargs)
with patch('.'.join([_original_call.__module__, _original_call.__qualname__]), _new_call):
yield
def prepare_dataset():
# prepare dataset & evaluation metric
dataset = datasets.load_dataset("squad", split='validation')
return dataset
def inference(model_id, dataset, is_ovmodel=True):
print(f'Inference on {model_id}...')
# prepare pipeline
if is_ovmodel:
model = OVModelForQuestionAnswering.from_pretrained(model_id)
else:
model = AutoModelForQuestionAnswering.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
ov_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
# inference
qa_eval = evaluator("question-answering")
with patch_tokenizer(tokenizer):
metric = evaluate.load('squad')
ov_eval_results = qa_eval.compute(
model_or_pipeline=ov_pipeline,
data=dataset,
metric=metric,
)
return ov_eval_results
dataset = prepare_dataset()
records = [inference(model_id, dataset) for model_id in MODEL_IDS]
pd.set_option('max_colwidth', 100)
df = pd.DataFrame.from_records(records, index=MODEL_IDS)
print(df.to_string())
@yujiepan-work
Copy link
Author

yujiepan-work commented Mar 8, 2023

In pytorch model it can get f1=90.9. So there might be a wrong config for now.

exact_match f1 total_time_in_seconds samples_per_second latency_in_seconds
yujiepan/test.mobilebert-uncased-squadv1 83.7559 90.7483 445.71 23.72 0.042
neuralmagic/mobilebert-uncased-finetuned-squadv1 83.7559 90.7483 983.54 10.77 0.093

@yujiepan-work
Copy link
Author

exact_match f1 total_time_in_seconds samples_per_second latency_in_seconds
yujiepan/test.mobilebert-uncased-squadv1-int8-f1-88.77 80.9745 88.4462 209.995 50.3345 0.0198671

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment