- OV_MODELS or TORCH_MODELS can be a model_id on huggingface or a local folder
- For text tasks, there is a patch to ensure the sequence length is fixed. Can be removed if the model accepts arbitrary shape
Last active
May 15, 2023 13:40
-
-
Save yujiepan-work/142dec832a7e3d68562312189a480b1f to your computer and use it in GitHub Desktop.
optimum pipeline eval
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' | |
from optimum.intel.openvino import OVModelForImageClassification | |
import pandas as pd | |
import numpy as np | |
import datasets | |
import evaluate # Use pip install git+https://github.com/huggingface/evaluate.git | |
from evaluate import evaluator | |
from transformers import AutoTokenizer, pipeline, AutoFeatureExtractor, AutoModelForImageClassification | |
from multiprocessing import Pool | |
from collections import defaultdict | |
import time | |
OV_MODELS = [ | |
# "skylord/swin-finetuned-food101", | |
# "echarlaix/vit-food101-int8", | |
# "helenai/swin-base-food101-jpqd-ov", | |
# "vuiseng9/swin-base-food101-int8-structured43-15eph", | |
"yujiepan/internal.swin-base-food101-int8-structured38.01", | |
] | |
TORCH_MODELS = [ | |
"skylord/swin-finetuned-food101", | |
] | |
def prepare_dataset(): | |
dataset = datasets.load_dataset('food101', split='validation') | |
return dataset | |
def inference(model_id, dataset): | |
print(f'Inference on {model_id}...') | |
batch_size = 1 # use 1 currently. Other values may cause errors because the dataset is non-divisible | |
if model_id in TORCH_MODELS: | |
model = AutoModelForImageClassification.from_pretrained(model_id) | |
else: | |
model = OVModelForImageClassification.from_pretrained(model_id, compile=False) | |
model.reshape(batch_size, 3, 224, 224) | |
model.compile() | |
tokenizer = AutoFeatureExtractor.from_pretrained(model_id) | |
ov_pipeline = pipeline("image-classification", model=model, feature_extractor=tokenizer, batch_size=batch_size) | |
task_evaluator = evaluator("image-classification") | |
metric = evaluate.load('accuracy') | |
ov_eval_results = task_evaluator.compute( | |
model_or_pipeline=ov_pipeline, | |
data=dataset, | |
metric=metric, | |
label_mapping=model.config.label2id, | |
) | |
ov_eval_results['latency_in_ms'] = ov_eval_results['latency_in_seconds'] * 1000 | |
print(model_id, ov_eval_results) | |
return ov_eval_results | |
dataset = prepare_dataset() | |
all_models = OV_MODELS + TORCH_MODELS | |
records = [inference(model_id, dataset) for model_id in all_models] | |
pd.set_option('max_colwidth', 100) | |
df = pd.DataFrame.from_records(records, index=all_models) | |
print(df.to_string()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from contextlib import contextmanager | |
from unittest.mock import patch | |
import datasets | |
import evaluate | |
from evaluate import evaluator | |
from transformers import AutoTokenizer, pipeline, AutoModelForQuestionAnswering | |
import pandas as pd | |
from optimum.intel.openvino import OVModelForQuestionAnswering | |
OV_MODELS = [ | |
"yujiepan/test.mobilebert-uncased-squadv1" # or some local path | |
] | |
TORCH_MODELS = [ | |
] | |
@contextmanager | |
def patch_tokenizer(tokenizer): | |
# ensure the input is padded to a fixed length | |
_original_call = tokenizer.__class__.__call__ | |
pad_on_right = tokenizer.padding_side == "right" | |
def _new_call(self, *args, **kwargs): | |
kwargs['max_length'] = 384 | |
kwargs['padding'] = 'max_length' | |
kwargs['truncation'] = "only_second" if pad_on_right else "only_first" | |
kwargs['return_overflowing_tokens'] = True | |
kwargs['return_offsets_mapping'] = True | |
kwargs['stride'] = 128 | |
return _original_call(self, *args, **kwargs) | |
with patch('.'.join([_original_call.__module__, _original_call.__qualname__]), _new_call): | |
yield | |
def prepare_dataset(): | |
# prepare dataset & evaluation metric | |
dataset = datasets.load_dataset("squad", split='validation') | |
return dataset | |
def inference(model_id, dataset): | |
print(f'Inference on {model_id}...') | |
if model_id in OV_MODELS: | |
model = OVModelForQuestionAnswering.from_pretrained(model_id) | |
else: | |
model = AutoModelForQuestionAnswering.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
ov_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer) | |
# inference | |
qa_eval = evaluator("question-answering") | |
with patch_tokenizer(tokenizer): | |
metric = evaluate.load('squad') | |
ov_eval_results = qa_eval.compute( | |
model_or_pipeline=ov_pipeline, | |
data=dataset, | |
metric=metric, | |
) | |
print(model_id, ov_eval_results) | |
return ov_eval_results | |
dataset = prepare_dataset() | |
all_models = OV_MODELS + TORCH_MODELS | |
records = [inference(model_id, dataset) for model_id in all_models] | |
pd.set_option('max_colwidth', 100) | |
df = pd.DataFrame.from_records(records, index=all_models) | |
print(df.to_string()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from contextlib import contextmanager | |
from unittest.mock import patch | |
from optimum.intel.openvino import OVModelForSequenceClassification | |
import pandas as pd | |
import datasets | |
import evaluate | |
from evaluate import evaluator | |
from transformers import AutoTokenizer, pipeline, AutoModelForSequenceClassification | |
TASK_NAME = "sst2" | |
OV_MODELS = [ | |
# "yujiepan/bert-base-uncased-sst2", | |
# "yujiepan/bert-base-uncased-sst2-PTQ", | |
# "yujiepan/bert-base-uncased-sst2-int8-unstructured80-17epoch", | |
"yujiepan/bert-base-uncased-sst2-int8-unstructured80-30epoch", | |
] | |
TORCH_MODELS = [] | |
@contextmanager | |
def patch_tokenizer(tokenizer): | |
# ensure the input is padded to a fixed length | |
_original_call = tokenizer.__class__.__call__ | |
def _new_call(self, *args, **kwargs): | |
kwargs['max_length'] = 128 | |
kwargs['padding'] = 'max_length' | |
kwargs['truncation'] = True | |
return _original_call(self, *args, **kwargs) | |
with patch('.'.join([_original_call.__module__, _original_call.__qualname__]), _new_call): | |
yield | |
def prepare_dataset(): | |
# prepare dataset & evaluation metric | |
dataset = datasets.load_dataset("glue", TASK_NAME) | |
labels = dataset['train'].features['label'].names | |
label2id = dict(zip(labels, range(len(labels)))) | |
id2label = dict(zip(range(len(labels)), labels)) | |
task_to_keys = { | |
"cola": ("sentence", None), | |
"mnli": ("premise", "hypothesis"), | |
"mnli-mm": ("premise", "hypothesis"), | |
"mrpc": ("sentence1", "sentence2"), | |
"qnli": ("question", "sentence"), | |
"qqp": ("question1", "question2"), | |
"rte": ("sentence1", "sentence2"), | |
"sst2": ("sentence", None), | |
"stsb": ("sentence1", "sentence2"), | |
"wnli": ("sentence1", "sentence2"), | |
} | |
input_column = task_to_keys[TASK_NAME][0] | |
return dataset, label2id, input_column | |
def inference(model_id): | |
print(f'Inference on {model_id}...') | |
# prepare pipeline | |
if model_id in OV_MODELS: | |
optimized_model = OVModelForSequenceClassification.from_pretrained(model_id) | |
else: | |
optimized_model = AutoModelForSequenceClassification.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
ov_sst2_pipeline = pipeline("text-classification", model=optimized_model, tokenizer=tokenizer) | |
# inference | |
glue_eval = evaluator("text-classification") | |
with patch_tokenizer(tokenizer): | |
metric = evaluate.load('glue', TASK_NAME) | |
ov_eval_results = glue_eval.compute( | |
model_or_pipeline=ov_sst2_pipeline, | |
data=dataset['validation'], | |
metric=metric, | |
input_column=input_column, | |
label_mapping=label2id if optimized_model.config.label2id == label2id else None, | |
) | |
print(model_id, ov_eval_results) | |
return ov_eval_results | |
dataset, label2id, input_column = prepare_dataset() | |
all_models = OV_MODELS + TORCH_MODELS | |
records = [inference(model_id) for model_id in all_models] | |
pd.set_option('max_colwidth', 100) | |
df = pd.DataFrame.from_records(records, index=all_models) | |
print(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' | |
import datasets | |
import evaluate # Use pip install git+https://github.com/huggingface/evaluate.git | |
from evaluate import evaluator | |
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification, pipeline | |
from collections import defaultdict | |
import time | |
import numpy as np | |
import pandas as pd | |
from multiprocessing import Pool | |
from optimum.intel.openvino import OVModelForAudioClassification | |
OV_MODELS = [ | |
# "superb/wav2vec2-base-superb-ks", | |
# "anton-l/wav2vec2-base-finetuned-ks", | |
# "helenai/wav2vec2-base-superb-ks-jpqd-ov", | |
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured83", | |
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured79", | |
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-inputs", | |
"yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-feature-extractor" | |
] | |
TORCH_MODELS = [ | |
# "superb/wav2vec2-base-superb-ks", | |
"anton-l/wav2vec2-base-finetuned-ks", | |
] | |
def prepare_dataset(): | |
split_ = 'validation' | |
dataset = datasets.load_dataset("superb", "ks", split=split_) | |
# dataset = dataset.select(range(3000)) # use a subset to just get the latency. please comment this line if you want the actual accuracy. | |
return dataset | |
def inference(model_id, dataset): | |
print(f'Inference on {model_id}...') | |
batch_size = 1 # use 1 currently. Other values may cause errors because the dataset is non-divisible | |
if model_id in TORCH_MODELS: | |
model = AutoModelForAudioClassification.from_pretrained(model_id) | |
else: | |
model = OVModelForAudioClassification.from_pretrained(model_id) | |
tokenizer = AutoFeatureExtractor.from_pretrained(model_id) | |
ov_pipeline = pipeline("audio-classification", model=model, feature_extractor=tokenizer, batch_size=batch_size) | |
task_evaluator = evaluator("audio-classification") | |
metric = evaluate.load('accuracy') | |
ov_eval_results = task_evaluator.compute( | |
model_or_pipeline=ov_pipeline, | |
data=dataset, | |
metric=metric, | |
label_mapping=model.config.label2id, | |
) | |
ov_eval_results['latency_in_ms'] = ov_eval_results['latency_in_seconds'] * 1000 | |
print(model_id, ov_eval_results) | |
return ov_eval_results | |
dataset = prepare_dataset() | |
all_models = OV_MODELS + TORCH_MODELS | |
records = [inference(model_id, dataset) for model_id in all_models] | |
pd.set_option('max_colwidth', 100) | |
df = pd.DataFrame.from_records(records, index=all_models) | |
print(df.to_string()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment