Skip to content

Instantly share code, notes, and snippets.

@yujiepan-work
Last active May 15, 2023 13:40
Show Gist options
  • Save yujiepan-work/142dec832a7e3d68562312189a480b1f to your computer and use it in GitHub Desktop.
Save yujiepan-work/142dec832a7e3d68562312189a480b1f to your computer and use it in GitHub Desktop.
optimum pipeline eval
  • OV_MODELS or TORCH_MODELS can be a model_id on huggingface or a local folder
  • For text tasks, there is a patch to ensure the sequence length is fixed. Can be removed if the model accepts arbitrary shape
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from optimum.intel.openvino import OVModelForImageClassification
import pandas as pd
import numpy as np
import datasets
import evaluate # Use pip install git+https://github.com/huggingface/evaluate.git
from evaluate import evaluator
from transformers import AutoTokenizer, pipeline, AutoFeatureExtractor, AutoModelForImageClassification
from multiprocessing import Pool
from collections import defaultdict
import time
OV_MODELS = [
# "skylord/swin-finetuned-food101",
# "echarlaix/vit-food101-int8",
# "helenai/swin-base-food101-jpqd-ov",
# "vuiseng9/swin-base-food101-int8-structured43-15eph",
"yujiepan/internal.swin-base-food101-int8-structured38.01",
]
TORCH_MODELS = [
"skylord/swin-finetuned-food101",
]
def prepare_dataset():
dataset = datasets.load_dataset('food101', split='validation')
return dataset
def inference(model_id, dataset):
print(f'Inference on {model_id}...')
batch_size = 1 # use 1 currently. Other values may cause errors because the dataset is non-divisible
if model_id in TORCH_MODELS:
model = AutoModelForImageClassification.from_pretrained(model_id)
else:
model = OVModelForImageClassification.from_pretrained(model_id, compile=False)
model.reshape(batch_size, 3, 224, 224)
model.compile()
tokenizer = AutoFeatureExtractor.from_pretrained(model_id)
ov_pipeline = pipeline("image-classification", model=model, feature_extractor=tokenizer, batch_size=batch_size)
task_evaluator = evaluator("image-classification")
metric = evaluate.load('accuracy')
ov_eval_results = task_evaluator.compute(
model_or_pipeline=ov_pipeline,
data=dataset,
metric=metric,
label_mapping=model.config.label2id,
)
ov_eval_results['latency_in_ms'] = ov_eval_results['latency_in_seconds'] * 1000
print(model_id, ov_eval_results)
return ov_eval_results
dataset = prepare_dataset()
all_models = OV_MODELS + TORCH_MODELS
records = [inference(model_id, dataset) for model_id in all_models]
pd.set_option('max_colwidth', 100)
df = pd.DataFrame.from_records(records, index=all_models)
print(df.to_string())
from contextlib import contextmanager
from unittest.mock import patch
import datasets
import evaluate
from evaluate import evaluator
from transformers import AutoTokenizer, pipeline, AutoModelForQuestionAnswering
import pandas as pd
from optimum.intel.openvino import OVModelForQuestionAnswering
OV_MODELS = [
"yujiepan/test.mobilebert-uncased-squadv1" # or some local path
]
TORCH_MODELS = [
]
@contextmanager
def patch_tokenizer(tokenizer):
# ensure the input is padded to a fixed length
_original_call = tokenizer.__class__.__call__
pad_on_right = tokenizer.padding_side == "right"
def _new_call(self, *args, **kwargs):
kwargs['max_length'] = 384
kwargs['padding'] = 'max_length'
kwargs['truncation'] = "only_second" if pad_on_right else "only_first"
kwargs['return_overflowing_tokens'] = True
kwargs['return_offsets_mapping'] = True
kwargs['stride'] = 128
return _original_call(self, *args, **kwargs)
with patch('.'.join([_original_call.__module__, _original_call.__qualname__]), _new_call):
yield
def prepare_dataset():
# prepare dataset & evaluation metric
dataset = datasets.load_dataset("squad", split='validation')
return dataset
def inference(model_id, dataset):
print(f'Inference on {model_id}...')
if model_id in OV_MODELS:
model = OVModelForQuestionAnswering.from_pretrained(model_id)
else:
model = AutoModelForQuestionAnswering.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
ov_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
# inference
qa_eval = evaluator("question-answering")
with patch_tokenizer(tokenizer):
metric = evaluate.load('squad')
ov_eval_results = qa_eval.compute(
model_or_pipeline=ov_pipeline,
data=dataset,
metric=metric,
)
print(model_id, ov_eval_results)
return ov_eval_results
dataset = prepare_dataset()
all_models = OV_MODELS + TORCH_MODELS
records = [inference(model_id, dataset) for model_id in all_models]
pd.set_option('max_colwidth', 100)
df = pd.DataFrame.from_records(records, index=all_models)
print(df.to_string())
from contextlib import contextmanager
from unittest.mock import patch
from optimum.intel.openvino import OVModelForSequenceClassification
import pandas as pd
import datasets
import evaluate
from evaluate import evaluator
from transformers import AutoTokenizer, pipeline, AutoModelForSequenceClassification
TASK_NAME = "sst2"
OV_MODELS = [
# "yujiepan/bert-base-uncased-sst2",
# "yujiepan/bert-base-uncased-sst2-PTQ",
# "yujiepan/bert-base-uncased-sst2-int8-unstructured80-17epoch",
"yujiepan/bert-base-uncased-sst2-int8-unstructured80-30epoch",
]
TORCH_MODELS = []
@contextmanager
def patch_tokenizer(tokenizer):
# ensure the input is padded to a fixed length
_original_call = tokenizer.__class__.__call__
def _new_call(self, *args, **kwargs):
kwargs['max_length'] = 128
kwargs['padding'] = 'max_length'
kwargs['truncation'] = True
return _original_call(self, *args, **kwargs)
with patch('.'.join([_original_call.__module__, _original_call.__qualname__]), _new_call):
yield
def prepare_dataset():
# prepare dataset & evaluation metric
dataset = datasets.load_dataset("glue", TASK_NAME)
labels = dataset['train'].features['label'].names
label2id = dict(zip(labels, range(len(labels))))
id2label = dict(zip(range(len(labels)), labels))
task_to_keys = {
"cola": ("sentence", None),
"mnli": ("premise", "hypothesis"),
"mnli-mm": ("premise", "hypothesis"),
"mrpc": ("sentence1", "sentence2"),
"qnli": ("question", "sentence"),
"qqp": ("question1", "question2"),
"rte": ("sentence1", "sentence2"),
"sst2": ("sentence", None),
"stsb": ("sentence1", "sentence2"),
"wnli": ("sentence1", "sentence2"),
}
input_column = task_to_keys[TASK_NAME][0]
return dataset, label2id, input_column
def inference(model_id):
print(f'Inference on {model_id}...')
# prepare pipeline
if model_id in OV_MODELS:
optimized_model = OVModelForSequenceClassification.from_pretrained(model_id)
else:
optimized_model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
ov_sst2_pipeline = pipeline("text-classification", model=optimized_model, tokenizer=tokenizer)
# inference
glue_eval = evaluator("text-classification")
with patch_tokenizer(tokenizer):
metric = evaluate.load('glue', TASK_NAME)
ov_eval_results = glue_eval.compute(
model_or_pipeline=ov_sst2_pipeline,
data=dataset['validation'],
metric=metric,
input_column=input_column,
label_mapping=label2id if optimized_model.config.label2id == label2id else None,
)
print(model_id, ov_eval_results)
return ov_eval_results
dataset, label2id, input_column = prepare_dataset()
all_models = OV_MODELS + TORCH_MODELS
records = [inference(model_id) for model_id in all_models]
pd.set_option('max_colwidth', 100)
df = pd.DataFrame.from_records(records, index=all_models)
print(df)
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import datasets
import evaluate # Use pip install git+https://github.com/huggingface/evaluate.git
from evaluate import evaluator
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification, pipeline
from collections import defaultdict
import time
import numpy as np
import pandas as pd
from multiprocessing import Pool
from optimum.intel.openvino import OVModelForAudioClassification
OV_MODELS = [
# "superb/wav2vec2-base-superb-ks",
# "anton-l/wav2vec2-base-finetuned-ks",
# "helenai/wav2vec2-base-superb-ks-jpqd-ov",
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured83",
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured79",
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-inputs",
"yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-feature-extractor"
]
TORCH_MODELS = [
# "superb/wav2vec2-base-superb-ks",
"anton-l/wav2vec2-base-finetuned-ks",
]
def prepare_dataset():
split_ = 'validation'
dataset = datasets.load_dataset("superb", "ks", split=split_)
# dataset = dataset.select(range(3000)) # use a subset to just get the latency. please comment this line if you want the actual accuracy.
return dataset
def inference(model_id, dataset):
print(f'Inference on {model_id}...')
batch_size = 1 # use 1 currently. Other values may cause errors because the dataset is non-divisible
if model_id in TORCH_MODELS:
model = AutoModelForAudioClassification.from_pretrained(model_id)
else:
model = OVModelForAudioClassification.from_pretrained(model_id)
tokenizer = AutoFeatureExtractor.from_pretrained(model_id)
ov_pipeline = pipeline("audio-classification", model=model, feature_extractor=tokenizer, batch_size=batch_size)
task_evaluator = evaluator("audio-classification")
metric = evaluate.load('accuracy')
ov_eval_results = task_evaluator.compute(
model_or_pipeline=ov_pipeline,
data=dataset,
metric=metric,
label_mapping=model.config.label2id,
)
ov_eval_results['latency_in_ms'] = ov_eval_results['latency_in_seconds'] * 1000
print(model_id, ov_eval_results)
return ov_eval_results
dataset = prepare_dataset()
all_models = OV_MODELS + TORCH_MODELS
records = [inference(model_id, dataset) for model_id in all_models]
pd.set_option('max_colwidth', 100)
df = pd.DataFrame.from_records(records, index=all_models)
print(df.to_string())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment