Skip to content

Instantly share code, notes, and snippets.

@yujiepan-work
Last active March 22, 2023 10:05
Show Gist options
  • Save yujiepan-work/e71ceecf8c36b340ddfc0951eb68a8ee to your computer and use it in GitHub Desktop.
Save yujiepan-work/e71ceecf8c36b340ddfc0951eb68a8ee to your computer and use it in GitHub Desktop.
ovmodel_pipeline_superb_ks.py
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from collections import defaultdict
import time
import numpy as np
import pandas as pd
from multiprocessing import Pool
import datasets
import evaluate # Use pip install git+https://github.com/huggingface/evaluate.git
from evaluate import evaluator
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification, pipeline
from optimum.intel.openvino import OVModelForAudioClassification
MODEL_IDS = [
"superb/wav2vec2-base-superb-ks",
"anton-l/wav2vec2-base-finetuned-ks",
"helenai/wav2vec2-base-superb-ks-jpqd-ov",
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured83",
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured79",
# "yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-inputs",
"yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-feature-extractor"
]
TORCH_MODELS = [
"superb/wav2vec2-base-superb-ks",
"anton-l/wav2vec2-base-finetuned-ks",
]
# split_ = 'validation'
split_ = 'test'
dataset = datasets.load_dataset("superb", "ks", split=split_)
dataset = dataset.select(range(3000)) # use a subset to just get the latency. please comment this line if you want the actual accuracy.
print('Evaluating ks ' + split_)
class Timer:
def __init__(self) -> None:
self.timer_start = defaultdict(list)
self.timer_end = defaultdict(list)
def report_dict(self):
result = {}
for key in ['preprocess', 'forward', 'postprocess']:
if len(self.timer_start[key]) > 0:
starts = np.array(self.timer_start[key])
ends = np.array(self.timer_end[key])
result[key + '_latency_in_ms'] = float(np.mean(ends - starts)) * 1000
return result
def add_perf_counter(self, ovpipe, enable=True):
def log_time(fn, name):
def foo(*args, **kwargs):
start = time.perf_counter()
self.timer_start[name].append(start)
result = fn(*args, **kwargs)
end = time.perf_counter()
self.timer_end[name].append(end)
return result
return foo
if enable:
ovpipe.__class__.preprocess = log_time(ovpipe.__class__.preprocess, 'preprocess')
ovpipe.__class__.forward = log_time(ovpipe.__class__.forward, 'forward')
ovpipe.__class__.postprocess = log_time(ovpipe.__class__.postprocess, 'postprocess')
def inference(model_id):
print(f'Inference on {model_id}...')
batch_size = 1 # use 1 currently. Other values may cause errors because the dataset is non-divisible
if model_id in TORCH_MODELS:
model = AutoModelForAudioClassification.from_pretrained(model_id)
else:
model = OVModelForAudioClassification.from_pretrained(model_id)
tokenizer = AutoFeatureExtractor.from_pretrained(model_id)
ov_pipeline = pipeline("audio-classification", model=model, feature_extractor=tokenizer, batch_size=batch_size)
timer = Timer()
timer.add_perf_counter(ov_pipeline, enable=True)
task_evaluator = evaluator("audio-classification")
metric = evaluate.load('accuracy')
ov_eval_results = task_evaluator.compute(
model_or_pipeline=ov_pipeline,
data=dataset,
metric=metric,
label_mapping=model.config.label2id,
)
ov_eval_results['latency_in_ms'] = ov_eval_results['latency_in_seconds'] * 1000
ov_eval_results.update(timer.report_dict())
return ov_eval_results
# if you just want the accuracy, use this multi-processing way to save total cost time
# n_process = len(MODEL_IDS)
# pool = Pool(n_process)
# records = pool.map(inference, MODEL_IDS)
# pool.close()
# pool.join()
# if you want the latency, use this single process way
records = list(map(inference, MODEL_IDS))
pd.set_option('max_colwidth', 100)
df = pd.DataFrame.from_records(records, index=MODEL_IDS)
print(df.to_string())
# df.to_csv(f'superb_ks_pipeline.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment