yujiepan-work/ovmodel_pipeline_food101.py

## ovmodel_pipeline_food101.py
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

from optimum.intel.openvino import OVModelForImageClassification
import pandas as pd
import numpy as np

import datasets
import evaluate  # Use pip install git+https://github.com/huggingface/evaluate.git
from evaluate import evaluator
from transformers import AutoTokenizer, pipeline, AutoFeatureExtractor, AutoModelForImageClassification
from multiprocessing import Pool
from collections import defaultdict
import time


MODEL_IDS = [
    "skylord/swin-finetuned-food101",
    "echarlaix/vit-food101-int8",
    "helenai/swin-base-food101-jpqd-ov",
    # "yujiepan/internal.swin-base-food101-int8-structured40",
    "vuiseng9/swin-base-food101-int8-structured43-15eph",
    # "vuiseng9/swin-base-food101-int8-structured44.5-20eph",
    # "yujiepan/internal.swin-base-food101-int8-structured40",
    # "yujiepan/internal.swin-base-food101-int8-structured38.63",
    "yujiepan/internal.swin-base-food101-int8-structured38.01",
    # "yujiepan/internal.swin-base-food101-int8-structured30.56"
]

TORCH_MODELS = [
    "skylord/swin-finetuned-food101",
]


class Timer:
    def __init__(self) -> None:
        self.timer_start = defaultdict(list)
        self.timer_end = defaultdict(list)

    def report_dict(self):
        result = {}
        for key in ['preprocess', 'forward', 'postprocess']:
            if len(self.timer_start[key]) > 0:
                starts = np.array(self.timer_start[key])
                ends = np.array(self.timer_end[key])
                result[key + '_latency_in_ms'] = float(np.mean(ends - starts)) * 1000
        return result

    def add_perf_counter(self, ovpipe, enable=True):
        def log_time(fn, name):
            def foo(*args, **kwargs):
                start = time.perf_counter()
                self.timer_start[name].append(start)
                result = fn(*args, **kwargs)
                end = time.perf_counter()
                self.timer_end[name].append(end)
                return result
            return foo

        if enable:
            ovpipe.__class__.preprocess = log_time(ovpipe.__class__.preprocess, 'preprocess')
            ovpipe.__class__.forward = log_time(ovpipe.__class__.forward, 'forward')
            ovpipe.__class__.postprocess = log_time(ovpipe.__class__.postprocess, 'postprocess')


dataset = datasets.load_dataset('food101', split='validation')
dataset = dataset.select(range(3000)) # use a subset to just get the latency. please comment this line if you want the actual accuracy.

def inference(model_id):
    print(f'Inference on {model_id}...')
    evaluate.enable_progress_bar()

    batch_size = 1  # use 1 currently. Other values may cause errors because the dataset is non-divisible
    if model_id in TORCH_MODELS:
        model = AutoModelForImageClassification.from_pretrained(model_id)
    else:
        model = OVModelForImageClassification.from_pretrained(model_id, compile=False)
        model.reshape(batch_size, 3, 224, 224)
        model.compile()
    tokenizer = AutoFeatureExtractor.from_pretrained(model_id)
    ov_pipeline = pipeline("image-classification", model=model, feature_extractor=tokenizer, batch_size=batch_size)

    timer = Timer()
    timer.add_perf_counter(ov_pipeline, enable=True)

    task_evaluator = evaluator("image-classification")
    metric = evaluate.load('accuracy')
    ov_eval_results = task_evaluator.compute(
        model_or_pipeline=ov_pipeline,
        data=dataset,
        metric=metric,
        label_mapping=model.config.label2id,
    )

    ov_eval_results['latency_in_ms'] = ov_eval_results['latency_in_seconds'] * 1000
    ov_eval_results.update(timer.report_dict())
    print(model_id, ov_eval_results)
    return ov_eval_results

# if you just want the accuracy, use this multi-processing way to save total cost time
# n_process = len(MODEL_IDS)
# pool = Pool(n_process)
# records = pool.map(inference, MODEL_IDS)
# pool.close()
# pool.join()

# if you want the latency, use this single process way
records = list(map(inference, MODEL_IDS))

pd.set_option('max_colwidth', 100)
df = pd.DataFrame.from_records(records, index=MODEL_IDS)
print(df.to_string())
# df.to_csv(f'food101_pipeline.csv')
	import os
	os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

	from optimum.intel.openvino import OVModelForImageClassification
	import pandas as pd
	import numpy as np

	import datasets
	import evaluate # Use pip install git+https://github.com/huggingface/evaluate.git
	from evaluate import evaluator
	from transformers import AutoTokenizer, pipeline, AutoFeatureExtractor, AutoModelForImageClassification
	from multiprocessing import Pool
	from collections import defaultdict
	import time


	MODEL_IDS = [
	"skylord/swin-finetuned-food101",
	"echarlaix/vit-food101-int8",
	"helenai/swin-base-food101-jpqd-ov",
	# "yujiepan/internal.swin-base-food101-int8-structured40",
	"vuiseng9/swin-base-food101-int8-structured43-15eph",
	# "vuiseng9/swin-base-food101-int8-structured44.5-20eph",
	# "yujiepan/internal.swin-base-food101-int8-structured40",
	# "yujiepan/internal.swin-base-food101-int8-structured38.63",
	"yujiepan/internal.swin-base-food101-int8-structured38.01",
	# "yujiepan/internal.swin-base-food101-int8-structured30.56"
	]

	TORCH_MODELS = [
	"skylord/swin-finetuned-food101",
	]


	class Timer:
	def __init__(self) -> None:
	self.timer_start = defaultdict(list)
	self.timer_end = defaultdict(list)

	def report_dict(self):
	result = {}
	for key in ['preprocess', 'forward', 'postprocess']:
	if len(self.timer_start[key]) > 0:
	starts = np.array(self.timer_start[key])
	ends = np.array(self.timer_end[key])
	result[key + '_latency_in_ms'] = float(np.mean(ends - starts)) * 1000
	return result

	def add_perf_counter(self, ovpipe, enable=True):
	def log_time(fn, name):
	def foo(args, *kwargs):
	start = time.perf_counter()
	self.timer_start[name].append(start)
	result = fn(args, *kwargs)
	end = time.perf_counter()
	self.timer_end[name].append(end)
	return result
	return foo

	if enable:
	ovpipe.__class__.preprocess = log_time(ovpipe.__class__.preprocess, 'preprocess')
	ovpipe.__class__.forward = log_time(ovpipe.__class__.forward, 'forward')
	ovpipe.__class__.postprocess = log_time(ovpipe.__class__.postprocess, 'postprocess')


	dataset = datasets.load_dataset('food101', split='validation')
	dataset = dataset.select(range(3000)) # use a subset to just get the latency. please comment this line if you want the actual accuracy.

	def inference(model_id):
	print(f'Inference on {model_id}...')
	evaluate.enable_progress_bar()

	batch_size = 1 # use 1 currently. Other values may cause errors because the dataset is non-divisible
	if model_id in TORCH_MODELS:
	model = AutoModelForImageClassification.from_pretrained(model_id)
	else:
	model = OVModelForImageClassification.from_pretrained(model_id, compile=False)
	model.reshape(batch_size, 3, 224, 224)
	model.compile()
	tokenizer = AutoFeatureExtractor.from_pretrained(model_id)
	ov_pipeline = pipeline("image-classification", model=model, feature_extractor=tokenizer, batch_size=batch_size)

	timer = Timer()
	timer.add_perf_counter(ov_pipeline, enable=True)

	task_evaluator = evaluator("image-classification")
	metric = evaluate.load('accuracy')
	ov_eval_results = task_evaluator.compute(
	model_or_pipeline=ov_pipeline,
	data=dataset,
	metric=metric,
	label_mapping=model.config.label2id,
	)

	ov_eval_results['latency_in_ms'] = ov_eval_results['latency_in_seconds'] * 1000
	ov_eval_results.update(timer.report_dict())
	print(model_id, ov_eval_results)
	return ov_eval_results

	# if you just want the accuracy, use this multi-processing way to save total cost time
	# n_process = len(MODEL_IDS)
	# pool = Pool(n_process)
	# records = pool.map(inference, MODEL_IDS)
	# pool.close()
	# pool.join()

	# if you want the latency, use this single process way
	records = list(map(inference, MODEL_IDS))

	pd.set_option('max_colwidth', 100)
	df = pd.DataFrame.from_records(records, index=MODEL_IDS)
	print(df.to_string())
	# df.to_csv(f'food101_pipeline.csv')