Skip to content

Instantly share code, notes, and snippets.

@tonyreina
Last active March 23, 2024 05:58
Show Gist options
  • Save tonyreina/13f54d30f86f6b645b58359d7a439367 to your computer and use it in GitHub Desktop.
Save tonyreina/13f54d30f86f6b645b58359d7a439367 to your computer and use it in GitHub Desktop.
Generic script for doing inference on OpenVINO model
#!/usr/bin/env python
# python openvino_inference.py -l /opt/intel/openvino/inference_engine/lib/libcpu_extension.so
import sys
import os
from argparse import ArgumentParser
import numpy as np
import logging as log
from timeit import default_timer as timer
from openvino.inference_engine import IENetwork, IEPlugin
def load_model(model_xml):
"""
Load the OpenVINO model.
"""
log.info("Loading OpenVINO model to the plugin")
model_bin = os.path.splitext(model_xml)[0] + ".bin"
return model_xml, model_bin
def build_argparser():
import psutil
p = psutil.Process() # Get the process ID for this script
parser = ArgumentParser()
parser.add_argument("-bz", "--batch_size",
help="Batch size", default=1, type=int)
parser.add_argument("-number_iter", "--number_iter",
help="Number of iterations", default=10, type=int)
parser.add_argument("--num_threads", default=len(p.cpu_affinity()), # number of cores
type=int,
help="number of threads to use")
parser.add_argument("--blocktime", default=1, type=int, help="KMP Blocktime")
parser.add_argument("-stats", "--stats",
help="Performance count statistics", action="store_true")
parser.add_argument("-l", "--cpu_extension",
help="MKLDNN (CPU)-targeted custom layers. "
"Absolute path to a shared library with "
"the kernels impl.", type=str, default=None)
parser.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder",
type=str, default=None)
parser.add_argument("-d", "--device",
help="Specify the target device to infer on; "
"CPU, GPU, FPGA or MYRIAD is acceptable. Sample "
"will look for a suitable plugin for device "
"specified (CPU by default)", default="CPU",
type=str)
parser.add_argument("-m", "--model",
help="The name of the OpenVINO XML file", required=True,
type=str)
return parser
def load_openvino_model(args):
"""
Loads the OpenVINO model (.xml for graph and .bin for weights).
"""
# Plugin initialization for specified device and
# load extensions library if specified
plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir)
if args.cpu_extension and "CPU" in args.device:
plugin.add_cpu_extension(args.cpu_extension)
# Read IR
# If using MYRIAD then we need to load FP16 model version
model_xml, model_bin = load_model(args.model)
log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
net = IENetwork(model=model_xml, weights=model_bin)
if "CPU" in plugin.device:
supported_layers = plugin.get_supported_layers(net)
not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
if len(not_supported_layers) != 0:
log.error("Following layers are not supported by the plugin "
" for specified device {}:\n {}".
format(plugin.device, ", ".join(not_supported_layers)))
log.error("Please try to specify cpu extensions library path "
"in sample's command line parameters using -l "
"or --cpu_extension command line argument")
sys.exit(1)
log.info("The network inputs are:")
for idx, input_layer in enumerate(net.inputs.keys()):
log.info("{}: {}, shape = {}".format(idx,input_layer,net.inputs[input_layer].shape))
log.info("The network outputs are:")
for idx, output_layer in enumerate(net.outputs.keys()):
log.info("{}: {}, shape = {}".format(idx,output_layer,net.outputs[output_layer].shape))
net.batch_size = args.batch_size
# Loading model to the plugin
exec_net = plugin.load(network=net)
del plugin
return exec_net, net
def print_stats(exec_net):
perf_counts = exec_net.requests[0].get_perf_counts()
log.info("Performance counters:")
log.info("{:<70} {:<15} {:<15} {:<15} {:<10}".format("name",
"layer_type",
"exec_type",
"status",
"real_time, us"))
for layer, stats in perf_counts.items():
log.info("{:<70} {:<15} {:<15} {:<15} {:<10}".format(layer,
stats["layer_type"],
stats["exec_type"],
stats["status"],
stats["real_time"]))
def main():
log.basicConfig(format="[ %(levelname)s ] %(message)s",
level=log.INFO, stream=sys.stdout)
args = build_argparser().parse_args()
# CPU specific settings for multi-threading cores
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # Get rid of the AVX, SSE warnings
os.environ["OMP_NUM_THREADS"] = str(args.num_threads)
os.environ["KMP_BLOCKTIME"] = str(args.blocktime)
os.environ["KMP_AFFINITY"] = "granularity=thread,compact,1,0"
exec_net, net = load_openvino_model(args)
# Start sync inference
inputdata = np.random.random((len(net.inputs.keys()),args.batch_size, 2, 2048, 2048))
"""
Inference on OpenVINO looks just like TensorFlow feed_dict
There are multiple inputs to the model so we'll use for loop
"""
inputs_dict = {}
for idx, input_layer in enumerate(net.inputs.keys()):
inputs_dict.update({input_layer : inputdata[idx]})
log.info("Starting inference ({} iterations)".format(args.number_iter))
infer_time = np.zeros((args.number_iter))
for idx in range(args.number_iter):
start = timer()
res = exec_net.infer(inputs=inputs_dict)
infer_time[idx] = timer() - start
log.info("Iteration #{}/{}: Finished inference in {:.6f} seconds.".format(idx+1, args.number_iter, infer_time[idx]))
average_inference = infer_time.mean()
log.info("Average running time of one batch: {:.5f} seconds".format(average_inference))
log.info("Standard deviation of one batch: {:.5f} seconds".format(infer_time.std()))
log.info("Images per second = {:.3f}".format(args.batch_size / average_inference))
"""
Statistics print out layer by layer costs in inference time
"""
if args.stats:
print_stats(exec_net)
for idx, output_layer in enumerate(net.outputs.keys()):
res_out = res[output_layer]
for batch, prediction in enumerate(res_out):
output_data = prediction
del exec_net
if __name__ == '__main__':
sys.exit(main() or 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment