yujiepan-work/benchmark_app_ovmodel.ipynb

## benchmark_app_ovmodel.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "xml path: /home/yujiepan/.cache/huggingface/hub/models--yujiepan--internal.wav2vec2-base-superb-ks-int8-structured64-quantize-feature-extractor/snapshots/be3366138b0e5e96b0c10067fe1f94d2e45dca90/openvino_model.xml\n",
      "[Step 1/11] Parsing and validating input arguments\n",
      "[ INFO ] Parsing input parameters\n",
      "[Step 2/11] Loading OpenVINO Runtime\n",
      "[ INFO ] OpenVINO:\n",
      "[ INFO ] Build ................................. 2023.0.0-9771-d7f47aa1228\n",
      "[ INFO ] \n",
      "[ INFO ] Device info:\n",
      "[ INFO ] CPU\n",
      "[ INFO ] Build ................................. 2023.0.0-9771-d7f47aa1228\n",
      "[ INFO ] \n",
      "[ INFO ] \n",
      "[Step 3/11] Setting device configuration\n",
      "[ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to LATENCY.\n",
      "[Step 4/11] Reading model files\n",
      "[ INFO ] Loading model files\n",
      "[ INFO ] Read model took 549.28 ms\n",
      "[ INFO ] Original model I/O parameters:\n",
      "[ INFO ] Model inputs:\n",
      "[ INFO ]     input_values (node: input_values) : f32 / [...] / [?,?]\n",
      "[ INFO ] Model outputs:\n",
      "[ INFO ]     logits (node: logits) : f32 / [...] / [?,12]\n",
      "[Step 5/11] Resizing model to match image sizes and given batch\n",
      "[ INFO ] Model batch size: 1\n",
      "[ INFO ] Reshaping model: 'input_values': [1,16000]\n",
      "[ INFO ] Reshape model took 171.31 ms\n",
      "[Step 6/11] Configuring input of the model\n",
      "[ INFO ] Model inputs:\n",
      "[ INFO ]     input_values (node: input_values) : f32 / [...] / [1,16000]\n",
      "[ INFO ] Model outputs:\n",
      "[ INFO ]     logits (node: logits) : f32 / [...] / [1,12]\n",
      "[Step 7/11] Loading the model to the device\n",
      "[ INFO ] Compile model took 2565.29 ms\n",
      "[Step 8/11] Querying optimal runtime parameters\n",
      "[ INFO ] Model:\n",
      "[ INFO ]   NETWORK_NAME: torch_jit\n",
      "[ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 2\n",
      "[ INFO ]   NUM_STREAMS: 2\n",
      "[ INFO ]   AFFINITY: Affinity.CORE\n",
      "[ INFO ]   INFERENCE_NUM_THREADS: 32\n",
      "[ INFO ]   PERF_COUNT: False\n",
      "[ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>\n",
      "[ INFO ]   PERFORMANCE_HINT: PerformanceMode.LATENCY\n",
      "[ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0\n",
      "[ INFO ]   EXECUTION_DEVICES: ['CPU']\n",
      "[Step 9/11] Creating infer requests and preparing input tensors\n",
      "[ WARNING ] No input files were given for input 'input_values'!. This input will be filled with random values!\n",
      "[ INFO ] Fill input 'input_values' with random values \n",
      "[Step 10/11] Measuring performance (Start inference synchronously, limits: 3000 iterations)\n",
      "[ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).\n",
      "[ INFO ] First inference took 22.68 ms\n",
      "[Step 11/11] Dumping statistics report\n",
      "[ INFO ] Execution Devices:['CPU']\n",
      "[ INFO ] Count:            3000 iterations\n",
      "[ INFO ] Duration:         31214.86 ms\n",
      "[ INFO ] Latency:\n",
      "[ INFO ]    Median:        9.83 ms\n",
      "[ INFO ]    Average:       10.29 ms\n",
      "[ INFO ]    Min:           9.08 ms\n",
      "[ INFO ]    Max:           26.44 ms\n",
      "[ INFO ] Throughput:   101.77 FPS\n"
     ]
    }
   ],
   "source": [
    "# First, download the ov files.\n",
    "# ! pip install --upgrade huggingface_hub\n",
    "from huggingface_hub import hf_hub_download\n",
    "\n",
    "model_id = \"yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-feature-extractor\"\n",
    "ov_ir_xml_path = hf_hub_download(repo_id=model_id, filename=\"openvino_model.xml\")\n",
    "ov_ir_bin_path = hf_hub_download(repo_id=model_id, filename=\"openvino_model.bin\")\n",
    "print('xml path:', ov_ir_xml_path)\n",
    "\n",
    "# sync mode. focusing on latency. run 3000 iterations.\n",
    "! benchmark_app -m $ov_ir_xml_path -api sync -niter 3000 -shape \"[1,16000]\"\n",
    "\n",
    "# if multiple inputs:\n",
    "# ! benchmark_app -m <path/to/openvino_model.xml> -api sync -niter 3000 -shape \"input_ids[1,64],attention_mask[1,64],token_type_ids[1,64]\"\n",
    "\n",
    "# async mode. focusing on throughput. run 20 seconds.\n",
    "# ! benchmark_app -m $ov_ir_xml_path -api async -t 20 -shape \"[1,16000]\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.15"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "c30c450e430028f81574c777bb25e3ac5a02c782df8066b610b99127d866d557"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"xml path: /home/yujiepan/.cache/huggingface/hub/models--yujiepan--internal.wav2vec2-base-superb-ks-int8-structured64-quantize-feature-extractor/snapshots/be3366138b0e5e96b0c10067fe1f94d2e45dca90/openvino_model.xml\n",
	"[Step 1/11] Parsing and validating input arguments\n",
	"[ INFO ] Parsing input parameters\n",
	"[Step 2/11] Loading OpenVINO Runtime\n",
	"[ INFO ] OpenVINO:\n",
	"[ INFO ] Build ................................. 2023.0.0-9771-d7f47aa1228\n",
	"[ INFO ] \n",
	"[ INFO ] Device info:\n",
	"[ INFO ] CPU\n",
	"[ INFO ] Build ................................. 2023.0.0-9771-d7f47aa1228\n",
	"[ INFO ] \n",
	"[ INFO ] \n",
	"[Step 3/11] Setting device configuration\n",
	"[ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to LATENCY.\n",
	"[Step 4/11] Reading model files\n",
	"[ INFO ] Loading model files\n",
	"[ INFO ] Read model took 549.28 ms\n",
	"[ INFO ] Original model I/O parameters:\n",
	"[ INFO ] Model inputs:\n",
	"[ INFO ] input_values (node: input_values) : f32 / [...] / [?,?]\n",
	"[ INFO ] Model outputs:\n",
	"[ INFO ] logits (node: logits) : f32 / [...] / [?,12]\n",
	"[Step 5/11] Resizing model to match image sizes and given batch\n",
	"[ INFO ] Model batch size: 1\n",
	"[ INFO ] Reshaping model: 'input_values': [1,16000]\n",
	"[ INFO ] Reshape model took 171.31 ms\n",
	"[Step 6/11] Configuring input of the model\n",
	"[ INFO ] Model inputs:\n",
	"[ INFO ] input_values (node: input_values) : f32 / [...] / [1,16000]\n",
	"[ INFO ] Model outputs:\n",
	"[ INFO ] logits (node: logits) : f32 / [...] / [1,12]\n",
	"[Step 7/11] Loading the model to the device\n",
	"[ INFO ] Compile model took 2565.29 ms\n",
	"[Step 8/11] Querying optimal runtime parameters\n",
	"[ INFO ] Model:\n",
	"[ INFO ] NETWORK_NAME: torch_jit\n",
	"[ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 2\n",
	"[ INFO ] NUM_STREAMS: 2\n",
	"[ INFO ] AFFINITY: Affinity.CORE\n",
	"[ INFO ] INFERENCE_NUM_THREADS: 32\n",
	"[ INFO ] PERF_COUNT: False\n",
	"[ INFO ] INFERENCE_PRECISION_HINT: <Type: 'float32'>\n",
	"[ INFO ] PERFORMANCE_HINT: PerformanceMode.LATENCY\n",
	"[ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0\n",
	"[ INFO ] EXECUTION_DEVICES: ['CPU']\n",
	"[Step 9/11] Creating infer requests and preparing input tensors\n",
	"[ WARNING ] No input files were given for input 'input_values'!. This input will be filled with random values!\n",
	"[ INFO ] Fill input 'input_values' with random values \n",
	"[Step 10/11] Measuring performance (Start inference synchronously, limits: 3000 iterations)\n",
	"[ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).\n",
	"[ INFO ] First inference took 22.68 ms\n",
	"[Step 11/11] Dumping statistics report\n",
	"[ INFO ] Execution Devices:['CPU']\n",
	"[ INFO ] Count: 3000 iterations\n",
	"[ INFO ] Duration: 31214.86 ms\n",
	"[ INFO ] Latency:\n",
	"[ INFO ] Median: 9.83 ms\n",
	"[ INFO ] Average: 10.29 ms\n",
	"[ INFO ] Min: 9.08 ms\n",
	"[ INFO ] Max: 26.44 ms\n",
	"[ INFO ] Throughput: 101.77 FPS\n"
	]
	}
	],
	"source": [
	"# First, download the ov files.\n",
	"# ! pip install --upgrade huggingface_hub\n",
	"from huggingface_hub import hf_hub_download\n",
	"\n",
	"model_id = \"yujiepan/internal.wav2vec2-base-superb-ks-int8-structured64-quantize-feature-extractor\"\n",
	"ov_ir_xml_path = hf_hub_download(repo_id=model_id, filename=\"openvino_model.xml\")\n",
	"ov_ir_bin_path = hf_hub_download(repo_id=model_id, filename=\"openvino_model.bin\")\n",
	"print('xml path:', ov_ir_xml_path)\n",
	"\n",
	"# sync mode. focusing on latency. run 3000 iterations.\n",
	"! benchmark_app -m $ov_ir_xml_path -api sync -niter 3000 -shape \"[1,16000]\"\n",
	"\n",
	"# if multiple inputs:\n",
	"# ! benchmark_app -m <path/to/openvino_model.xml> -api sync -niter 3000 -shape \"input_ids[1,64],attention_mask[1,64],token_type_ids[1,64]\"\n",
	"\n",
	"# async mode. focusing on throughput. run 20 seconds.\n",
	"# ! benchmark_app -m $ov_ir_xml_path -api async -t 20 -shape \"[1,16000]\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.15"
	},
	"orig_nbformat": 4,
	"vscode": {
	"interpreter": {
	"hash": "c30c450e430028f81574c777bb25e3ac5a02c782df8066b610b99127d866d557"
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}