Created
May 3, 2021 18:10
-
-
Save msaroufim/62512953e87e29e234cc0c28dc55f69f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ubuntu@ip-172-31-1-60:~/model_analyzer$ docker run -it --rm --gpus all \ | |
> -v /var/run/docker.sock:/var/run/docker.sock \ | |
> -v $HOME/model_analyzer/examples/quick-start:/quick_start_repository \ | |
> --net=host --name model-analyzer \ | |
> model-analyzer /bin/bash | |
root@ip-172-31-1-60:/opt/triton-model-analyzer# mkdir analysis_results | |
root@ip-172-31-1-60:/opt/triton-model-analyzer# model-analyzer -m /quick_start_repository -n add_sub --triton-launch-mode=local --export-path=analysis_results | |
2021-05-03 18:08:04.55 INFO[entrypoint.py:210] Triton Model Analyzer started: config={'model_repository': '/quick_start_repository', 'model_names': [{'model_name': 'add_sub', 'objectives': {'perf_throughput': 10}, 'parameters': {'batch_sizes': [1], 'concurrency': []}}], 'objectives': {'perf_throughput': 10}, 'constraints': {}, 'batch_sizes': [1], 'concurrency': [], 'perf_analyzer_timeout': 600, 'perf_analyzer_cpu_util': 80.0, 'run_config_search_max_concurrency': 1024, 'run_config_search_max_instance_count': 5, 'run_config_search_disable': False, 'run_config_search_max_preferred_batch_size': 16, 'export': True, 'cpu_only': False, 'export_path': 'analysis_results', 'summarize': True, 'num_configs_per_model': 3, 'num_top_model_configs': 0, 'filename_model_inference': 'metrics-model-inference.csv', 'filename_model_gpu': 'metrics-model-gpu.csv', 'filename_server_only': 'metrics-server-only.csv', 'max_retries': 1000, 'duration_seconds': 5, 'monitoring_interval': 0.01, 'client_protocol': 'grpc', 'perf_analyzer_path': 'perf_analyzer', 'perf_measurement_window': 5000, 'perf_output': False, 'perf_analyzer_flags': {}, 'triton_launch_mode': 'local', 'triton_docker_image': 'nvcr.io/nvidia/tritonserver:21.04-py3', 'triton_http_endpoint': 'localhost:8000', 'triton_grpc_endpoint': 'localhost:8001', 'triton_metrics_url': 'http://localhost:8002/metrics', 'triton_server_path': 'tritonserver', 'triton_output_path': None, 'triton_server_flags': {}, 'log_level': 'INFO', 'gpus': ['all'], 'output_model_repository_path': './output_model_repository', 'override_output_model_repository': False, 'config_file': None, 'inference_output_fields': ['model_name', 'batch_size', 'concurrency', 'model_config_path', 'instance_group', 'dynamic_batch_sizes', 'satisfies_constraints', 'perf_throughput', 'perf_latency', 'cpu_used_ram'], 'gpu_output_fields': ['model_name', 'gpu_id', 'batch_size', 'concurrency', 'model_config_path', 'instance_group', 'dynamic_batch_sizes', 'satisfies_constraints', 'gpu_used_memory', 'gpu_utilization', 'gpu_power_usage'], 'server_output_fields': ['model_name', 'gpu_id', 'gpu_used_memory', 'gpu_utilization', 'gpu_power_usage'], 'plots': [{'name': 'throughput_v_latency', 'title': 'Throughput vs. Latency', 'x_axis': 'perf_latency', 'y_axis': 'perf_throughput', 'monotonic': True}, {'name': 'gpu_mem_v_latency', 'title': 'GPU Memory vs. Latency', 'x_axis': 'perf_latency', 'y_axis': 'gpu_used_memory', 'monotonic': False}]} | |
2021-05-03 18:08:04.58 INFO[entrypoint.py:96] Starting a local Triton Server... | |
2021-05-03 18:08:04.58 INFO[analyzer_state_manager.py:124] No checkpoint file found, starting a fresh run. | |
2021-05-03 18:08:12.832 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:25.337 INFO[server_local.py:81] Triton Server stopped. | |
2021-05-03 18:08:25.337 INFO[analyzer.py:94] Profiling server only metrics... | |
2021-05-03 18:08:25.342 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:36.463 INFO[gpu_monitor.py:73] Using GPU(s) with UUID(s) = { GPU-6addc5c7-faee-102a-eaa9-6357eaac328a,GPU-60f3c226-bb6c-9da4-2e1c-5a9a456df7a4,GPU-06f06c69-abb3-9ae1-9ddc-6285b957f5cc,GPU-22251b86-b7e5-52d3-a631-32a3830efd48,GPU-d7890092-0ab5-8a60-f42a-50db1efff45f,GPU-20c502a2-b8a5-2f88-03ae-469d3ef78627,GPU-513b02ff-c719-1475-aabc-798ba461a33d,GPU-6efc3ca7-ebab-9a22-281c-079c2cb1ddf7 } for the analysis. | |
2021-05-03 18:08:37.835 INFO[server_local.py:81] Triton Server stopped. | |
2021-05-03 18:08:37.835 INFO[run_search.py:143] Will sweep both the concurrency and model config parameters... | |
2021-05-03 18:08:37.836 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and dynamic batching is disabled. | |
2021-05-03 18:08:37.843 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.291 INFO[client.py:82] Model add_sub_i0 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i0', no version is available | |
2021-05-03 18:08:40.292 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and dynamic batching is disabled. | |
2021-05-03 18:08:40.299 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.320 INFO[client.py:82] Model add_sub_i1 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i1', no version is available | |
2021-05-03 18:08:40.321 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and dynamic batching is disabled. | |
2021-05-03 18:08:40.327 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.351 INFO[client.py:82] Model add_sub_i2 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i2', no version is available | |
2021-05-03 18:08:40.352 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and dynamic batching is disabled. | |
2021-05-03 18:08:40.359 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.390 INFO[client.py:82] Model add_sub_i3 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i3', no version is available | |
2021-05-03 18:08:40.391 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and dynamic batching is disabled. | |
2021-05-03 18:08:40.398 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.463 INFO[client.py:82] Model add_sub_i4 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i4', no version is available | |
2021-05-03 18:08:40.464 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and dynamic batching is enabled. | |
2021-05-03 18:08:40.471 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.563 INFO[client.py:82] Model add_sub_i5 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i5', no version is available | |
2021-05-03 18:08:40.563 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and dynamic batching is enabled. | |
2021-05-03 18:08:40.570 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.674 INFO[client.py:82] Model add_sub_i6 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i6', no version is available | |
2021-05-03 18:08:40.674 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and dynamic batching is enabled. | |
2021-05-03 18:08:40.682 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.821 INFO[client.py:82] Model add_sub_i7 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i7', no version is available | |
2021-05-03 18:08:40.822 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and dynamic batching is enabled. | |
2021-05-03 18:08:40.829 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:40.987 INFO[client.py:82] Model add_sub_i8 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i8', no version is available | |
2021-05-03 18:08:40.988 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and dynamic batching is enabled. | |
2021-05-03 18:08:40.995 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:41.164 INFO[client.py:82] Model add_sub_i9 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i9', no version is available | |
2021-05-03 18:08:41.165 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 1. | |
2021-05-03 18:08:41.174 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:41.330 INFO[client.py:82] Model add_sub_i10 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i10', no version is available | |
2021-05-03 18:08:41.331 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 1. | |
2021-05-03 18:08:41.338 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:41.480 INFO[client.py:82] Model add_sub_i11 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i11', no version is available | |
2021-05-03 18:08:41.480 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 1. | |
2021-05-03 18:08:41.488 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:41.627 INFO[client.py:82] Model add_sub_i12 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i12', no version is available | |
2021-05-03 18:08:41.628 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 1. | |
2021-05-03 18:08:41.635 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:41.785 INFO[client.py:82] Model add_sub_i13 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i13', no version is available | |
2021-05-03 18:08:41.786 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 1. | |
2021-05-03 18:08:41.793 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:42.278 INFO[client.py:82] Model add_sub_i14 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i14', no version is available | |
2021-05-03 18:08:42.279 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 2. | |
2021-05-03 18:08:42.286 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:42.904 INFO[client.py:82] Model add_sub_i15 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i15', no version is available | |
2021-05-03 18:08:42.905 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 2. | |
2021-05-03 18:08:42.912 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:43.173 INFO[client.py:82] Model add_sub_i16 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i16', no version is available | |
2021-05-03 18:08:43.174 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 2. | |
2021-05-03 18:08:43.182 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:43.912 INFO[client.py:82] Model add_sub_i17 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i17', no version is available | |
2021-05-03 18:08:43.912 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 2. | |
2021-05-03 18:08:43.920 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:44.516 INFO[client.py:82] Model add_sub_i18 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i18', no version is available | |
2021-05-03 18:08:44.517 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 2. | |
2021-05-03 18:08:44.524 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:44.963 INFO[client.py:82] Model add_sub_i19 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i19', no version is available | |
2021-05-03 18:08:44.964 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 4. | |
2021-05-03 18:08:44.971 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:45.259 INFO[client.py:82] Model add_sub_i20 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i20', no version is available | |
2021-05-03 18:08:45.260 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 4. | |
2021-05-03 18:08:45.268 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:45.693 INFO[client.py:82] Model add_sub_i21 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i21', no version is available | |
2021-05-03 18:08:45.694 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 4. | |
2021-05-03 18:08:45.702 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:46.324 INFO[client.py:82] Model add_sub_i22 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i22', no version is available | |
2021-05-03 18:08:46.325 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 4. | |
2021-05-03 18:08:46.332 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:46.764 INFO[client.py:82] Model add_sub_i23 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i23', no version is available | |
2021-05-03 18:08:46.765 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 4. | |
2021-05-03 18:08:46.773 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:47.41 INFO[client.py:82] Model add_sub_i24 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i24', no version is available | |
2021-05-03 18:08:47.42 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 8. | |
2021-05-03 18:08:47.50 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:47.496 INFO[client.py:82] Model add_sub_i25 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i25', no version is available | |
2021-05-03 18:08:47.497 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 8. | |
2021-05-03 18:08:47.505 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:48.169 INFO[client.py:82] Model add_sub_i26 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i26', no version is available | |
2021-05-03 18:08:48.169 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 8. | |
2021-05-03 18:08:48.178 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:48.696 INFO[client.py:82] Model add_sub_i27 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i27', no version is available | |
2021-05-03 18:08:48.697 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 8. | |
2021-05-03 18:08:48.704 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:49.177 INFO[client.py:82] Model add_sub_i28 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i28', no version is available | |
2021-05-03 18:08:49.178 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 8. | |
2021-05-03 18:08:49.187 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:49.676 INFO[client.py:82] Model add_sub_i29 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i29', no version is available | |
2021-05-03 18:08:49.677 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 16. | |
2021-05-03 18:08:49.685 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:49.898 INFO[client.py:82] Model add_sub_i30 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i30', no version is available | |
2021-05-03 18:08:49.898 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 16. | |
2021-05-03 18:08:49.906 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:50.208 INFO[client.py:82] Model add_sub_i31 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i31', no version is available | |
2021-05-03 18:08:50.208 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 16. | |
2021-05-03 18:08:50.217 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:50.452 INFO[client.py:82] Model add_sub_i32 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i32', no version is available | |
2021-05-03 18:08:50.453 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 16. | |
2021-05-03 18:08:50.462 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:50.752 INFO[client.py:82] Model add_sub_i33 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i33', no version is available | |
2021-05-03 18:08:50.753 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 16. | |
2021-05-03 18:08:50.761 INFO[server_local.py:64] Triton Server started. | |
2021-05-03 18:08:51.8 INFO[client.py:82] Model add_sub_i34 load failed: [StatusCode.INTERNAL] failed to load 'add_sub_i34', no version is available | |
2021-05-03 18:08:51.8 INFO[analyzer_state_manager.py:140] Saved checkpoint to analysis_results/checkpoints/0.ckpt. | |
2021-05-03 18:08:51.8 WARNING[result_manager.py:310] Model add_sub requested for analysis but no results were found. Ensure that this model was actually profiled. | |
2021-05-03 18:08:51.59 WARNING[result_heap.py:95] Requested top 3 configs, but none satisfied constraints. Showing available constraint failing configs for this model. | |
2021-05-03 18:08:51.59 WARNING[result_heap.py:103] Requested top 3 failing configs, but found only 0. Showing all available constraint failing configs for this model. | |
Models (Inference): | |
Model Batch Concurrency Model Config Path Instance Group Preferred Batch Sizes Satisfies Constraints Throughput (infer/sec) p99 Latency (ms) RAM Usage (MB) | |
Models (GPU Metrics): | |
Model GPU ID Batch Concurrency Model Config Path Instance Group Preferred Batch Sizes Satisfies Constraints GPU Memory Usage (MB) GPU Utilization (%) GPU Power Usage (W) | |
Server Only: | |
Model GPU ID GPU Memory Usage (MB) GPU Utilization (%) GPU Power Usage (W) | |
triton-server 0 58.0 0.0 70.0 | |
triton-server 1 3.0 0.0 26.4 | |
triton-server 2 3.0 0.0 28.9 | |
triton-server 3 3.0 0.0 27.6 | |
triton-server 4 3.0 0.0 30.1 | |
triton-server 5 3.0 0.0 27.0 | |
triton-server 6 3.0 0.0 28.7 | |
triton-server 7 3.0 0.0 27.0 | |
2021-05-03 18:08:51.61 INFO[result_manager.py:512] Exporting server only metrics to analysis_results/results/metrics-server-only.csv... | |
2021-05-03 18:08:51.62 INFO[result_manager.py:524] Exporting inference metrics to analysis_results/results/metrics-model-inference.csv... | |
2021-05-03 18:08:51.62 INFO[result_manager.py:526] Exporting GPU metrics to analysis_results/results/metrics-model-gpu.csv... | |
2021-05-03 18:08:51.62 WARNING[legend.py:1225] No handles with labels found to put in legend. | |
2021-05-03 18:08:51.164 WARNING[legend.py:1225] No handles with labels found to put in legend. | |
2021-05-03 18:08:51.378 INFO[server_local.py:81] Triton Server stopped. | |
Traceback (most recent call last): | |
File "/usr/local/bin/model-analyzer", line 8, in <module> | |
sys.exit(main()) | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/entrypoint.py", line 234, in main | |
analyzer.write_and_export_results() | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/analyzer.py", line 133, in write_and_export_results | |
self._report_manager.export_summary( | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/reports/report_manager.py", line 114, in export_summary | |
summary = self._build_summary_report(report_key=report_key, | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/reports/report_manager.py", line 151, in _build_summary_report | |
table, summary_sentence = self._build_summary_table( | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/reports/report_manager.py", line 253, in _build_summary_table | |
best_config = sorted_measurements[0][0] | |
IndexError: list index out of range |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment