Created
April 30, 2021 22:10
-
-
Save msaroufim/912ab9a5ae17b5ed444bf790ead0612e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@ip-172-31-1-60:/opt/triton-model-analyzer# rm -r output_model_repository/ | |
root@ip-172-31-1-60:/opt/triton-model-analyzer# ls | |
CONTRIBUTING.md Dockerfile LICENSE README.md VERSION analysis_results build_wheel.sh docs examples helm-chart model_analyzer qa requirements.txt setup.py tests wheels | |
root@ip-172-31-1-60:/opt/triton-model-analyzer# model-analyzer -m /quick_start_repository -n add_sub --triton-launch-mode=local --export-path=analysis_results | |
2021-04-30 22:06:10.93 INFO[entrypoint.py:210] Triton Model Analyzer started: config={'model_repository': '/quick_start_repository', 'model_names': [{'model_name': 'add_sub', 'objectives': {'perf_throughput': 10}, 'parameters': {'batch_sizes': [1], 'concurrency': []}}], 'objectives': {'perf_throughput': 10}, 'constraints': {}, 'batch_sizes': [1], 'concurrency': [], 'perf_analyzer_timeout': 600, 'perf_analyzer_cpu_util': 80.0, 'run_config_search_max_concurrency': 1024, 'run_config_search_max_instance_count': 5, 'run_config_search_disable': False, 'run_config_search_max_preferred_batch_size': 16, 'export': True, 'cpu_only': False, 'export_path': 'analysis_results', 'summarize': True, 'num_configs_per_model': 3, 'num_top_model_configs': 0, 'filename_model_inference': 'metrics-model-inference.csv', 'filename_model_gpu': 'metrics-model-gpu.csv', 'filename_server_only': 'metrics-server-only.csv', 'max_retries': 1000, 'duration_seconds': 5, 'monitoring_interval': 0.01, 'client_protocol': 'grpc', 'perf_analyzer_path': 'perf_analyzer', 'perf_measurement_window': 5000, 'perf_output': False, 'perf_analyzer_flags': {}, 'triton_launch_mode': 'local', 'triton_docker_image': 'nvcr.io/nvidia/tritonserver:21.04-py3', 'triton_http_endpoint': 'localhost:8000', 'triton_grpc_endpoint': 'localhost:8001', 'triton_metrics_url': 'http://localhost:8002/metrics', 'triton_server_path': 'tritonserver', 'triton_output_path': None, 'triton_server_flags': {}, 'log_level': 'INFO', 'gpus': ['all'], 'output_model_repository_path': './output_model_repository', 'override_output_model_repository': False, 'config_file': None, 'inference_output_fields': ['model_name', 'batch_size', 'concurrency', 'model_config_path', 'instance_group', 'dynamic_batch_sizes', 'satisfies_constraints', 'perf_throughput', 'perf_latency', 'cpu_used_ram'], 'gpu_output_fields': ['model_name', 'gpu_id', 'batch_size', 'concurrency', 'model_config_path', 'instance_group', 'dynamic_batch_sizes', 'satisfies_constraints', 'gpu_used_memory', 'gpu_utilization', 'gpu_power_usage'], 'server_output_fields': ['model_name', 'gpu_id', 'gpu_used_memory', 'gpu_utilization', 'gpu_power_usage'], 'plots': [{'name': 'throughput_v_latency', 'title': 'Throughput vs. Latency', 'x_axis': 'perf_latency', 'y_axis': 'perf_throughput', 'monotonic': True}, {'name': 'gpu_mem_v_latency', 'title': 'GPU Memory vs. Latency', 'x_axis': 'perf_latency', 'y_axis': 'gpu_used_memory', 'monotonic': False}]} | |
2021-04-30 22:06:10.96 INFO[entrypoint.py:96] Starting a local Triton Server... | |
2021-04-30 22:06:10.96 INFO[analyzer_state_manager.py:109] Loaded checkpoint from file analysis_results/checkpoints/0.ckpt | |
2021-04-30 22:06:18.858 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:27.559 INFO[server_local.py:81] Triton Server stopped. | |
2021-04-30 22:06:27.559 INFO[analyzer.py:94] Profiling server only metrics... | |
2021-04-30 22:06:27.565 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:36.263 INFO[gpu_monitor.py:73] Using GPU(s) with UUID(s) = { GPU-910a5fbb-b7b6-9578-2a1d-7214b1bba733,GPU-ce20e534-f74d-002d-cea1-d8d56845c873,GPU-b346812d-eada-49e7-264e-602cad0884ed,GPU-0b8ce32e-71eb-369b-77ea-45ba6213a908,GPU-df14663d-d7e7-e0e3-84e9-14682a9faddc,GPU-568cfee1-9ecd-99ae-bd59-903adaa1aad6,GPU-40435e07-c0cb-fe8a-1597-598e7317c353,GPU-1b626e27-3cd9-a8c2-ba2d-4c3c5bfb9bc1 } for the analysis. | |
2021-04-30 22:06:37.353 INFO[server_local.py:81] Triton Server stopped. | |
2021-04-30 22:06:37.353 INFO[run_search.py:143] Will sweep both the concurrency and model config parameters... | |
2021-04-30 22:06:37.353 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and dynamic batching is disabled. | |
2021-04-30 22:06:37.361 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.367 INFO[client.py:82] Model add_sub_i0 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.367 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and dynamic batching is disabled. | |
2021-04-30 22:06:37.373 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.377 INFO[client.py:82] Model add_sub_i1 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.377 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and dynamic batching is disabled. | |
2021-04-30 22:06:37.384 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.388 INFO[client.py:82] Model add_sub_i2 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.388 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and dynamic batching is disabled. | |
2021-04-30 22:06:37.395 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.400 INFO[client.py:82] Model add_sub_i3 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.400 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and dynamic batching is disabled. | |
2021-04-30 22:06:37.409 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.412 INFO[client.py:82] Model add_sub_i4 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.412 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and dynamic batching is enabled. | |
2021-04-30 22:06:37.421 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.424 INFO[client.py:82] Model add_sub_i5 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.424 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and dynamic batching is enabled. | |
2021-04-30 22:06:37.431 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.436 INFO[client.py:82] Model add_sub_i6 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.436 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and dynamic batching is enabled. | |
2021-04-30 22:06:37.445 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.448 INFO[client.py:82] Model add_sub_i7 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.448 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and dynamic batching is enabled. | |
2021-04-30 22:06:37.457 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.460 INFO[client.py:82] Model add_sub_i8 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.460 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and dynamic batching is enabled. | |
2021-04-30 22:06:37.467 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.472 INFO[client.py:82] Model add_sub_i9 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.472 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 1. | |
2021-04-30 22:06:37.481 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.485 INFO[client.py:82] Model add_sub_i10 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.485 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 1. | |
2021-04-30 22:06:37.494 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.499 INFO[client.py:82] Model add_sub_i11 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.499 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 1. | |
2021-04-30 22:06:37.508 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.511 INFO[client.py:82] Model add_sub_i12 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.511 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 1. | |
2021-04-30 22:06:37.520 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.523 INFO[client.py:82] Model add_sub_i13 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.523 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 1. | |
2021-04-30 22:06:37.531 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.535 INFO[client.py:82] Model add_sub_i14 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.535 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 2. | |
2021-04-30 22:06:37.542 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.547 INFO[client.py:82] Model add_sub_i15 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.547 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 2. | |
2021-04-30 22:06:37.556 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.559 INFO[client.py:82] Model add_sub_i16 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.559 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 2. | |
2021-04-30 22:06:37.568 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.571 INFO[client.py:82] Model add_sub_i17 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.571 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 2. | |
2021-04-30 22:06:37.580 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.583 INFO[client.py:82] Model add_sub_i18 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.583 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 2. | |
2021-04-30 22:06:37.591 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.596 INFO[client.py:82] Model add_sub_i19 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.596 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 4. | |
2021-04-30 22:06:37.605 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.609 INFO[client.py:82] Model add_sub_i20 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.609 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 4. | |
2021-04-30 22:06:37.618 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.621 INFO[client.py:82] Model add_sub_i21 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.621 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 4. | |
2021-04-30 22:06:37.630 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.633 INFO[client.py:82] Model add_sub_i22 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.633 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 4. | |
2021-04-30 22:06:37.642 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.645 INFO[client.py:82] Model add_sub_i23 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.645 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 4. | |
2021-04-30 22:06:37.653 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.658 INFO[client.py:82] Model add_sub_i24 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.658 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 8. | |
2021-04-30 22:06:37.667 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.671 INFO[client.py:82] Model add_sub_i25 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.671 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 8. | |
2021-04-30 22:06:37.679 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.684 INFO[client.py:82] Model add_sub_i26 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.684 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 8. | |
2021-04-30 22:06:37.693 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.697 INFO[client.py:82] Model add_sub_i27 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.697 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 8. | |
2021-04-30 22:06:37.705 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.709 INFO[client.py:82] Model add_sub_i28 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.709 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 8. | |
2021-04-30 22:06:37.716 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.721 INFO[client.py:82] Model add_sub_i29 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.721 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 1, and preferred batch size is set to 16. | |
2021-04-30 22:06:37.730 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.733 INFO[client.py:82] Model add_sub_i30 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.733 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 2, and preferred batch size is set to 16. | |
2021-04-30 22:06:37.742 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.746 INFO[client.py:82] Model add_sub_i31 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.746 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 3, and preferred batch size is set to 16. | |
2021-04-30 22:06:37.755 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.759 INFO[client.py:82] Model add_sub_i32 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.759 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 4, and preferred batch size is set to 16. | |
2021-04-30 22:06:37.768 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.772 INFO[client.py:82] Model add_sub_i33 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.772 INFO[run_search.py:284] Concurrency set to 1. Instance count set to 5, and preferred batch size is set to 16. | |
2021-04-30 22:06:37.780 INFO[server_local.py:64] Triton Server started. | |
2021-04-30 22:06:37.785 INFO[client.py:82] Model add_sub_i34 load failed: [StatusCode.UNAVAILABLE] explicit model load / unload is not allowed if polling is enabled | |
2021-04-30 22:06:37.785 INFO[analyzer_state_manager.py:140] Saved checkpoint to analysis_results/checkpoints/0.ckpt. | |
2021-04-30 22:06:37.787 INFO[server_local.py:81] Triton Server stopped. | |
Traceback (most recent call last): | |
File "/usr/local/bin/model-analyzer", line 8, in <module> | |
sys.exit(main()) | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/entrypoint.py", line 233, in main | |
analyzer.run() | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/analyzer.py", line 116, in run | |
self._result_manager.collect_and_sort_results( | |
File "/usr/local/lib/python3.8/dist-packages/model_analyzer/result/result_manager.py", line 309, in collect_and_sort_results | |
result_dict = results[model_name] | |
KeyError: 'add_sub' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment