This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@4316cdb72fd3:/app/tensorrt_llm/TensorRT-LLM# python benchmarks/cpp/prepare_dataset.py --stdout --tokenizer $MODEL_ID token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 1000 > /tmp/synthetic_128_128.txt | |
root@4316cdb72fd3:/app/tensorrt_llm/TensorRT-LLM# trtllm-bench --model $MODEL_ID throughput --dataset /tmp/synthetic_128_128.txt --backend _autodeploy | |
2025-06-13 15:20:14,791 - INFO - flashinfer.jit: Prebuilt kernels not found, using JIT backend | |
[TensorRT-LLM] TensorRT-LLM version: 0.21.0rc1 | |
[06/13/2025-15:20:15] [TRT-LLM] [I] Preparing to run throughput benchmark... | |
Parse safetensors files: 0%| | 0/4 [00:00<?, ?it/Parse safetensors files: 25%|███████████████████▎ | 1/4 [00:00<00:00, 6.78it/Parse safetensors files: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 24 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@4316cdb72fd3:/app/tensorrt_llm/TensorRT-LLM# export MODEL_ID="TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
root@4316cdb72fd3:/app/tensorrt_llm/TensorRT-LLM# python benchmarks/cpp/prepare_dataset.py --stdout --tokenizer $MODEL_ID token-norm-dist --input-mean 128 --output-mean 128 --input-stdev 0 --output-stdev 0 --num-requests 3000 > /tmp/synthetic_128_128.txt | |
root@4316cdb72fd3:/app/tensorrt_llm/TensorRT-LLM# trtllm-bench --model $MODEL_ID throughput --dataset /tmp/synthetic_128_128.txt --backend _autodeploy | |
2025-06-13 12:02:13,647 - INFO - flashinfer.jit: Prebuilt kernels not found, using JIT backend | |
[TensorRT-LLM] TensorRT-LLM version: 0.21.0rc1 | |
[06/13/2025-12:02:14] [TRT-LLM] [I] Preparing to run throughput benchmark... | |
[06/13/2025-12:02:14] [TRT-LLM] [I] | |
=========================================================== | |
= DATASET DETAILS | |
=========================================================== |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@4316cdb72fd3:/app/tensorrt_llm/TensorRT-LLM# trtllm-bench --model $MODEL_ID throughput --dataset /tmp/synthetic_128_128.txt --backend _autodeploy | |
2025-06-13 11:47:57,126 - INFO - flashinfer.jit: Prebuilt kernels not found, using JIT backend | |
[TensorRT-LLM] TensorRT-LLM version: 0.21.0rc1 | |
[06/13/2025-11:47:57] [TRT-LLM] [I] Preparing to run throughput benchmark... | |
Parse safetensors files: 0%| | 0/4 [00:00<?, ?it/Parse safetensors files: 25%|███████████████████▎ | 1/4 [00:00<00:00, 7.04it/Parse safetensors files: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 28.14it/s] | |
[06/13/2025-11:47:58] [TRT-LLM] [I] | |
=========================================================== | |
= DATASET DETAILS | |
=========================================================== |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. build docker on my server | |
2004 git clone https://github.com/nv-auto-deploy/TensorRT-LLM | |
2005 git checkout user/sg/trtllm-bench-fix | |
2006 cd TensorRT-LLM/ | |
2007 git checkout user/sg/trtllm-bench-fix | |
2008 git branch | |
2009 docker build --pull --target devel --file docker/Dockerfile.multi --tag tensorrt_llm/devel:latest . | |
2010 docker images | |
2011 docker save -o tensorrt_llm_latest tensorrt_llm/devel:latest |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@6e61d1d8b02e:/app/tensorrt_llm# trtllm-bench --model $MODEL_ID throughput --dataset /tmp/syntoy | |
2025-06-11 10:08:54,023 - INFO - flashinfer.jit: Prebuilt kernels not found, using JIT backend | |
[TensorRT-LLM] TensorRT-LLM version: 0.21.0rc1 | |
[06/11/2025-10:08:54] [TRT-LLM] [I] Preparing to run throughput benchmark... | |
Parse safetensors files: 100%|████████████████████████████████████████████████████████████████████ | |
[06/11/2025-10:08:55] [TRT-LLM] [I] | |
=========================================================== | |
= DATASET DETAILS | |
=========================================================== | |
Dataset Path: /tmp/synthetic_128_128.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@53acaad1b40e:/app/tensorrt_llm# trtllm-bench --model $MODEL_ID throughput --dataset /tmp/synthetic_128_128.txt --backend autodeploy | |
2025-06-10 13:11:02,295 - INFO - flashinfer.jit: Prebuilt kernels not found, using JIT backend | |
[TensorRT-LLM] TensorRT-LLM version: 0.21.0rc0 | |
[06/10/2025-13:11:02] [TRT-LLM] [I] Preparing to run throughput benchmark... | |
Parse safetensors files: 100%|███████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 22.03it/s] | |
[06/10/2025-13:11:03] [TRT-LLM] [I] | |
=========================================================== | |
= DATASET DETAILS | |
=========================================================== | |
Dataset Path: /tmp/synthetic_128_128.txt |