Skip to content

Instantly share code, notes, and snippets.

@zachgk
Last active November 10, 2023 22:03
Show Gist options
  • Save zachgk/c0090ab15c00a3fda06b63f7a61a370a to your computer and use it in GitHub Desktop.
Save zachgk/c0090ab15c00a3fda06b63f7a61a370a to your computer and use it in GitHub Desktop.
Rubikon IB Mistral 7B
[test_name]
concurrency1
[serving_properties]
engine=Python
option.tensor_parallel_degree=1
option.rolling_batch=vllm
option.model_id=mistralai/Mistral-7B-v0.1
option.max_rolling_batch_size=32
[aws_curl]
TOKENIZER=mistralai/Mistral-7B-v0.1 ./awscurl -c 1 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
[test_name]
concurrency16
[serving_properties]
engine=Python
option.tensor_parallel_degree=1
option.rolling_batch=vllm
option.model_id=mistralai/Mistral-7B-v0.1
option.max_rolling_batch_size=32
[aws_curl]
TOKENIZER=mistralai/Mistral-7B-v0.1 ./awscurl -c 16 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
[test_name]
concurrency32
[serving_properties]
engine=Python
option.tensor_parallel_degree=1
option.rolling_batch=vllm
option.model_id=mistralai/Mistral-7B-v0.1
option.max_rolling_batch_size=32
[aws_curl]
TOKENIZER=mistralai/Mistral-7B-v0.1 ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment