Skip to content

Instantly share code, notes, and snippets.

root@smci350-zts-gtu-c8-25:/mlperf/harness# echo "OS:" && cat /etc/os-release | grep -E "^(NAME=|VERSION=)";
echo "CPU: " && cat /proc/cpuinfo | grep "model name" | sort --unique;
echo "GPU:" && /opt/rocm/bin/rocminfo | grep -E "^\s*(Name|Marketing Name)";
OS:
NAME="Ubuntu"
VERSION="22.04.5 LTS (Jammy Jellyfish)"
CPU:
model name : AMD EPYC 9575F 64-Core Processor
GPU:
Name: AMD EPYC 9575F 64-Core Processor
root@smci350-zts-gtu-c8-25:/mlperf/harness# /opt/rocm/bin/rocminfo
ROCk module version 6.14.14 is loaded
=====================
HSA System Attributes
=====================
Runtime Version: 1.18
Runtime Ext Version: 1.11
System Timestamp Freq.: 1000.000000MHz
Sig. Max Wait Duration: 18446744073709551615 (0xFFFFFFFFFFFFFFFF) (timestamp count)
Machine Model: LARGE
# The format of this config file is 'key = value'.
# The key has the format 'model.scenario.key'. Value is mostly int64_t.
# Model maybe '*' as wildcard. In that case the value applies to all models.
# All times are in milli seconds
#
*.Offline.min_duration = 6000
*.Offline.min_query_count = 4
*.Offline.max_query_count = 4
*.Server.target_qps = 0.5
{
"host": "0.0.0.0",
"port": "8080",
"model_config": "/artifacts/chi/f4/f4_mi350_bs1_ds2_dc2816.iree0915.shark0915_ce7.json",
"tokenizer_json": "/shark-dev/tokenizer.json",
"tokenizer_config_json": "/shark-dev/tokenizer_config.json",
"vmfb": "/artifacts/chi/f4/f4_mi350_bs1_ds2_dc2816.iree0915.shark0915_ce7.vmfb",
"parameters": [
"/shark-dev/weights/fp4/fp4_2025_07_10_fn.irpa"
],
FROM rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
# ######################################################
# # Install MLPerf+Shark reference implementation
# ######################################################
ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-c"]
# apt dependencies
RUN apt-get --fix-broken install -y && apt-get update && apt-get install -y \
docker is 7.0.0 FROM rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
rocprofv3 --output-format pftrace -r -- python3 -u harness_alt_mi355.py \
  --devices "0,1,2,3,4,5,6,7" --scenario "$TEST_SCENARIO" \
  --test_mode "$TEST_MODE" \
  --bs 2 \
  --user_conf_path user.conf \
  --count 8 \
user.conf:
*.Offline.min_duration = 600000
INFO:shortfin_apps.llm.components.service_debug_dumper:[debug_service.py] Please find debug dumps for service.py in /root/.shortfin/debug/llm_service_invocation_dumps/2025-09-16T21:21:31.617026
INFO:root:####################################################################################################################################################################################
Running python3 harness_alt_mi355.py --devices 0,1,2,3,4,5,6,7 --scenario Offline --test_mode AccuracyOnly --bs 2 --user_conf_path user.conf --tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl --logfile_outdir OutputOfflineAccuracyOnly --debug False --verbose False --user_conf_path user.conf --shortfin_config shortfin_405b_config_fp4.json
##############################################################################################################################################################################################
INFO:Llama-405B-Dataset:Loading datas
/opt/rocm/bin/rocprofv3 --output-format pftrace -r -- python3 -u harness_alt_mi355.py \
--devices "0,1,2,3,4,5,6,7" \
--scenario "$TEST_SCENARIO" \
--test_mode "$TEST_MODE" \
--bs 2 \
--user_conf_path user.conf \
--tensor_path /data/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl \
--logfile_outdir "Output${TEST_SCENARIO}${TEST_MODE}" \
--debug "$DEBUG" \
--verbose "$VERBOSE" \
((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ git config --global credential.helper store
git config --global user.name AmosLewis
git config --global user.password ghp_nsRzvxclTLke......
((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ git config --global --list
[6] + 450113 suspended git config --global --list
((.venv12) ) ➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/build_docker_mi355.sh
[+] Building 209.4s (12/23) docker:default
=> [internal] load build definition from llama_harness_355_nightly.dockerfile 0.0s
docker build --no-cache --platform linux/amd64 --tag mlperf_llama_mi350:405b_chi --file LLAMA_inference/llama_harness_355_nightly.dockerfile .
➜ 2024q2-sdxl-mlperf-sprint git:(mi355_llama_working_harness_v1) ✗ ./LLAMA_inference/build_docker_mi355.sh
DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
Install the buildx component to build images with BuildKit:
https://docs.docker.com/go/buildx/
Sending build context to Docker daemon 46.69MB
Step 1/31 : FROM ghcr.io/rocm/no_rocm_image_ubuntu24_04:main
---> e6ad78a4d6b1