[Rocm][CI] Fix LM Eval Large Models (H100) test group (#34750)

Signed-off-by: charlifu <charlifu@amd.com>
2026-03-02 01:43:38 -06:00
parent cb21972a97
commit 3fd1d4ec2c
2 changed files with 5 additions and 4 deletions
--- a/.buildkite/lm-eval-harness/configs/models-large-rocm.txt
+++ b/.buildkite/lm-eval-harness/configs/models-large-rocm.txt
@@ -1 +1,2 @@
 Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
+Qwen3-235B-A22B-Instruct-2507-FP8.yaml
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -1544,8 +1544,8 @@ steps:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4

-##### H100 test #####
- label: LM Eval Large Models (H100) # optional
+##### FP8 test #####
+- label: LM Eval Large Models (H100) # optional, still use H100 for consistency
  gpu: h100
  optional: true
  mirror_hardwares: [amdexperimental, amdproduction]
@@ -1557,8 +1557,8 @@ steps:
  - csrc/
  - vllm/model_executor/layers/quantization
  commands:
-    - export VLLM_USE_DEEP_GEMM=0  # We found Triton is faster than DeepGEMM for H100
-    - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-hopper.txt --tp-size=4
+    - export VLLM_USE_DEEP_GEMM=0 
+    - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm.txt --tp-size=4


 ##### H200 test #####