[Rocm][CI] Fix LM Eval Large Models (H100) test group (#34750)

Signed-off-by: charlifu <charlifu@amd.com>
This commit is contained in:
Charlie Fu
2026-03-02 01:43:38 -06:00
committed by GitHub
parent cb21972a97
commit 3fd1d4ec2c
2 changed files with 5 additions and 4 deletions

View File

@@ -1 +1,2 @@
Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
Qwen3-235B-A22B-Instruct-2507-FP8.yaml

View File

@@ -1544,8 +1544,8 @@ steps:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
##### H100 test #####
- label: LM Eval Large Models (H100) # optional
##### FP8 test #####
- label: LM Eval Large Models (H100) # optional, still use H100 for consistency
gpu: h100
optional: true
mirror_hardwares: [amdexperimental, amdproduction]
@@ -1557,8 +1557,8 @@ steps:
- csrc/
- vllm/model_executor/layers/quantization
commands:
- export VLLM_USE_DEEP_GEMM=0 # We found Triton is faster than DeepGEMM for H100
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-hopper.txt --tp-size=4
- export VLLM_USE_DEEP_GEMM=0
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm.txt --tp-size=4
##### H200 test #####