[Rocm][CI] Fix LM Eval Large Models (H100) test group (#34750)
Signed-off-by: charlifu <charlifu@amd.com>
This commit is contained in:
@@ -1 +1,2 @@
|
||||
Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
|
||||
Qwen3-235B-A22B-Instruct-2507-FP8.yaml
|
||||
|
||||
@@ -1544,8 +1544,8 @@ steps:
|
||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
|
||||
|
||||
##### H100 test #####
|
||||
- label: LM Eval Large Models (H100) # optional
|
||||
##### FP8 test #####
|
||||
- label: LM Eval Large Models (H100) # optional, still use H100 for consistency
|
||||
gpu: h100
|
||||
optional: true
|
||||
mirror_hardwares: [amdexperimental, amdproduction]
|
||||
@@ -1557,8 +1557,8 @@ steps:
|
||||
- csrc/
|
||||
- vllm/model_executor/layers/quantization
|
||||
commands:
|
||||
- export VLLM_USE_DEEP_GEMM=0 # We found Triton is faster than DeepGEMM for H100
|
||||
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-hopper.txt --tp-size=4
|
||||
- export VLLM_USE_DEEP_GEMM=0
|
||||
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm.txt --tp-size=4
|
||||
|
||||
|
||||
##### H200 test #####
|
||||
|
||||
Reference in New Issue
Block a user